1/*
2*******************************************************************************
3* Copyright (C) 1997-2014, International Business Machines Corporation
4* and others. All Rights Reserved.
5*******************************************************************************
6*/
7
8#include "unicode/utypes.h"
9#include "utypeinfo.h"  // for 'typeid' to work
10
11#include "unicode/rbnf.h"
12
13#if U_HAVE_RBNF
14
15#include "unicode/normlzr.h"
16#include "unicode/tblcoll.h"
17#include "unicode/uchar.h"
18#include "unicode/ucol.h"
19#include "unicode/uloc.h"
20#include "unicode/unum.h"
21#include "unicode/ures.h"
22#include "unicode/ustring.h"
23#include "unicode/utf16.h"
24#include "unicode/udata.h"
25#include "unicode/udisplaycontext.h"
26#include "unicode/brkiter.h"
27#include "nfrs.h"
28
29#include "cmemory.h"
30#include "cstring.h"
31#include "patternprops.h"
32#include "uresimp.h"
33
34// debugging
35// #define DEBUG
36
37#ifdef DEBUG
38#include "stdio.h"
39#endif
40
41#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
42
43static const UChar gPercentPercent[] =
44{
45    0x25, 0x25, 0
46}; /* "%%" */
47
48// All urbnf objects are created through openRules, so we init all of the
49// Unicode string constants required by rbnf, nfrs, or nfr here.
50static const UChar gLenientParse[] =
51{
52    0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
53}; /* "%%lenient-parse:" */
54static const UChar gSemiColon = 0x003B;
55static const UChar gSemiPercent[] =
56{
57    0x3B, 0x25, 0
58}; /* ";%" */
59
60#define kSomeNumberOfBitsDiv2 22
61#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
62#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
63
64U_NAMESPACE_BEGIN
65
66UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
67
68/*
69This is a utility class. It does not use ICU's RTTI.
70If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
71Please make sure that intltest passes on Windows in Release mode,
72since the string pooling per compilation unit will mess up how RTTI works.
73The RTTI code was also removed due to lack of code coverage.
74*/
75class LocalizationInfo : public UMemory {
76protected:
77    virtual ~LocalizationInfo();
78    uint32_t refcount;
79
80public:
81    LocalizationInfo() : refcount(0) {}
82
83    LocalizationInfo* ref(void) {
84        ++refcount;
85        return this;
86    }
87
88    LocalizationInfo* unref(void) {
89        if (refcount && --refcount == 0) {
90            delete this;
91        }
92        return NULL;
93    }
94
95    virtual UBool operator==(const LocalizationInfo* rhs) const;
96    inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
97
98    virtual int32_t getNumberOfRuleSets(void) const = 0;
99    virtual const UChar* getRuleSetName(int32_t index) const = 0;
100    virtual int32_t getNumberOfDisplayLocales(void) const = 0;
101    virtual const UChar* getLocaleName(int32_t index) const = 0;
102    virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
103
104    virtual int32_t indexForLocale(const UChar* locale) const;
105    virtual int32_t indexForRuleSet(const UChar* ruleset) const;
106
107//    virtual UClassID getDynamicClassID() const = 0;
108//    static UClassID getStaticClassID(void);
109};
110
111LocalizationInfo::~LocalizationInfo() {}
112
113//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
114
115// if both strings are NULL, this returns TRUE
116static UBool
117streq(const UChar* lhs, const UChar* rhs) {
118    if (rhs == lhs) {
119        return TRUE;
120    }
121    if (lhs && rhs) {
122        return u_strcmp(lhs, rhs) == 0;
123    }
124    return FALSE;
125}
126
127UBool
128LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
129    if (rhs) {
130        if (this == rhs) {
131            return TRUE;
132        }
133
134        int32_t rsc = getNumberOfRuleSets();
135        if (rsc == rhs->getNumberOfRuleSets()) {
136            for (int i = 0; i < rsc; ++i) {
137                if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
138                    return FALSE;
139                }
140            }
141            int32_t dlc = getNumberOfDisplayLocales();
142            if (dlc == rhs->getNumberOfDisplayLocales()) {
143                for (int i = 0; i < dlc; ++i) {
144                    const UChar* locale = getLocaleName(i);
145                    int32_t ix = rhs->indexForLocale(locale);
146                    // if no locale, ix is -1, getLocaleName returns null, so streq returns false
147                    if (!streq(locale, rhs->getLocaleName(ix))) {
148                        return FALSE;
149                    }
150                    for (int j = 0; j < rsc; ++j) {
151                        if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
152                            return FALSE;
153                        }
154                    }
155                }
156                return TRUE;
157            }
158        }
159    }
160    return FALSE;
161}
162
163int32_t
164LocalizationInfo::indexForLocale(const UChar* locale) const {
165    for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
166        if (streq(locale, getLocaleName(i))) {
167            return i;
168        }
169    }
170    return -1;
171}
172
173int32_t
174LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
175    if (ruleset) {
176        for (int i = 0; i < getNumberOfRuleSets(); ++i) {
177            if (streq(ruleset, getRuleSetName(i))) {
178                return i;
179            }
180        }
181    }
182    return -1;
183}
184
185
186typedef void (*Fn_Deleter)(void*);
187
188class VArray {
189    void** buf;
190    int32_t cap;
191    int32_t size;
192    Fn_Deleter deleter;
193public:
194    VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
195
196    VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
197
198    ~VArray() {
199        if (deleter) {
200            for (int i = 0; i < size; ++i) {
201                (*deleter)(buf[i]);
202            }
203        }
204        uprv_free(buf);
205    }
206
207    int32_t length() {
208        return size;
209    }
210
211    void add(void* elem, UErrorCode& status) {
212        if (U_SUCCESS(status)) {
213            if (size == cap) {
214                if (cap == 0) {
215                    cap = 1;
216                } else if (cap < 256) {
217                    cap *= 2;
218                } else {
219                    cap += 256;
220                }
221                if (buf == NULL) {
222                    buf = (void**)uprv_malloc(cap * sizeof(void*));
223                } else {
224                    buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
225                }
226                if (buf == NULL) {
227                    // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
228                    status = U_MEMORY_ALLOCATION_ERROR;
229                    return;
230                }
231                void* start = &buf[size];
232                size_t count = (cap - size) * sizeof(void*);
233                uprv_memset(start, 0, count); // fill with nulls, just because
234            }
235            buf[size++] = elem;
236        }
237    }
238
239    void** release(void) {
240        void** result = buf;
241        buf = NULL;
242        cap = 0;
243        size = 0;
244        return result;
245    }
246};
247
248class LocDataParser;
249
250class StringLocalizationInfo : public LocalizationInfo {
251    UChar* info;
252    UChar*** data;
253    int32_t numRuleSets;
254    int32_t numLocales;
255
256friend class LocDataParser;
257
258    StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
259        : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
260    {
261    }
262
263public:
264    static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
265
266    virtual ~StringLocalizationInfo();
267    virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
268    virtual const UChar* getRuleSetName(int32_t index) const;
269    virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
270    virtual const UChar* getLocaleName(int32_t index) const;
271    virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
272
273//    virtual UClassID getDynamicClassID() const;
274//    static UClassID getStaticClassID(void);
275
276private:
277    void init(UErrorCode& status) const;
278};
279
280
281enum {
282    OPEN_ANGLE = 0x003c, /* '<' */
283    CLOSE_ANGLE = 0x003e, /* '>' */
284    COMMA = 0x002c,
285    TICK = 0x0027,
286    QUOTE = 0x0022,
287    SPACE = 0x0020
288};
289
290/**
291 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
292 */
293class LocDataParser {
294    UChar* data;
295    const UChar* e;
296    UChar* p;
297    UChar ch;
298    UParseError& pe;
299    UErrorCode& ec;
300
301public:
302    LocDataParser(UParseError& parseError, UErrorCode& status)
303        : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
304    ~LocDataParser() {}
305
306    /*
307    * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
308    * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
309    */
310    StringLocalizationInfo* parse(UChar* data, int32_t len);
311
312private:
313
314    void inc(void) { ++p; ch = 0xffff; }
315    UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
316    UBool check(UChar c) { return p < e && (ch == c || *p == c); }
317    void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
318    UBool inList(UChar c, const UChar* list) const {
319        if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
320        while (*list && *list != c) ++list; return *list == c;
321    }
322    void parseError(const char* msg);
323
324    StringLocalizationInfo* doParse(void);
325
326    UChar** nextArray(int32_t& requiredLength);
327    UChar*  nextString(void);
328};
329
330#ifdef DEBUG
331#define ERROR(msg) parseError(msg); return NULL;
332#define EXPLANATION_ARG explanationArg
333#else
334#define ERROR(msg) parseError(NULL); return NULL;
335#define EXPLANATION_ARG
336#endif
337
338
339static const UChar DQUOTE_STOPLIST[] = {
340    QUOTE, 0
341};
342
343static const UChar SQUOTE_STOPLIST[] = {
344    TICK, 0
345};
346
347static const UChar NOQUOTE_STOPLIST[] = {
348    SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
349};
350
351static void
352DeleteFn(void* p) {
353  uprv_free(p);
354}
355
356StringLocalizationInfo*
357LocDataParser::parse(UChar* _data, int32_t len) {
358    if (U_FAILURE(ec)) {
359        if (_data) uprv_free(_data);
360        return NULL;
361    }
362
363    pe.line = 0;
364    pe.offset = -1;
365    pe.postContext[0] = 0;
366    pe.preContext[0] = 0;
367
368    if (_data == NULL) {
369        ec = U_ILLEGAL_ARGUMENT_ERROR;
370        return NULL;
371    }
372
373    if (len <= 0) {
374        ec = U_ILLEGAL_ARGUMENT_ERROR;
375        uprv_free(_data);
376        return NULL;
377    }
378
379    data = _data;
380    e = data + len;
381    p = _data;
382    ch = 0xffff;
383
384    return doParse();
385}
386
387
388StringLocalizationInfo*
389LocDataParser::doParse(void) {
390    skipWhitespace();
391    if (!checkInc(OPEN_ANGLE)) {
392        ERROR("Missing open angle");
393    } else {
394        VArray array(DeleteFn);
395        UBool mightHaveNext = TRUE;
396        int32_t requiredLength = -1;
397        while (mightHaveNext) {
398            mightHaveNext = FALSE;
399            UChar** elem = nextArray(requiredLength);
400            skipWhitespace();
401            UBool haveComma = check(COMMA);
402            if (elem) {
403                array.add(elem, ec);
404                if (haveComma) {
405                    inc();
406                    mightHaveNext = TRUE;
407                }
408            } else if (haveComma) {
409                ERROR("Unexpected character");
410            }
411        }
412
413        skipWhitespace();
414        if (!checkInc(CLOSE_ANGLE)) {
415            if (check(OPEN_ANGLE)) {
416                ERROR("Missing comma in outer array");
417            } else {
418                ERROR("Missing close angle bracket in outer array");
419            }
420        }
421
422        skipWhitespace();
423        if (p != e) {
424            ERROR("Extra text after close of localization data");
425        }
426
427        array.add(NULL, ec);
428        if (U_SUCCESS(ec)) {
429            int32_t numLocs = array.length() - 2; // subtract first, NULL
430            UChar*** result = (UChar***)array.release();
431
432            return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
433        }
434    }
435
436    ERROR("Unknown error");
437}
438
439UChar**
440LocDataParser::nextArray(int32_t& requiredLength) {
441    if (U_FAILURE(ec)) {
442        return NULL;
443    }
444
445    skipWhitespace();
446    if (!checkInc(OPEN_ANGLE)) {
447        ERROR("Missing open angle");
448    }
449
450    VArray array;
451    UBool mightHaveNext = TRUE;
452    while (mightHaveNext) {
453        mightHaveNext = FALSE;
454        UChar* elem = nextString();
455        skipWhitespace();
456        UBool haveComma = check(COMMA);
457        if (elem) {
458            array.add(elem, ec);
459            if (haveComma) {
460                inc();
461                mightHaveNext = TRUE;
462            }
463        } else if (haveComma) {
464            ERROR("Unexpected comma");
465        }
466    }
467    skipWhitespace();
468    if (!checkInc(CLOSE_ANGLE)) {
469        if (check(OPEN_ANGLE)) {
470            ERROR("Missing close angle bracket in inner array");
471        } else {
472            ERROR("Missing comma in inner array");
473        }
474    }
475
476    array.add(NULL, ec);
477    if (U_SUCCESS(ec)) {
478        if (requiredLength == -1) {
479            requiredLength = array.length() + 1;
480        } else if (array.length() != requiredLength) {
481            ec = U_ILLEGAL_ARGUMENT_ERROR;
482            ERROR("Array not of required length");
483        }
484
485        return (UChar**)array.release();
486    }
487    ERROR("Unknown Error");
488}
489
490UChar*
491LocDataParser::nextString() {
492    UChar* result = NULL;
493
494    skipWhitespace();
495    if (p < e) {
496        const UChar* terminators;
497        UChar c = *p;
498        UBool haveQuote = c == QUOTE || c == TICK;
499        if (haveQuote) {
500            inc();
501            terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
502        } else {
503            terminators = NOQUOTE_STOPLIST;
504        }
505        UChar* start = p;
506        while (p < e && !inList(*p, terminators)) ++p;
507        if (p == e) {
508            ERROR("Unexpected end of data");
509        }
510
511        UChar x = *p;
512        if (p > start) {
513            ch = x;
514            *p = 0x0; // terminate by writing to data
515            result = start; // just point into data
516        }
517        if (haveQuote) {
518            if (x != c) {
519                ERROR("Missing matching quote");
520            } else if (p == start) {
521                ERROR("Empty string");
522            }
523            inc();
524        } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
525            ERROR("Unexpected character in string");
526        }
527    }
528
529    // ok for there to be no next string
530    return result;
531}
532
533void LocDataParser::parseError(const char* EXPLANATION_ARG)
534{
535    if (!data) {
536        return;
537    }
538
539    const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
540    if (start < data) {
541        start = data;
542    }
543    for (UChar* x = p; --x >= start;) {
544        if (!*x) {
545            start = x+1;
546            break;
547        }
548    }
549    const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
550    if (limit > e) {
551        limit = e;
552    }
553    u_strncpy(pe.preContext, start, (int32_t)(p-start));
554    pe.preContext[p-start] = 0;
555    u_strncpy(pe.postContext, p, (int32_t)(limit-p));
556    pe.postContext[limit-p] = 0;
557    pe.offset = (int32_t)(p - data);
558
559#ifdef DEBUG
560    fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
561
562    UnicodeString msg;
563    msg.append(start, p - start);
564    msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
565    msg.append(p, limit-p);
566    msg.append(UNICODE_STRING_SIMPLE("'"));
567
568    char buf[128];
569    int32_t len = msg.extract(0, msg.length(), buf, 128);
570    if (len >= 128) {
571        buf[127] = 0;
572    } else {
573        buf[len] = 0;
574    }
575    fprintf(stderr, "%s\n", buf);
576    fflush(stderr);
577#endif
578
579    uprv_free(data);
580    data = NULL;
581    p = NULL;
582    e = NULL;
583
584    if (U_SUCCESS(ec)) {
585        ec = U_PARSE_ERROR;
586    }
587}
588
589//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
590
591StringLocalizationInfo*
592StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
593    if (U_FAILURE(status)) {
594        return NULL;
595    }
596
597    int32_t len = info.length();
598    if (len == 0) {
599        return NULL; // no error;
600    }
601
602    UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
603    if (!p) {
604        status = U_MEMORY_ALLOCATION_ERROR;
605        return NULL;
606    }
607    info.extract(p, len, status);
608    if (!U_FAILURE(status)) {
609        status = U_ZERO_ERROR; // clear warning about non-termination
610    }
611
612    LocDataParser parser(perror, status);
613    return parser.parse(p, len);
614}
615
616StringLocalizationInfo::~StringLocalizationInfo() {
617    for (UChar*** p = (UChar***)data; *p; ++p) {
618        // remaining data is simply pointer into our unicode string data.
619        if (*p) uprv_free(*p);
620    }
621    if (data) uprv_free(data);
622    if (info) uprv_free(info);
623}
624
625
626const UChar*
627StringLocalizationInfo::getRuleSetName(int32_t index) const {
628    if (index >= 0 && index < getNumberOfRuleSets()) {
629        return data[0][index];
630    }
631    return NULL;
632}
633
634const UChar*
635StringLocalizationInfo::getLocaleName(int32_t index) const {
636    if (index >= 0 && index < getNumberOfDisplayLocales()) {
637        return data[index+1][0];
638    }
639    return NULL;
640}
641
642const UChar*
643StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
644    if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
645        ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
646        return data[localeIndex+1][ruleIndex+1];
647    }
648    return NULL;
649}
650
651// ----------
652
653RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
654                                             const UnicodeString& locs,
655                                             const Locale& alocale, UParseError& perror, UErrorCode& status)
656  : ruleSets(NULL)
657  , ruleSetDescriptions(NULL)
658  , numRuleSets(0)
659  , defaultRuleSet(NULL)
660  , locale(alocale)
661  , collator(NULL)
662  , decimalFormatSymbols(NULL)
663  , lenient(FALSE)
664  , lenientParseRules(NULL)
665  , localizations(NULL)
666  , capitalizationInfoSet(FALSE)
667  , capitalizationForUIListMenu(FALSE)
668  , capitalizationForStandAlone(FALSE)
669  , capitalizationBrkIter(NULL)
670{
671  LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
672  init(description, locinfo, perror, status);
673}
674
675RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
676                                             const UnicodeString& locs,
677                                             UParseError& perror, UErrorCode& status)
678  : ruleSets(NULL)
679  , ruleSetDescriptions(NULL)
680  , numRuleSets(0)
681  , defaultRuleSet(NULL)
682  , locale(Locale::getDefault())
683  , collator(NULL)
684  , decimalFormatSymbols(NULL)
685  , lenient(FALSE)
686  , lenientParseRules(NULL)
687  , localizations(NULL)
688  , capitalizationInfoSet(FALSE)
689  , capitalizationForUIListMenu(FALSE)
690  , capitalizationForStandAlone(FALSE)
691  , capitalizationBrkIter(NULL)
692{
693  LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
694  init(description, locinfo, perror, status);
695}
696
697RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
698                                             LocalizationInfo* info,
699                                             const Locale& alocale, UParseError& perror, UErrorCode& status)
700  : ruleSets(NULL)
701  , ruleSetDescriptions(NULL)
702  , numRuleSets(0)
703  , defaultRuleSet(NULL)
704  , locale(alocale)
705  , collator(NULL)
706  , decimalFormatSymbols(NULL)
707  , lenient(FALSE)
708  , lenientParseRules(NULL)
709  , localizations(NULL)
710  , capitalizationInfoSet(FALSE)
711  , capitalizationForUIListMenu(FALSE)
712  , capitalizationForStandAlone(FALSE)
713  , capitalizationBrkIter(NULL)
714{
715  init(description, info, perror, status);
716}
717
718RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
719                         UParseError& perror,
720                         UErrorCode& status)
721  : ruleSets(NULL)
722  , ruleSetDescriptions(NULL)
723  , numRuleSets(0)
724  , defaultRuleSet(NULL)
725  , locale(Locale::getDefault())
726  , collator(NULL)
727  , decimalFormatSymbols(NULL)
728  , lenient(FALSE)
729  , lenientParseRules(NULL)
730  , localizations(NULL)
731  , capitalizationInfoSet(FALSE)
732  , capitalizationForUIListMenu(FALSE)
733  , capitalizationForStandAlone(FALSE)
734  , capitalizationBrkIter(NULL)
735{
736    init(description, NULL, perror, status);
737}
738
739RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
740                         const Locale& aLocale,
741                         UParseError& perror,
742                         UErrorCode& status)
743  : ruleSets(NULL)
744  , ruleSetDescriptions(NULL)
745  , numRuleSets(0)
746  , defaultRuleSet(NULL)
747  , locale(aLocale)
748  , collator(NULL)
749  , decimalFormatSymbols(NULL)
750  , lenient(FALSE)
751  , lenientParseRules(NULL)
752  , localizations(NULL)
753  , capitalizationInfoSet(FALSE)
754  , capitalizationForUIListMenu(FALSE)
755  , capitalizationForStandAlone(FALSE)
756  , capitalizationBrkIter(NULL)
757{
758    init(description, NULL, perror, status);
759}
760
761RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
762  : ruleSets(NULL)
763  , ruleSetDescriptions(NULL)
764  , numRuleSets(0)
765  , defaultRuleSet(NULL)
766  , locale(alocale)
767  , collator(NULL)
768  , decimalFormatSymbols(NULL)
769  , lenient(FALSE)
770  , lenientParseRules(NULL)
771  , localizations(NULL)
772  , capitalizationInfoSet(FALSE)
773  , capitalizationForUIListMenu(FALSE)
774  , capitalizationForStandAlone(FALSE)
775  , capitalizationBrkIter(NULL)
776{
777    if (U_FAILURE(status)) {
778        return;
779    }
780
781    const char* rules_tag = "RBNFRules";
782    const char* fmt_tag = "";
783    switch (tag) {
784    case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
785    case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
786    case URBNF_DURATION: fmt_tag = "DurationRules"; break;
787    case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
788    default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
789    }
790
791    // TODO: read localization info from resource
792    LocalizationInfo* locinfo = NULL;
793
794    UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
795    if (U_SUCCESS(status)) {
796        setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
797                     ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
798
799        UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
800        if (U_FAILURE(status)) {
801            ures_close(nfrb);
802        }
803        UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
804        if (U_FAILURE(status)) {
805            ures_close(rbnfRules);
806            ures_close(nfrb);
807            return;
808        }
809
810        UnicodeString desc;
811        while (ures_hasNext(ruleSets)) {
812           desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
813        }
814        UParseError perror;
815
816        init (desc, locinfo, perror, status);
817
818        ures_close(ruleSets);
819        ures_close(rbnfRules);
820    }
821    ures_close(nfrb);
822}
823
824RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
825  : NumberFormat(rhs)
826  , ruleSets(NULL)
827  , ruleSetDescriptions(NULL)
828  , numRuleSets(0)
829  , defaultRuleSet(NULL)
830  , locale(rhs.locale)
831  , collator(NULL)
832  , decimalFormatSymbols(NULL)
833  , lenient(FALSE)
834  , lenientParseRules(NULL)
835  , localizations(NULL)
836  , capitalizationInfoSet(FALSE)
837  , capitalizationForUIListMenu(FALSE)
838  , capitalizationForStandAlone(FALSE)
839  , capitalizationBrkIter(NULL)
840{
841    this->operator=(rhs);
842}
843
844// --------
845
846RuleBasedNumberFormat&
847RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
848{
849    if (this == &rhs) {
850        return *this;
851    }
852    NumberFormat::operator=(rhs);
853    UErrorCode status = U_ZERO_ERROR;
854    dispose();
855    locale = rhs.locale;
856    lenient = rhs.lenient;
857
858    UParseError perror;
859    init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
860    setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
861    setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
862
863    capitalizationInfoSet = rhs.capitalizationInfoSet;
864    capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
865    capitalizationForStandAlone = rhs.capitalizationForStandAlone;
866#if !UCONFIG_NO_BREAK_ITERATION
867    capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
868#endif
869
870    return *this;
871}
872
873RuleBasedNumberFormat::~RuleBasedNumberFormat()
874{
875    dispose();
876}
877
878Format*
879RuleBasedNumberFormat::clone(void) const
880{
881    return new RuleBasedNumberFormat(*this);
882}
883
884UBool
885RuleBasedNumberFormat::operator==(const Format& other) const
886{
887    if (this == &other) {
888        return TRUE;
889    }
890
891    if (typeid(*this) == typeid(other)) {
892        const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
893        // test for capitalization info equality is adequately handled
894        // by the NumberFormat test for fCapitalizationContext equality;
895        // the info here is just derived from that.
896        if (locale == rhs.locale &&
897            lenient == rhs.lenient &&
898            (localizations == NULL
899                ? rhs.localizations == NULL
900                : (rhs.localizations == NULL
901                    ? FALSE
902                    : *localizations == rhs.localizations))) {
903
904            NFRuleSet** p = ruleSets;
905            NFRuleSet** q = rhs.ruleSets;
906            if (p == NULL) {
907                return q == NULL;
908            } else if (q == NULL) {
909                return FALSE;
910            }
911            while (*p && *q && (**p == **q)) {
912                ++p;
913                ++q;
914            }
915            return *q == NULL && *p == NULL;
916        }
917    }
918
919    return FALSE;
920}
921
922UnicodeString
923RuleBasedNumberFormat::getRules() const
924{
925    UnicodeString result;
926    if (ruleSets != NULL) {
927        for (NFRuleSet** p = ruleSets; *p; ++p) {
928            (*p)->appendRules(result);
929        }
930    }
931    return result;
932}
933
934UnicodeString
935RuleBasedNumberFormat::getRuleSetName(int32_t index) const
936{
937    if (localizations) {
938      UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
939      return string;
940    } else if (ruleSets) {
941        UnicodeString result;
942        for (NFRuleSet** p = ruleSets; *p; ++p) {
943            NFRuleSet* rs = *p;
944            if (rs->isPublic()) {
945                if (--index == -1) {
946                    rs->getName(result);
947                    return result;
948                }
949            }
950        }
951    }
952    UnicodeString empty;
953    return empty;
954}
955
956int32_t
957RuleBasedNumberFormat::getNumberOfRuleSetNames() const
958{
959    int32_t result = 0;
960    if (localizations) {
961      result = localizations->getNumberOfRuleSets();
962    } else if (ruleSets) {
963        for (NFRuleSet** p = ruleSets; *p; ++p) {
964            if ((**p).isPublic()) {
965                ++result;
966            }
967        }
968    }
969    return result;
970}
971
972int32_t
973RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
974    if (localizations) {
975        return localizations->getNumberOfDisplayLocales();
976    }
977    return 0;
978}
979
980Locale
981RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
982    if (U_FAILURE(status)) {
983        return Locale("");
984    }
985    if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
986        UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
987        char buffer[64];
988        int32_t cap = name.length() + 1;
989        char* bp = buffer;
990        if (cap > 64) {
991            bp = (char *)uprv_malloc(cap);
992            if (bp == NULL) {
993                status = U_MEMORY_ALLOCATION_ERROR;
994                return Locale("");
995            }
996        }
997        name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
998        Locale retLocale(bp);
999        if (bp != buffer) {
1000            uprv_free(bp);
1001        }
1002        return retLocale;
1003    }
1004    status = U_ILLEGAL_ARGUMENT_ERROR;
1005    Locale retLocale;
1006    return retLocale;
1007}
1008
1009UnicodeString
1010RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1011    if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1012        UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1013        int32_t len = localeName.length();
1014        UChar* localeStr = localeName.getBuffer(len + 1);
1015        while (len >= 0) {
1016            localeStr[len] = 0;
1017            int32_t ix = localizations->indexForLocale(localeStr);
1018            if (ix >= 0) {
1019                UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1020                return name;
1021            }
1022
1023            // trim trailing portion, skipping over ommitted sections
1024            do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1025            while (len > 0 && localeStr[len-1] == 0x005F) --len;
1026        }
1027        UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1028        return name;
1029    }
1030    UnicodeString bogus;
1031    bogus.setToBogus();
1032    return bogus;
1033}
1034
1035UnicodeString
1036RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1037    if (localizations) {
1038        UnicodeString rsn(ruleSetName);
1039        int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1040        return getRuleSetDisplayName(ix, localeParam);
1041    }
1042    UnicodeString bogus;
1043    bogus.setToBogus();
1044    return bogus;
1045}
1046
1047NFRuleSet*
1048RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1049{
1050    if (U_SUCCESS(status) && ruleSets) {
1051        for (NFRuleSet** p = ruleSets; *p; ++p) {
1052            NFRuleSet* rs = *p;
1053            if (rs->isNamed(name)) {
1054                return rs;
1055            }
1056        }
1057        status = U_ILLEGAL_ARGUMENT_ERROR;
1058    }
1059    return NULL;
1060}
1061
1062UnicodeString&
1063RuleBasedNumberFormat::format(int32_t number,
1064                              UnicodeString& toAppendTo,
1065                              FieldPosition& /* pos */) const
1066{
1067    if (defaultRuleSet) {
1068        int32_t startPos = toAppendTo.length();
1069        defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
1070        adjustForCapitalizationContext(startPos, toAppendTo);
1071    }
1072    return toAppendTo;
1073}
1074
1075
1076UnicodeString&
1077RuleBasedNumberFormat::format(int64_t number,
1078                              UnicodeString& toAppendTo,
1079                              FieldPosition& /* pos */) const
1080{
1081    if (defaultRuleSet) {
1082        int32_t startPos = toAppendTo.length();
1083        defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1084        adjustForCapitalizationContext(startPos, toAppendTo);
1085    }
1086    return toAppendTo;
1087}
1088
1089
1090UnicodeString&
1091RuleBasedNumberFormat::format(double number,
1092                              UnicodeString& toAppendTo,
1093                              FieldPosition& /* pos */) const
1094{
1095    int32_t startPos = toAppendTo.length();
1096    // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1097    if (uprv_isNaN(number)) {
1098        DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1099        if (decFmtSyms) {
1100            toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
1101        }
1102    } else if (defaultRuleSet) {
1103        defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1104    }
1105    return adjustForCapitalizationContext(startPos, toAppendTo);
1106}
1107
1108
1109UnicodeString&
1110RuleBasedNumberFormat::format(int32_t number,
1111                              const UnicodeString& ruleSetName,
1112                              UnicodeString& toAppendTo,
1113                              FieldPosition& /* pos */,
1114                              UErrorCode& status) const
1115{
1116    // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1117    if (U_SUCCESS(status)) {
1118        if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1119            // throw new IllegalArgumentException("Can't use internal rule set");
1120            status = U_ILLEGAL_ARGUMENT_ERROR;
1121        } else {
1122            NFRuleSet *rs = findRuleSet(ruleSetName, status);
1123            if (rs) {
1124                int32_t startPos = toAppendTo.length();
1125                rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1126                adjustForCapitalizationContext(startPos, toAppendTo);
1127            }
1128        }
1129    }
1130    return toAppendTo;
1131}
1132
1133
1134UnicodeString&
1135RuleBasedNumberFormat::format(int64_t number,
1136                              const UnicodeString& ruleSetName,
1137                              UnicodeString& toAppendTo,
1138                              FieldPosition& /* pos */,
1139                              UErrorCode& status) const
1140{
1141    if (U_SUCCESS(status)) {
1142        if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1143            // throw new IllegalArgumentException("Can't use internal rule set");
1144            status = U_ILLEGAL_ARGUMENT_ERROR;
1145        } else {
1146            NFRuleSet *rs = findRuleSet(ruleSetName, status);
1147            if (rs) {
1148                int32_t startPos = toAppendTo.length();
1149                rs->format(number, toAppendTo, toAppendTo.length());
1150                adjustForCapitalizationContext(startPos, toAppendTo);
1151            }
1152        }
1153    }
1154    return toAppendTo;
1155}
1156
1157
1158UnicodeString&
1159RuleBasedNumberFormat::format(double number,
1160                              const UnicodeString& ruleSetName,
1161                              UnicodeString& toAppendTo,
1162                              FieldPosition& /* pos */,
1163                              UErrorCode& status) const
1164{
1165    if (U_SUCCESS(status)) {
1166        if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1167            // throw new IllegalArgumentException("Can't use internal rule set");
1168            status = U_ILLEGAL_ARGUMENT_ERROR;
1169        } else {
1170            NFRuleSet *rs = findRuleSet(ruleSetName, status);
1171            if (rs) {
1172                int32_t startPos = toAppendTo.length();
1173                rs->format(number, toAppendTo, toAppendTo.length());
1174                adjustForCapitalizationContext(startPos, toAppendTo);
1175            }
1176        }
1177    }
1178    return toAppendTo;
1179}
1180
1181UnicodeString&
1182RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1183                                                      UnicodeString& currentResult) const
1184{
1185#if !UCONFIG_NO_BREAK_ITERATION
1186    if (startPos==0 && currentResult.length() > 0) {
1187        // capitalize currentResult according to context
1188        UChar32 ch = currentResult.char32At(0);
1189        UErrorCode status = U_ZERO_ERROR;
1190        UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1191        if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
1192              ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1193                (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1194                (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1195            // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1196            // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1197            currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1198        }
1199    }
1200#endif
1201    return currentResult;
1202}
1203
1204
1205void
1206RuleBasedNumberFormat::parse(const UnicodeString& text,
1207                             Formattable& result,
1208                             ParsePosition& parsePosition) const
1209{
1210    if (!ruleSets) {
1211        parsePosition.setErrorIndex(0);
1212        return;
1213    }
1214
1215    UnicodeString workingText(text, parsePosition.getIndex());
1216    ParsePosition workingPos(0);
1217
1218    ParsePosition high_pp(0);
1219    Formattable high_result;
1220
1221    for (NFRuleSet** p = ruleSets; *p; ++p) {
1222        NFRuleSet *rp = *p;
1223        if (rp->isPublic() && rp->isParseable()) {
1224            ParsePosition working_pp(0);
1225            Formattable working_result;
1226
1227            rp->parse(workingText, working_pp, kMaxDouble, working_result, lenient);
1228            if (working_pp.getIndex() > high_pp.getIndex()) {
1229                high_pp = working_pp;
1230                high_result = working_result;
1231
1232                if (high_pp.getIndex() == workingText.length()) {
1233                    break;
1234                }
1235            }
1236        }
1237    }
1238
1239    int32_t startIndex = parsePosition.getIndex();
1240    parsePosition.setIndex(startIndex + high_pp.getIndex());
1241    if (high_pp.getIndex() > 0) {
1242        parsePosition.setErrorIndex(-1);
1243    } else {
1244        int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1245        parsePosition.setErrorIndex(startIndex + errorIndex);
1246    }
1247    result = high_result;
1248    if (result.getType() == Formattable::kDouble) {
1249        int32_t r = (int32_t)result.getDouble();
1250        if ((double)r == result.getDouble()) {
1251            result.setLong(r);
1252        }
1253    }
1254}
1255
1256#if !UCONFIG_NO_COLLATION
1257
1258void
1259RuleBasedNumberFormat::setLenient(UBool enabled)
1260{
1261    lenient = enabled;
1262    if (!enabled && collator) {
1263        delete collator;
1264        collator = NULL;
1265    }
1266}
1267
1268#endif
1269
1270void
1271RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1272    if (U_SUCCESS(status)) {
1273        if (ruleSetName.isEmpty()) {
1274          if (localizations) {
1275              UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1276              defaultRuleSet = findRuleSet(name, status);
1277          } else {
1278            initDefaultRuleSet();
1279          }
1280        } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1281            status = U_ILLEGAL_ARGUMENT_ERROR;
1282        } else {
1283            NFRuleSet* result = findRuleSet(ruleSetName, status);
1284            if (result != NULL) {
1285                defaultRuleSet = result;
1286            }
1287        }
1288    }
1289}
1290
1291UnicodeString
1292RuleBasedNumberFormat::getDefaultRuleSetName() const {
1293  UnicodeString result;
1294  if (defaultRuleSet && defaultRuleSet->isPublic()) {
1295    defaultRuleSet->getName(result);
1296  } else {
1297    result.setToBogus();
1298  }
1299  return result;
1300}
1301
1302void
1303RuleBasedNumberFormat::initDefaultRuleSet()
1304{
1305    defaultRuleSet = NULL;
1306    if (!ruleSets) {
1307      return;
1308    }
1309
1310    const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1311    const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1312    const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1313
1314    NFRuleSet**p = &ruleSets[0];
1315    while (*p) {
1316        if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1317            defaultRuleSet = *p;
1318            return;
1319        } else {
1320            ++p;
1321        }
1322    }
1323
1324    defaultRuleSet = *--p;
1325    if (!defaultRuleSet->isPublic()) {
1326        while (p != ruleSets) {
1327            if ((*--p)->isPublic()) {
1328                defaultRuleSet = *p;
1329                break;
1330            }
1331        }
1332    }
1333}
1334
1335
1336void
1337RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1338                            UParseError& pErr, UErrorCode& status)
1339{
1340    // TODO: implement UParseError
1341    uprv_memset(&pErr, 0, sizeof(UParseError));
1342    // Note: this can leave ruleSets == NULL, so remaining code should check
1343    if (U_FAILURE(status)) {
1344        return;
1345    }
1346
1347    this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1348
1349    UnicodeString description(rules);
1350    if (!description.length()) {
1351        status = U_MEMORY_ALLOCATION_ERROR;
1352        return;
1353    }
1354
1355    // start by stripping the trailing whitespace from all the rules
1356    // (this is all the whitespace follwing each semicolon in the
1357    // description).  This allows us to look for rule-set boundaries
1358    // by searching for ";%" without having to worry about whitespace
1359    // between the ; and the %
1360    stripWhitespace(description);
1361
1362    // check to see if there's a set of lenient-parse rules.  If there
1363    // is, pull them out into our temporary holding place for them,
1364    // and delete them from the description before the real desciption-
1365    // parsing code sees them
1366    int32_t lp = description.indexOf(gLenientParse, -1, 0);
1367    if (lp != -1) {
1368        // we've got to make sure we're not in the middle of a rule
1369        // (where "%%lenient-parse" would actually get treated as
1370        // rule text)
1371        if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1372            // locate the beginning and end of the actual collation
1373            // rules (there may be whitespace between the name and
1374            // the first token in the description)
1375            int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1376
1377            if (lpEnd == -1) {
1378                lpEnd = description.length() - 1;
1379            }
1380            int lpStart = lp + u_strlen(gLenientParse);
1381            while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1382                ++lpStart;
1383            }
1384
1385            // copy out the lenient-parse rules and delete them
1386            // from the description
1387            lenientParseRules = new UnicodeString();
1388            /* test for NULL */
1389            if (lenientParseRules == 0) {
1390                status = U_MEMORY_ALLOCATION_ERROR;
1391                return;
1392            }
1393            lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1394
1395            description.remove(lp, lpEnd + 1 - lp);
1396        }
1397    }
1398
1399    // pre-flight parsing the description and count the number of
1400    // rule sets (";%" marks the end of one rule set and the beginning
1401    // of the next)
1402    numRuleSets = 0;
1403    for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1404        ++numRuleSets;
1405        ++p;
1406    }
1407    ++numRuleSets;
1408
1409    // our rule list is an array of the appropriate size
1410    ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1411    /* test for NULL */
1412    if (ruleSets == 0) {
1413        status = U_MEMORY_ALLOCATION_ERROR;
1414        return;
1415    }
1416
1417    for (int i = 0; i <= numRuleSets; ++i) {
1418        ruleSets[i] = NULL;
1419    }
1420
1421    // divide up the descriptions into individual rule-set descriptions
1422    // and store them in a temporary array.  At each step, we also
1423    // new up a rule set, but all this does is initialize its name
1424    // and remove it from its description.  We can't actually parse
1425    // the rest of the descriptions and finish initializing everything
1426    // because we have to know the names and locations of all the rule
1427    // sets before we can actually set everything up
1428    if(!numRuleSets) {
1429        status = U_ILLEGAL_ARGUMENT_ERROR;
1430        return;
1431    }
1432
1433    ruleSetDescriptions = new UnicodeString[numRuleSets];
1434    if (ruleSetDescriptions == 0) {
1435        status = U_MEMORY_ALLOCATION_ERROR;
1436        return;
1437    }
1438
1439    {
1440        int curRuleSet = 0;
1441        int32_t start = 0;
1442        for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1443            ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1444            ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1445            if (ruleSets[curRuleSet] == 0) {
1446                status = U_MEMORY_ALLOCATION_ERROR;
1447                return;
1448            }
1449            ++curRuleSet;
1450            start = p + 1;
1451        }
1452        ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1453        ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1454        if (ruleSets[curRuleSet] == 0) {
1455            status = U_MEMORY_ALLOCATION_ERROR;
1456            return;
1457        }
1458    }
1459
1460    // now we can take note of the formatter's default rule set, which
1461    // is the last public rule set in the description (it's the last
1462    // rather than the first so that a user can create a new formatter
1463    // from an existing formatter and change its default behavior just
1464    // by appending more rule sets to the end)
1465
1466    // {dlf} Initialization of a fraction rule set requires the default rule
1467    // set to be known.  For purposes of initialization, this is always the
1468    // last public rule set, no matter what the localization data says.
1469    initDefaultRuleSet();
1470
1471    // finally, we can go back through the temporary descriptions
1472    // list and finish seting up the substructure (and we throw
1473    // away the temporary descriptions as we go)
1474    {
1475        for (int i = 0; i < numRuleSets; i++) {
1476            ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1477        }
1478    }
1479
1480    // Now that the rules are initialized, the 'real' default rule
1481    // set can be adjusted by the localization data.
1482
1483    // The C code keeps the localization array as is, rather than building
1484    // a separate array of the public rule set names, so we have less work
1485    // to do here-- but we still need to check the names.
1486
1487    if (localizationInfos) {
1488        // confirm the names, if any aren't in the rules, that's an error
1489        // it is ok if the rules contain public rule sets that are not in this list
1490        for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1491            UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1492            NFRuleSet* rs = findRuleSet(name, status);
1493            if (rs == NULL) {
1494                break; // error
1495            }
1496            if (i == 0) {
1497                defaultRuleSet = rs;
1498            }
1499        }
1500    } else {
1501        defaultRuleSet = getDefaultRuleSet();
1502    }
1503    originalDescription = rules;
1504}
1505
1506// override the NumberFormat implementation in order to
1507// lazily initialize relevant items
1508void
1509RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1510{
1511    NumberFormat::setContext(value, status);
1512    if (U_SUCCESS(status)) {
1513    	if (!capitalizationInfoSet &&
1514    	        (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1515    	    initCapitalizationContextInfo(locale);
1516    	    capitalizationInfoSet = TRUE;
1517        }
1518#if !UCONFIG_NO_BREAK_ITERATION
1519        if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1520                (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1521                (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1522            UErrorCode status = U_ZERO_ERROR;
1523            capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1524            if (U_FAILURE(status)) {
1525                delete capitalizationBrkIter;
1526                capitalizationBrkIter = NULL;
1527            }
1528        }
1529#endif
1530    }
1531}
1532
1533void
1534RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1535{
1536#if !UCONFIG_NO_BREAK_ITERATION
1537    const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1538    UErrorCode status = U_ZERO_ERROR;
1539    UResourceBundle *rb = ures_open(NULL, localeID, &status);
1540    rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1541    rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1542    if (U_SUCCESS(status) && rb != NULL) {
1543        int32_t len = 0;
1544        const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1545        if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1546            capitalizationForUIListMenu = intVector[0];
1547            capitalizationForStandAlone = intVector[1];
1548        }
1549    }
1550    ures_close(rb);
1551#endif
1552}
1553
1554void
1555RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1556{
1557    // iterate through the characters...
1558    UnicodeString result;
1559
1560    int start = 0;
1561    while (start != -1 && start < description.length()) {
1562        // seek to the first non-whitespace character...
1563        while (start < description.length()
1564            && PatternProps::isWhiteSpace(description.charAt(start))) {
1565            ++start;
1566        }
1567
1568        // locate the next semicolon in the text and copy the text from
1569        // our current position up to that semicolon into the result
1570        int32_t p = description.indexOf(gSemiColon, start);
1571        if (p == -1) {
1572            // or if we don't find a semicolon, just copy the rest of
1573            // the string into the result
1574            result.append(description, start, description.length() - start);
1575            start = -1;
1576        }
1577        else if (p < description.length()) {
1578            result.append(description, start, p + 1 - start);
1579            start = p + 1;
1580        }
1581
1582        // when we get here, we've seeked off the end of the sring, and
1583        // we terminate the loop (we continue until *start* is -1 rather
1584        // than until *p* is -1, because otherwise we'd miss the last
1585        // rule in the description)
1586        else {
1587            start = -1;
1588        }
1589    }
1590
1591    description.setTo(result);
1592}
1593
1594
1595void
1596RuleBasedNumberFormat::dispose()
1597{
1598    if (ruleSets) {
1599        for (NFRuleSet** p = ruleSets; *p; ++p) {
1600            delete *p;
1601        }
1602        uprv_free(ruleSets);
1603        ruleSets = NULL;
1604    }
1605
1606    if (ruleSetDescriptions) {
1607        delete [] ruleSetDescriptions;
1608    }
1609
1610#if !UCONFIG_NO_COLLATION
1611    delete collator;
1612#endif
1613    collator = NULL;
1614
1615    delete decimalFormatSymbols;
1616    decimalFormatSymbols = NULL;
1617
1618    delete lenientParseRules;
1619    lenientParseRules = NULL;
1620
1621#if !UCONFIG_NO_BREAK_ITERATION
1622   delete capitalizationBrkIter;
1623   capitalizationBrkIter = NULL;
1624#endif
1625
1626    if (localizations) localizations = localizations->unref();
1627}
1628
1629
1630//-----------------------------------------------------------------------
1631// package-internal API
1632//-----------------------------------------------------------------------
1633
1634/**
1635 * Returns the collator to use for lenient parsing.  The collator is lazily created:
1636 * this function creates it the first time it's called.
1637 * @return The collator to use for lenient parsing, or null if lenient parsing
1638 * is turned off.
1639*/
1640const RuleBasedCollator*
1641RuleBasedNumberFormat::getCollator() const
1642{
1643#if !UCONFIG_NO_COLLATION
1644    if (!ruleSets) {
1645        return NULL;
1646    }
1647
1648    // lazy-evaluate the collator
1649    if (collator == NULL && lenient) {
1650        // create a default collator based on the formatter's locale,
1651        // then pull out that collator's rules, append any additional
1652        // rules specified in the description, and create a _new_
1653        // collator based on the combinaiton of those rules
1654
1655        UErrorCode status = U_ZERO_ERROR;
1656
1657        Collator* temp = Collator::createInstance(locale, status);
1658        RuleBasedCollator* newCollator;
1659        if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1660            if (lenientParseRules) {
1661                UnicodeString rules(newCollator->getRules());
1662                rules.append(*lenientParseRules);
1663
1664                newCollator = new RuleBasedCollator(rules, status);
1665                // Exit if newCollator could not be created.
1666                if (newCollator == NULL) {
1667                    return NULL;
1668                }
1669            } else {
1670                temp = NULL;
1671            }
1672            if (U_SUCCESS(status)) {
1673                newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1674                // cast away const
1675                ((RuleBasedNumberFormat*)this)->collator = newCollator;
1676            } else {
1677                delete newCollator;
1678            }
1679        }
1680        delete temp;
1681    }
1682#endif
1683
1684    // if lenient-parse mode is off, this will be null
1685    // (see setLenientParseMode())
1686    return collator;
1687}
1688
1689
1690/**
1691 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1692 * instances owned by this formatter.  This object is lazily created: this function
1693 * creates it the first time it's called.
1694 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1695 * instances owned by this formatter.
1696*/
1697DecimalFormatSymbols*
1698RuleBasedNumberFormat::getDecimalFormatSymbols() const
1699{
1700    // lazy-evaluate the DecimalFormatSymbols object.  This object
1701    // is shared by all DecimalFormat instances belonging to this
1702    // formatter
1703    if (decimalFormatSymbols == NULL) {
1704        UErrorCode status = U_ZERO_ERROR;
1705        DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1706        if (U_SUCCESS(status)) {
1707            ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1708        } else {
1709            delete temp;
1710        }
1711    }
1712    return decimalFormatSymbols;
1713}
1714
1715// De-owning the current localized symbols and adopt the new symbols.
1716void
1717RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1718{
1719    if (symbolsToAdopt == NULL) {
1720        return; // do not allow caller to set decimalFormatSymbols to NULL
1721    }
1722
1723    if (decimalFormatSymbols != NULL) {
1724        delete decimalFormatSymbols;
1725    }
1726
1727    decimalFormatSymbols = symbolsToAdopt;
1728
1729    {
1730        // Apply the new decimalFormatSymbols by reparsing the rulesets
1731        UErrorCode status = U_ZERO_ERROR;
1732
1733        for (int32_t i = 0; i < numRuleSets; i++) {
1734            ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1735        }
1736    }
1737}
1738
1739// Setting the symbols is equlivalent to adopting a newly created localized symbols.
1740void
1741RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1742{
1743    adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1744}
1745
1746U_NAMESPACE_END
1747
1748/* U_HAVE_RBNF */
1749#endif
1750