1/*
2*******************************************************************************
3* Copyright (C) 2007-2012, International Business Machines Corporation and
4* others. All Rights Reserved.
5*******************************************************************************
6*
7* File plurrule.cpp
8*/
9
10#include "unicode/utypes.h"
11#include "unicode/localpointer.h"
12#include "unicode/plurrule.h"
13#include "unicode/upluralrules.h"
14#include "unicode/ures.h"
15#include "cmemory.h"
16#include "cstring.h"
17#include "hash.h"
18#include "mutex.h"
19#include "patternprops.h"
20#include "plurrule_impl.h"
21#include "putilimp.h"
22#include "ucln_in.h"
23#include "ustrfmt.h"
24#include "locutil.h"
25#include "uassert.h"
26
27#if !UCONFIG_NO_FORMATTING
28
29U_NAMESPACE_BEGIN
30
31// shared by all instances when lazy-initializing samples
32static UMutex pluralMutex = U_MUTEX_INITIALIZER;
33
34#define ARRAY_SIZE(array) (int32_t)(sizeof array  / sizeof array[0])
35
36static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
37static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
38static const UChar PK_IN[]={LOW_I,LOW_N,0};
39static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
40static const UChar PK_IS[]={LOW_I,LOW_S,0};
41static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
42static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
43static const UChar PK_OR[]={LOW_O,LOW_R,0};
44static const UChar PK_VAR_N[]={LOW_N,0};
45static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
46
47UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
48UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
49
50PluralRules::PluralRules(UErrorCode& status)
51:   UObject(),
52    mRules(NULL),
53    mParser(NULL),
54    mSamples(NULL),
55    mSampleInfo(NULL),
56    mSampleInfoCount(0)
57{
58    if (U_FAILURE(status)) {
59        return;
60    }
61    mParser = new RuleParser();
62    if (mParser==NULL) {
63        status = U_MEMORY_ALLOCATION_ERROR;
64    }
65}
66
67PluralRules::PluralRules(const PluralRules& other)
68: UObject(other),
69    mRules(NULL),
70  mParser(NULL),
71  mSamples(NULL),
72  mSampleInfo(NULL),
73  mSampleInfoCount(0)
74{
75    *this=other;
76}
77
78PluralRules::~PluralRules() {
79    delete mRules;
80    delete mParser;
81    uprv_free(mSamples);
82    uprv_free(mSampleInfo);
83}
84
85PluralRules*
86PluralRules::clone() const {
87    return new PluralRules(*this);
88}
89
90PluralRules&
91PluralRules::operator=(const PluralRules& other) {
92    if (this != &other) {
93        delete mRules;
94        if (other.mRules==NULL) {
95            mRules = NULL;
96        }
97        else {
98            mRules = new RuleChain(*other.mRules);
99        }
100        delete mParser;
101        mParser = new RuleParser();
102
103        uprv_free(mSamples);
104        mSamples = NULL;
105
106        uprv_free(mSampleInfo);
107        mSampleInfo = NULL;
108        mSampleInfoCount = 0;
109    }
110
111    return *this;
112}
113
114PluralRules* U_EXPORT2
115PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
116    RuleChain   rules;
117
118    if (U_FAILURE(status)) {
119        return NULL;
120    }
121    PluralRules *newRules = new PluralRules(status);
122    if ( (newRules != NULL)&& U_SUCCESS(status) ) {
123        newRules->parseDescription((UnicodeString &)description, rules, status);
124        if (U_SUCCESS(status)) {
125            newRules->addRules(rules);
126        }
127    }
128    if (U_FAILURE(status)) {
129        delete newRules;
130        return NULL;
131    }
132    else {
133        return newRules;
134    }
135}
136
137PluralRules* U_EXPORT2
138PluralRules::createDefaultRules(UErrorCode& status) {
139    return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status);
140}
141
142PluralRules* U_EXPORT2
143PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
144    return forLocale(locale, UPLURAL_TYPE_CARDINAL, status);
145}
146
147PluralRules* U_EXPORT2
148PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
149    RuleChain   rChain;
150    if (U_FAILURE(status)) {
151        return NULL;
152    }
153    if (type >= UPLURAL_TYPE_COUNT) {
154        status = U_ILLEGAL_ARGUMENT_ERROR;
155        return NULL;
156    }
157    PluralRules *newObj = new PluralRules(status);
158    if (newObj==NULL || U_FAILURE(status)) {
159        delete newObj;
160        return NULL;
161    }
162    UnicodeString locRule = newObj->getRuleFromResource(locale, type, status);
163    if ((locRule.length() != 0) && U_SUCCESS(status)) {
164        newObj->parseDescription(locRule, rChain, status);
165        if (U_SUCCESS(status)) {
166            newObj->addRules(rChain);
167        }
168    }
169    if (U_FAILURE(status)||(locRule.length() == 0)) {
170        // use default plural rule
171        status = U_ZERO_ERROR;
172        UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
173        newObj->parseDescription(defRule, rChain, status);
174        newObj->addRules(rChain);
175    }
176
177    return newObj;
178}
179
180UnicodeString
181PluralRules::select(int32_t number) const {
182    if (mRules == NULL) {
183        return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
184    }
185    else {
186        return mRules->select(number);
187    }
188}
189
190UnicodeString
191PluralRules::select(double number) const {
192    if (mRules == NULL) {
193        return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
194    }
195    else {
196        return mRules->select(number);
197    }
198}
199
200StringEnumeration*
201PluralRules::getKeywords(UErrorCode& status) const {
202    if (U_FAILURE(status))  return NULL;
203    StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
204    if (U_FAILURE(status)) {
205      delete nameEnumerator;
206      return NULL;
207    }
208
209    return nameEnumerator;
210}
211
212double
213PluralRules::getUniqueKeywordValue(const UnicodeString& keyword) {
214  double val = 0.0;
215  UErrorCode status = U_ZERO_ERROR;
216  int32_t count = getSamplesInternal(keyword, &val, 1, FALSE, status);
217  return count == 1 ? val : UPLRULES_NO_UNIQUE_VALUE;
218}
219
220int32_t
221PluralRules::getAllKeywordValues(const UnicodeString &keyword, double *dest,
222                                 int32_t destCapacity, UErrorCode& error) {
223    return getSamplesInternal(keyword, dest, destCapacity, FALSE, error);
224}
225
226int32_t
227PluralRules::getSamples(const UnicodeString &keyword, double *dest,
228                        int32_t destCapacity, UErrorCode& status) {
229    return getSamplesInternal(keyword, dest, destCapacity, TRUE, status);
230}
231
232int32_t
233PluralRules::getSamplesInternal(const UnicodeString &keyword, double *dest,
234                                int32_t destCapacity, UBool includeUnlimited,
235                                UErrorCode& status) {
236    initSamples(status);
237    if (U_FAILURE(status)) {
238        return -1;
239    }
240    if (destCapacity < 0 || (dest == NULL && destCapacity > 0)) {
241        status = U_ILLEGAL_ARGUMENT_ERROR;
242        return -1;
243    }
244
245    int32_t index = getKeywordIndex(keyword, status);
246    if (index == -1) {
247        return 0;
248    }
249
250    const int32_t LIMIT_MASK = 0x1 << 31;
251
252    if (!includeUnlimited) {
253        if ((mSampleInfo[index] & LIMIT_MASK) == 0) {
254            return -1;
255        }
256    }
257
258    int32_t start = index == 0 ? 0 : mSampleInfo[index - 1] & ~LIMIT_MASK;
259    int32_t limit = mSampleInfo[index] & ~LIMIT_MASK;
260    int32_t len = limit - start;
261    if (len <= destCapacity) {
262        destCapacity = len;
263    } else if (includeUnlimited) {
264        len = destCapacity;  // no overflow, and don't report more than we copy
265    } else {
266        status = U_BUFFER_OVERFLOW_ERROR;
267        return len;
268    }
269    for (int32_t i = 0; i < destCapacity; ++i, ++start) {
270        dest[i] = mSamples[start];
271    }
272    return len;
273}
274
275
276UBool
277PluralRules::isKeyword(const UnicodeString& keyword) const {
278    if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
279        return true;
280    }
281    else {
282        if (mRules==NULL) {
283            return false;
284        }
285        else {
286            return mRules->isKeyword(keyword);
287        }
288    }
289}
290
291UnicodeString
292PluralRules::getKeywordOther() const {
293    return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
294}
295
296UBool
297PluralRules::operator==(const PluralRules& other) const  {
298    int32_t limit;
299    const UnicodeString *ptrKeyword;
300    UErrorCode status= U_ZERO_ERROR;
301
302    if ( this == &other ) {
303        return TRUE;
304    }
305    LocalPointer<StringEnumeration> myKeywordList(getKeywords(status));
306    LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status));
307    if (U_FAILURE(status)) {
308        return FALSE;
309    }
310
311    if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
312        return FALSE;
313    }
314    myKeywordList->reset(status);
315    while ((ptrKeyword=myKeywordList->snext(status))!=NULL) {
316        if (!other.isKeyword(*ptrKeyword)) {
317            return FALSE;
318        }
319    }
320    otherKeywordList->reset(status);
321    while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) {
322        if (!this->isKeyword(*ptrKeyword)) {
323            return FALSE;
324        }
325    }
326    if (U_FAILURE(status)) {
327        return FALSE;
328    }
329
330    if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
331        return FALSE;
332    }
333    UnicodeString myKeyword, otherKeyword;
334    for (int32_t i=0; i<limit; ++i) {
335        myKeyword = this->select(i);
336        otherKeyword = other.select(i);
337        if (myKeyword!=otherKeyword) {
338            return FALSE;
339        }
340    }
341    return TRUE;
342}
343
344void
345PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
346{
347    int32_t ruleIndex=0;
348    UnicodeString token;
349    tokenType type;
350    tokenType prevType=none;
351    RuleChain *ruleChain=NULL;
352    AndConstraint *curAndConstraint=NULL;
353    OrConstraint *orNode=NULL;
354    RuleChain *lastChain=NULL;
355
356    if (U_FAILURE(status)) {
357        return;
358    }
359    UnicodeString ruleData = data.toLower("");
360    while (ruleIndex< ruleData.length()) {
361        mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
362        if (U_FAILURE(status)) {
363            return;
364        }
365        mParser->checkSyntax(prevType, type, status);
366        if (U_FAILURE(status)) {
367            return;
368        }
369        switch (type) {
370        case tAnd:
371            U_ASSERT(curAndConstraint != NULL);
372            curAndConstraint = curAndConstraint->add();
373            break;
374        case tOr:
375            lastChain = &rules;
376            while (lastChain->next !=NULL) {
377                lastChain = lastChain->next;
378            }
379            orNode=lastChain->ruleHeader;
380            while (orNode->next != NULL) {
381                orNode = orNode->next;
382            }
383            orNode->next= new OrConstraint();
384            orNode=orNode->next;
385            orNode->next=NULL;
386            curAndConstraint = orNode->add();
387            break;
388        case tIs:
389            U_ASSERT(curAndConstraint != NULL);
390            curAndConstraint->rangeHigh=-1;
391            break;
392        case tNot:
393            U_ASSERT(curAndConstraint != NULL);
394            curAndConstraint->notIn=TRUE;
395            break;
396        case tIn:
397            U_ASSERT(curAndConstraint != NULL);
398            curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
399            curAndConstraint->integerOnly = TRUE;
400            break;
401        case tWithin:
402            U_ASSERT(curAndConstraint != NULL);
403            curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
404            break;
405        case tNumber:
406            U_ASSERT(curAndConstraint != NULL);
407            if ( (curAndConstraint->op==AndConstraint::MOD)&&
408                 (curAndConstraint->opNum == -1 ) ) {
409                curAndConstraint->opNum=getNumberValue(token);
410            }
411            else {
412                if (curAndConstraint->rangeLow == -1) {
413                    curAndConstraint->rangeLow=getNumberValue(token);
414                }
415                else {
416                    curAndConstraint->rangeHigh=getNumberValue(token);
417                }
418            }
419            break;
420        case tMod:
421            U_ASSERT(curAndConstraint != NULL);
422            curAndConstraint->op=AndConstraint::MOD;
423            break;
424        case tKeyword:
425            if (ruleChain==NULL) {
426                ruleChain = &rules;
427            }
428            else {
429                while (ruleChain->next!=NULL){
430                    ruleChain=ruleChain->next;
431                }
432                ruleChain=ruleChain->next=new RuleChain();
433            }
434            if (ruleChain->ruleHeader != NULL) {
435                delete ruleChain->ruleHeader;
436            }
437            orNode = ruleChain->ruleHeader = new OrConstraint();
438            curAndConstraint = orNode->add();
439            ruleChain->keyword = token;
440            break;
441        default:
442            break;
443        }
444        prevType=type;
445    }
446}
447
448int32_t
449PluralRules::getNumberValue(const UnicodeString& token) const {
450    int32_t i;
451    char digits[128];
452
453    i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
454    digits[i]='\0';
455
456    return((int32_t)atoi(digits));
457}
458
459
460void
461PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
462    int32_t i=*curIndex;
463
464    localeName.remove();
465    while (i< localeData.length()) {
466       if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
467           break;
468       }
469       i++;
470    }
471
472    while (i< localeData.length()) {
473       if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
474           break;
475       }
476       localeName+=localeData.charAt(i++);
477    }
478    *curIndex=i;
479}
480
481
482int32_t
483PluralRules::getRepeatLimit() const {
484    if (mRules!=NULL) {
485        return mRules->getRepeatLimit();
486    }
487    else {
488        return 0;
489    }
490}
491
492int32_t
493PluralRules::getKeywordIndex(const UnicodeString& keyword,
494                             UErrorCode& status) const {
495    if (U_SUCCESS(status)) {
496        int32_t n = 0;
497        RuleChain* rc = mRules;
498        while (rc != NULL) {
499            if (rc->ruleHeader != NULL) {
500                if (rc->keyword == keyword) {
501                    return n;
502                }
503                ++n;
504            }
505            rc = rc->next;
506        }
507        if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
508            return n;
509        }
510    }
511    return -1;
512}
513
514typedef struct SampleRecord {
515    int32_t ruleIndex;
516    double  value;
517} SampleRecord;
518
519void
520PluralRules::initSamples(UErrorCode& status) {
521    if (U_FAILURE(status)) {
522        return;
523    }
524    Mutex lock(&pluralMutex);
525
526    if (mSamples) {
527        return;
528    }
529
530    // Note, the original design let you have multiple rules with the same keyword.  But
531    // we don't use that in our data and existing functions in this implementation don't
532    // fully support it (for example, the returned keywords is a list and not a set).
533    //
534    // So I don't support this here either.  If you ask for samples, or for all values,
535    // you will get information about the first rule with that keyword, not all rules with
536    // that keyword.
537
538    int32_t maxIndex = 0;
539    int32_t otherIndex = -1; // the value -1 will indicate we added 'other' at end
540    RuleChain* rc = mRules;
541    while (rc != NULL) {
542        if (rc->ruleHeader != NULL) {
543            if (otherIndex == -1 && 0 == rc->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
544                otherIndex = maxIndex;
545            }
546            ++maxIndex;
547        }
548        rc = rc->next;
549    }
550    if (otherIndex == -1) {
551        ++maxIndex;
552    }
553
554    LocalMemory<int32_t> newSampleInfo;
555    if (NULL == newSampleInfo.allocateInsteadAndCopy(maxIndex)) {
556        status = U_MEMORY_ALLOCATION_ERROR;
557        return;
558    }
559
560    const int32_t LIMIT_MASK = 0x1 << 31;
561
562    rc = mRules;
563    int32_t n = 0;
564    while (rc != NULL) {
565        if (rc->ruleHeader != NULL) {
566            newSampleInfo[n++] = rc->ruleHeader->isLimited() ? LIMIT_MASK : 0;
567        }
568        rc = rc->next;
569    }
570    if (otherIndex == -1) {
571        newSampleInfo[maxIndex - 1] = 0; // unlimited
572    }
573
574    MaybeStackArray<SampleRecord, 10> newSamples;
575    int32_t sampleCount = 0;
576
577    int32_t limit = getRepeatLimit() * MAX_SAMPLES * 2;
578    if (limit < 10) {
579        limit = 10;
580    }
581
582    for (int i = 0, keywordsRemaining = maxIndex;
583          keywordsRemaining > 0 && i < limit;
584          ++i) {
585        double val = i / 2.0;
586
587        n = 0;
588        rc = mRules;
589        int32_t found = -1;
590        while (rc != NULL) {
591            if (rc->ruleHeader != NULL) {
592                if (rc->ruleHeader->isFulfilled(val)) {
593                    found = n;
594                    break;
595                }
596                ++n;
597            }
598            rc = rc->next;
599        }
600        if (found == -1) {
601            // 'other'.  If there is an 'other' rule, the rule set is bad since nothing
602            // should leak through, but we don't bother to report that here.
603            found = otherIndex == -1 ? maxIndex - 1 : otherIndex;
604        }
605        if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
606            continue;
607        }
608        newSampleInfo[found] += 1; // won't impact limit flag
609
610        if (sampleCount == newSamples.getCapacity()) {
611            int32_t newCapacity = sampleCount < 20 ? 128 : sampleCount * 2;
612            if (NULL == newSamples.resize(newCapacity, sampleCount)) {
613                status = U_MEMORY_ALLOCATION_ERROR;
614                return;
615            }
616        }
617        newSamples[sampleCount].ruleIndex = found;
618        newSamples[sampleCount].value = val;
619        ++sampleCount;
620
621        if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
622            --keywordsRemaining;
623        }
624    }
625
626    // sort the values by index, leaving order otherwise unchanged
627    // this is just a selection sort for simplicity
628    LocalMemory<double> values;
629    if (NULL == values.allocateInsteadAndCopy(sampleCount)) {
630        status = U_MEMORY_ALLOCATION_ERROR;
631        return;
632    }
633    for (int i = 0, j = 0; i < maxIndex; ++i) {
634        for (int k = 0; k < sampleCount; ++k) {
635            if (newSamples[k].ruleIndex == i) {
636                values[j++] = newSamples[k].value;
637            }
638        }
639    }
640
641    // convert array of mask/lengths to array of mask/limits
642    limit = 0;
643    for (int i = 0; i < maxIndex; ++i) {
644        int32_t info = newSampleInfo[i];
645        int32_t len = info & ~LIMIT_MASK;
646        limit += len;
647        // if a rule is 'unlimited' but has fewer than MAX_SAMPLES samples,
648        // it's not really unlimited, so mark it as limited
649        int32_t mask = len < MAX_SAMPLES ? LIMIT_MASK : info & LIMIT_MASK;
650        newSampleInfo[i] = limit | mask;
651    }
652
653    // ok, we've got good data
654    mSamples = values.orphan();
655    mSampleInfo = newSampleInfo.orphan();
656    mSampleInfoCount = maxIndex;
657}
658
659void
660PluralRules::addRules(RuleChain& rules) {
661    RuleChain *newRule = new RuleChain(rules);
662    this->mRules=newRule;
663    newRule->setRepeatLimit();
664}
665
666UnicodeString
667PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) {
668    UnicodeString emptyStr;
669
670    if (U_FAILURE(errCode)) {
671        return emptyStr;
672    }
673    LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &errCode));
674    if(U_FAILURE(errCode)) {
675        return emptyStr;
676    }
677    const char *typeKey;
678    switch (type) {
679    case UPLURAL_TYPE_CARDINAL:
680        typeKey = "locales";
681        break;
682    case UPLURAL_TYPE_ORDINAL:
683        typeKey = "locales_ordinals";
684        break;
685    default:
686        // Must not occur: The caller should have checked for valid types.
687        errCode = U_ILLEGAL_ARGUMENT_ERROR;
688        return emptyStr;
689    }
690    LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, NULL, &errCode));
691    if(U_FAILURE(errCode)) {
692        return emptyStr;
693    }
694    int32_t resLen=0;
695    const char *curLocaleName=locale.getName();
696    const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode);
697
698    if (s == NULL) {
699        // Check parent locales.
700        UErrorCode status = U_ZERO_ERROR;
701        char parentLocaleName[ULOC_FULLNAME_CAPACITY];
702        const char *curLocaleName=locale.getName();
703        uprv_strcpy(parentLocaleName, curLocaleName);
704
705        while (uloc_getParent(parentLocaleName, parentLocaleName,
706                                       ULOC_FULLNAME_CAPACITY, &status) > 0) {
707            resLen=0;
708            s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status);
709            if (s != NULL) {
710                errCode = U_ZERO_ERROR;
711                break;
712            }
713            status = U_ZERO_ERROR;
714        }
715    }
716    if (s==NULL) {
717        return emptyStr;
718    }
719
720    char setKey[256];
721    UChar result[256];
722    u_UCharsToChars(s, setKey, resLen + 1);
723    // printf("\n PluralRule: %s\n", setKey);
724
725
726    LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", NULL, &errCode));
727    if(U_FAILURE(errCode)) {
728        return emptyStr;
729    }
730    resLen=0;
731    LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, NULL, &errCode));
732    if (U_FAILURE(errCode)) {
733        return emptyStr;
734    }
735
736    int32_t numberKeys = ures_getSize(setRes.getAlias());
737    char *key=NULL;
738    int32_t len=0;
739    for(int32_t i=0; i<numberKeys; ++i) {
740        int32_t keyLen;
741        resLen=0;
742        s=ures_getNextString(setRes.getAlias(), &resLen, (const char**)&key, &errCode);
743        keyLen = (int32_t)uprv_strlen(key);
744        u_charsToUChars(key, result+len, keyLen);
745        len += keyLen;
746        result[len++]=COLON;
747        uprv_memcpy(result+len, s, resLen*sizeof(UChar));
748        len += resLen;
749        result[len++]=SEMI_COLON;
750    }
751    result[len++]=0;
752    u_UCharsToChars(result, setKey, len);
753    // printf(" Rule: %s\n", setKey);
754
755    return UnicodeString(result);
756}
757
758AndConstraint::AndConstraint() {
759    op = AndConstraint::NONE;
760    opNum=-1;
761    rangeLow=-1;
762    rangeHigh=-1;
763    notIn=FALSE;
764    integerOnly=FALSE;
765    next=NULL;
766}
767
768
769AndConstraint::AndConstraint(const AndConstraint& other) {
770    this->op = other.op;
771    this->opNum=other.opNum;
772    this->rangeLow=other.rangeLow;
773    this->rangeHigh=other.rangeHigh;
774    this->integerOnly=other.integerOnly;
775    this->notIn=other.notIn;
776    if (other.next==NULL) {
777        this->next=NULL;
778    }
779    else {
780        this->next = new AndConstraint(*other.next);
781    }
782}
783
784AndConstraint::~AndConstraint() {
785    if (next!=NULL) {
786        delete next;
787    }
788}
789
790
791UBool
792AndConstraint::isFulfilled(double number) {
793    UBool result=TRUE;
794    double value=number;
795
796    // arrrrrrgh
797    if ((rangeHigh == -1 || integerOnly) && number != uprv_floor(number)) {
798      return notIn;
799    }
800
801    if ( op == MOD ) {
802        value = (int32_t)value % opNum;
803    }
804    if ( rangeHigh == -1 ) {
805        if ( rangeLow == -1 ) {
806            result = TRUE; // empty rule
807        }
808        else {
809            if ( value == rangeLow ) {
810                result = TRUE;
811            }
812            else {
813                result = FALSE;
814            }
815        }
816    }
817    else {
818        if ((rangeLow <= value) && (value <= rangeHigh)) {
819            if (integerOnly) {
820                if ( value != (int32_t)value) {
821                    result = FALSE;
822                }
823                else {
824                    result = TRUE;
825                }
826            }
827            else {
828                result = TRUE;
829            }
830        }
831        else {
832            result = FALSE;
833        }
834    }
835    if (notIn) {
836        return !result;
837    }
838    else {
839        return result;
840    }
841}
842
843UBool
844AndConstraint::isLimited() {
845    return (rangeHigh == -1 || integerOnly) && !notIn && op != MOD;
846}
847
848int32_t
849AndConstraint::updateRepeatLimit(int32_t maxLimit) {
850
851    if ( op == MOD ) {
852        return uprv_max(opNum, maxLimit);
853    }
854    else {
855        if ( rangeHigh == -1 ) {
856            return uprv_max(rangeLow, maxLimit);
857        }
858        else{
859            return uprv_max(rangeHigh, maxLimit);
860        }
861    }
862}
863
864
865AndConstraint*
866AndConstraint::add()
867{
868    this->next = new AndConstraint();
869    return this->next;
870}
871
872OrConstraint::OrConstraint() {
873    childNode=NULL;
874    next=NULL;
875}
876
877OrConstraint::OrConstraint(const OrConstraint& other) {
878    if ( other.childNode == NULL ) {
879        this->childNode = NULL;
880    }
881    else {
882        this->childNode = new AndConstraint(*(other.childNode));
883    }
884    if (other.next == NULL ) {
885        this->next = NULL;
886    }
887    else {
888        this->next = new OrConstraint(*(other.next));
889    }
890}
891
892OrConstraint::~OrConstraint() {
893    if (childNode!=NULL) {
894        delete childNode;
895    }
896    if (next!=NULL) {
897        delete next;
898    }
899}
900
901AndConstraint*
902OrConstraint::add()
903{
904    OrConstraint *curOrConstraint=this;
905    {
906        while (curOrConstraint->next!=NULL) {
907            curOrConstraint = curOrConstraint->next;
908        }
909        curOrConstraint->next = NULL;
910        curOrConstraint->childNode = new AndConstraint();
911    }
912    return curOrConstraint->childNode;
913}
914
915UBool
916OrConstraint::isFulfilled(double number) {
917    OrConstraint* orRule=this;
918    UBool result=FALSE;
919
920    while (orRule!=NULL && !result) {
921        result=TRUE;
922        AndConstraint* andRule = orRule->childNode;
923        while (andRule!=NULL && result) {
924            result = andRule->isFulfilled(number);
925            andRule=andRule->next;
926        }
927        orRule = orRule->next;
928    }
929
930    return result;
931}
932
933UBool
934OrConstraint::isLimited() {
935    for (OrConstraint *orc = this; orc != NULL; orc = orc->next) {
936        UBool result = FALSE;
937        for (AndConstraint *andc = orc->childNode; andc != NULL; andc = andc->next) {
938            if (andc->isLimited()) {
939                result = TRUE;
940                break;
941            }
942        }
943        if (result == FALSE) {
944            return FALSE;
945        }
946    }
947    return TRUE;
948}
949
950RuleChain::RuleChain() {
951    ruleHeader=NULL;
952    next = NULL;
953    repeatLimit=0;
954}
955
956RuleChain::RuleChain(const RuleChain& other) {
957    this->repeatLimit = other.repeatLimit;
958    this->keyword=other.keyword;
959    if (other.ruleHeader != NULL) {
960        this->ruleHeader = new OrConstraint(*(other.ruleHeader));
961    }
962    else {
963        this->ruleHeader = NULL;
964    }
965    if (other.next != NULL ) {
966        this->next = new RuleChain(*other.next);
967    }
968    else
969    {
970        this->next = NULL;
971    }
972}
973
974RuleChain::~RuleChain() {
975    if (next != NULL) {
976        delete next;
977    }
978    if ( ruleHeader != NULL ) {
979        delete ruleHeader;
980    }
981}
982
983UnicodeString
984RuleChain::select(double number) const {
985
986   if ( ruleHeader != NULL ) {
987       if (ruleHeader->isFulfilled(number)) {
988           return keyword;
989       }
990   }
991   if ( next != NULL ) {
992       return next->select(number);
993   }
994   else {
995       return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
996   }
997
998}
999
1000void
1001RuleChain::dumpRules(UnicodeString& result) {
1002    UChar digitString[16];
1003
1004    if ( ruleHeader != NULL ) {
1005        result +=  keyword;
1006        OrConstraint* orRule=ruleHeader;
1007        while ( orRule != NULL ) {
1008            AndConstraint* andRule=orRule->childNode;
1009            while ( andRule != NULL ) {
1010                if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
1011                    result += UNICODE_STRING_SIMPLE(" n is ");
1012                    if (andRule->notIn) {
1013                        result += UNICODE_STRING_SIMPLE("not ");
1014                    }
1015                    uprv_itou(digitString,16, andRule->rangeLow,10,0);
1016                    result += UnicodeString(digitString);
1017                }
1018                else {
1019                    if (andRule->op==AndConstraint::MOD) {
1020                        result += UNICODE_STRING_SIMPLE("  n mod ");
1021                        uprv_itou(digitString,16, andRule->opNum,10,0);
1022                        result += UnicodeString(digitString);
1023                    }
1024                    else {
1025                        result += UNICODE_STRING_SIMPLE("  n ");
1026                    }
1027                    if (andRule->rangeHigh==-1) {
1028                        if (andRule->notIn) {
1029                            result += UNICODE_STRING_SIMPLE(" is not ");
1030                            uprv_itou(digitString,16, andRule->rangeLow,10,0);
1031                            result += UnicodeString(digitString);
1032                        }
1033                        else {
1034                            result += UNICODE_STRING_SIMPLE(" is ");
1035                            uprv_itou(digitString,16, andRule->rangeLow,10,0);
1036                            result += UnicodeString(digitString);
1037                        }
1038                    }
1039                    else {
1040                        if (andRule->notIn) {
1041                            if ( andRule->integerOnly ) {
1042                                result += UNICODE_STRING_SIMPLE("  not in ");
1043                            }
1044                            else {
1045                                result += UNICODE_STRING_SIMPLE("  not within ");
1046                            }
1047                            uprv_itou(digitString,16, andRule->rangeLow,10,0);
1048                            result += UnicodeString(digitString);
1049                            result += UNICODE_STRING_SIMPLE(" .. ");
1050                            uprv_itou(digitString,16, andRule->rangeHigh,10,0);
1051                            result += UnicodeString(digitString);
1052                        }
1053                        else {
1054                            if ( andRule->integerOnly ) {
1055                                result += UNICODE_STRING_SIMPLE(" in ");
1056                            }
1057                            else {
1058                                result += UNICODE_STRING_SIMPLE(" within ");
1059                            }
1060                            uprv_itou(digitString,16, andRule->rangeLow,10,0);
1061                            result += UnicodeString(digitString);
1062                            result += UNICODE_STRING_SIMPLE(" .. ");
1063                            uprv_itou(digitString,16, andRule->rangeHigh,10,0);
1064                        }
1065                    }
1066                }
1067                if ( (andRule=andRule->next) != NULL) {
1068                    result.append(PK_AND, 3);
1069                }
1070            }
1071            if ( (orRule = orRule->next) != NULL ) {
1072                result.append(PK_OR, 2);
1073            }
1074        }
1075    }
1076    if ( next != NULL ) {
1077        next->dumpRules(result);
1078    }
1079}
1080
1081int32_t
1082RuleChain::getRepeatLimit () {
1083    return repeatLimit;
1084}
1085
1086void
1087RuleChain::setRepeatLimit () {
1088    int32_t limit=0;
1089
1090    if ( next != NULL ) {
1091        next->setRepeatLimit();
1092        limit = next->repeatLimit;
1093    }
1094
1095    if ( ruleHeader != NULL ) {
1096        OrConstraint* orRule=ruleHeader;
1097        while ( orRule != NULL ) {
1098            AndConstraint* andRule=orRule->childNode;
1099            while ( andRule != NULL ) {
1100                limit = andRule->updateRepeatLimit(limit);
1101                andRule = andRule->next;
1102            }
1103            orRule = orRule->next;
1104        }
1105    }
1106    repeatLimit = limit;
1107}
1108
1109UErrorCode
1110RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
1111    if ( arraySize < capacityOfKeywords-1 ) {
1112        keywords[arraySize++]=keyword;
1113    }
1114    else {
1115        return U_BUFFER_OVERFLOW_ERROR;
1116    }
1117
1118    if ( next != NULL ) {
1119        return next->getKeywords(capacityOfKeywords, keywords, arraySize);
1120    }
1121    else {
1122        return U_ZERO_ERROR;
1123    }
1124}
1125
1126UBool
1127RuleChain::isKeyword(const UnicodeString& keywordParam) const {
1128    if ( keyword == keywordParam ) {
1129        return TRUE;
1130    }
1131
1132    if ( next != NULL ) {
1133        return next->isKeyword(keywordParam);
1134    }
1135    else {
1136        return FALSE;
1137    }
1138}
1139
1140
1141RuleParser::RuleParser() {
1142}
1143
1144RuleParser::~RuleParser() {
1145}
1146
1147void
1148RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
1149{
1150    if (U_FAILURE(status)) {
1151        return;
1152    }
1153    switch(prevType) {
1154    case none:
1155    case tSemiColon:
1156        if (curType!=tKeyword) {
1157            status = U_UNEXPECTED_TOKEN;
1158        }
1159        break;
1160    case tVariableN :
1161        if (curType != tIs && curType != tMod && curType != tIn &&
1162            curType != tNot && curType != tWithin) {
1163            status = U_UNEXPECTED_TOKEN;
1164        }
1165        break;
1166    case tZero:
1167    case tOne:
1168    case tTwo:
1169    case tFew:
1170    case tMany:
1171    case tOther:
1172    case tKeyword:
1173        if (curType != tColon) {
1174            status = U_UNEXPECTED_TOKEN;
1175        }
1176        break;
1177    case tColon :
1178        if (curType != tVariableN) {
1179            status = U_UNEXPECTED_TOKEN;
1180        }
1181        break;
1182    case tIs:
1183        if ( curType != tNumber && curType != tNot) {
1184            status = U_UNEXPECTED_TOKEN;
1185        }
1186        break;
1187    case tNot:
1188        if (curType != tNumber && curType != tIn && curType != tWithin) {
1189            status = U_UNEXPECTED_TOKEN;
1190        }
1191        break;
1192    case tMod:
1193    case tDot:
1194    case tIn:
1195    case tWithin:
1196    case tAnd:
1197    case tOr:
1198        if (curType != tNumber && curType != tVariableN) {
1199            status = U_UNEXPECTED_TOKEN;
1200        }
1201        break;
1202    case tNumber:
1203        if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
1204            curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
1205        {
1206            status = U_UNEXPECTED_TOKEN;
1207        }
1208        break;
1209    default:
1210        status = U_UNEXPECTED_TOKEN;
1211        break;
1212    }
1213}
1214
1215void
1216RuleParser::getNextToken(const UnicodeString& ruleData,
1217                         int32_t *ruleIndex,
1218                         UnicodeString& token,
1219                         tokenType& type,
1220                         UErrorCode &status)
1221{
1222    int32_t curIndex= *ruleIndex;
1223    UChar ch;
1224    tokenType prevType=none;
1225
1226    if (U_FAILURE(status)) {
1227        return;
1228    }
1229    while (curIndex<ruleData.length()) {
1230        ch = ruleData.charAt(curIndex);
1231        if ( !inRange(ch, type) ) {
1232            status = U_ILLEGAL_CHARACTER;
1233            return;
1234        }
1235        switch (type) {
1236        case tSpace:
1237            if ( *ruleIndex != curIndex ) { // letter
1238                token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1239                *ruleIndex=curIndex;
1240                type=prevType;
1241                getKeyType(token, type, status);
1242                return;
1243            }
1244            else {
1245                *ruleIndex=*ruleIndex+1;
1246            }
1247            break; // consective space
1248        case tColon:
1249        case tSemiColon:
1250            if ( *ruleIndex != curIndex ) {
1251                token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1252                *ruleIndex=curIndex;
1253                type=prevType;
1254                getKeyType(token, type, status);
1255                return;
1256            }
1257            else {
1258                *ruleIndex=curIndex+1;
1259                return;
1260            }
1261        case tLetter:
1262             if ((type==prevType)||(prevType==none)) {
1263                prevType=type;
1264                break;
1265             }
1266             break;
1267        case tNumber:
1268             if ((type==prevType)||(prevType==none)) {
1269                prevType=type;
1270                break;
1271             }
1272             else {
1273                *ruleIndex=curIndex+1;
1274                return;
1275             }
1276         case tDot:
1277             if (prevType==none) {  // first dot
1278                prevType=type;
1279                continue;
1280             }
1281             else {
1282                 if ( *ruleIndex != curIndex ) {
1283                    token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1284                    *ruleIndex=curIndex;  // letter
1285                    type=prevType;
1286                    getKeyType(token, type, status);
1287                    return;
1288                 }
1289                 else {  // two consective dots
1290                    *ruleIndex=curIndex+2;
1291                    return;
1292                 }
1293             }
1294         default:
1295             status = U_UNEXPECTED_TOKEN;
1296             return;
1297        }
1298        curIndex++;
1299    }
1300    if ( curIndex>=ruleData.length() ) {
1301        if ( (type == tLetter)||(type == tNumber) ) {
1302            token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1303            getKeyType(token, type, status);
1304            if (U_FAILURE(status)) {
1305                return;
1306            }
1307        }
1308        *ruleIndex = ruleData.length();
1309    }
1310}
1311
1312UBool
1313RuleParser::inRange(UChar ch, tokenType& type) {
1314    if ((ch>=CAP_A) && (ch<=CAP_Z)) {
1315        // we assume all characters are in lower case already.
1316        return FALSE;
1317    }
1318    if ((ch>=LOW_A) && (ch<=LOW_Z)) {
1319        type = tLetter;
1320        return TRUE;
1321    }
1322    if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1323        type = tNumber;
1324        return TRUE;
1325    }
1326    switch (ch) {
1327    case COLON:
1328        type = tColon;
1329        return TRUE;
1330    case SPACE:
1331        type = tSpace;
1332        return TRUE;
1333    case SEMI_COLON:
1334        type = tSemiColon;
1335        return TRUE;
1336    case DOT:
1337        type = tDot;
1338        return TRUE;
1339    default :
1340        type = none;
1341        return FALSE;
1342    }
1343}
1344
1345
1346void
1347RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
1348{
1349    if (U_FAILURE(status)) {
1350        return;
1351    }
1352    if ( keyType==tNumber) {
1353    }
1354    else if (0 == token.compare(PK_VAR_N, 1)) {
1355        keyType = tVariableN;
1356    }
1357    else if (0 == token.compare(PK_IS, 2)) {
1358        keyType = tIs;
1359    }
1360    else if (0 == token.compare(PK_AND, 3)) {
1361        keyType = tAnd;
1362    }
1363    else if (0 == token.compare(PK_IN, 2)) {
1364        keyType = tIn;
1365    }
1366    else if (0 == token.compare(PK_WITHIN, 6)) {
1367        keyType = tWithin;
1368    }
1369    else if (0 == token.compare(PK_NOT, 3)) {
1370        keyType = tNot;
1371    }
1372    else if (0 == token.compare(PK_MOD, 3)) {
1373        keyType = tMod;
1374    }
1375    else if (0 == token.compare(PK_OR, 2)) {
1376        keyType = tOr;
1377    }
1378    else if ( isValidKeyword(token) ) {
1379        keyType = tKeyword;
1380    }
1381    else {
1382        status = U_UNEXPECTED_TOKEN;
1383    }
1384}
1385
1386UBool
1387RuleParser::isValidKeyword(const UnicodeString& token) {
1388    return PatternProps::isIdentifier(token.getBuffer(), token.length());
1389}
1390
1391PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
1392        : pos(0), fKeywordNames(status) {
1393    if (U_FAILURE(status)) {
1394        return;
1395    }
1396    fKeywordNames.setDeleter(uprv_deleteUObject);
1397    UBool  addKeywordOther=TRUE;
1398    RuleChain *node=header;
1399    while(node!=NULL) {
1400        fKeywordNames.addElement(new UnicodeString(node->keyword), status);
1401        if (U_FAILURE(status)) {
1402            return;
1403        }
1404        if (0 == node->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
1405            addKeywordOther= FALSE;
1406        }
1407        node=node->next;
1408    }
1409
1410    if (addKeywordOther) {
1411        fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
1412    }
1413}
1414
1415const UnicodeString*
1416PluralKeywordEnumeration::snext(UErrorCode& status) {
1417    if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1418        return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1419    }
1420    return NULL;
1421}
1422
1423void
1424PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1425    pos=0;
1426}
1427
1428int32_t
1429PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1430       return fKeywordNames.size();
1431}
1432
1433PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1434}
1435
1436U_NAMESPACE_END
1437
1438
1439#endif /* #if !UCONFIG_NO_FORMATTING */
1440
1441//eof
1442