1/* 2******************************************************************************* 3* Copyright (C) 1997-2014, International Business Machines Corporation 4* and others. All Rights Reserved. 5******************************************************************************* 6*/ 7 8#include "unicode/utypes.h" 9#include "utypeinfo.h" // for 'typeid' to work 10 11#include "unicode/rbnf.h" 12 13#if U_HAVE_RBNF 14 15#include "unicode/normlzr.h" 16#include "unicode/tblcoll.h" 17#include "unicode/uchar.h" 18#include "unicode/ucol.h" 19#include "unicode/uloc.h" 20#include "unicode/unum.h" 21#include "unicode/ures.h" 22#include "unicode/ustring.h" 23#include "unicode/utf16.h" 24#include "unicode/udata.h" 25#include "unicode/udisplaycontext.h" 26#include "unicode/brkiter.h" 27#include "nfrs.h" 28 29#include "cmemory.h" 30#include "cstring.h" 31#include "patternprops.h" 32#include "uresimp.h" 33 34// debugging 35// #define DEBUG 36 37#ifdef DEBUG 38#include "stdio.h" 39#endif 40 41#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf" 42 43static const UChar gPercentPercent[] = 44{ 45 0x25, 0x25, 0 46}; /* "%%" */ 47 48// All urbnf objects are created through openRules, so we init all of the 49// Unicode string constants required by rbnf, nfrs, or nfr here. 50static const UChar gLenientParse[] = 51{ 52 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0 53}; /* "%%lenient-parse:" */ 54static const UChar gSemiColon = 0x003B; 55static const UChar gSemiPercent[] = 56{ 57 0x3B, 0x25, 0 58}; /* ";%" */ 59 60#define kSomeNumberOfBitsDiv2 22 61#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2) 62#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble) 63 64U_NAMESPACE_BEGIN 65 66UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat) 67 68/* 69This is a utility class. It does not use ICU's RTTI. 70If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject. 71Please make sure that intltest passes on Windows in Release mode, 72since the string pooling per compilation unit will mess up how RTTI works. 73The RTTI code was also removed due to lack of code coverage. 74*/ 75class LocalizationInfo : public UMemory { 76protected: 77 virtual ~LocalizationInfo(); 78 uint32_t refcount; 79 80public: 81 LocalizationInfo() : refcount(0) {} 82 83 LocalizationInfo* ref(void) { 84 ++refcount; 85 return this; 86 } 87 88 LocalizationInfo* unref(void) { 89 if (refcount && --refcount == 0) { 90 delete this; 91 } 92 return NULL; 93 } 94 95 virtual UBool operator==(const LocalizationInfo* rhs) const; 96 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } 97 98 virtual int32_t getNumberOfRuleSets(void) const = 0; 99 virtual const UChar* getRuleSetName(int32_t index) const = 0; 100 virtual int32_t getNumberOfDisplayLocales(void) const = 0; 101 virtual const UChar* getLocaleName(int32_t index) const = 0; 102 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0; 103 104 virtual int32_t indexForLocale(const UChar* locale) const; 105 virtual int32_t indexForRuleSet(const UChar* ruleset) const; 106 107// virtual UClassID getDynamicClassID() const = 0; 108// static UClassID getStaticClassID(void); 109}; 110 111LocalizationInfo::~LocalizationInfo() {} 112 113//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo) 114 115// if both strings are NULL, this returns TRUE 116static UBool 117streq(const UChar* lhs, const UChar* rhs) { 118 if (rhs == lhs) { 119 return TRUE; 120 } 121 if (lhs && rhs) { 122 return u_strcmp(lhs, rhs) == 0; 123 } 124 return FALSE; 125} 126 127UBool 128LocalizationInfo::operator==(const LocalizationInfo* rhs) const { 129 if (rhs) { 130 if (this == rhs) { 131 return TRUE; 132 } 133 134 int32_t rsc = getNumberOfRuleSets(); 135 if (rsc == rhs->getNumberOfRuleSets()) { 136 for (int i = 0; i < rsc; ++i) { 137 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) { 138 return FALSE; 139 } 140 } 141 int32_t dlc = getNumberOfDisplayLocales(); 142 if (dlc == rhs->getNumberOfDisplayLocales()) { 143 for (int i = 0; i < dlc; ++i) { 144 const UChar* locale = getLocaleName(i); 145 int32_t ix = rhs->indexForLocale(locale); 146 // if no locale, ix is -1, getLocaleName returns null, so streq returns false 147 if (!streq(locale, rhs->getLocaleName(ix))) { 148 return FALSE; 149 } 150 for (int j = 0; j < rsc; ++j) { 151 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) { 152 return FALSE; 153 } 154 } 155 } 156 return TRUE; 157 } 158 } 159 } 160 return FALSE; 161} 162 163int32_t 164LocalizationInfo::indexForLocale(const UChar* locale) const { 165 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) { 166 if (streq(locale, getLocaleName(i))) { 167 return i; 168 } 169 } 170 return -1; 171} 172 173int32_t 174LocalizationInfo::indexForRuleSet(const UChar* ruleset) const { 175 if (ruleset) { 176 for (int i = 0; i < getNumberOfRuleSets(); ++i) { 177 if (streq(ruleset, getRuleSetName(i))) { 178 return i; 179 } 180 } 181 } 182 return -1; 183} 184 185 186typedef void (*Fn_Deleter)(void*); 187 188class VArray { 189 void** buf; 190 int32_t cap; 191 int32_t size; 192 Fn_Deleter deleter; 193public: 194 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {} 195 196 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {} 197 198 ~VArray() { 199 if (deleter) { 200 for (int i = 0; i < size; ++i) { 201 (*deleter)(buf[i]); 202 } 203 } 204 uprv_free(buf); 205 } 206 207 int32_t length() { 208 return size; 209 } 210 211 void add(void* elem, UErrorCode& status) { 212 if (U_SUCCESS(status)) { 213 if (size == cap) { 214 if (cap == 0) { 215 cap = 1; 216 } else if (cap < 256) { 217 cap *= 2; 218 } else { 219 cap += 256; 220 } 221 if (buf == NULL) { 222 buf = (void**)uprv_malloc(cap * sizeof(void*)); 223 } else { 224 buf = (void**)uprv_realloc(buf, cap * sizeof(void*)); 225 } 226 if (buf == NULL) { 227 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway 228 status = U_MEMORY_ALLOCATION_ERROR; 229 return; 230 } 231 void* start = &buf[size]; 232 size_t count = (cap - size) * sizeof(void*); 233 uprv_memset(start, 0, count); // fill with nulls, just because 234 } 235 buf[size++] = elem; 236 } 237 } 238 239 void** release(void) { 240 void** result = buf; 241 buf = NULL; 242 cap = 0; 243 size = 0; 244 return result; 245 } 246}; 247 248class LocDataParser; 249 250class StringLocalizationInfo : public LocalizationInfo { 251 UChar* info; 252 UChar*** data; 253 int32_t numRuleSets; 254 int32_t numLocales; 255 256friend class LocDataParser; 257 258 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs) 259 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs) 260 { 261 } 262 263public: 264 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status); 265 266 virtual ~StringLocalizationInfo(); 267 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; } 268 virtual const UChar* getRuleSetName(int32_t index) const; 269 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; } 270 virtual const UChar* getLocaleName(int32_t index) const; 271 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const; 272 273// virtual UClassID getDynamicClassID() const; 274// static UClassID getStaticClassID(void); 275 276private: 277 void init(UErrorCode& status) const; 278}; 279 280 281enum { 282 OPEN_ANGLE = 0x003c, /* '<' */ 283 CLOSE_ANGLE = 0x003e, /* '>' */ 284 COMMA = 0x002c, 285 TICK = 0x0027, 286 QUOTE = 0x0022, 287 SPACE = 0x0020 288}; 289 290/** 291 * Utility for parsing a localization string and returning a StringLocalizationInfo*. 292 */ 293class LocDataParser { 294 UChar* data; 295 const UChar* e; 296 UChar* p; 297 UChar ch; 298 UParseError& pe; 299 UErrorCode& ec; 300 301public: 302 LocDataParser(UParseError& parseError, UErrorCode& status) 303 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {} 304 ~LocDataParser() {} 305 306 /* 307 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status, 308 * and return NULL. The StringLocalizationInfo will adopt locData if it is created. 309 */ 310 StringLocalizationInfo* parse(UChar* data, int32_t len); 311 312private: 313 314 void inc(void) { ++p; ch = 0xffff; } 315 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; } 316 UBool check(UChar c) { return p < e && (ch == c || *p == c); } 317 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();} 318 UBool inList(UChar c, const UChar* list) const { 319 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE; 320 while (*list && *list != c) ++list; return *list == c; 321 } 322 void parseError(const char* msg); 323 324 StringLocalizationInfo* doParse(void); 325 326 UChar** nextArray(int32_t& requiredLength); 327 UChar* nextString(void); 328}; 329 330#ifdef DEBUG 331#define ERROR(msg) parseError(msg); return NULL; 332#define EXPLANATION_ARG explanationArg 333#else 334#define ERROR(msg) parseError(NULL); return NULL; 335#define EXPLANATION_ARG 336#endif 337 338 339static const UChar DQUOTE_STOPLIST[] = { 340 QUOTE, 0 341}; 342 343static const UChar SQUOTE_STOPLIST[] = { 344 TICK, 0 345}; 346 347static const UChar NOQUOTE_STOPLIST[] = { 348 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0 349}; 350 351static void 352DeleteFn(void* p) { 353 uprv_free(p); 354} 355 356StringLocalizationInfo* 357LocDataParser::parse(UChar* _data, int32_t len) { 358 if (U_FAILURE(ec)) { 359 if (_data) uprv_free(_data); 360 return NULL; 361 } 362 363 pe.line = 0; 364 pe.offset = -1; 365 pe.postContext[0] = 0; 366 pe.preContext[0] = 0; 367 368 if (_data == NULL) { 369 ec = U_ILLEGAL_ARGUMENT_ERROR; 370 return NULL; 371 } 372 373 if (len <= 0) { 374 ec = U_ILLEGAL_ARGUMENT_ERROR; 375 uprv_free(_data); 376 return NULL; 377 } 378 379 data = _data; 380 e = data + len; 381 p = _data; 382 ch = 0xffff; 383 384 return doParse(); 385} 386 387 388StringLocalizationInfo* 389LocDataParser::doParse(void) { 390 skipWhitespace(); 391 if (!checkInc(OPEN_ANGLE)) { 392 ERROR("Missing open angle"); 393 } else { 394 VArray array(DeleteFn); 395 UBool mightHaveNext = TRUE; 396 int32_t requiredLength = -1; 397 while (mightHaveNext) { 398 mightHaveNext = FALSE; 399 UChar** elem = nextArray(requiredLength); 400 skipWhitespace(); 401 UBool haveComma = check(COMMA); 402 if (elem) { 403 array.add(elem, ec); 404 if (haveComma) { 405 inc(); 406 mightHaveNext = TRUE; 407 } 408 } else if (haveComma) { 409 ERROR("Unexpected character"); 410 } 411 } 412 413 skipWhitespace(); 414 if (!checkInc(CLOSE_ANGLE)) { 415 if (check(OPEN_ANGLE)) { 416 ERROR("Missing comma in outer array"); 417 } else { 418 ERROR("Missing close angle bracket in outer array"); 419 } 420 } 421 422 skipWhitespace(); 423 if (p != e) { 424 ERROR("Extra text after close of localization data"); 425 } 426 427 array.add(NULL, ec); 428 if (U_SUCCESS(ec)) { 429 int32_t numLocs = array.length() - 2; // subtract first, NULL 430 UChar*** result = (UChar***)array.release(); 431 432 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL 433 } 434 } 435 436 ERROR("Unknown error"); 437} 438 439UChar** 440LocDataParser::nextArray(int32_t& requiredLength) { 441 if (U_FAILURE(ec)) { 442 return NULL; 443 } 444 445 skipWhitespace(); 446 if (!checkInc(OPEN_ANGLE)) { 447 ERROR("Missing open angle"); 448 } 449 450 VArray array; 451 UBool mightHaveNext = TRUE; 452 while (mightHaveNext) { 453 mightHaveNext = FALSE; 454 UChar* elem = nextString(); 455 skipWhitespace(); 456 UBool haveComma = check(COMMA); 457 if (elem) { 458 array.add(elem, ec); 459 if (haveComma) { 460 inc(); 461 mightHaveNext = TRUE; 462 } 463 } else if (haveComma) { 464 ERROR("Unexpected comma"); 465 } 466 } 467 skipWhitespace(); 468 if (!checkInc(CLOSE_ANGLE)) { 469 if (check(OPEN_ANGLE)) { 470 ERROR("Missing close angle bracket in inner array"); 471 } else { 472 ERROR("Missing comma in inner array"); 473 } 474 } 475 476 array.add(NULL, ec); 477 if (U_SUCCESS(ec)) { 478 if (requiredLength == -1) { 479 requiredLength = array.length() + 1; 480 } else if (array.length() != requiredLength) { 481 ec = U_ILLEGAL_ARGUMENT_ERROR; 482 ERROR("Array not of required length"); 483 } 484 485 return (UChar**)array.release(); 486 } 487 ERROR("Unknown Error"); 488} 489 490UChar* 491LocDataParser::nextString() { 492 UChar* result = NULL; 493 494 skipWhitespace(); 495 if (p < e) { 496 const UChar* terminators; 497 UChar c = *p; 498 UBool haveQuote = c == QUOTE || c == TICK; 499 if (haveQuote) { 500 inc(); 501 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST; 502 } else { 503 terminators = NOQUOTE_STOPLIST; 504 } 505 UChar* start = p; 506 while (p < e && !inList(*p, terminators)) ++p; 507 if (p == e) { 508 ERROR("Unexpected end of data"); 509 } 510 511 UChar x = *p; 512 if (p > start) { 513 ch = x; 514 *p = 0x0; // terminate by writing to data 515 result = start; // just point into data 516 } 517 if (haveQuote) { 518 if (x != c) { 519 ERROR("Missing matching quote"); 520 } else if (p == start) { 521 ERROR("Empty string"); 522 } 523 inc(); 524 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) { 525 ERROR("Unexpected character in string"); 526 } 527 } 528 529 // ok for there to be no next string 530 return result; 531} 532 533void LocDataParser::parseError(const char* EXPLANATION_ARG) 534{ 535 if (!data) { 536 return; 537 } 538 539 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1; 540 if (start < data) { 541 start = data; 542 } 543 for (UChar* x = p; --x >= start;) { 544 if (!*x) { 545 start = x+1; 546 break; 547 } 548 } 549 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1; 550 if (limit > e) { 551 limit = e; 552 } 553 u_strncpy(pe.preContext, start, (int32_t)(p-start)); 554 pe.preContext[p-start] = 0; 555 u_strncpy(pe.postContext, p, (int32_t)(limit-p)); 556 pe.postContext[limit-p] = 0; 557 pe.offset = (int32_t)(p - data); 558 559#ifdef DEBUG 560 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data); 561 562 UnicodeString msg; 563 msg.append(start, p - start); 564 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */ 565 msg.append(p, limit-p); 566 msg.append(UNICODE_STRING_SIMPLE("'")); 567 568 char buf[128]; 569 int32_t len = msg.extract(0, msg.length(), buf, 128); 570 if (len >= 128) { 571 buf[127] = 0; 572 } else { 573 buf[len] = 0; 574 } 575 fprintf(stderr, "%s\n", buf); 576 fflush(stderr); 577#endif 578 579 uprv_free(data); 580 data = NULL; 581 p = NULL; 582 e = NULL; 583 584 if (U_SUCCESS(ec)) { 585 ec = U_PARSE_ERROR; 586 } 587} 588 589//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo) 590 591StringLocalizationInfo* 592StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) { 593 if (U_FAILURE(status)) { 594 return NULL; 595 } 596 597 int32_t len = info.length(); 598 if (len == 0) { 599 return NULL; // no error; 600 } 601 602 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar)); 603 if (!p) { 604 status = U_MEMORY_ALLOCATION_ERROR; 605 return NULL; 606 } 607 info.extract(p, len, status); 608 if (!U_FAILURE(status)) { 609 status = U_ZERO_ERROR; // clear warning about non-termination 610 } 611 612 LocDataParser parser(perror, status); 613 return parser.parse(p, len); 614} 615 616StringLocalizationInfo::~StringLocalizationInfo() { 617 for (UChar*** p = (UChar***)data; *p; ++p) { 618 // remaining data is simply pointer into our unicode string data. 619 if (*p) uprv_free(*p); 620 } 621 if (data) uprv_free(data); 622 if (info) uprv_free(info); 623} 624 625 626const UChar* 627StringLocalizationInfo::getRuleSetName(int32_t index) const { 628 if (index >= 0 && index < getNumberOfRuleSets()) { 629 return data[0][index]; 630 } 631 return NULL; 632} 633 634const UChar* 635StringLocalizationInfo::getLocaleName(int32_t index) const { 636 if (index >= 0 && index < getNumberOfDisplayLocales()) { 637 return data[index+1][0]; 638 } 639 return NULL; 640} 641 642const UChar* 643StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const { 644 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() && 645 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) { 646 return data[localeIndex+1][ruleIndex+1]; 647 } 648 return NULL; 649} 650 651// ---------- 652 653RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 654 const UnicodeString& locs, 655 const Locale& alocale, UParseError& perror, UErrorCode& status) 656 : ruleSets(NULL) 657 , ruleSetDescriptions(NULL) 658 , numRuleSets(0) 659 , defaultRuleSet(NULL) 660 , locale(alocale) 661 , collator(NULL) 662 , decimalFormatSymbols(NULL) 663 , lenient(FALSE) 664 , lenientParseRules(NULL) 665 , localizations(NULL) 666 , capitalizationInfoSet(FALSE) 667 , capitalizationForUIListMenu(FALSE) 668 , capitalizationForStandAlone(FALSE) 669 , capitalizationBrkIter(NULL) 670{ 671 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 672 init(description, locinfo, perror, status); 673} 674 675RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 676 const UnicodeString& locs, 677 UParseError& perror, UErrorCode& status) 678 : ruleSets(NULL) 679 , ruleSetDescriptions(NULL) 680 , numRuleSets(0) 681 , defaultRuleSet(NULL) 682 , locale(Locale::getDefault()) 683 , collator(NULL) 684 , decimalFormatSymbols(NULL) 685 , lenient(FALSE) 686 , lenientParseRules(NULL) 687 , localizations(NULL) 688 , capitalizationInfoSet(FALSE) 689 , capitalizationForUIListMenu(FALSE) 690 , capitalizationForStandAlone(FALSE) 691 , capitalizationBrkIter(NULL) 692{ 693 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 694 init(description, locinfo, perror, status); 695} 696 697RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 698 LocalizationInfo* info, 699 const Locale& alocale, UParseError& perror, UErrorCode& status) 700 : ruleSets(NULL) 701 , ruleSetDescriptions(NULL) 702 , numRuleSets(0) 703 , defaultRuleSet(NULL) 704 , locale(alocale) 705 , collator(NULL) 706 , decimalFormatSymbols(NULL) 707 , lenient(FALSE) 708 , lenientParseRules(NULL) 709 , localizations(NULL) 710 , capitalizationInfoSet(FALSE) 711 , capitalizationForUIListMenu(FALSE) 712 , capitalizationForStandAlone(FALSE) 713 , capitalizationBrkIter(NULL) 714{ 715 init(description, info, perror, status); 716} 717 718RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 719 UParseError& perror, 720 UErrorCode& status) 721 : ruleSets(NULL) 722 , ruleSetDescriptions(NULL) 723 , numRuleSets(0) 724 , defaultRuleSet(NULL) 725 , locale(Locale::getDefault()) 726 , collator(NULL) 727 , decimalFormatSymbols(NULL) 728 , lenient(FALSE) 729 , lenientParseRules(NULL) 730 , localizations(NULL) 731 , capitalizationInfoSet(FALSE) 732 , capitalizationForUIListMenu(FALSE) 733 , capitalizationForStandAlone(FALSE) 734 , capitalizationBrkIter(NULL) 735{ 736 init(description, NULL, perror, status); 737} 738 739RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 740 const Locale& aLocale, 741 UParseError& perror, 742 UErrorCode& status) 743 : ruleSets(NULL) 744 , ruleSetDescriptions(NULL) 745 , numRuleSets(0) 746 , defaultRuleSet(NULL) 747 , locale(aLocale) 748 , collator(NULL) 749 , decimalFormatSymbols(NULL) 750 , lenient(FALSE) 751 , lenientParseRules(NULL) 752 , localizations(NULL) 753 , capitalizationInfoSet(FALSE) 754 , capitalizationForUIListMenu(FALSE) 755 , capitalizationForStandAlone(FALSE) 756 , capitalizationBrkIter(NULL) 757{ 758 init(description, NULL, perror, status); 759} 760 761RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status) 762 : ruleSets(NULL) 763 , ruleSetDescriptions(NULL) 764 , numRuleSets(0) 765 , defaultRuleSet(NULL) 766 , locale(alocale) 767 , collator(NULL) 768 , decimalFormatSymbols(NULL) 769 , lenient(FALSE) 770 , lenientParseRules(NULL) 771 , localizations(NULL) 772 , capitalizationInfoSet(FALSE) 773 , capitalizationForUIListMenu(FALSE) 774 , capitalizationForStandAlone(FALSE) 775 , capitalizationBrkIter(NULL) 776{ 777 if (U_FAILURE(status)) { 778 return; 779 } 780 781 const char* rules_tag = "RBNFRules"; 782 const char* fmt_tag = ""; 783 switch (tag) { 784 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break; 785 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break; 786 case URBNF_DURATION: fmt_tag = "DurationRules"; break; 787 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break; 788 default: status = U_ILLEGAL_ARGUMENT_ERROR; return; 789 } 790 791 // TODO: read localization info from resource 792 LocalizationInfo* locinfo = NULL; 793 794 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status); 795 if (U_SUCCESS(status)) { 796 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status), 797 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status)); 798 799 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status); 800 if (U_FAILURE(status)) { 801 ures_close(nfrb); 802 } 803 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status); 804 if (U_FAILURE(status)) { 805 ures_close(rbnfRules); 806 ures_close(nfrb); 807 return; 808 } 809 810 UnicodeString desc; 811 while (ures_hasNext(ruleSets)) { 812 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status)); 813 } 814 UParseError perror; 815 816 init (desc, locinfo, perror, status); 817 818 ures_close(ruleSets); 819 ures_close(rbnfRules); 820 } 821 ures_close(nfrb); 822} 823 824RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) 825 : NumberFormat(rhs) 826 , ruleSets(NULL) 827 , ruleSetDescriptions(NULL) 828 , numRuleSets(0) 829 , defaultRuleSet(NULL) 830 , locale(rhs.locale) 831 , collator(NULL) 832 , decimalFormatSymbols(NULL) 833 , lenient(FALSE) 834 , lenientParseRules(NULL) 835 , localizations(NULL) 836 , capitalizationInfoSet(FALSE) 837 , capitalizationForUIListMenu(FALSE) 838 , capitalizationForStandAlone(FALSE) 839 , capitalizationBrkIter(NULL) 840{ 841 this->operator=(rhs); 842} 843 844// -------- 845 846RuleBasedNumberFormat& 847RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) 848{ 849 if (this == &rhs) { 850 return *this; 851 } 852 NumberFormat::operator=(rhs); 853 UErrorCode status = U_ZERO_ERROR; 854 dispose(); 855 locale = rhs.locale; 856 lenient = rhs.lenient; 857 858 UParseError perror; 859 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status); 860 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols()); 861 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status); 862 863 capitalizationInfoSet = rhs.capitalizationInfoSet; 864 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu; 865 capitalizationForStandAlone = rhs.capitalizationForStandAlone; 866#if !UCONFIG_NO_BREAK_ITERATION 867 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL; 868#endif 869 870 return *this; 871} 872 873RuleBasedNumberFormat::~RuleBasedNumberFormat() 874{ 875 dispose(); 876} 877 878Format* 879RuleBasedNumberFormat::clone(void) const 880{ 881 return new RuleBasedNumberFormat(*this); 882} 883 884UBool 885RuleBasedNumberFormat::operator==(const Format& other) const 886{ 887 if (this == &other) { 888 return TRUE; 889 } 890 891 if (typeid(*this) == typeid(other)) { 892 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other; 893 // test for capitalization info equality is adequately handled 894 // by the NumberFormat test for fCapitalizationContext equality; 895 // the info here is just derived from that. 896 if (locale == rhs.locale && 897 lenient == rhs.lenient && 898 (localizations == NULL 899 ? rhs.localizations == NULL 900 : (rhs.localizations == NULL 901 ? FALSE 902 : *localizations == rhs.localizations))) { 903 904 NFRuleSet** p = ruleSets; 905 NFRuleSet** q = rhs.ruleSets; 906 if (p == NULL) { 907 return q == NULL; 908 } else if (q == NULL) { 909 return FALSE; 910 } 911 while (*p && *q && (**p == **q)) { 912 ++p; 913 ++q; 914 } 915 return *q == NULL && *p == NULL; 916 } 917 } 918 919 return FALSE; 920} 921 922UnicodeString 923RuleBasedNumberFormat::getRules() const 924{ 925 UnicodeString result; 926 if (ruleSets != NULL) { 927 for (NFRuleSet** p = ruleSets; *p; ++p) { 928 (*p)->appendRules(result); 929 } 930 } 931 return result; 932} 933 934UnicodeString 935RuleBasedNumberFormat::getRuleSetName(int32_t index) const 936{ 937 if (localizations) { 938 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1); 939 return string; 940 } else if (ruleSets) { 941 UnicodeString result; 942 for (NFRuleSet** p = ruleSets; *p; ++p) { 943 NFRuleSet* rs = *p; 944 if (rs->isPublic()) { 945 if (--index == -1) { 946 rs->getName(result); 947 return result; 948 } 949 } 950 } 951 } 952 UnicodeString empty; 953 return empty; 954} 955 956int32_t 957RuleBasedNumberFormat::getNumberOfRuleSetNames() const 958{ 959 int32_t result = 0; 960 if (localizations) { 961 result = localizations->getNumberOfRuleSets(); 962 } else if (ruleSets) { 963 for (NFRuleSet** p = ruleSets; *p; ++p) { 964 if ((**p).isPublic()) { 965 ++result; 966 } 967 } 968 } 969 return result; 970} 971 972int32_t 973RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const { 974 if (localizations) { 975 return localizations->getNumberOfDisplayLocales(); 976 } 977 return 0; 978} 979 980Locale 981RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const { 982 if (U_FAILURE(status)) { 983 return Locale(""); 984 } 985 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) { 986 UnicodeString name(TRUE, localizations->getLocaleName(index), -1); 987 char buffer[64]; 988 int32_t cap = name.length() + 1; 989 char* bp = buffer; 990 if (cap > 64) { 991 bp = (char *)uprv_malloc(cap); 992 if (bp == NULL) { 993 status = U_MEMORY_ALLOCATION_ERROR; 994 return Locale(""); 995 } 996 } 997 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant); 998 Locale retLocale(bp); 999 if (bp != buffer) { 1000 uprv_free(bp); 1001 } 1002 return retLocale; 1003 } 1004 status = U_ILLEGAL_ARGUMENT_ERROR; 1005 Locale retLocale; 1006 return retLocale; 1007} 1008 1009UnicodeString 1010RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) { 1011 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) { 1012 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant); 1013 int32_t len = localeName.length(); 1014 UChar* localeStr = localeName.getBuffer(len + 1); 1015 while (len >= 0) { 1016 localeStr[len] = 0; 1017 int32_t ix = localizations->indexForLocale(localeStr); 1018 if (ix >= 0) { 1019 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1); 1020 return name; 1021 } 1022 1023 // trim trailing portion, skipping over ommitted sections 1024 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore 1025 while (len > 0 && localeStr[len-1] == 0x005F) --len; 1026 } 1027 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1); 1028 return name; 1029 } 1030 UnicodeString bogus; 1031 bogus.setToBogus(); 1032 return bogus; 1033} 1034 1035UnicodeString 1036RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) { 1037 if (localizations) { 1038 UnicodeString rsn(ruleSetName); 1039 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer()); 1040 return getRuleSetDisplayName(ix, localeParam); 1041 } 1042 UnicodeString bogus; 1043 bogus.setToBogus(); 1044 return bogus; 1045} 1046 1047NFRuleSet* 1048RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const 1049{ 1050 if (U_SUCCESS(status) && ruleSets) { 1051 for (NFRuleSet** p = ruleSets; *p; ++p) { 1052 NFRuleSet* rs = *p; 1053 if (rs->isNamed(name)) { 1054 return rs; 1055 } 1056 } 1057 status = U_ILLEGAL_ARGUMENT_ERROR; 1058 } 1059 return NULL; 1060} 1061 1062UnicodeString& 1063RuleBasedNumberFormat::format(int32_t number, 1064 UnicodeString& toAppendTo, 1065 FieldPosition& /* pos */) const 1066{ 1067 if (defaultRuleSet) { 1068 int32_t startPos = toAppendTo.length(); 1069 defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length()); 1070 adjustForCapitalizationContext(startPos, toAppendTo); 1071 } 1072 return toAppendTo; 1073} 1074 1075 1076UnicodeString& 1077RuleBasedNumberFormat::format(int64_t number, 1078 UnicodeString& toAppendTo, 1079 FieldPosition& /* pos */) const 1080{ 1081 if (defaultRuleSet) { 1082 int32_t startPos = toAppendTo.length(); 1083 defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); 1084 adjustForCapitalizationContext(startPos, toAppendTo); 1085 } 1086 return toAppendTo; 1087} 1088 1089 1090UnicodeString& 1091RuleBasedNumberFormat::format(double number, 1092 UnicodeString& toAppendTo, 1093 FieldPosition& /* pos */) const 1094{ 1095 int32_t startPos = toAppendTo.length(); 1096 // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does. 1097 if (uprv_isNaN(number)) { 1098 DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal 1099 if (decFmtSyms) { 1100 toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol); 1101 } 1102 } else if (defaultRuleSet) { 1103 defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); 1104 } 1105 return adjustForCapitalizationContext(startPos, toAppendTo); 1106} 1107 1108 1109UnicodeString& 1110RuleBasedNumberFormat::format(int32_t number, 1111 const UnicodeString& ruleSetName, 1112 UnicodeString& toAppendTo, 1113 FieldPosition& /* pos */, 1114 UErrorCode& status) const 1115{ 1116 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status); 1117 if (U_SUCCESS(status)) { 1118 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1119 // throw new IllegalArgumentException("Can't use internal rule set"); 1120 status = U_ILLEGAL_ARGUMENT_ERROR; 1121 } else { 1122 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1123 if (rs) { 1124 int32_t startPos = toAppendTo.length(); 1125 rs->format((int64_t)number, toAppendTo, toAppendTo.length()); 1126 adjustForCapitalizationContext(startPos, toAppendTo); 1127 } 1128 } 1129 } 1130 return toAppendTo; 1131} 1132 1133 1134UnicodeString& 1135RuleBasedNumberFormat::format(int64_t number, 1136 const UnicodeString& ruleSetName, 1137 UnicodeString& toAppendTo, 1138 FieldPosition& /* pos */, 1139 UErrorCode& status) const 1140{ 1141 if (U_SUCCESS(status)) { 1142 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1143 // throw new IllegalArgumentException("Can't use internal rule set"); 1144 status = U_ILLEGAL_ARGUMENT_ERROR; 1145 } else { 1146 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1147 if (rs) { 1148 int32_t startPos = toAppendTo.length(); 1149 rs->format(number, toAppendTo, toAppendTo.length()); 1150 adjustForCapitalizationContext(startPos, toAppendTo); 1151 } 1152 } 1153 } 1154 return toAppendTo; 1155} 1156 1157 1158UnicodeString& 1159RuleBasedNumberFormat::format(double number, 1160 const UnicodeString& ruleSetName, 1161 UnicodeString& toAppendTo, 1162 FieldPosition& /* pos */, 1163 UErrorCode& status) const 1164{ 1165 if (U_SUCCESS(status)) { 1166 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1167 // throw new IllegalArgumentException("Can't use internal rule set"); 1168 status = U_ILLEGAL_ARGUMENT_ERROR; 1169 } else { 1170 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1171 if (rs) { 1172 int32_t startPos = toAppendTo.length(); 1173 rs->format(number, toAppendTo, toAppendTo.length()); 1174 adjustForCapitalizationContext(startPos, toAppendTo); 1175 } 1176 } 1177 } 1178 return toAppendTo; 1179} 1180 1181UnicodeString& 1182RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos, 1183 UnicodeString& currentResult) const 1184{ 1185#if !UCONFIG_NO_BREAK_ITERATION 1186 if (startPos==0 && currentResult.length() > 0) { 1187 // capitalize currentResult according to context 1188 UChar32 ch = currentResult.char32At(0); 1189 UErrorCode status = U_ZERO_ERROR; 1190 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); 1191 if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL && 1192 ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1193 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1194 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1195 // titlecase first word of currentResult, here use sentence iterator unlike current implementations 1196 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format 1197 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); 1198 } 1199 } 1200#endif 1201 return currentResult; 1202} 1203 1204 1205void 1206RuleBasedNumberFormat::parse(const UnicodeString& text, 1207 Formattable& result, 1208 ParsePosition& parsePosition) const 1209{ 1210 if (!ruleSets) { 1211 parsePosition.setErrorIndex(0); 1212 return; 1213 } 1214 1215 UnicodeString workingText(text, parsePosition.getIndex()); 1216 ParsePosition workingPos(0); 1217 1218 ParsePosition high_pp(0); 1219 Formattable high_result; 1220 1221 for (NFRuleSet** p = ruleSets; *p; ++p) { 1222 NFRuleSet *rp = *p; 1223 if (rp->isPublic() && rp->isParseable()) { 1224 ParsePosition working_pp(0); 1225 Formattable working_result; 1226 1227 rp->parse(workingText, working_pp, kMaxDouble, working_result, lenient); 1228 if (working_pp.getIndex() > high_pp.getIndex()) { 1229 high_pp = working_pp; 1230 high_result = working_result; 1231 1232 if (high_pp.getIndex() == workingText.length()) { 1233 break; 1234 } 1235 } 1236 } 1237 } 1238 1239 int32_t startIndex = parsePosition.getIndex(); 1240 parsePosition.setIndex(startIndex + high_pp.getIndex()); 1241 if (high_pp.getIndex() > 0) { 1242 parsePosition.setErrorIndex(-1); 1243 } else { 1244 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; 1245 parsePosition.setErrorIndex(startIndex + errorIndex); 1246 } 1247 result = high_result; 1248 if (result.getType() == Formattable::kDouble) { 1249 int32_t r = (int32_t)result.getDouble(); 1250 if ((double)r == result.getDouble()) { 1251 result.setLong(r); 1252 } 1253 } 1254} 1255 1256#if !UCONFIG_NO_COLLATION 1257 1258void 1259RuleBasedNumberFormat::setLenient(UBool enabled) 1260{ 1261 lenient = enabled; 1262 if (!enabled && collator) { 1263 delete collator; 1264 collator = NULL; 1265 } 1266} 1267 1268#endif 1269 1270void 1271RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) { 1272 if (U_SUCCESS(status)) { 1273 if (ruleSetName.isEmpty()) { 1274 if (localizations) { 1275 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1); 1276 defaultRuleSet = findRuleSet(name, status); 1277 } else { 1278 initDefaultRuleSet(); 1279 } 1280 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) { 1281 status = U_ILLEGAL_ARGUMENT_ERROR; 1282 } else { 1283 NFRuleSet* result = findRuleSet(ruleSetName, status); 1284 if (result != NULL) { 1285 defaultRuleSet = result; 1286 } 1287 } 1288 } 1289} 1290 1291UnicodeString 1292RuleBasedNumberFormat::getDefaultRuleSetName() const { 1293 UnicodeString result; 1294 if (defaultRuleSet && defaultRuleSet->isPublic()) { 1295 defaultRuleSet->getName(result); 1296 } else { 1297 result.setToBogus(); 1298 } 1299 return result; 1300} 1301 1302void 1303RuleBasedNumberFormat::initDefaultRuleSet() 1304{ 1305 defaultRuleSet = NULL; 1306 if (!ruleSets) { 1307 return; 1308 } 1309 1310 const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering"); 1311 const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal"); 1312 const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration"); 1313 1314 NFRuleSet**p = &ruleSets[0]; 1315 while (*p) { 1316 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) { 1317 defaultRuleSet = *p; 1318 return; 1319 } else { 1320 ++p; 1321 } 1322 } 1323 1324 defaultRuleSet = *--p; 1325 if (!defaultRuleSet->isPublic()) { 1326 while (p != ruleSets) { 1327 if ((*--p)->isPublic()) { 1328 defaultRuleSet = *p; 1329 break; 1330 } 1331 } 1332 } 1333} 1334 1335 1336void 1337RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos, 1338 UParseError& pErr, UErrorCode& status) 1339{ 1340 // TODO: implement UParseError 1341 uprv_memset(&pErr, 0, sizeof(UParseError)); 1342 // Note: this can leave ruleSets == NULL, so remaining code should check 1343 if (U_FAILURE(status)) { 1344 return; 1345 } 1346 1347 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref(); 1348 1349 UnicodeString description(rules); 1350 if (!description.length()) { 1351 status = U_MEMORY_ALLOCATION_ERROR; 1352 return; 1353 } 1354 1355 // start by stripping the trailing whitespace from all the rules 1356 // (this is all the whitespace follwing each semicolon in the 1357 // description). This allows us to look for rule-set boundaries 1358 // by searching for ";%" without having to worry about whitespace 1359 // between the ; and the % 1360 stripWhitespace(description); 1361 1362 // check to see if there's a set of lenient-parse rules. If there 1363 // is, pull them out into our temporary holding place for them, 1364 // and delete them from the description before the real desciption- 1365 // parsing code sees them 1366 int32_t lp = description.indexOf(gLenientParse, -1, 0); 1367 if (lp != -1) { 1368 // we've got to make sure we're not in the middle of a rule 1369 // (where "%%lenient-parse" would actually get treated as 1370 // rule text) 1371 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) { 1372 // locate the beginning and end of the actual collation 1373 // rules (there may be whitespace between the name and 1374 // the first token in the description) 1375 int lpEnd = description.indexOf(gSemiPercent, 2, lp); 1376 1377 if (lpEnd == -1) { 1378 lpEnd = description.length() - 1; 1379 } 1380 int lpStart = lp + u_strlen(gLenientParse); 1381 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) { 1382 ++lpStart; 1383 } 1384 1385 // copy out the lenient-parse rules and delete them 1386 // from the description 1387 lenientParseRules = new UnicodeString(); 1388 /* test for NULL */ 1389 if (lenientParseRules == 0) { 1390 status = U_MEMORY_ALLOCATION_ERROR; 1391 return; 1392 } 1393 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart); 1394 1395 description.remove(lp, lpEnd + 1 - lp); 1396 } 1397 } 1398 1399 // pre-flight parsing the description and count the number of 1400 // rule sets (";%" marks the end of one rule set and the beginning 1401 // of the next) 1402 numRuleSets = 0; 1403 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) { 1404 ++numRuleSets; 1405 ++p; 1406 } 1407 ++numRuleSets; 1408 1409 // our rule list is an array of the appropriate size 1410 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *)); 1411 /* test for NULL */ 1412 if (ruleSets == 0) { 1413 status = U_MEMORY_ALLOCATION_ERROR; 1414 return; 1415 } 1416 1417 for (int i = 0; i <= numRuleSets; ++i) { 1418 ruleSets[i] = NULL; 1419 } 1420 1421 // divide up the descriptions into individual rule-set descriptions 1422 // and store them in a temporary array. At each step, we also 1423 // new up a rule set, but all this does is initialize its name 1424 // and remove it from its description. We can't actually parse 1425 // the rest of the descriptions and finish initializing everything 1426 // because we have to know the names and locations of all the rule 1427 // sets before we can actually set everything up 1428 if(!numRuleSets) { 1429 status = U_ILLEGAL_ARGUMENT_ERROR; 1430 return; 1431 } 1432 1433 ruleSetDescriptions = new UnicodeString[numRuleSets]; 1434 if (ruleSetDescriptions == 0) { 1435 status = U_MEMORY_ALLOCATION_ERROR; 1436 return; 1437 } 1438 1439 { 1440 int curRuleSet = 0; 1441 int32_t start = 0; 1442 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) { 1443 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start); 1444 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status); 1445 if (ruleSets[curRuleSet] == 0) { 1446 status = U_MEMORY_ALLOCATION_ERROR; 1447 return; 1448 } 1449 ++curRuleSet; 1450 start = p + 1; 1451 } 1452 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start); 1453 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status); 1454 if (ruleSets[curRuleSet] == 0) { 1455 status = U_MEMORY_ALLOCATION_ERROR; 1456 return; 1457 } 1458 } 1459 1460 // now we can take note of the formatter's default rule set, which 1461 // is the last public rule set in the description (it's the last 1462 // rather than the first so that a user can create a new formatter 1463 // from an existing formatter and change its default behavior just 1464 // by appending more rule sets to the end) 1465 1466 // {dlf} Initialization of a fraction rule set requires the default rule 1467 // set to be known. For purposes of initialization, this is always the 1468 // last public rule set, no matter what the localization data says. 1469 initDefaultRuleSet(); 1470 1471 // finally, we can go back through the temporary descriptions 1472 // list and finish seting up the substructure (and we throw 1473 // away the temporary descriptions as we go) 1474 { 1475 for (int i = 0; i < numRuleSets; i++) { 1476 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status); 1477 } 1478 } 1479 1480 // Now that the rules are initialized, the 'real' default rule 1481 // set can be adjusted by the localization data. 1482 1483 // The C code keeps the localization array as is, rather than building 1484 // a separate array of the public rule set names, so we have less work 1485 // to do here-- but we still need to check the names. 1486 1487 if (localizationInfos) { 1488 // confirm the names, if any aren't in the rules, that's an error 1489 // it is ok if the rules contain public rule sets that are not in this list 1490 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) { 1491 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1); 1492 NFRuleSet* rs = findRuleSet(name, status); 1493 if (rs == NULL) { 1494 break; // error 1495 } 1496 if (i == 0) { 1497 defaultRuleSet = rs; 1498 } 1499 } 1500 } else { 1501 defaultRuleSet = getDefaultRuleSet(); 1502 } 1503 originalDescription = rules; 1504} 1505 1506// override the NumberFormat implementation in order to 1507// lazily initialize relevant items 1508void 1509RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status) 1510{ 1511 NumberFormat::setContext(value, status); 1512 if (U_SUCCESS(status)) { 1513 if (!capitalizationInfoSet && 1514 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) { 1515 initCapitalizationContextInfo(locale); 1516 capitalizationInfoSet = TRUE; 1517 } 1518#if !UCONFIG_NO_BREAK_ITERATION 1519 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1520 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1521 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1522 UErrorCode status = U_ZERO_ERROR; 1523 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status); 1524 if (U_FAILURE(status)) { 1525 delete capitalizationBrkIter; 1526 capitalizationBrkIter = NULL; 1527 } 1528 } 1529#endif 1530 } 1531} 1532 1533void 1534RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale) 1535{ 1536#if !UCONFIG_NO_BREAK_ITERATION 1537 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL; 1538 UErrorCode status = U_ZERO_ERROR; 1539 UResourceBundle *rb = ures_open(NULL, localeID, &status); 1540 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status); 1541 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status); 1542 if (U_SUCCESS(status) && rb != NULL) { 1543 int32_t len = 0; 1544 const int32_t * intVector = ures_getIntVector(rb, &len, &status); 1545 if (U_SUCCESS(status) && intVector != NULL && len >= 2) { 1546 capitalizationForUIListMenu = intVector[0]; 1547 capitalizationForStandAlone = intVector[1]; 1548 } 1549 } 1550 ures_close(rb); 1551#endif 1552} 1553 1554void 1555RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) 1556{ 1557 // iterate through the characters... 1558 UnicodeString result; 1559 1560 int start = 0; 1561 while (start != -1 && start < description.length()) { 1562 // seek to the first non-whitespace character... 1563 while (start < description.length() 1564 && PatternProps::isWhiteSpace(description.charAt(start))) { 1565 ++start; 1566 } 1567 1568 // locate the next semicolon in the text and copy the text from 1569 // our current position up to that semicolon into the result 1570 int32_t p = description.indexOf(gSemiColon, start); 1571 if (p == -1) { 1572 // or if we don't find a semicolon, just copy the rest of 1573 // the string into the result 1574 result.append(description, start, description.length() - start); 1575 start = -1; 1576 } 1577 else if (p < description.length()) { 1578 result.append(description, start, p + 1 - start); 1579 start = p + 1; 1580 } 1581 1582 // when we get here, we've seeked off the end of the sring, and 1583 // we terminate the loop (we continue until *start* is -1 rather 1584 // than until *p* is -1, because otherwise we'd miss the last 1585 // rule in the description) 1586 else { 1587 start = -1; 1588 } 1589 } 1590 1591 description.setTo(result); 1592} 1593 1594 1595void 1596RuleBasedNumberFormat::dispose() 1597{ 1598 if (ruleSets) { 1599 for (NFRuleSet** p = ruleSets; *p; ++p) { 1600 delete *p; 1601 } 1602 uprv_free(ruleSets); 1603 ruleSets = NULL; 1604 } 1605 1606 if (ruleSetDescriptions) { 1607 delete [] ruleSetDescriptions; 1608 } 1609 1610#if !UCONFIG_NO_COLLATION 1611 delete collator; 1612#endif 1613 collator = NULL; 1614 1615 delete decimalFormatSymbols; 1616 decimalFormatSymbols = NULL; 1617 1618 delete lenientParseRules; 1619 lenientParseRules = NULL; 1620 1621#if !UCONFIG_NO_BREAK_ITERATION 1622 delete capitalizationBrkIter; 1623 capitalizationBrkIter = NULL; 1624#endif 1625 1626 if (localizations) localizations = localizations->unref(); 1627} 1628 1629 1630//----------------------------------------------------------------------- 1631// package-internal API 1632//----------------------------------------------------------------------- 1633 1634/** 1635 * Returns the collator to use for lenient parsing. The collator is lazily created: 1636 * this function creates it the first time it's called. 1637 * @return The collator to use for lenient parsing, or null if lenient parsing 1638 * is turned off. 1639*/ 1640const RuleBasedCollator* 1641RuleBasedNumberFormat::getCollator() const 1642{ 1643#if !UCONFIG_NO_COLLATION 1644 if (!ruleSets) { 1645 return NULL; 1646 } 1647 1648 // lazy-evaluate the collator 1649 if (collator == NULL && lenient) { 1650 // create a default collator based on the formatter's locale, 1651 // then pull out that collator's rules, append any additional 1652 // rules specified in the description, and create a _new_ 1653 // collator based on the combinaiton of those rules 1654 1655 UErrorCode status = U_ZERO_ERROR; 1656 1657 Collator* temp = Collator::createInstance(locale, status); 1658 RuleBasedCollator* newCollator; 1659 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) { 1660 if (lenientParseRules) { 1661 UnicodeString rules(newCollator->getRules()); 1662 rules.append(*lenientParseRules); 1663 1664 newCollator = new RuleBasedCollator(rules, status); 1665 // Exit if newCollator could not be created. 1666 if (newCollator == NULL) { 1667 return NULL; 1668 } 1669 } else { 1670 temp = NULL; 1671 } 1672 if (U_SUCCESS(status)) { 1673 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status); 1674 // cast away const 1675 ((RuleBasedNumberFormat*)this)->collator = newCollator; 1676 } else { 1677 delete newCollator; 1678 } 1679 } 1680 delete temp; 1681 } 1682#endif 1683 1684 // if lenient-parse mode is off, this will be null 1685 // (see setLenientParseMode()) 1686 return collator; 1687} 1688 1689 1690/** 1691 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat 1692 * instances owned by this formatter. This object is lazily created: this function 1693 * creates it the first time it's called. 1694 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat 1695 * instances owned by this formatter. 1696*/ 1697DecimalFormatSymbols* 1698RuleBasedNumberFormat::getDecimalFormatSymbols() const 1699{ 1700 // lazy-evaluate the DecimalFormatSymbols object. This object 1701 // is shared by all DecimalFormat instances belonging to this 1702 // formatter 1703 if (decimalFormatSymbols == NULL) { 1704 UErrorCode status = U_ZERO_ERROR; 1705 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status); 1706 if (U_SUCCESS(status)) { 1707 ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp; 1708 } else { 1709 delete temp; 1710 } 1711 } 1712 return decimalFormatSymbols; 1713} 1714 1715// De-owning the current localized symbols and adopt the new symbols. 1716void 1717RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt) 1718{ 1719 if (symbolsToAdopt == NULL) { 1720 return; // do not allow caller to set decimalFormatSymbols to NULL 1721 } 1722 1723 if (decimalFormatSymbols != NULL) { 1724 delete decimalFormatSymbols; 1725 } 1726 1727 decimalFormatSymbols = symbolsToAdopt; 1728 1729 { 1730 // Apply the new decimalFormatSymbols by reparsing the rulesets 1731 UErrorCode status = U_ZERO_ERROR; 1732 1733 for (int32_t i = 0; i < numRuleSets; i++) { 1734 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status); 1735 } 1736 } 1737} 1738 1739// Setting the symbols is equlivalent to adopting a newly created localized symbols. 1740void 1741RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols) 1742{ 1743 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols)); 1744} 1745 1746U_NAMESPACE_END 1747 1748/* U_HAVE_RBNF */ 1749#endif 1750