1 2/******************************************************************** 3 * COPYRIGHT: 4 * Copyright (c) 2001-2014, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ********************************************************************/ 7/******************************************************************************* 8* 9* File cmsccoll.C 10* 11*******************************************************************************/ 12/** 13 * These are the tests specific to ICU 1.8 and above, that I didn't know where 14 * to fit. 15 */ 16 17#include <stdio.h> 18 19#include "unicode/utypes.h" 20 21#if !UCONFIG_NO_COLLATION 22 23#include "unicode/ucol.h" 24#include "unicode/ucoleitr.h" 25#include "unicode/uloc.h" 26#include "cintltst.h" 27#include "ccolltst.h" 28#include "callcoll.h" 29#include "unicode/ustring.h" 30#include "string.h" 31#include "ucol_imp.h" 32#include "cmemory.h" 33#include "cstring.h" 34#include "uassert.h" 35#include "unicode/parseerr.h" 36#include "unicode/ucnv.h" 37#include "unicode/ures.h" 38#include "unicode/uscript.h" 39#include "unicode/utf16.h" 40#include "uparse.h" 41#include "putilimp.h" 42 43 44#define LEN(a) (sizeof(a)/sizeof(a[0])) 45 46#define MAX_TOKEN_LEN 16 47 48typedef UCollationResult tst_strcoll(void *collator, const int object, 49 const UChar *source, const int sLen, 50 const UChar *target, const int tLen); 51 52 53 54const static char cnt1[][10] = { 55 56 "AA", 57 "AC", 58 "AZ", 59 "AQ", 60 "AB", 61 "ABZ", 62 "ABQ", 63 "Z", 64 "ABC", 65 "Q", 66 "B" 67}; 68 69const static char cnt2[][10] = { 70 "DA", 71 "DAD", 72 "DAZ", 73 "MAR", 74 "Z", 75 "DAVIS", 76 "MARK", 77 "DAV", 78 "DAVI" 79}; 80 81static void IncompleteCntTest(void) 82{ 83 UErrorCode status = U_ZERO_ERROR; 84 UChar temp[90]; 85 UChar t1[90]; 86 UChar t2[90]; 87 88 UCollator *coll = NULL; 89 uint32_t i = 0, j = 0; 90 uint32_t size = 0; 91 92 u_uastrcpy(temp, " & Z < ABC < Q < B"); 93 94 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status); 95 96 if(U_SUCCESS(status)) { 97 size = sizeof(cnt1)/sizeof(cnt1[0]); 98 for(i = 0; i < size-1; i++) { 99 for(j = i+1; j < size; j++) { 100 UCollationElements *iter; 101 u_uastrcpy(t1, cnt1[i]); 102 u_uastrcpy(t2, cnt1[j]); 103 doTest(coll, t1, t2, UCOL_LESS); 104 /* synwee : added collation element iterator test */ 105 iter = ucol_openElements(coll, t2, u_strlen(t2), &status); 106 if (U_FAILURE(status)) { 107 log_err("Creation of iterator failed\n"); 108 break; 109 } 110 backAndForth(iter); 111 ucol_closeElements(iter); 112 } 113 } 114 } 115 116 ucol_close(coll); 117 118 119 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV"); 120 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); 121 122 if(U_SUCCESS(status)) { 123 size = sizeof(cnt2)/sizeof(cnt2[0]); 124 for(i = 0; i < size-1; i++) { 125 for(j = i+1; j < size; j++) { 126 UCollationElements *iter; 127 u_uastrcpy(t1, cnt2[i]); 128 u_uastrcpy(t2, cnt2[j]); 129 doTest(coll, t1, t2, UCOL_LESS); 130 131 /* synwee : added collation element iterator test */ 132 iter = ucol_openElements(coll, t2, u_strlen(t2), &status); 133 if (U_FAILURE(status)) { 134 log_err("Creation of iterator failed\n"); 135 break; 136 } 137 backAndForth(iter); 138 ucol_closeElements(iter); 139 } 140 } 141 } 142 143 ucol_close(coll); 144 145 146} 147 148const static char shifted[][20] = { 149 "black bird", 150 "black-bird", 151 "blackbird", 152 "black Bird", 153 "black-Bird", 154 "blackBird", 155 "black birds", 156 "black-birds", 157 "blackbirds" 158}; 159 160const static UCollationResult shiftedTert[] = { 161 UCOL_EQUAL, 162 UCOL_EQUAL, 163 UCOL_EQUAL, 164 UCOL_LESS, 165 UCOL_EQUAL, 166 UCOL_EQUAL, 167 UCOL_LESS, 168 UCOL_EQUAL, 169 UCOL_EQUAL 170}; 171 172const static char nonignorable[][20] = { 173 "black bird", 174 "black Bird", 175 "black birds", 176 "black-bird", 177 "black-Bird", 178 "black-birds", 179 "blackbird", 180 "blackBird", 181 "blackbirds" 182}; 183 184static void BlackBirdTest(void) { 185 UErrorCode status = U_ZERO_ERROR; 186 UChar t1[90]; 187 UChar t2[90]; 188 189 uint32_t i = 0, j = 0; 190 uint32_t size = 0; 191 UCollator *coll = ucol_open("en_US", &status); 192 193 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); 194 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status); 195 196 if(U_SUCCESS(status)) { 197 size = sizeof(nonignorable)/sizeof(nonignorable[0]); 198 for(i = 0; i < size-1; i++) { 199 for(j = i+1; j < size; j++) { 200 u_uastrcpy(t1, nonignorable[i]); 201 u_uastrcpy(t2, nonignorable[j]); 202 doTest(coll, t1, t2, UCOL_LESS); 203 } 204 } 205 } 206 207 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 208 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status); 209 210 if(U_SUCCESS(status)) { 211 size = sizeof(shifted)/sizeof(shifted[0]); 212 for(i = 0; i < size-1; i++) { 213 for(j = i+1; j < size; j++) { 214 u_uastrcpy(t1, shifted[i]); 215 u_uastrcpy(t2, shifted[j]); 216 doTest(coll, t1, t2, UCOL_LESS); 217 } 218 } 219 } 220 221 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status); 222 if(U_SUCCESS(status)) { 223 size = sizeof(shifted)/sizeof(shifted[0]); 224 for(i = 1; i < size; i++) { 225 u_uastrcpy(t1, shifted[i-1]); 226 u_uastrcpy(t2, shifted[i]); 227 doTest(coll, t1, t2, shiftedTert[i]); 228 } 229 } 230 231 ucol_close(coll); 232} 233 234const static UChar testSourceCases[][MAX_TOKEN_LEN] = { 235 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000}, 236 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000}, 237 {0x0041/*'A'*/, 0x0300, 0x0000}, 238 {0x00C0, 0x0301, 0x0000}, 239 /* this would work with forced normalization */ 240 {0x00C0, 0x0316, 0x0000} 241}; 242 243const static UChar testTargetCases[][MAX_TOKEN_LEN] = { 244 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000}, 245 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}, 246 {0x00C0, 0}, 247 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000}, 248 /* this would work with forced normalization */ 249 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000} 250}; 251 252const static UCollationResult results[] = { 253 UCOL_GREATER, 254 UCOL_EQUAL, 255 UCOL_EQUAL, 256 UCOL_GREATER, 257 UCOL_EQUAL 258}; 259 260static void FunkyATest(void) 261{ 262 263 int32_t i; 264 UErrorCode status = U_ZERO_ERROR; 265 UCollator *myCollation; 266 myCollation = ucol_open("en_US", &status); 267 if(U_FAILURE(status)){ 268 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 269 return; 270 } 271 log_verbose("Testing some A letters, for some reason\n"); 272 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 273 ucol_setStrength(myCollation, UCOL_TERTIARY); 274 for (i = 0; i < 4 ; i++) 275 { 276 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); 277 } 278 ucol_close(myCollation); 279} 280 281UColAttributeValue caseFirst[] = { 282 UCOL_OFF, 283 UCOL_LOWER_FIRST, 284 UCOL_UPPER_FIRST 285}; 286 287 288UColAttributeValue alternateHandling[] = { 289 UCOL_NON_IGNORABLE, 290 UCOL_SHIFTED 291}; 292 293UColAttributeValue caseLevel[] = { 294 UCOL_OFF, 295 UCOL_ON 296}; 297 298UColAttributeValue strengths[] = { 299 UCOL_PRIMARY, 300 UCOL_SECONDARY, 301 UCOL_TERTIARY, 302 UCOL_QUATERNARY, 303 UCOL_IDENTICAL 304}; 305 306#if 0 307static const char * strengthsC[] = { 308 "UCOL_PRIMARY", 309 "UCOL_SECONDARY", 310 "UCOL_TERTIARY", 311 "UCOL_QUATERNARY", 312 "UCOL_IDENTICAL" 313}; 314 315static const char * caseFirstC[] = { 316 "UCOL_OFF", 317 "UCOL_LOWER_FIRST", 318 "UCOL_UPPER_FIRST" 319}; 320 321 322static const char * alternateHandlingC[] = { 323 "UCOL_NON_IGNORABLE", 324 "UCOL_SHIFTED" 325}; 326 327static const char * caseLevelC[] = { 328 "UCOL_OFF", 329 "UCOL_ON" 330}; 331 332/* not used currently - does not test only prints */ 333static void PrintMarkDavis(void) 334{ 335 UErrorCode status = U_ZERO_ERROR; 336 UChar m[256]; 337 uint8_t sortkey[256]; 338 UCollator *coll = ucol_open("en_US", &status); 339 uint32_t h,i,j,k, sortkeysize; 340 uint32_t sizem = 0; 341 char buffer[512]; 342 uint32_t len = 512; 343 344 log_verbose("PrintMarkDavis"); 345 346 u_uastrcpy(m, "Mark Davis"); 347 sizem = u_strlen(m); 348 349 350 m[1] = 0xe4; 351 352 for(i = 0; i<sizem; i++) { 353 fprintf(stderr, "\\u%04X ", m[i]); 354 } 355 fprintf(stderr, "\n"); 356 357 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) { 358 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status); 359 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]); 360 361 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) { 362 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status); 363 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]); 364 365 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) { 366 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status); 367 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]); 368 369 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) { 370 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status); 371 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256); 372 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]); 373 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len)); 374 } 375 376 } 377 378 } 379 380 } 381} 382#endif 383 384static void BillFairmanTest(void) { 385/* 386** check for actual locale via ICU resource bundles 387** 388** lp points to the original locale ("fr_FR_....") 389*/ 390 391 UResourceBundle *lr,*cr; 392 UErrorCode lec = U_ZERO_ERROR; 393 const char *lp = "fr_FR_you_ll_never_find_this_locale"; 394 395 log_verbose("BillFairmanTest\n"); 396 397 lr = ures_open(NULL,lp,&lec); 398 if (lr) { 399 cr = ures_getByKey(lr,"collations",0,&lec); 400 if (cr) { 401 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec); 402 if (lp) { 403 if (U_SUCCESS(lec)) { 404 if(strcmp(lp, "fr") != 0) { 405 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp); 406 } 407 } 408 } 409 ures_close(cr); 410 } 411 ures_close(lr); 412 } 413} 414 415const static char chTest[][20] = { 416 "c", 417 "C", 418 "ca", "cb", "cx", "cy", "CZ", 419 "c\\u030C", "C\\u030C", 420 "h", 421 "H", 422 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY", 423 "ch", "cH", "Ch", "CH", 424 "cha", "charly", "che", "chh", "chch", "chr", 425 "i", "I", "iarly", 426 "r", "R", 427 "r\\u030C", "R\\u030C", 428 "s", 429 "S", 430 "s\\u030C", "S\\u030C", 431 "z", "Z", 432 "z\\u030C", "Z\\u030C" 433}; 434 435static void TestChMove(void) { 436 UChar t1[256] = {0}; 437 UChar t2[256] = {0}; 438 439 uint32_t i = 0, j = 0; 440 uint32_t size = 0; 441 UErrorCode status = U_ZERO_ERROR; 442 443 UCollator *coll = ucol_open("cs", &status); 444 445 if(U_SUCCESS(status)) { 446 size = sizeof(chTest)/sizeof(chTest[0]); 447 for(i = 0; i < size-1; i++) { 448 for(j = i+1; j < size; j++) { 449 u_unescape(chTest[i], t1, 256); 450 u_unescape(chTest[j], t2, 256); 451 doTest(coll, t1, t2, UCOL_LESS); 452 } 453 } 454 } 455 else { 456 log_data_err("Can't open collator"); 457 } 458 ucol_close(coll); 459} 460 461 462 463 464/* 465const static char impTest[][20] = { 466 "\\u4e00", 467 "a", 468 "A", 469 "b", 470 "B", 471 "\\u4e01" 472}; 473*/ 474 475 476static void TestImplicitTailoring(void) { 477 static const struct { 478 const char *rules; 479 const char *data[10]; 480 const uint32_t len; 481 } tests[] = { 482 { 483 /* Tailor b and c before U+4E00. */ 484 "&[before 1]\\u4e00 < b < c " 485 /* Now, before U+4E00 is c; put d and e after that. */ 486 "&[before 1]\\u4e00 < d < e", 487 { "b", "c", "d", "e", "\\u4e00"}, 5 }, 488 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 }, 489 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3}, 490 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3} 491 }; 492 493 int32_t i = 0; 494 495 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 496 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 497 } 498 499/* 500 UChar t1[256] = {0}; 501 UChar t2[256] = {0}; 502 503 const char *rule = "&\\u4e00 < a <<< A < b <<< B"; 504 505 uint32_t i = 0, j = 0; 506 uint32_t size = 0; 507 uint32_t ruleLen = 0; 508 UErrorCode status = U_ZERO_ERROR; 509 UCollator *coll = NULL; 510 ruleLen = u_unescape(rule, t1, 256); 511 512 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status); 513 514 if(U_SUCCESS(status)) { 515 size = sizeof(impTest)/sizeof(impTest[0]); 516 for(i = 0; i < size-1; i++) { 517 for(j = i+1; j < size; j++) { 518 u_unescape(impTest[i], t1, 256); 519 u_unescape(impTest[j], t2, 256); 520 doTest(coll, t1, t2, UCOL_LESS); 521 } 522 } 523 } 524 else { 525 log_err("Can't open collator"); 526 } 527 ucol_close(coll); 528 */ 529} 530 531static void TestFCDProblem(void) { 532 UChar t1[256] = {0}; 533 UChar t2[256] = {0}; 534 535 const char *s1 = "\\u0430\\u0306\\u0325"; 536 const char *s2 = "\\u04D1\\u0325"; 537 538 UErrorCode status = U_ZERO_ERROR; 539 UCollator *coll = ucol_open("", &status); 540 u_unescape(s1, t1, 256); 541 u_unescape(s2, t2, 256); 542 543 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); 544 doTest(coll, t1, t2, UCOL_EQUAL); 545 546 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 547 doTest(coll, t1, t2, UCOL_EQUAL); 548 549 ucol_close(coll); 550} 551 552/* 553The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC 554We're only using NFC/NFD in this test. 555*/ 556#define NORM_BUFFER_TEST_LEN 18 557typedef struct { 558 UChar32 u; 559 UChar NFC[NORM_BUFFER_TEST_LEN]; 560 UChar NFD[NORM_BUFFER_TEST_LEN]; 561} tester; 562 563static void TestComposeDecompose(void) { 564 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */ 565 static const UChar UNICODESET_STR[] = { 566 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61, 567 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72, 568 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0 569 }; 570 int32_t noOfLoc; 571 int32_t i = 0, j = 0; 572 573 UErrorCode status = U_ZERO_ERROR; 574 const char *locName = NULL; 575 uint32_t nfcSize; 576 uint32_t nfdSize; 577 tester **t; 578 uint32_t noCases = 0; 579 UCollator *coll = NULL; 580 UChar32 u = 0; 581 UChar comp[NORM_BUFFER_TEST_LEN]; 582 uint32_t len = 0; 583 UCollationElements *iter; 584 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status); 585 int32_t charsToTestSize; 586 587 noOfLoc = uloc_countAvailable(); 588 589 coll = ucol_open("", &status); 590 if (U_FAILURE(status)) { 591 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status)); 592 return; 593 } 594 charsToTestSize = uset_size(charsToTest); 595 if (charsToTestSize <= 0) { 596 log_err("Set was zero. Missing data?\n"); 597 return; 598 } 599 t = (tester **)malloc(charsToTestSize * sizeof(tester *)); 600 t[0] = (tester *)malloc(sizeof(tester)); 601 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize); 602 603 for(u = 0; u < charsToTestSize; u++) { 604 UChar32 ch = uset_charAt(charsToTest, u); 605 len = 0; 606 U16_APPEND_UNSAFE(comp, len, ch); 607 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status); 608 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status); 609 610 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0) 611 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) { 612 t[noCases]->u = ch; 613 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) { 614 u_strncpy(t[noCases]->NFC, comp, len); 615 t[noCases]->NFC[len] = 0; 616 } 617 noCases++; 618 t[noCases] = (tester *)malloc(sizeof(tester)); 619 uprv_memset(t[noCases], 0, sizeof(tester)); 620 } 621 } 622 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize); 623 uset_close(charsToTest); 624 charsToTest = NULL; 625 626 for(u=0; u<(UChar32)noCases; u++) { 627 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) { 628 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u); 629 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL); 630 } 631 } 632 /* 633 for(u = 0; u < charsToTestSize; u++) { 634 if(!(u&0xFFFF)) { 635 log_verbose("%08X ", u); 636 } 637 uprv_memset(t[noCases], 0, sizeof(tester)); 638 t[noCases]->u = u; 639 len = 0; 640 U16_APPEND_UNSAFE(comp, len, u); 641 comp[len] = 0; 642 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status); 643 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status); 644 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL); 645 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL); 646 } 647 */ 648 649 ucol_close(coll); 650 651 log_verbose("Testing locales, number of cases = %i\n", noCases); 652 for(i = 0; i<noOfLoc; i++) { 653 status = U_ZERO_ERROR; 654 locName = uloc_getAvailable(i); 655 if(hasCollationElements(locName)) { 656 char cName[256]; 657 UChar name[256]; 658 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status); 659 660 for(j = 0; j<nameSize; j++) { 661 cName[j] = (char)name[j]; 662 } 663 cName[nameSize] = 0; 664 log_verbose("\nTesting locale %s (%s)\n", locName, cName); 665 666 coll = ucol_open(locName, &status); 667 ucol_setStrength(coll, UCOL_IDENTICAL); 668 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status); 669 670 for(u=0; u<(UChar32)noCases; u++) { 671 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) { 672 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName); 673 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL); 674 log_verbose("Testing NFC\n"); 675 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status); 676 backAndForth(iter); 677 log_verbose("Testing NFD\n"); 678 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status); 679 backAndForth(iter); 680 } 681 } 682 ucol_closeElements(iter); 683 ucol_close(coll); 684 } 685 } 686 for(u = 0; u <= (UChar32)noCases; u++) { 687 free(t[u]); 688 } 689 free(t); 690} 691 692static void TestEmptyRule(void) { 693 UErrorCode status = U_ZERO_ERROR; 694 UChar rulez[] = { 0 }; 695 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status); 696 697 ucol_close(coll); 698} 699 700static void TestUCARules(void) { 701 UErrorCode status = U_ZERO_ERROR; 702 UChar b[256]; 703 UChar *rules = b; 704 uint32_t ruleLen = 0; 705 UCollator *UCAfromRules = NULL; 706 UCollator *coll = ucol_open("", &status); 707 if(status == U_FILE_ACCESS_ERROR) { 708 log_data_err("Is your data around?\n"); 709 return; 710 } else if(U_FAILURE(status)) { 711 log_err("Error opening collator\n"); 712 return; 713 } 714 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256); 715 716 log_verbose("TestUCARules\n"); 717 if(ruleLen > 256) { 718 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar)); 719 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen); 720 } 721 log_verbose("Rules length is %d\n", ruleLen); 722 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 723 if(U_SUCCESS(status)) { 724 ucol_close(UCAfromRules); 725 } else { 726 log_verbose("Unable to create a collator from UCARules!\n"); 727 } 728/* 729 u_unescape(blah, b, 256); 730 ucol_getSortKey(coll, b, 1, res, 256); 731*/ 732 ucol_close(coll); 733 if(rules != b) { 734 free(rules); 735 } 736} 737 738 739/* Pinyin tonal order */ 740/* 741 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0) 742 (w/macron)< (w/acute)< (w/caron)< (w/grave) 743 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8) 744 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec) 745 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2) 746 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9) 747 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) < 748.. (\u00fc) 749 750However, in testing we got the following order: 751 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101) 752 (w/acute)< (w/grave)< (w/caron)< (w/macron) 753 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) < 754.. (\u0113) 755 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b) 756 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d) 757 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) < 758.. (\u01d8) 759 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b) 760*/ 761 762static void TestBefore(void) { 763 const static char *data[] = { 764 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A", 765 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E", 766 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I", 767 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O", 768 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U", 769 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc" 770 }; 771 genericRulesStarter( 772 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0" 773 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8" 774 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec" 775 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2" 776 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9" 777 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc", 778 data, sizeof(data)/sizeof(data[0])); 779} 780 781#if 0 782/* superceded by TestBeforePinyin */ 783static void TestJ784(void) { 784 const static char *data[] = { 785 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", 786 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", 787 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", 788 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", 789 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", 790 "\\u00fc", 791 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc" 792 }; 793 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0])); 794} 795#endif 796 797#if 0 798/* superceded by the changes to the lv locale */ 799static void TestJ831(void) { 800 const static char *data[] = { 801 "I", 802 "i", 803 "Y", 804 "y" 805 }; 806 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0])); 807} 808#endif 809 810static void TestJ815(void) { 811 const static char *data[] = { 812 "aa", 813 "Aa", 814 "ab", 815 "Ab", 816 "ad", 817 "Ad", 818 "ae", 819 "Ae", 820 "\\u00e6", 821 "\\u00c6", 822 "af", 823 "Af", 824 "b", 825 "B" 826 }; 827 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0])); 828 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0])); 829} 830 831 832static void TestCase(void) 833{ 834 const static UChar gRules[MAX_TOKEN_LEN] = 835 /*" & 0 < 1,\u2461<a,A"*/ 836 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 }; 837 838 const static UChar testCase[][MAX_TOKEN_LEN] = 839 { 840 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000}, 841 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000}, 842 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000}, 843 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000} 844 }; 845 846 const static UCollationResult caseTestResults[][9] = 847 { 848 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS }, 849 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }, 850 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS }, 851 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER } 852 }; 853 854 const static UColAttributeValue caseTestAttributes[][2] = 855 { 856 { UCOL_LOWER_FIRST, UCOL_OFF}, 857 { UCOL_UPPER_FIRST, UCOL_OFF}, 858 { UCOL_LOWER_FIRST, UCOL_ON}, 859 { UCOL_UPPER_FIRST, UCOL_ON} 860 }; 861 int32_t i,j,k; 862 UErrorCode status = U_ZERO_ERROR; 863 UCollationElements *iter; 864 UCollator *myCollation; 865 myCollation = ucol_open("en_US", &status); 866 867 if(U_FAILURE(status)){ 868 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 869 return; 870 } 871 log_verbose("Testing different case settings\n"); 872 ucol_setStrength(myCollation, UCOL_TERTIARY); 873 874 for(k = 0; k<4; k++) { 875 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status); 876 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status); 877 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]); 878 for (i = 0; i < 3 ; i++) { 879 for(j = i+1; j<4; j++) { 880 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]); 881 } 882 } 883 } 884 ucol_close(myCollation); 885 886 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status); 887 if(U_FAILURE(status)){ 888 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 889 return; 890 } 891 log_verbose("Testing different case settings with custom rules\n"); 892 ucol_setStrength(myCollation, UCOL_TERTIARY); 893 894 for(k = 0; k<4; k++) { 895 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status); 896 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status); 897 for (i = 0; i < 3 ; i++) { 898 for(j = i+1; j<4; j++) { 899 log_verbose("k:%d, i:%d, j:%d\n", k, i, j); 900 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]); 901 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status); 902 backAndForth(iter); 903 ucol_closeElements(iter); 904 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status); 905 backAndForth(iter); 906 ucol_closeElements(iter); 907 } 908 } 909 } 910 ucol_close(myCollation); 911 { 912 const static char *lowerFirst[] = { 913 "h", 914 "H", 915 "ch", 916 "Ch", 917 "CH", 918 "cha", 919 "chA", 920 "Cha", 921 "ChA", 922 "CHa", 923 "CHA", 924 "i", 925 "I" 926 }; 927 928 const static char *upperFirst[] = { 929 "H", 930 "h", 931 "CH", 932 "Ch", 933 "ch", 934 "CHA", 935 "CHa", 936 "ChA", 937 "Cha", 938 "chA", 939 "cha", 940 "I", 941 "i" 942 }; 943 log_verbose("mixed case test\n"); 944 log_verbose("lower first, case level off\n"); 945 genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); 946 log_verbose("upper first, case level off\n"); 947 genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); 948 log_verbose("lower first, case level on\n"); 949 genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); 950 log_verbose("upper first, case level on\n"); 951 genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); 952 } 953 954} 955 956static void TestIncrementalNormalize(void) { 957 958 /*UChar baseA =0x61;*/ 959 UChar baseA =0x41; 960/* UChar baseB = 0x42;*/ 961 static const UChar ccMix[] = {0x316, 0x321, 0x300}; 962 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/ 963 /* 964 0x316 is combining grave accent below, cc=220 965 0x321 is combining palatalized hook below, cc=202 966 0x300 is combining grave accent, cc=230 967 */ 968 969#define MAXSLEN 2000 970 /*int maxSLen = 64000;*/ 971 int sLen; 972 int i; 973 974 UCollator *coll; 975 UErrorCode status = U_ZERO_ERROR; 976 UCollationResult result; 977 978 int32_t myQ = getTestOption(QUICK_OPTION); 979 980 if(getTestOption(QUICK_OPTION) < 0) { 981 setTestOption(QUICK_OPTION, 1); 982 } 983 984 { 985 /* Test 1. Run very long unnormalized strings, to force overflow of*/ 986 /* most buffers along the way.*/ 987 UChar strA[MAXSLEN+1]; 988 UChar strB[MAXSLEN+1]; 989 990 coll = ucol_open("en_US", &status); 991 if(status == U_FILE_ACCESS_ERROR) { 992 log_data_err("Is your data around?\n"); 993 return; 994 } else if(U_FAILURE(status)) { 995 log_err("Error opening collator\n"); 996 return; 997 } 998 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 999 1000 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/ 1001 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/ 1002 /*for (sLen = 1000; sLen<1001; sLen++) {*/ 1003 for (sLen = 500; sLen<501; sLen++) { 1004 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/ 1005 strA[0] = baseA; 1006 strB[0] = baseA; 1007 for (i=1; i<=sLen-1; i++) { 1008 strA[i] = ccMix[i % 3]; 1009 strB[sLen-i] = ccMix[i % 3]; 1010 } 1011 strA[sLen] = 0; 1012 strB[sLen] = 0; 1013 1014 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/ 1015 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/ 1016 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/ 1017 doTest(coll, strA, strB, UCOL_EQUAL); 1018 } 1019 } 1020 1021 setTestOption(QUICK_OPTION, myQ); 1022 1023 1024 /* Test 2: Non-normal sequence in a string that extends to the last character*/ 1025 /* of the string. Checks a couple of edge cases.*/ 1026 1027 { 1028 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0}; 1029 static const UChar strB[] = {0x41, 0xc0, 0x316, 0}; 1030 ucol_setStrength(coll, UCOL_TERTIARY); 1031 doTest(coll, strA, strB, UCOL_EQUAL); 1032 } 1033 1034 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/ 1035 1036 { 1037 /* New UCA 3.1.1. 1038 * test below used a code point from Desseret, which sorts differently 1039 * than d800 dc00 1040 */ 1041 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/ 1042 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0}; 1043 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0}; 1044 ucol_setStrength(coll, UCOL_TERTIARY); 1045 doTest(coll, strA, strB, UCOL_GREATER); 1046 } 1047 1048 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/ 1049 1050 { 1051 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00}; 1052 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00}; 1053 char sortKeyA[50]; 1054 char sortKeyAz[50]; 1055 char sortKeyB[50]; 1056 char sortKeyBz[50]; 1057 int r; 1058 1059 /* there used to be -3 here. Hmmmm.... */ 1060 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/ 1061 result = ucol_strcoll(coll, strA, 3, strB, 3); 1062 if (result != UCOL_GREATER) { 1063 log_err("ERROR 1 in test 4\n"); 1064 } 1065 result = ucol_strcoll(coll, strA, -1, strB, -1); 1066 if (result != UCOL_EQUAL) { 1067 log_err("ERROR 2 in test 4\n"); 1068 } 1069 1070 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 1071 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 1072 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 1073 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 1074 1075 r = strcmp(sortKeyA, sortKeyAz); 1076 if (r <= 0) { 1077 log_err("Error 3 in test 4\n"); 1078 } 1079 r = strcmp(sortKeyA, sortKeyB); 1080 if (r <= 0) { 1081 log_err("Error 4 in test 4\n"); 1082 } 1083 r = strcmp(sortKeyAz, sortKeyBz); 1084 if (r != 0) { 1085 log_err("Error 5 in test 4\n"); 1086 } 1087 1088 ucol_setStrength(coll, UCOL_IDENTICAL); 1089 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 1090 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 1091 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 1092 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 1093 1094 r = strcmp(sortKeyA, sortKeyAz); 1095 if (r <= 0) { 1096 log_err("Error 6 in test 4\n"); 1097 } 1098 r = strcmp(sortKeyA, sortKeyB); 1099 if (r <= 0) { 1100 log_err("Error 7 in test 4\n"); 1101 } 1102 r = strcmp(sortKeyAz, sortKeyBz); 1103 if (r != 0) { 1104 log_err("Error 8 in test 4\n"); 1105 } 1106 ucol_setStrength(coll, UCOL_TERTIARY); 1107 } 1108 1109 1110 /* Test 5: Null characters in non-normal source strings.*/ 1111 1112 { 1113 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00}; 1114 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00}; 1115 char sortKeyA[50]; 1116 char sortKeyAz[50]; 1117 char sortKeyB[50]; 1118 char sortKeyBz[50]; 1119 int r; 1120 1121 result = ucol_strcoll(coll, strA, 6, strB, 6); 1122 if (result != UCOL_GREATER) { 1123 log_err("ERROR 1 in test 5\n"); 1124 } 1125 result = ucol_strcoll(coll, strA, -1, strB, -1); 1126 if (result != UCOL_EQUAL) { 1127 log_err("ERROR 2 in test 5\n"); 1128 } 1129 1130 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 1131 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 1132 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 1133 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 1134 1135 r = strcmp(sortKeyA, sortKeyAz); 1136 if (r <= 0) { 1137 log_err("Error 3 in test 5\n"); 1138 } 1139 r = strcmp(sortKeyA, sortKeyB); 1140 if (r <= 0) { 1141 log_err("Error 4 in test 5\n"); 1142 } 1143 r = strcmp(sortKeyAz, sortKeyBz); 1144 if (r != 0) { 1145 log_err("Error 5 in test 5\n"); 1146 } 1147 1148 ucol_setStrength(coll, UCOL_IDENTICAL); 1149 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 1150 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 1151 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 1152 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 1153 1154 r = strcmp(sortKeyA, sortKeyAz); 1155 if (r <= 0) { 1156 log_err("Error 6 in test 5\n"); 1157 } 1158 r = strcmp(sortKeyA, sortKeyB); 1159 if (r <= 0) { 1160 log_err("Error 7 in test 5\n"); 1161 } 1162 r = strcmp(sortKeyAz, sortKeyBz); 1163 if (r != 0) { 1164 log_err("Error 8 in test 5\n"); 1165 } 1166 ucol_setStrength(coll, UCOL_TERTIARY); 1167 } 1168 1169 1170 /* Test 6: Null character as base of a non-normal combining sequence.*/ 1171 1172 { 1173 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00}; 1174 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00}; 1175 1176 result = ucol_strcoll(coll, strA, 5, strB, 5); 1177 if (result != UCOL_LESS) { 1178 log_err("Error 1 in test 6\n"); 1179 } 1180 result = ucol_strcoll(coll, strA, -1, strB, -1); 1181 if (result != UCOL_EQUAL) { 1182 log_err("Error 2 in test 6\n"); 1183 } 1184 } 1185 1186 ucol_close(coll); 1187} 1188 1189 1190 1191#if 0 1192static void TestGetCaseBit(void) { 1193 static const char *caseBitData[] = { 1194 "a", "A", "ch", "Ch", "CH", 1195 "\\uFF9E", "\\u0009" 1196 }; 1197 1198 static const uint8_t results[] = { 1199 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE, 1200 UCOL_UPPER_CASE, UCOL_LOWER_CASE 1201 }; 1202 1203 uint32_t i, blen = 0; 1204 UChar b[256] = {0}; 1205 UErrorCode status = U_ZERO_ERROR; 1206 UCollator *UCA = ucol_open("", &status); 1207 uint8_t res = 0; 1208 1209 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) { 1210 blen = u_unescape(caseBitData[i], b, 256); 1211 res = ucol_uprv_getCaseBits(UCA, b, blen, &status); 1212 if(results[i] != res) { 1213 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]); 1214 } 1215 } 1216} 1217#endif 1218 1219static void TestHangulTailoring(void) { 1220 static const char *koreanData[] = { 1221 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475", 1222 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef", 1223 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888", 1224 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5", 1225 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E", 1226 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C" 1227 }; 1228 1229 const char *rules = 1230 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 " 1231 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef " 1232 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 " 1233 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 " 1234 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E " 1235 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C"; 1236 1237 1238 UErrorCode status = U_ZERO_ERROR; 1239 UChar rlz[2048] = { 0 }; 1240 uint32_t rlen = u_unescape(rules, rlz, 2048); 1241 1242 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 1243 if(status == U_FILE_ACCESS_ERROR) { 1244 log_data_err("Is your data around?\n"); 1245 return; 1246 } else if(U_FAILURE(status)) { 1247 log_err("Error opening collator\n"); 1248 return; 1249 } 1250 1251 log_verbose("Using start of korean rules\n"); 1252 1253 if(U_SUCCESS(status)) { 1254 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 1255 } else { 1256 log_err("Unable to open collator with rules %s\n", rules); 1257 } 1258 1259 ucol_close(coll); 1260 1261 log_verbose("Using ko__LOTUS locale\n"); 1262 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 1263} 1264 1265/* 1266 * The secondary/tertiary compression middle byte 1267 * as used by the current implementation. 1268 * Subject to change as the sort key compression changes. 1269 * See class CollationKeys. 1270 */ 1271enum { 1272 SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */ 1273 TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */ 1274}; 1275 1276static void TestCompressOverlap(void) { 1277 UChar secstr[150]; 1278 UChar tertstr[150]; 1279 UErrorCode status = U_ZERO_ERROR; 1280 UCollator *coll; 1281 uint8_t result[500]; 1282 uint32_t resultlen; 1283 int count = 0; 1284 uint8_t *tempptr; 1285 1286 coll = ucol_open("", &status); 1287 1288 if (U_FAILURE(status)) { 1289 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status)); 1290 return; 1291 } 1292 while (count < 149) { 1293 secstr[count] = 0x0020; /* [06, 05, 05] */ 1294 tertstr[count] = 0x0020; 1295 count ++; 1296 } 1297 1298 /* top down compression ----------------------------------- */ 1299 secstr[count] = 0x0332; /* [, 87, 05] */ 1300 tertstr[count] = 0x3000; /* [06, 05, 07] */ 1301 1302 /* no compression secstr should have 150 secondary bytes, tertstr should 1303 have 150 tertiary bytes. 1304 with correct compression, secstr should have 6 secondary 1305 bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */ 1306 resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result)); 1307 (void)resultlen; /* Suppress set but not used warning. */ 1308 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1; 1309 while (*(tempptr + 1) != 1) { 1310 /* the last secondary collation element is not checked since it is not 1311 part of the compression */ 1312 if (*tempptr < SEC_COMMON_MIDDLE) { 1313 log_err("Secondary top down compression overlapped\n"); 1314 } 1315 tempptr ++; 1316 } 1317 1318 /* tertiary top/bottom/common for en_US is similar to the secondary 1319 top/bottom/common */ 1320 resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result)); 1321 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1; 1322 while (*(tempptr + 1) != 0) { 1323 /* the last secondary collation element is not checked since it is not 1324 part of the compression */ 1325 if (*tempptr < TER_ONLY_COMMON_MIDDLE) { 1326 log_err("Tertiary top down compression overlapped\n"); 1327 } 1328 tempptr ++; 1329 } 1330 1331 /* bottom up compression ------------------------------------- */ 1332 secstr[count] = 0; 1333 tertstr[count] = 0; 1334 resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result)); 1335 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1; 1336 while (*(tempptr + 1) != 1) { 1337 /* the last secondary collation element is not checked since it is not 1338 part of the compression */ 1339 if (*tempptr > SEC_COMMON_MIDDLE) { 1340 log_err("Secondary bottom up compression overlapped\n"); 1341 } 1342 tempptr ++; 1343 } 1344 1345 /* tertiary top/bottom/common for en_US is similar to the secondary 1346 top/bottom/common */ 1347 resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result)); 1348 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1; 1349 while (*(tempptr + 1) != 0) { 1350 /* the last secondary collation element is not checked since it is not 1351 part of the compression */ 1352 if (*tempptr > TER_ONLY_COMMON_MIDDLE) { 1353 log_err("Tertiary bottom up compression overlapped\n"); 1354 } 1355 tempptr ++; 1356 } 1357 1358 ucol_close(coll); 1359} 1360 1361static void TestCyrillicTailoring(void) { 1362 static const char *test[] = { 1363 "\\u0410b", 1364 "\\u0410\\u0306a", 1365 "\\u04d0A" 1366 }; 1367 1368 /* Russian overrides contractions, so this test is not valid anymore */ 1369 /*genericLocaleStarter("ru", test, 3);*/ 1370 1371 genericLocaleStarter("root", test, 3); 1372 genericRulesStarter("&\\u0410 = \\u0410", test, 3); 1373 genericRulesStarter("&Z < \\u0410", test, 3); 1374 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3); 1375 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3); 1376 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3); 1377 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3); 1378} 1379 1380static void TestSuppressContractions(void) { 1381 1382 static const char *testNoCont2[] = { 1383 "\\u0410\\u0302a", 1384 "\\u0410\\u0306b", 1385 "\\u0410c" 1386 }; 1387 static const char *testNoCont[] = { 1388 "a\\u0410", 1389 "A\\u0410\\u0306", 1390 "\\uFF21\\u0410\\u0302" 1391 }; 1392 1393 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3); 1394 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3); 1395} 1396 1397static void TestContraction(void) { 1398 const static char *testrules[] = { 1399 "&A = AB / B", 1400 "&A = A\\u0306/\\u0306", 1401 "&c = ch / h" 1402 }; 1403 const static UChar testdata[][2] = { 1404 {0x0041 /* 'A' */, 0x0042 /* 'B' */}, 1405 {0x0041 /* 'A' */, 0x0306 /* combining breve */}, 1406 {0x0063 /* 'c' */, 0x0068 /* 'h' */} 1407 }; 1408 const static UChar testdata2[][2] = { 1409 {0x0063 /* 'c' */, 0x0067 /* 'g' */}, 1410 {0x0063 /* 'c' */, 0x0068 /* 'h' */}, 1411 {0x0063 /* 'c' */, 0x006C /* 'l' */} 1412 }; 1413#if 0 1414 /* 1415 * These pairs of rule strings are not guaranteed to yield the very same mappings. 1416 * In fact, LDML 24 recommends an improved way of creating mappings 1417 * which always yields different mappings for such pairs. See 1418 * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings 1419 */ 1420 const static char *testrules3[] = { 1421 "&z < xyz &xyzw << B", 1422 "&z < xyz &xyz << B / w", 1423 "&z < ch &achm << B", 1424 "&z < ch &a << B / chm", 1425 "&\\ud800\\udc00w << B", 1426 "&\\ud800\\udc00 << B / w", 1427 "&a\\ud800\\udc00m << B", 1428 "&a << B / \\ud800\\udc00m", 1429 }; 1430#endif 1431 1432 UErrorCode status = U_ZERO_ERROR; 1433 UCollator *coll; 1434 UChar rule[256] = {0}; 1435 uint32_t rlen = 0; 1436 int i; 1437 1438 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { 1439 UCollationElements *iter1; 1440 int j = 0; 1441 log_verbose("Rule %s for testing\n", testrules[i]); 1442 rlen = u_unescape(testrules[i], rule, 32); 1443 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 1444 if (U_FAILURE(status)) { 1445 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status)); 1446 return; 1447 } 1448 iter1 = ucol_openElements(coll, testdata[i], 2, &status); 1449 if (U_FAILURE(status)) { 1450 log_err("Collation iterator creation failed\n"); 1451 return; 1452 } 1453 while (j < 2) { 1454 UCollationElements *iter2 = ucol_openElements(coll, 1455 &(testdata[i][j]), 1456 1, &status); 1457 uint32_t ce; 1458 if (U_FAILURE(status)) { 1459 log_err("Collation iterator creation failed\n"); 1460 return; 1461 } 1462 ce = ucol_next(iter2, &status); 1463 while (ce != UCOL_NULLORDER) { 1464 if ((uint32_t)ucol_next(iter1, &status) != ce) { 1465 log_err("Collation elements in contraction split does not match\n"); 1466 return; 1467 } 1468 ce = ucol_next(iter2, &status); 1469 } 1470 j ++; 1471 ucol_closeElements(iter2); 1472 } 1473 if (ucol_next(iter1, &status) != UCOL_NULLORDER) { 1474 log_err("Collation elements not exhausted\n"); 1475 return; 1476 } 1477 ucol_closeElements(iter1); 1478 ucol_close(coll); 1479 } 1480 1481 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256); 1482 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 1483 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) { 1484 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", 1485 testdata2[0][0], testdata2[0][1], testdata2[1][0], 1486 testdata2[1][1]); 1487 return; 1488 } 1489 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) { 1490 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", 1491 testdata2[1][0], testdata2[1][1], testdata2[2][0], 1492 testdata2[2][1]); 1493 return; 1494 } 1495 ucol_close(coll); 1496#if 0 /* see above */ 1497 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) { 1498 log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]); 1499 UCollator *coll1, 1500 *coll2; 1501 UCollationElements *iter1, 1502 *iter2; 1503 UChar ch = 0x0042 /* 'B' */; 1504 uint32_t ce; 1505 rlen = u_unescape(testrules3[i], rule, 32); 1506 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 1507 rlen = u_unescape(testrules3[i + 1], rule, 32); 1508 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 1509 if (U_FAILURE(status)) { 1510 log_err("Collator creation failed %s\n", testrules[i]); 1511 return; 1512 } 1513 iter1 = ucol_openElements(coll1, &ch, 1, &status); 1514 iter2 = ucol_openElements(coll2, &ch, 1, &status); 1515 if (U_FAILURE(status)) { 1516 log_err("Collation iterator creation failed\n"); 1517 return; 1518 } 1519 ce = ucol_next(iter1, &status); 1520 if (U_FAILURE(status)) { 1521 log_err("Retrieving ces failed\n"); 1522 return; 1523 } 1524 while (ce != UCOL_NULLORDER) { 1525 uint32_t ce2 = (uint32_t)ucol_next(iter2, &status); 1526 if (ce == ce2) { 1527 log_verbose("CEs match: %08x\n", ce); 1528 } else { 1529 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2); 1530 return; 1531 } 1532 ce = ucol_next(iter1, &status); 1533 if (U_FAILURE(status)) { 1534 log_err("Retrieving ces failed\n"); 1535 return; 1536 } 1537 } 1538 if (ucol_next(iter2, &status) != UCOL_NULLORDER) { 1539 log_err("CEs not exhausted\n"); 1540 return; 1541 } 1542 ucol_closeElements(iter1); 1543 ucol_closeElements(iter2); 1544 ucol_close(coll1); 1545 ucol_close(coll2); 1546 } 1547#endif 1548} 1549 1550static void TestExpansion(void) { 1551 const static char *testrules[] = { 1552#if 0 1553 /* 1554 * This seems to have tested that M was not mapped to an expansion. 1555 * I believe the old builder just did that because it computed the extension CEs 1556 * at the very end, which was a bug. 1557 * Among other problems, it violated the core tailoring principle 1558 * by making an earlier rule depend on a later one. 1559 * And, of course, if M did not get an expansion, then it was primary different from K, 1560 * unlike what the rule &K<<M says. 1561 */ 1562 "&J << K / B & K << M", 1563#endif 1564 "&J << K / B << M" 1565 }; 1566 const static UChar testdata[][3] = { 1567 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0}, 1568 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0}, 1569 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0}, 1570 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0}, 1571 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0}, 1572 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0} 1573 }; 1574 1575 UErrorCode status = U_ZERO_ERROR; 1576 UCollator *coll; 1577 UChar rule[256] = {0}; 1578 uint32_t rlen = 0; 1579 int i; 1580 1581 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { 1582 int j = 0; 1583 log_verbose("Rule %s for testing\n", testrules[i]); 1584 rlen = u_unescape(testrules[i], rule, 32); 1585 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 1586 if (U_FAILURE(status)) { 1587 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status)); 1588 return; 1589 } 1590 1591 for (j = 0; j < 5; j ++) { 1592 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS); 1593 } 1594 ucol_close(coll); 1595 } 1596} 1597 1598#if 0 1599/* this test tests the current limitations of the engine */ 1600/* it always fail, so it is disabled by default */ 1601static void TestLimitations(void) { 1602 /* recursive expansions */ 1603 { 1604 static const char *rule = "&a=b/c&d=c/e"; 1605 static const char *tlimit01[] = {"add","b","adf"}; 1606 static const char *tlimit02[] = {"aa","b","af"}; 1607 log_verbose("recursive expansions\n"); 1608 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0])); 1609 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0])); 1610 } 1611 /* contractions spanning expansions */ 1612 { 1613 static const char *rule = "&a<<<c/e&g<<<eh"; 1614 static const char *tlimit01[] = {"ad","c","af","f","ch","h"}; 1615 static const char *tlimit02[] = {"ad","c","ch","af","f","h"}; 1616 log_verbose("contractions spanning expansions\n"); 1617 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0])); 1618 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0])); 1619 } 1620 /* normalization: nulls in contractions */ 1621 { 1622 static const char *rule = "&a<<<\\u0000\\u0302"; 1623 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"}; 1624 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"}; 1625 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE }; 1626 static const UColAttributeValue valOn[] = { UCOL_ON }; 1627 static const UColAttributeValue valOff[] = { UCOL_OFF }; 1628 1629 log_verbose("NULL in contractions\n"); 1630 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1); 1631 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1); 1632 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1); 1633 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1); 1634 1635 } 1636 /* normalization: contractions spanning normalization */ 1637 { 1638 static const char *rule = "&a<<<\\u0000\\u0302"; 1639 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"}; 1640 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"}; 1641 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE }; 1642 static const UColAttributeValue valOn[] = { UCOL_ON }; 1643 static const UColAttributeValue valOff[] = { UCOL_OFF }; 1644 1645 log_verbose("contractions spanning normalization\n"); 1646 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1); 1647 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1); 1648 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1); 1649 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1); 1650 1651 } 1652 /* variable top: */ 1653 { 1654 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/ 1655 static const char *rule = "&\\u2010<x<[variable top]=z"; 1656 /*static const char *rule3 = "&' '<x<[variable top]=z";*/ 1657 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" }; 1658 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"}; 1659 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" }; 1660 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH }; 1661 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY }; 1662 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY }; 1663 1664 log_verbose("variable top\n"); 1665 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0])); 1666 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0])); 1667 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0])); 1668 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0])); 1669 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0])); 1670 1671 } 1672 /* case level */ 1673 { 1674 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH"; 1675 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"}; 1676 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"}; 1677 static const UColAttribute att[] = { UCOL_CASE_FIRST}; 1678 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST}; 1679 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/ 1680 log_verbose("case level\n"); 1681 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0])); 1682 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0])); 1683 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/ 1684 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/ 1685 } 1686 1687} 1688#endif 1689 1690static void TestBocsuCoverage(void) { 1691 UErrorCode status = U_ZERO_ERROR; 1692 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041"; 1693 UChar test[256] = {0}; 1694 uint32_t tlen = u_unescape(testString, test, 32); 1695 uint8_t key[256] = {0}; 1696 uint32_t klen = 0; 1697 1698 UCollator *coll = ucol_open("", &status); 1699 if(U_SUCCESS(status)) { 1700 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status); 1701 1702 klen = ucol_getSortKey(coll, test, tlen, key, 256); 1703 (void)klen; /* Suppress set but not used warning. */ 1704 1705 ucol_close(coll); 1706 } else { 1707 log_data_err("Couldn't open UCA\n"); 1708 } 1709} 1710 1711static void TestVariableTopSetting(void) { 1712 UErrorCode status = U_ZERO_ERROR; 1713 uint32_t varTopOriginal = 0, varTop1, varTop2; 1714 UCollator *coll = ucol_open("", &status); 1715 if(U_SUCCESS(status)) { 1716 1717 static const UChar nul = 0; 1718 static const UChar space = 0x20; 1719 static const UChar dot = 0x2e; /* punctuation */ 1720 static const UChar degree = 0xb0; /* symbol */ 1721 static const UChar dollar = 0x24; /* currency symbol */ 1722 static const UChar zero = 0x30; /* digit */ 1723 1724 varTopOriginal = ucol_getVariableTop(coll, &status); 1725 log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal); 1726 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 1727 1728 varTop1 = ucol_setVariableTop(coll, &space, 1, &status); 1729 varTop2 = ucol_getVariableTop(coll, &status); 1730 log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1); 1731 if(U_FAILURE(status) || varTop1 != varTop2 || 1732 !ucol_equal(coll, &nul, 0, &space, 1) || 1733 ucol_equal(coll, &nul, 0, &dot, 1) || 1734 ucol_equal(coll, &nul, 0, °ree, 1) || 1735 ucol_equal(coll, &nul, 0, &dollar, 1) || 1736 ucol_equal(coll, &nul, 0, &zero, 1) || 1737 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) { 1738 log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status)); 1739 } 1740 1741 varTop1 = ucol_setVariableTop(coll, &dot, 1, &status); 1742 varTop2 = ucol_getVariableTop(coll, &status); 1743 log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1); 1744 if(U_FAILURE(status) || varTop1 != varTop2 || 1745 !ucol_equal(coll, &nul, 0, &space, 1) || 1746 !ucol_equal(coll, &nul, 0, &dot, 1) || 1747 ucol_equal(coll, &nul, 0, °ree, 1) || 1748 ucol_equal(coll, &nul, 0, &dollar, 1) || 1749 ucol_equal(coll, &nul, 0, &zero, 1) || 1750 ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) { 1751 log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status)); 1752 } 1753 1754 varTop1 = ucol_setVariableTop(coll, °ree, 1, &status); 1755 varTop2 = ucol_getVariableTop(coll, &status); 1756 log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1); 1757 if(U_FAILURE(status) || varTop1 != varTop2 || 1758 !ucol_equal(coll, &nul, 0, &space, 1) || 1759 !ucol_equal(coll, &nul, 0, &dot, 1) || 1760 !ucol_equal(coll, &nul, 0, °ree, 1) || 1761 ucol_equal(coll, &nul, 0, &dollar, 1) || 1762 ucol_equal(coll, &nul, 0, &zero, 1) || 1763 ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) { 1764 log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status)); 1765 } 1766 1767 varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status); 1768 varTop2 = ucol_getVariableTop(coll, &status); 1769 log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1); 1770 if(U_FAILURE(status) || varTop1 != varTop2 || 1771 !ucol_equal(coll, &nul, 0, &space, 1) || 1772 !ucol_equal(coll, &nul, 0, &dot, 1) || 1773 !ucol_equal(coll, &nul, 0, °ree, 1) || 1774 !ucol_equal(coll, &nul, 0, &dollar, 1) || 1775 ucol_equal(coll, &nul, 0, &zero, 1) || 1776 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) { 1777 log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status)); 1778 } 1779 1780 log_verbose("Testing setting variable top to contractions\n"); 1781 { 1782 UChar first[4] = { 0 }; 1783 first[0] = 0x0040; 1784 first[1] = 0x0050; 1785 first[2] = 0x0000; 1786 1787 status = U_ZERO_ERROR; 1788 ucol_setVariableTop(coll, first, -1, &status); 1789 1790 if(U_SUCCESS(status)) { 1791 log_err("Invalid contraction succeded in setting variable top!\n"); 1792 } 1793 1794 } 1795 1796 log_verbose("Test restoring variable top\n"); 1797 1798 status = U_ZERO_ERROR; 1799 ucol_restoreVariableTop(coll, varTopOriginal, &status); 1800 if(varTopOriginal != ucol_getVariableTop(coll, &status)) { 1801 log_err("Couldn't restore old variable top\n"); 1802 } 1803 1804 log_verbose("Testing calling with error set\n"); 1805 1806 status = U_INTERNAL_PROGRAM_ERROR; 1807 varTop1 = ucol_setVariableTop(coll, &space, 1, &status); 1808 varTop2 = ucol_getVariableTop(coll, &status); 1809 ucol_restoreVariableTop(coll, varTop2, &status); 1810 varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status); 1811 varTop2 = ucol_getVariableTop(NULL, &status); 1812 ucol_restoreVariableTop(NULL, varTop2, &status); 1813 if(status != U_INTERNAL_PROGRAM_ERROR) { 1814 log_err("Bad reaction to passed error!\n"); 1815 } 1816 ucol_close(coll); 1817 } else { 1818 log_data_err("Couldn't open UCA collator\n"); 1819 } 1820} 1821 1822static void TestMaxVariable() { 1823 UErrorCode status = U_ZERO_ERROR; 1824 UColReorderCode oldMax, max; 1825 UCollator *coll; 1826 1827 static const UChar nul = 0; 1828 static const UChar space = 0x20; 1829 static const UChar dot = 0x2e; /* punctuation */ 1830 static const UChar degree = 0xb0; /* symbol */ 1831 static const UChar dollar = 0x24; /* currency symbol */ 1832 static const UChar zero = 0x30; /* digit */ 1833 1834 coll = ucol_open("", &status); 1835 if(U_FAILURE(status)) { 1836 log_data_err("Couldn't open root collator\n"); 1837 return; 1838 } 1839 1840 oldMax = ucol_getMaxVariable(coll); 1841 log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax); 1842 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 1843 1844 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status); 1845 max = ucol_getMaxVariable(coll); 1846 log_verbose("ucol_setMaxVariable(space) -> %04x\n", max); 1847 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE || 1848 !ucol_equal(coll, &nul, 0, &space, 1) || 1849 ucol_equal(coll, &nul, 0, &dot, 1) || 1850 ucol_equal(coll, &nul, 0, °ree, 1) || 1851 ucol_equal(coll, &nul, 0, &dollar, 1) || 1852 ucol_equal(coll, &nul, 0, &zero, 1) || 1853 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) { 1854 log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status)); 1855 } 1856 1857 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status); 1858 max = ucol_getMaxVariable(coll); 1859 log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max); 1860 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION || 1861 !ucol_equal(coll, &nul, 0, &space, 1) || 1862 !ucol_equal(coll, &nul, 0, &dot, 1) || 1863 ucol_equal(coll, &nul, 0, °ree, 1) || 1864 ucol_equal(coll, &nul, 0, &dollar, 1) || 1865 ucol_equal(coll, &nul, 0, &zero, 1) || 1866 ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) { 1867 log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status)); 1868 } 1869 1870 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status); 1871 max = ucol_getMaxVariable(coll); 1872 log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max); 1873 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL || 1874 !ucol_equal(coll, &nul, 0, &space, 1) || 1875 !ucol_equal(coll, &nul, 0, &dot, 1) || 1876 !ucol_equal(coll, &nul, 0, °ree, 1) || 1877 ucol_equal(coll, &nul, 0, &dollar, 1) || 1878 ucol_equal(coll, &nul, 0, &zero, 1) || 1879 ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) { 1880 log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status)); 1881 } 1882 1883 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status); 1884 max = ucol_getMaxVariable(coll); 1885 log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max); 1886 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY || 1887 !ucol_equal(coll, &nul, 0, &space, 1) || 1888 !ucol_equal(coll, &nul, 0, &dot, 1) || 1889 !ucol_equal(coll, &nul, 0, °ree, 1) || 1890 !ucol_equal(coll, &nul, 0, &dollar, 1) || 1891 ucol_equal(coll, &nul, 0, &zero, 1) || 1892 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) { 1893 log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status)); 1894 } 1895 1896 log_verbose("Test restoring maxVariable\n"); 1897 status = U_ZERO_ERROR; 1898 ucol_setMaxVariable(coll, oldMax, &status); 1899 if(oldMax != ucol_getMaxVariable(coll)) { 1900 log_err("Couldn't restore old maxVariable\n"); 1901 } 1902 1903 log_verbose("Testing calling with error set\n"); 1904 status = U_INTERNAL_PROGRAM_ERROR; 1905 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status); 1906 max = ucol_getMaxVariable(coll); 1907 if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) { 1908 log_err("Bad reaction to passed error!\n"); 1909 } 1910 ucol_close(coll); 1911} 1912 1913static void TestNonChars(void) { 1914 static const char *test[] = { 1915 "\\u0000", /* ignorable */ 1916 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */ 1917 "\\uFDD0", "\\uFDEF", 1918 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */ 1919 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */ 1920 "\\U0003FFFE", "\\U0003FFFF", 1921 "\\U0004FFFE", "\\U0004FFFF", 1922 "\\U0005FFFE", "\\U0005FFFF", 1923 "\\U0006FFFE", "\\U0006FFFF", 1924 "\\U0007FFFE", "\\U0007FFFF", 1925 "\\U0008FFFE", "\\U0008FFFF", 1926 "\\U0009FFFE", "\\U0009FFFF", 1927 "\\U000AFFFE", "\\U000AFFFF", 1928 "\\U000BFFFE", "\\U000BFFFF", 1929 "\\U000CFFFE", "\\U000CFFFF", 1930 "\\U000DFFFE", "\\U000DFFFF", 1931 "\\U000EFFFE", "\\U000EFFFF", 1932 "\\U000FFFFE", "\\U000FFFFF", 1933 "\\U0010FFFE", "\\U0010FFFF", 1934 "\\uFFFF" /* special character with maximum primary weight */ 1935 }; 1936 UErrorCode status = U_ZERO_ERROR; 1937 UCollator *coll = ucol_open("en_US", &status); 1938 1939 log_verbose("Test non characters\n"); 1940 1941 if(U_SUCCESS(status)) { 1942 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS); 1943 } else { 1944 log_err_status(status, "Unable to open collator\n"); 1945 } 1946 1947 ucol_close(coll); 1948} 1949 1950static void TestExtremeCompression(void) { 1951 static char *test[4]; 1952 int32_t j = 0, i = 0; 1953 1954 for(i = 0; i<4; i++) { 1955 test[i] = (char *)malloc(2048*sizeof(char)); 1956 } 1957 1958 for(j = 20; j < 500; j++) { 1959 for(i = 0; i<4; i++) { 1960 uprv_memset(test[i], 'a', (j-1)*sizeof(char)); 1961 test[i][j-1] = (char)('a'+i); 1962 test[i][j] = 0; 1963 } 1964 genericLocaleStarter("en_US", (const char **)test, 4); 1965 } 1966 1967 1968 for(i = 0; i<4; i++) { 1969 free(test[i]); 1970 } 1971} 1972 1973#if 0 1974static void TestExtremeCompression(void) { 1975 static char *test[4]; 1976 int32_t j = 0, i = 0; 1977 UErrorCode status = U_ZERO_ERROR; 1978 UCollator *coll = ucol_open("en_US", status); 1979 for(i = 0; i<4; i++) { 1980 test[i] = (char *)malloc(2048*sizeof(char)); 1981 } 1982 for(j = 10; j < 2048; j++) { 1983 for(i = 0; i<4; i++) { 1984 uprv_memset(test[i], 'a', (j-2)*sizeof(char)); 1985 test[i][j-1] = (char)('a'+i); 1986 test[i][j] = 0; 1987 } 1988 } 1989 genericLocaleStarter("en_US", (const char **)test, 4); 1990 1991 for(j = 10; j < 2048; j++) { 1992 for(i = 0; i<1; i++) { 1993 uprv_memset(test[i], 'a', (j-1)*sizeof(char)); 1994 test[i][j] = 0; 1995 } 1996 } 1997 for(i = 0; i<4; i++) { 1998 free(test[i]); 1999 } 2000} 2001#endif 2002 2003static void TestSurrogates(void) { 2004 static const char *test[] = { 2005 "z","\\ud900\\udc25", "\\ud805\\udc50", 2006 "\\ud800\\udc00y", "\\ud800\\udc00r", 2007 "\\ud800\\udc00f", "\\ud800\\udc00", 2008 "\\ud800\\udc00c", "\\ud800\\udc00b", 2009 "\\ud800\\udc00fa", "\\ud800\\udc00fb", 2010 "\\ud800\\udc00a", 2011 "c", "b" 2012 }; 2013 2014 static const char *rule = 2015 "&z < \\ud900\\udc25 < \\ud805\\udc50" 2016 "< \\ud800\\udc00y < \\ud800\\udc00r" 2017 "< \\ud800\\udc00f << \\ud800\\udc00" 2018 "< \\ud800\\udc00fa << \\ud800\\udc00fb" 2019 "< \\ud800\\udc00a < c < b" ; 2020 2021 genericRulesStarter(rule, test, 14); 2022} 2023 2024/* This is a test for prefix implementation, used by JIS X 4061 collation rules */ 2025static void TestPrefix(void) { 2026 uint32_t i; 2027 2028 static const struct { 2029 const char *rules; 2030 const char *data[50]; 2031 const uint32_t len; 2032 } tests[] = { 2033 { "&z <<< z|a", 2034 {"zz", "za"}, 2 }, 2035 2036 { "&z <<< z| a", 2037 {"zz", "za"}, 2 }, 2038 { "[strength I]" 2039 "&a=\\ud900\\udc25" 2040 "&z<<<\\ud900\\udc25|a", 2041 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 }, 2042 }; 2043 2044 2045 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 2046 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 2047 } 2048} 2049 2050/* This test uses data suplied by Masashiko Maedera to test the implementation */ 2051/* JIS X 4061 collation order implementation */ 2052static void TestNewJapanese(void) { 2053 2054 static const char * const test1[] = { 2055 "\\u30b7\\u30e3\\u30fc\\u30ec", 2056 "\\u30b7\\u30e3\\u30a4", 2057 "\\u30b7\\u30e4\\u30a3", 2058 "\\u30b7\\u30e3\\u30ec", 2059 "\\u3061\\u3087\\u3053", 2060 "\\u3061\\u3088\\u3053", 2061 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8", 2062 "\\u3066\\u30fc\\u305f", 2063 "\\u30c6\\u30fc\\u30bf", 2064 "\\u30c6\\u30a7\\u30bf", 2065 "\\u3066\\u3048\\u305f", 2066 "\\u3067\\u30fc\\u305f", 2067 "\\u30c7\\u30fc\\u30bf", 2068 "\\u30c7\\u30a7\\u30bf", 2069 "\\u3067\\u3048\\u305f", 2070 "\\u3066\\u30fc\\u305f\\u30fc", 2071 "\\u30c6\\u30fc\\u30bf\\u30a1", 2072 "\\u30c6\\u30a7\\u30bf\\u30fc", 2073 "\\u3066\\u3047\\u305f\\u3041", 2074 "\\u3066\\u3048\\u305f\\u30fc", 2075 "\\u3067\\u30fc\\u305f\\u30fc", 2076 "\\u30c7\\u30fc\\u30bf\\u30a1", 2077 "\\u3067\\u30a7\\u305f\\u30a1", 2078 "\\u30c7\\u3047\\u30bf\\u3041", 2079 "\\u30c7\\u30a8\\u30bf\\u30a2", 2080 "\\u3072\\u3086", 2081 "\\u3073\\u3085\\u3042", 2082 "\\u3074\\u3085\\u3042", 2083 "\\u3073\\u3085\\u3042\\u30fc", 2084 "\\u30d3\\u30e5\\u30a2\\u30fc", 2085 "\\u3074\\u3085\\u3042\\u30fc", 2086 "\\u30d4\\u30e5\\u30a2\\u30fc", 2087 "\\u30d2\\u30e5\\u30a6", 2088 "\\u30d2\\u30e6\\u30a6", 2089 "\\u30d4\\u30e5\\u30a6\\u30a2", 2090 "\\u3073\\u3085\\u30fc\\u3042\\u30fc", 2091 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc", 2092 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc", 2093 "\\u3072\\u3085\\u3093", 2094 "\\u3074\\u3085\\u3093", 2095 "\\u3075\\u30fc\\u308a", 2096 "\\u30d5\\u30fc\\u30ea", 2097 "\\u3075\\u3045\\u308a", 2098 "\\u3075\\u30a5\\u308a", 2099 "\\u3075\\u30a5\\u30ea", 2100 "\\u30d5\\u30a6\\u30ea", 2101 "\\u3076\\u30fc\\u308a", 2102 "\\u30d6\\u30fc\\u30ea", 2103 "\\u3076\\u3045\\u308a", 2104 "\\u30d6\\u30a5\\u308a", 2105 "\\u3077\\u3046\\u308a", 2106 "\\u30d7\\u30a6\\u30ea", 2107 "\\u3075\\u30fc\\u308a\\u30fc", 2108 "\\u30d5\\u30a5\\u30ea\\u30fc", 2109 "\\u3075\\u30a5\\u308a\\u30a3", 2110 "\\u30d5\\u3045\\u308a\\u3043", 2111 "\\u30d5\\u30a6\\u30ea\\u30fc", 2112 "\\u3075\\u3046\\u308a\\u3043", 2113 "\\u30d6\\u30a6\\u30ea\\u30a4", 2114 "\\u3077\\u30fc\\u308a\\u30fc", 2115 "\\u3077\\u30a5\\u308a\\u30a4", 2116 "\\u3077\\u3046\\u308a\\u30fc", 2117 "\\u30d7\\u30a6\\u30ea\\u30a4", 2118 "\\u30d5\\u30fd", 2119 "\\u3075\\u309e", 2120 "\\u3076\\u309d", 2121 "\\u3076\\u3075", 2122 "\\u3076\\u30d5", 2123 "\\u30d6\\u3075", 2124 "\\u30d6\\u30d5", 2125 "\\u3076\\u309e", 2126 "\\u3076\\u3077", 2127 "\\u30d6\\u3077", 2128 "\\u3077\\u309d", 2129 "\\u30d7\\u30fd", 2130 "\\u3077\\u3075", 2131}; 2132 2133 static const char *test2[] = { 2134 "\\u306f\\u309d", /* H\\u309d */ 2135 "\\u30cf\\u30fd", /* K\\u30fd */ 2136 "\\u306f\\u306f", /* HH */ 2137 "\\u306f\\u30cf", /* HK */ 2138 "\\u30cf\\u30cf", /* KK */ 2139 "\\u306f\\u309e", /* H\\u309e */ 2140 "\\u30cf\\u30fe", /* K\\u30fe */ 2141 "\\u306f\\u3070", /* HH\\u309b */ 2142 "\\u30cf\\u30d0", /* KK\\u309b */ 2143 "\\u306f\\u3071", /* HH\\u309c */ 2144 "\\u30cf\\u3071", /* KH\\u309c */ 2145 "\\u30cf\\u30d1", /* KK\\u309c */ 2146 "\\u3070\\u309d", /* H\\u309b\\u309d */ 2147 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */ 2148 "\\u3070\\u306f", /* H\\u309bH */ 2149 "\\u30d0\\u30cf", /* K\\u309bK */ 2150 "\\u3070\\u309e", /* H\\u309b\\u309e */ 2151 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */ 2152 "\\u3070\\u3070", /* H\\u309bH\\u309b */ 2153 "\\u30d0\\u3070", /* K\\u309bH\\u309b */ 2154 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */ 2155 "\\u3070\\u3071", /* H\\u309bH\\u309c */ 2156 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */ 2157 "\\u3071\\u309d", /* H\\u309c\\u309d */ 2158 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */ 2159 "\\u3071\\u306f", /* H\\u309cH */ 2160 "\\u30d1\\u30cf", /* K\\u309cK */ 2161 "\\u3071\\u3070", /* H\\u309cH\\u309b */ 2162 "\\u3071\\u30d0", /* H\\u309cK\\u309b */ 2163 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */ 2164 "\\u3071\\u3071", /* H\\u309cH\\u309c */ 2165 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */ 2166 }; 2167 /* 2168 static const char *test3[] = { 2169 "\\u221er\\u221e", 2170 "\\u221eR#", 2171 "\\u221et\\u221e", 2172 "#r\\u221e", 2173 "#R#", 2174 "#t%", 2175 "#T%", 2176 "8t\\u221e", 2177 "8T\\u221e", 2178 "8t#", 2179 "8T#", 2180 "8t%", 2181 "8T%", 2182 "8t8", 2183 "8T8", 2184 "\\u03c9r\\u221e", 2185 "\\u03a9R%", 2186 "rr\\u221e", 2187 "rR\\u221e", 2188 "Rr\\u221e", 2189 "RR\\u221e", 2190 "RT%", 2191 "rt8", 2192 "tr\\u221e", 2193 "tr8", 2194 "TR8", 2195 "tt8", 2196 "\\u30b7\\u30e3\\u30fc\\u30ec", 2197 }; 2198 */ 2199 static const UColAttribute att[] = { UCOL_STRENGTH }; 2200 static const UColAttributeValue val[] = { UCOL_QUATERNARY }; 2201 2202 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING}; 2203 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED }; 2204 2205 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1); 2206 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1); 2207 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/ 2208 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2); 2209 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2); 2210} 2211 2212static void TestStrCollIdenticalPrefix(void) { 2213 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71"; 2214 const char* test[] = { 2215 "ab\\ud9b0\\udc70", 2216 "ab\\ud9b0\\udc71" 2217 }; 2218 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL); 2219} 2220/* Contractions should have all their canonically equivalent */ 2221/* strings included */ 2222static void TestContractionClosure(void) { 2223 static const struct { 2224 const char *rules; 2225 const char *data[10]; 2226 const uint32_t len; 2227 } tests[] = { 2228 { "&b=\\u00e4\\u00e4", 2229 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5}, 2230 { "&b=\\u00C5", 2231 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4}, 2232 }; 2233 uint32_t i; 2234 2235 2236 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 2237 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL); 2238 } 2239} 2240 2241/* This tests also fails*/ 2242static void TestBeforePrefixFailure(void) { 2243 static const struct { 2244 const char *rules; 2245 const char *data[10]; 2246 const uint32_t len; 2247 } tests[] = { 2248 { "&g <<< a" 2249 "&[before 3]\\uff41 <<< x", 2250 {"x", "\\uff41"}, 2 }, 2251 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 2252 "&\\u30A8=\\u30A8=\\u3048=\\uff74" 2253 "&[before 3]\\u30a7<<<\\u30a9", 2254 {"\\u30a9", "\\u30a7"}, 2 }, 2255 { "&[before 3]\\u30a7<<<\\u30a9" 2256 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 2257 "&\\u30A8=\\u30A8=\\u3048=\\uff74", 2258 {"\\u30a9", "\\u30a7"}, 2 }, 2259 }; 2260 uint32_t i; 2261 2262 2263 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 2264 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 2265 } 2266 2267#if 0 2268 const char* rule1 = 2269 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 2270 "&\\u30A8=\\u30A8=\\u3048=\\uff74" 2271 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"; 2272 const char* rule2 = 2273 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc" 2274 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 2275 "&\\u30A8=\\u30A8=\\u3048=\\uff74"; 2276 const char* test[] = { 2277 "\\u30c6\\u30fc\\u30bf", 2278 "\\u30c6\\u30a7\\u30bf", 2279 }; 2280 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0])); 2281 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0])); 2282/* this piece of code should be in some sort of verbose mode */ 2283/* it gets the collation elements for elements and prints them */ 2284/* This is useful when trying to see whether the problem is */ 2285 { 2286 UErrorCode status = U_ZERO_ERROR; 2287 uint32_t i = 0; 2288 UCollationElements *it = NULL; 2289 uint32_t CE; 2290 UChar string[256]; 2291 uint32_t uStringLen; 2292 UCollator *coll = NULL; 2293 2294 uStringLen = u_unescape(rule1, string, 256); 2295 2296 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 2297 2298 /*coll = ucol_open("ja_JP_JIS", &status);*/ 2299 it = ucol_openElements(coll, string, 0, &status); 2300 2301 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) { 2302 log_verbose("%s\n", test[i]); 2303 uStringLen = u_unescape(test[i], string, 256); 2304 ucol_setText(it, string, uStringLen, &status); 2305 2306 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) { 2307 log_verbose("%08X\n", CE); 2308 } 2309 log_verbose("\n"); 2310 2311 } 2312 2313 ucol_closeElements(it); 2314 ucol_close(coll); 2315 } 2316#endif 2317} 2318 2319static void TestPrefixCompose(void) { 2320 const char* rule1 = 2321 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc"; 2322 /* 2323 const char* test[] = { 2324 "\\u30c6\\u30fc\\u30bf", 2325 "\\u30c6\\u30a7\\u30bf", 2326 }; 2327 */ 2328 { 2329 UErrorCode status = U_ZERO_ERROR; 2330 /*uint32_t i = 0;*/ 2331 /*UCollationElements *it = NULL;*/ 2332/* uint32_t CE;*/ 2333 UChar string[256]; 2334 uint32_t uStringLen; 2335 UCollator *coll = NULL; 2336 2337 uStringLen = u_unescape(rule1, string, 256); 2338 2339 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 2340 ucol_close(coll); 2341 } 2342 2343 2344} 2345 2346/* 2347[last variable] last variable value 2348[last primary ignorable] largest CE for primary ignorable 2349[last secondary ignorable] largest CE for secondary ignorable 2350[last tertiary ignorable] largest CE for tertiary ignorable 2351[top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8) 2352*/ 2353 2354static void TestRuleOptions(void) { 2355 /* values here are hardcoded and are correct for the current UCA 2356 * when the UCA changes, one might be forced to change these 2357 * values. 2358 */ 2359 2360 /* 2361 * These strings contain the last character before [variable top] 2362 * and the first and second characters (by primary weights) after it. 2363 * See FractionalUCA.txt. For example: 2364 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR 2365 [variable top = 0C FE] 2366 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT 2367 and 2368 00B4; [0D 0C, 05, 05] 2369 * 2370 * Note: Starting with UCA 6.0, the [variable top] collation element 2371 * is not the weight of any character or string, 2372 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable]. 2373 */ 2374#define LAST_VARIABLE_CHAR_STRING "\\U00010A7F" 2375#define FIRST_REGULAR_CHAR_STRING "\\u0060" 2376#define SECOND_REGULAR_CHAR_STRING "\\u00B4" 2377 2378 /* 2379 * This string has to match the character that has the [last regular] weight 2380 * which changes with each UCA version. 2381 * See the bottom of FractionalUCA.txt which says something like 2382 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032 2383 * 2384 * Note: Starting with UCA 6.0, the [last regular] collation element 2385 * is not the weight of any character or string, 2386 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular]. 2387 */ 2388#define LAST_REGULAR_CHAR_STRING "\\U0001342E" 2389 2390 static const struct { 2391 const char *rules; 2392 const char *data[10]; 2393 const uint32_t len; 2394 } tests[] = { 2395#if 0 2396 /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */ 2397 /* - all befores here amount to zero */ 2398 { "&[before 3][first tertiary ignorable]<<<a", 2399 { "\\u0000", "a"}, 2 2400 }, /* you cannot go before first tertiary ignorable */ 2401 2402 { "&[before 3][last tertiary ignorable]<<<a", 2403 { "\\u0000", "a"}, 2 2404 }, /* you cannot go before last tertiary ignorable */ 2405#endif 2406 /* 2407 * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt), 2408 * and it *is* possible to "go before" that. 2409 */ 2410 { "&[before 3][first secondary ignorable]<<<a", 2411 { "\\u0000", "a"}, 2 2412 }, 2413 2414 { "&[before 3][last secondary ignorable]<<<a", 2415 { "\\u0000", "a"}, 2 2416 }, 2417 2418 /* 'normal' befores */ 2419 2420 /* 2421 * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt, 2422 * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a 2423 * because there is no tailoring space before that boundary. 2424 * Made the tests work by tailoring to a space instead. 2425 */ 2426 { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first primary ignorable]<a */ 2427 { "c", "b", "\\u0332", "a" }, 4 2428 }, 2429 2430 /* we don't have a code point that corresponds to 2431 * the last primary ignorable 2432 */ 2433 { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last primary ignorable]<a */ 2434 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5 2435 }, 2436 2437 { "&[before 3][first variable]<<<c<<<b &[first variable]<a", 2438 { "c", "b", "\\u0009", "a", "\\u000a" }, 5 2439 }, 2440 2441 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ", 2442 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5 2443 }, 2444 2445 { "&[first regular]<a" 2446 "&[before 1][first regular]<b", 2447 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4 2448 }, 2449 2450 { "&[before 1][last regular]<b" 2451 "&[last regular]<a", 2452 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4 2453 }, 2454 2455 { "&[before 1][first implicit]<b" 2456 "&[first implicit]<a", 2457 { "b", "\\u4e00", "a", "\\u4e01"}, 4 2458 }, 2459#if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */ 2460 { "&[before 1][last implicit]<b" 2461 "&[last implicit]<a", 2462 { "b", "\\U0010FFFD", "a" }, 3 2463 }, 2464#endif 2465 { "&[last variable]<z" 2466 "&' '<x" /* was &[last primary ignorable]<x, see above */ 2467 "&[last secondary ignorable]<<y" 2468 "&[last tertiary ignorable]<<<w" 2469 "&[top]<u", 2470 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7 2471 } 2472 2473 }; 2474 uint32_t i; 2475 2476 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 2477 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 2478 } 2479} 2480 2481 2482static void TestOptimize(void) { 2483 /* this is not really a test - just trying out 2484 * whether copying of UCA contents will fail 2485 * Cannot really test, since the functionality 2486 * remains the same. 2487 */ 2488 static const struct { 2489 const char *rules; 2490 const char *data[10]; 2491 const uint32_t len; 2492 } tests[] = { 2493 /* - all befores here amount to zero */ 2494 { "[optimize [\\uAC00-\\uD7FF]]", 2495 { "a", "b"}, 2} 2496 }; 2497 uint32_t i; 2498 2499 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 2500 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 2501 } 2502} 2503 2504/* 2505cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator. 2506weiv ucol_strcollIter? 2507cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021 2508weiv these are the input strings? 2509cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2 2510weiv will check - could be a problem with utf-8 iterator 2511cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2 2512weiv hmmm 2513cycheng@ca.ibm.c... note that we have a standalone high surrogate 2514weiv that doesn't sound right 2515cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000 2516weiv so you have two strings, you convert them to utf-8 and to utf-16BE 2517cycheng@ca.ibm.c... yes 2518weiv and then do the comparison 2519cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be 2520weiv utf-16 strings look like a little endian ones in the example you sent me 2521weiv It could be a bug - let me try to test it out 2522cycheng@ca.ibm.c... ok 2523cycheng@ca.ibm.c... we can wait till the conf. call 2524cycheng@ca.ibm.c... next weke 2525weiv that would be great 2526weiv hmmm 2527weiv I might be wrong 2528weiv let me play with it some more 2529cycheng@ca.ibm.c... ok 2530cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be 2531cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2 2532cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be 2533weiv ok 2534cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data 2535weiv thanks 2536cycheng@ca.ibm.c... the 4 strings we sent are just samples 2537*/ 2538#if 0 2539static void Alexis(void) { 2540 UErrorCode status = U_ZERO_ERROR; 2541 UCollator *coll = ucol_open("", &status); 2542 2543 2544 const char utf16be[2][4] = { 2545 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 }, 2546 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 } 2547 }; 2548 2549 const char utf8[2][4] = { 2550 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 }, 2551 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 }, 2552 }; 2553 2554 UCharIterator iterU161, iterU162; 2555 UCharIterator iterU81, iterU82; 2556 2557 UCollationResult resU16, resU8; 2558 2559 uiter_setUTF16BE(&iterU161, utf16be[0], 4); 2560 uiter_setUTF16BE(&iterU162, utf16be[1], 4); 2561 2562 uiter_setUTF8(&iterU81, utf8[0], 4); 2563 uiter_setUTF8(&iterU82, utf8[1], 4); 2564 2565 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 2566 2567 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status); 2568 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status); 2569 2570 2571 if(resU16 != resU8) { 2572 log_err("different results\n"); 2573 } 2574 2575 ucol_close(coll); 2576} 2577#endif 2578 2579#define CMSCOLL_ALEXIS2_BUFFER_SIZE 256 2580static void Alexis2(void) { 2581 UErrorCode status = U_ZERO_ERROR; 2582 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 2583 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 2584 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 2585 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0; 2586 2587 UConverter *conv = NULL; 2588 2589 UCharIterator U16BEItS, U16BEItT; 2590 UCharIterator U8ItS, U8ItT; 2591 2592 UCollationResult resU16, resU16BE, resU8; 2593 2594 static const char* const pairs[][2] = { 2595 { "\\ud800\\u0021", "\\uFFFC\\u0062"}, 2596 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" }, 2597 { "\\u0E40\\u0021", "\\u00A1\\u0021"}, 2598 { "\\u0E40\\u0021", "\\uFE57\\u0062"}, 2599 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"}, 2600 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"}, 2601 { "\\u0020", "\\u0020\\u0000"} 2602/* 26035F20 (my result here) 26045F204E008E3F 26055F20 (your result here) 2606*/ 2607 }; 2608 2609 int32_t i = 0; 2610 2611 UCollator *coll = ucol_open("", &status); 2612 if(status == U_FILE_ACCESS_ERROR) { 2613 log_data_err("Is your data around?\n"); 2614 return; 2615 } else if(U_FAILURE(status)) { 2616 log_err("Error opening collator\n"); 2617 return; 2618 } 2619 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 2620 conv = ucnv_open("UTF16BE", &status); 2621 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) { 2622 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE); 2623 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE); 2624 2625 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT); 2626 2627 log_verbose("Result of strcoll is %i\n", resU16); 2628 2629 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status); 2630 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status); 2631 (void)U16BELenS; /* Suppress set but not used warnings. */ 2632 (void)U16BELenT; 2633 2634 /* use the original sizes, as the result from converter is in bytes */ 2635 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS); 2636 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT); 2637 2638 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status); 2639 2640 log_verbose("Result of U16BE is %i\n", resU16BE); 2641 2642 if(resU16 != resU16BE) { 2643 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]); 2644 } 2645 2646 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status); 2647 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status); 2648 2649 uiter_setUTF8(&U8ItS, U8Source, U8LenS); 2650 uiter_setUTF8(&U8ItT, U8Target, U8LenT); 2651 2652 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status); 2653 2654 if(resU16 != resU8) { 2655 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]); 2656 } 2657 2658 } 2659 2660 ucol_close(coll); 2661 ucnv_close(conv); 2662} 2663 2664static void TestHebrewUCA(void) { 2665 UErrorCode status = U_ZERO_ERROR; 2666 static const char *first[] = { 2667 "d790d6b8d79cd795d6bcd7a9", 2668 "d790d79cd79ed7a7d799d799d7a1", 2669 "d790d6b4d79ed795d6bcd7a9", 2670 }; 2671 2672 char utf8String[3][256]; 2673 UChar utf16String[3][256]; 2674 2675 int32_t i = 0, j = 0; 2676 int32_t sizeUTF8[3]; 2677 int32_t sizeUTF16[3]; 2678 2679 UCollator *coll = ucol_open("", &status); 2680 if (U_FAILURE(status)) { 2681 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status)); 2682 return; 2683 } 2684 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/ 2685 2686 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) { 2687 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status); 2688 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status); 2689 log_verbose("%i: "); 2690 for(j = 0; j < sizeUTF16[i]; j++) { 2691 /*log_verbose("\\u%04X", utf16String[i][j]);*/ 2692 log_verbose("%04X", utf16String[i][j]); 2693 } 2694 log_verbose("\n"); 2695 } 2696 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) { 2697 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) { 2698 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS); 2699 } 2700 } 2701 2702 ucol_close(coll); 2703 2704} 2705 2706static void TestPartialSortKeyTermination(void) { 2707 static const char* cases[] = { 2708 "\\u1234\\u1234\\udc00", 2709 "\\udc00\\ud800\\ud800" 2710 }; 2711 2712 int32_t i; 2713 2714 UErrorCode status = U_ZERO_ERROR; 2715 2716 UCollator *coll = ucol_open("", &status); 2717 2718 UCharIterator iter; 2719 2720 UChar currCase[256]; 2721 int32_t length = 0; 2722 int32_t pKeyLen = 0; 2723 2724 uint8_t key[256]; 2725 2726 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { 2727 uint32_t state[2] = {0, 0}; 2728 length = u_unescape(cases[i], currCase, 256); 2729 uiter_setString(&iter, currCase, length); 2730 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status); 2731 (void)pKeyLen; /* Suppress set but not used warning. */ 2732 2733 log_verbose("Done\n"); 2734 2735 } 2736 ucol_close(coll); 2737} 2738 2739static void TestSettings(void) { 2740 static const char* cases[] = { 2741 "apple", 2742 "Apple" 2743 }; 2744 2745 static const char* locales[] = { 2746 "", 2747 "en" 2748 }; 2749 2750 UErrorCode status = U_ZERO_ERROR; 2751 2752 int32_t i = 0, j = 0; 2753 2754 UChar source[256], target[256]; 2755 int32_t sLen = 0, tLen = 0; 2756 2757 UCollator *collateObject = NULL; 2758 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) { 2759 collateObject = ucol_open(locales[i], &status); 2760 ucol_setStrength(collateObject, UCOL_PRIMARY); 2761 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status); 2762 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) { 2763 sLen = u_unescape(cases[j-1], source, 256); 2764 source[sLen] = 0; 2765 tLen = u_unescape(cases[j], target, 256); 2766 source[tLen] = 0; 2767 doTest(collateObject, source, target, UCOL_EQUAL); 2768 } 2769 ucol_close(collateObject); 2770 } 2771} 2772 2773static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) { 2774 UErrorCode status = U_ZERO_ERROR; 2775 int32_t errorNo = 0; 2776 const UChar *sourceRules = NULL; 2777 int32_t sourceRulesLen = 0; 2778 UParseError parseError; 2779 UColAttributeValue french = UCOL_OFF; 2780 2781 if(!ucol_equals(source, target)) { 2782 log_err("Same collators, different address not equal\n"); 2783 errorNo++; 2784 } 2785 ucol_close(target); 2786 if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) { 2787 target = ucol_safeClone(source, NULL, NULL, &status); 2788 if(U_FAILURE(status)) { 2789 log_err("Error creating clone\n"); 2790 errorNo++; 2791 return errorNo; 2792 } 2793 if(!ucol_equals(source, target)) { 2794 log_err("Collator different from it's clone\n"); 2795 errorNo++; 2796 } 2797 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status); 2798 if(french == UCOL_ON) { 2799 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status); 2800 } else { 2801 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status); 2802 } 2803 if(U_FAILURE(status)) { 2804 log_err("Error setting attributes\n"); 2805 errorNo++; 2806 return errorNo; 2807 } 2808 if(ucol_equals(source, target)) { 2809 log_err("Collators same even when options changed\n"); 2810 errorNo++; 2811 } 2812 ucol_close(target); 2813 2814 sourceRules = ucol_getRules(source, &sourceRulesLen); 2815 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 2816 if(U_FAILURE(status)) { 2817 log_err("Error instantiating target from rules - %s\n", u_errorName(status)); 2818 errorNo++; 2819 return errorNo; 2820 } 2821 if(!ucol_equals(source, target)) { 2822 log_err("Collator different from collator that was created from the same rules\n"); 2823 errorNo++; 2824 } 2825 ucol_close(target); 2826 } 2827 return errorNo; 2828} 2829 2830 2831static void TestEquals(void) { 2832 /* ucol_equals is not currently a public API. There is a chance that it will become 2833 * something like this, but currently it is only used by RuleBasedCollator::operator== 2834 */ 2835 /* test whether the two collators instantiated from the same locale are equal */ 2836 UErrorCode status = U_ZERO_ERROR; 2837 UParseError parseError; 2838 int32_t noOfLoc = uloc_countAvailable(); 2839 const char *locName = NULL; 2840 UCollator *source = NULL, *target = NULL; 2841 int32_t i = 0; 2842 2843 const char* rules[] = { 2844 "&l < lj <<< Lj <<< LJ", 2845 "&n < nj <<< Nj <<< NJ", 2846 "&ae <<< \\u00e4", 2847 "&AE <<< \\u00c4" 2848 }; 2849 /* 2850 const char* badRules[] = { 2851 "&l <<< Lj", 2852 "&n < nj <<< nJ <<< NJ", 2853 "&a <<< \\u00e4", 2854 "&AE <<< \\u00c4 <<< x" 2855 }; 2856 */ 2857 2858 UChar sourceRules[1024], targetRules[1024]; 2859 int32_t sourceRulesSize = 0, targetRulesSize = 0; 2860 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]); 2861 2862 for(i = 0; i < rulesSize; i++) { 2863 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize); 2864 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize); 2865 } 2866 2867 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 2868 if(status == U_FILE_ACCESS_ERROR) { 2869 log_data_err("Is your data around?\n"); 2870 return; 2871 } else if(U_FAILURE(status)) { 2872 log_err("Error opening collator\n"); 2873 return; 2874 } 2875 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 2876 if(!ucol_equals(source, target)) { 2877 log_err("Equivalent collators not equal!\n"); 2878 } 2879 ucol_close(source); 2880 ucol_close(target); 2881 2882 source = ucol_open("root", &status); 2883 target = ucol_open("root", &status); 2884 log_verbose("Testing root\n"); 2885 if(!ucol_equals(source, source)) { 2886 log_err("Same collator not equal\n"); 2887 } 2888 if(TestEqualsForCollator(locName, source, target)) { 2889 log_err("Errors for root\n", locName); 2890 } 2891 ucol_close(source); 2892 2893 for(i = 0; i<noOfLoc; i++) { 2894 status = U_ZERO_ERROR; 2895 locName = uloc_getAvailable(i); 2896 /*if(hasCollationElements(locName)) {*/ 2897 log_verbose("Testing equality for locale %s\n", locName); 2898 source = ucol_open(locName, &status); 2899 target = ucol_open(locName, &status); 2900 if (U_FAILURE(status)) { 2901 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status)); 2902 continue; 2903 } 2904 if(TestEqualsForCollator(locName, source, target)) { 2905 log_err("Errors for locale %s\n", locName); 2906 } 2907 ucol_close(source); 2908 /*}*/ 2909 } 2910} 2911 2912static void TestJ2726(void) { 2913 UChar a[2] = { 0x61, 0x00 }; /*"a"*/ 2914 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/ 2915 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/ 2916 UErrorCode status = U_ZERO_ERROR; 2917 UCollator *coll = ucol_open("en", &status); 2918 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 2919 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 2920 doTest(coll, a, aSpace, UCOL_EQUAL); 2921 doTest(coll, aSpace, a, UCOL_EQUAL); 2922 doTest(coll, a, spaceA, UCOL_EQUAL); 2923 doTest(coll, spaceA, a, UCOL_EQUAL); 2924 doTest(coll, spaceA, aSpace, UCOL_EQUAL); 2925 doTest(coll, aSpace, spaceA, UCOL_EQUAL); 2926 ucol_close(coll); 2927} 2928 2929static void NullRule(void) { 2930 UChar r[3] = {0}; 2931 UErrorCode status = U_ZERO_ERROR; 2932 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 2933 if(U_SUCCESS(status)) { 2934 log_err("This should have been an error!\n"); 2935 ucol_close(coll); 2936 } else { 2937 status = U_ZERO_ERROR; 2938 } 2939 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 2940 if(U_FAILURE(status)) { 2941 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status)); 2942 } else { 2943 ucol_close(coll); 2944 } 2945} 2946 2947/** 2948 * Test for CollationElementIterator previous and next for the whole set of 2949 * unicode characters with normalization on. 2950 */ 2951static void TestNumericCollation(void) 2952{ 2953 UErrorCode status = U_ZERO_ERROR; 2954 2955 const static char *basicTestStrings[]={ 2956 "hello1", 2957 "hello2", 2958 "hello2002", 2959 "hello2003", 2960 "hello123456", 2961 "hello1234567", 2962 "hello10000000", 2963 "hello100000000", 2964 "hello1000000000", 2965 "hello10000000000", 2966 }; 2967 2968 const static char *preZeroTestStrings[]={ 2969 "avery10000", 2970 "avery010000", 2971 "avery0010000", 2972 "avery00010000", 2973 "avery000010000", 2974 "avery0000010000", 2975 "avery00000010000", 2976 "avery000000010000", 2977 }; 2978 2979 const static char *thirtyTwoBitNumericStrings[]={ 2980 "avery42949672960", 2981 "avery42949672961", 2982 "avery42949672962", 2983 "avery429496729610" 2984 }; 2985 2986 const static char *longNumericStrings[]={ 2987 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings. 2988 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that 2989 are treated as multiple collation elements. */ 2990 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */ 2991 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */ 2992 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */ 2993 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */ 2994 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */ 2995 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */ 2996 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */ 2997 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */ 2998 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */ 2999 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */ 3000 }; 3001 3002 const static char *supplementaryDigits[] = { 3003 "\\uD835\\uDFCE", /* 0 */ 3004 "\\uD835\\uDFCF", /* 1 */ 3005 "\\uD835\\uDFD0", /* 2 */ 3006 "\\uD835\\uDFD1", /* 3 */ 3007 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */ 3008 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */ 3009 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */ 3010 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */ 3011 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */ 3012 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */ 3013 }; 3014 3015 const static char *foreignDigits[] = { 3016 "\\u0661", 3017 "\\u0662", 3018 "\\u0663", 3019 "\\u0661\\u0660", 3020 "\\u0661\\u0662", 3021 "\\u0661\\u0663", 3022 "\\u0662\\u0660", 3023 "\\u0662\\u0662", 3024 "\\u0662\\u0663", 3025 "\\u0663\\u0660", 3026 "\\u0663\\u0662", 3027 "\\u0663\\u0663" 3028 }; 3029 3030 const static char *evenZeroes[] = { 3031 "2000", 3032 "2001", 3033 "2002", 3034 "2003" 3035 }; 3036 3037 UColAttribute att = UCOL_NUMERIC_COLLATION; 3038 UColAttributeValue val = UCOL_ON; 3039 3040 /* Open our collator. */ 3041 UCollator* coll = ucol_open("root", &status); 3042 if (U_FAILURE(status)){ 3043 log_err_status(status, "ERROR: in using ucol_open() -> %s\n", 3044 myErrorName(status)); 3045 return; 3046 } 3047 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1); 3048 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1); 3049 genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1); 3050 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1); 3051 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1); 3052 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1); 3053 3054 /* Setting up our collator to do digits. */ 3055 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); 3056 if (U_FAILURE(status)){ 3057 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n", 3058 myErrorName(status)); 3059 return; 3060 } 3061 3062 /* 3063 Testing that prepended zeroes still yield the correct collation behavior. 3064 We expect that every element in our strings array will be equal. 3065 */ 3066 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL); 3067 3068 ucol_close(coll); 3069} 3070 3071static void TestTibetanConformance(void) 3072{ 3073 const char* test[] = { 3074 "\\u0FB2\\u0591\\u0F71\\u0061", 3075 "\\u0FB2\\u0F71\\u0061" 3076 }; 3077 3078 UErrorCode status = U_ZERO_ERROR; 3079 UCollator *coll = ucol_open("", &status); 3080 UChar source[100]; 3081 UChar target[100]; 3082 int result; 3083 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 3084 if (U_SUCCESS(status)) { 3085 u_unescape(test[0], source, 100); 3086 u_unescape(test[1], target, 100); 3087 doTest(coll, source, target, UCOL_EQUAL); 3088 result = ucol_strcoll(coll, source, -1, target, -1); 3089 log_verbose("result %d\n", result); 3090 if (UCOL_EQUAL != result) { 3091 log_err("Tibetan comparison error\n"); 3092 } 3093 } 3094 ucol_close(coll); 3095 3096 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL); 3097} 3098 3099static void TestPinyinProblem(void) { 3100 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" }; 3101 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0])); 3102} 3103 3104/** 3105 * Iterate through the given iterator, checking to see that all the strings 3106 * in the expected array are present. 3107 * @param expected array of strings we expect to see, or NULL 3108 * @param expectedCount number of elements of expected, or 0 3109 */ 3110static int32_t checkUEnumeration(const char* msg, 3111 UEnumeration* iter, 3112 const char** expected, 3113 int32_t expectedCount) { 3114 UErrorCode ec = U_ZERO_ERROR; 3115 int32_t i = 0, n, j, bit; 3116 int32_t seenMask = 0; 3117 3118 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */ 3119 n = uenum_count(iter, &ec); 3120 if (!assertSuccess("count", &ec)) return -1; 3121 log_verbose("%s = [", msg); 3122 for (;; ++i) { 3123 const char* s = uenum_next(iter, NULL, &ec); 3124 if (!assertSuccess("snext", &ec) || s == NULL) break; 3125 if (i != 0) log_verbose(","); 3126 log_verbose("%s", s); 3127 /* check expected list */ 3128 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) { 3129 if ((seenMask&bit) == 0 && 3130 uprv_strcmp(s, expected[j]) == 0) { 3131 seenMask |= bit; 3132 break; 3133 } 3134 } 3135 } 3136 log_verbose("] (%d)\n", i); 3137 assertTrue("count verified", i==n); 3138 /* did we see all expected strings? */ 3139 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) { 3140 if ((seenMask&bit)!=0) { 3141 log_verbose("Ok: \"%s\" seen\n", expected[j]); 3142 } else { 3143 log_err("FAIL: \"%s\" not seen\n", expected[j]); 3144 } 3145 } 3146 return n; 3147} 3148 3149/** 3150 * Test new API added for separate collation tree. 3151 */ 3152static void TestSeparateTrees(void) { 3153 UErrorCode ec = U_ZERO_ERROR; 3154 UEnumeration *e = NULL; 3155 int32_t n = -1; 3156 UBool isAvailable; 3157 char loc[256]; 3158 3159 static const char* AVAIL[] = { "en", "de" }; 3160 3161 static const char* KW[] = { "collation" }; 3162 3163 static const char* KWVAL[] = { "phonebook", "stroke" }; 3164 3165#if !UCONFIG_NO_SERVICE 3166 e = ucol_openAvailableLocales(&ec); 3167 if (e != NULL) { 3168 assertSuccess("ucol_openAvailableLocales", &ec); 3169 assertTrue("ucol_openAvailableLocales!=0", e!=0); 3170 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL)); 3171 (void)n; /* Suppress set but not used warnings. */ 3172 /* Don't need to check n because we check list */ 3173 uenum_close(e); 3174 } else { 3175 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec)); 3176 } 3177#endif 3178 3179 e = ucol_getKeywords(&ec); 3180 if (e != NULL) { 3181 assertSuccess("ucol_getKeywords", &ec); 3182 assertTrue("ucol_getKeywords!=0", e!=0); 3183 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW)); 3184 /* Don't need to check n because we check list */ 3185 uenum_close(e); 3186 } else { 3187 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec)); 3188 } 3189 3190 e = ucol_getKeywordValues(KW[0], &ec); 3191 if (e != NULL) { 3192 assertSuccess("ucol_getKeywordValues", &ec); 3193 assertTrue("ucol_getKeywordValues!=0", e!=0); 3194 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL)); 3195 /* Don't need to check n because we check list */ 3196 uenum_close(e); 3197 } else { 3198 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec)); 3199 } 3200 3201 /* Try setting a warning before calling ucol_getKeywordValues */ 3202 ec = U_USING_FALLBACK_WARNING; 3203 e = ucol_getKeywordValues(KW[0], &ec); 3204 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) { 3205 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0); 3206 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL)); 3207 /* Don't need to check n because we check list */ 3208 uenum_close(e); 3209 } 3210 3211 /* 3212U_DRAFT int32_t U_EXPORT2 3213ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 3214 const char* locale, UBool* isAvailable, 3215 UErrorCode* status); 3216} 3217*/ 3218 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de", 3219 &isAvailable, &ec); 3220 if (assertSuccess("getFunctionalEquivalent", &ec)) { 3221 assertEquals("getFunctionalEquivalent(de)", "root", loc); 3222 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE", 3223 isAvailable == TRUE); 3224 } 3225 3226 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE", 3227 &isAvailable, &ec); 3228 if (assertSuccess("getFunctionalEquivalent", &ec)) { 3229 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc); 3230 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE", 3231 isAvailable == FALSE); 3232 } 3233} 3234 3235/* supercedes TestJ784 */ 3236static void TestBeforePinyin(void) { 3237 const static char rules[] = { 3238 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0" 3239 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8" 3240 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC" 3241 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2" 3242 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9" 3243 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC" 3244 }; 3245 3246 const static char *test[] = { 3247 "l\\u0101", 3248 "la", 3249 "l\\u0101n", 3250 "lan ", 3251 "l\\u0113", 3252 "le", 3253 "l\\u0113n", 3254 "len" 3255 }; 3256 3257 const static char *test2[] = { 3258 "x\\u0101", 3259 "x\\u0100", 3260 "X\\u0101", 3261 "X\\u0100", 3262 "x\\u00E1", 3263 "x\\u00C1", 3264 "X\\u00E1", 3265 "X\\u00C1", 3266 "x\\u01CE", 3267 "x\\u01CD", 3268 "X\\u01CE", 3269 "X\\u01CD", 3270 "x\\u00E0", 3271 "x\\u00C0", 3272 "X\\u00E0", 3273 "X\\u00C0", 3274 "xa", 3275 "xA", 3276 "Xa", 3277 "XA", 3278 "x\\u0101x", 3279 "x\\u0100x", 3280 "x\\u00E1x", 3281 "x\\u00C1x", 3282 "x\\u01CEx", 3283 "x\\u01CDx", 3284 "x\\u00E0x", 3285 "x\\u00C0x", 3286 "xax", 3287 "xAx" 3288 }; 3289 3290 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0])); 3291 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0])); 3292 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0])); 3293 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0])); 3294} 3295 3296static void TestBeforeTightening(void) { 3297 static const struct { 3298 const char *rules; 3299 UErrorCode expectedStatus; 3300 } tests[] = { 3301 { "&[before 1]a<x", U_ZERO_ERROR }, 3302 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR }, 3303 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR }, 3304 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR }, 3305 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR }, 3306 { "&[before 2]a<<x",U_ZERO_ERROR }, 3307 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR }, 3308 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR }, 3309 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR }, 3310 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR }, 3311 { "&[before 3]a<<<x",U_ZERO_ERROR }, 3312 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR }, 3313 { "&[before I]a = x",U_INVALID_FORMAT_ERROR } 3314 }; 3315 3316 int32_t i = 0; 3317 3318 UErrorCode status = U_ZERO_ERROR; 3319 UChar rlz[RULE_BUFFER_LEN] = { 0 }; 3320 uint32_t rlen = 0; 3321 3322 UCollator *coll = NULL; 3323 3324 3325 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 3326 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN); 3327 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 3328 if(status != tests[i].expectedStatus) { 3329 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n", 3330 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus)); 3331 } 3332 ucol_close(coll); 3333 status = U_ZERO_ERROR; 3334 } 3335 3336} 3337 3338/* 3339&m < a 3340&[before 1] a < x <<< X << q <<< Q < z 3341assert: m <<< M < x <<< X << q <<< Q < z < a < n 3342 3343&m < a 3344&[before 2] a << x <<< X << q <<< Q < z 3345assert: m <<< M < x <<< X << q <<< Q << a < z < n 3346 3347&m < a 3348&[before 3] a <<< x <<< X << q <<< Q < z 3349assert: m <<< M < x <<< X <<< a << q <<< Q < z < n 3350 3351 3352&m << a 3353&[before 1] a < x <<< X << q <<< Q < z 3354assert: x <<< X << q <<< Q < z < m <<< M << a < n 3355 3356&m << a 3357&[before 2] a << x <<< X << q <<< Q < z 3358assert: m <<< M << x <<< X << q <<< Q << a < z < n 3359 3360&m << a 3361&[before 3] a <<< x <<< X << q <<< Q < z 3362assert: m <<< M << x <<< X <<< a << q <<< Q < z < n 3363 3364 3365&m <<< a 3366&[before 1] a < x <<< X << q <<< Q < z 3367assert: x <<< X << q <<< Q < z < n < m <<< a <<< M 3368 3369&m <<< a 3370&[before 2] a << x <<< X << q <<< Q < z 3371assert: x <<< X << q <<< Q << m <<< a <<< M < z < n 3372 3373&m <<< a 3374&[before 3] a <<< x <<< X << q <<< Q < z 3375assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n 3376 3377 3378&[before 1] s < x <<< X << q <<< Q < z 3379assert: r <<< R < x <<< X << q <<< Q < z < s < n 3380 3381&[before 2] s << x <<< X << q <<< Q < z 3382assert: r <<< R < x <<< X << q <<< Q << s < z < n 3383 3384&[before 3] s <<< x <<< X << q <<< Q < z 3385assert: r <<< R < x <<< X <<< s << q <<< Q < z < n 3386 3387 3388&[before 1] \u24DC < x <<< X << q <<< Q < z 3389assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M 3390 3391&[before 2] \u24DC << x <<< X << q <<< Q < z 3392assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n 3393 3394&[before 3] \u24DC <<< x <<< X << q <<< Q < z 3395assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n 3396*/ 3397 3398 3399#if 0 3400/* requires features not yet supported */ 3401static void TestMoreBefore(void) { 3402 static const struct { 3403 const char* rules; 3404 const char* order[16]; 3405 int32_t size; 3406 } tests[] = { 3407 { "&m < a &[before 1] a < x <<< X << q <<< Q < z", 3408 { "m","M","x","X","q","Q","z","a","n" }, 9}, 3409 { "&m < a &[before 2] a << x <<< X << q <<< Q < z", 3410 { "m","M","x","X","q","Q","a","z","n" }, 9}, 3411 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z", 3412 { "m","M","x","X","a","q","Q","z","n" }, 9}, 3413 { "&m << a &[before 1] a < x <<< X << q <<< Q < z", 3414 { "x","X","q","Q","z","m","M","a","n" }, 9}, 3415 { "&m << a &[before 2] a << x <<< X << q <<< Q < z", 3416 { "m","M","x","X","q","Q","a","z","n" }, 9}, 3417 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z", 3418 { "m","M","x","X","a","q","Q","z","n" }, 9}, 3419 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z", 3420 { "x","X","q","Q","z","n","m","a","M" }, 9}, 3421 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z", 3422 { "x","X","q","Q","m","a","M","z","n" }, 9}, 3423 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z", 3424 { "m","x","X","a","M","q","Q","z","n" }, 9}, 3425 { "&[before 1] s < x <<< X << q <<< Q < z", 3426 { "r","R","x","X","q","Q","z","s","n" }, 9}, 3427 { "&[before 2] s << x <<< X << q <<< Q < z", 3428 { "r","R","x","X","q","Q","s","z","n" }, 9}, 3429 { "&[before 3] s <<< x <<< X << q <<< Q < z", 3430 { "r","R","x","X","s","q","Q","z","n" }, 9}, 3431 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z", 3432 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9}, 3433 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z", 3434 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9}, 3435 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z", 3436 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9} 3437 }; 3438 3439 int32_t i = 0; 3440 3441 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 3442 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size); 3443 } 3444} 3445#endif 3446 3447static void TestTailorNULL( void ) { 3448 const static char* rule = "&a <<< '\\u0000'"; 3449 UErrorCode status = U_ZERO_ERROR; 3450 UChar rlz[RULE_BUFFER_LEN] = { 0 }; 3451 uint32_t rlen = 0; 3452 UChar a = 1, null = 0; 3453 UCollationResult res = UCOL_EQUAL; 3454 3455 UCollator *coll = NULL; 3456 3457 3458 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN); 3459 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 3460 3461 if(U_FAILURE(status)) { 3462 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status)); 3463 } else { 3464 res = ucol_strcoll(coll, &a, 1, &null, 1); 3465 3466 if(res != UCOL_LESS) { 3467 log_err("NULL was not tailored properly!\n"); 3468 } 3469 } 3470 3471 ucol_close(coll); 3472} 3473 3474static void 3475TestUpperFirstQuaternary(void) 3476{ 3477 const char* tests[] = { "B", "b", "Bb", "bB" }; 3478 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST }; 3479 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST }; 3480 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0])); 3481} 3482 3483static void 3484TestJ4960(void) 3485{ 3486 const char* tests[] = { "\\u00e2T", "aT" }; 3487 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL }; 3488 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON }; 3489 const char* tests2[] = { "a", "A" }; 3490 const char* rule = "&[first tertiary ignorable]=A=a"; 3491 UColAttribute att2[] = { UCOL_CASE_LEVEL }; 3492 UColAttributeValue attVals2[] = { UCOL_ON }; 3493 /* Test whether we correctly ignore primary ignorables on case level when */ 3494 /* we have only primary & case level */ 3495 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL); 3496 /* Test whether ICU4J will make case level for sortkeys that have primary strength */ 3497 /* and case level */ 3498 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0])); 3499 /* Test whether completely ignorable letters have case level info (they shouldn't) */ 3500 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL); 3501} 3502 3503static void 3504TestJ5223(void) 3505{ 3506 static const char *test = "this is a test string"; 3507 UChar ustr[256]; 3508 int32_t ustr_length = u_unescape(test, ustr, 256); 3509 unsigned char sortkey[256]; 3510 int32_t sortkey_length; 3511 UErrorCode status = U_ZERO_ERROR; 3512 static UCollator *coll = NULL; 3513 coll = ucol_open("root", &status); 3514 if(U_FAILURE(status)) { 3515 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 3516 return; 3517 } 3518 ucol_setStrength(coll, UCOL_PRIMARY); 3519 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 3520 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 3521 if (U_FAILURE(status)) { 3522 log_err("Failed setting atributes\n"); 3523 return; 3524 } 3525 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0); 3526 if (sortkey_length > 256) return; 3527 3528 /* we mark the position where the null byte should be written in advance */ 3529 sortkey[sortkey_length-1] = 0xAA; 3530 3531 /* we set the buffer size one byte higher than needed */ 3532 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey, 3533 sortkey_length+1); 3534 3535 /* no error occurs (for me) */ 3536 if (sortkey[sortkey_length-1] == 0xAA) { 3537 log_err("Hit bug at first try\n"); 3538 } 3539 3540 /* we mark the position where the null byte should be written again */ 3541 sortkey[sortkey_length-1] = 0xAA; 3542 3543 /* this time we set the buffer size to the exact amount needed */ 3544 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey, 3545 sortkey_length); 3546 3547 /* now the trailing null byte is not written */ 3548 if (sortkey[sortkey_length-1] == 0xAA) { 3549 log_err("Hit bug at second try\n"); 3550 } 3551 3552 ucol_close(coll); 3553} 3554 3555/* Regression test for Thai partial sort key problem */ 3556static void 3557TestJ5232(void) 3558{ 3559 const static char *test[] = { 3560 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21", 3561 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21" 3562 }; 3563 3564 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0])); 3565} 3566 3567static void 3568TestJ5367(void) 3569{ 3570 const static char *test[] = { "a", "y" }; 3571 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a"; 3572 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0])); 3573} 3574 3575static void 3576TestVI5913(void) 3577{ 3578 UErrorCode status = U_ZERO_ERROR; 3579 int32_t i, j; 3580 UCollator *coll =NULL; 3581 uint8_t resColl[100], expColl[100]; 3582 int32_t rLen, tLen, ruleLen, sLen, kLen; 3583 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypogegrammeni*/ 3584 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/ 3585 /* 3586 * Note: Just tailoring &z<ae^ does not work as expected: 3587 * The UCA spec requires for discontiguous contractions that they 3588 * extend an *existing match* by one combining mark at a time. 3589 * Therefore, ae must be a contraction so that the builder finds 3590 * discontiguous contractions for ae^, for example with an intervening underdot. 3591 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc. 3592 */ 3593 UChar rule3[256]={ 3594 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */ 3595 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/ 3596 0}; 3597 static const UChar tData[][20]={ 3598 {0x1EAC, 0}, 3599 {0x0041, 0x0323, 0x0302, 0}, 3600 {0x1EA0, 0x0302, 0}, 3601 {0x00C2, 0x0323, 0}, 3602 {0x1ED8, 0}, /* O with dot and circumflex */ 3603 {0x1ECC, 0x0302, 0}, 3604 {0x1EB7, 0}, 3605 {0x1EA1, 0x0306, 0}, 3606 }; 3607 static const UChar tailorData[][20]={ 3608 {0x1FA2, 0}, /* Omega with 3 combining marks */ 3609 {0x03C9, 0x0313, 0x0300, 0x0345, 0}, 3610 {0x1FF3, 0x0313, 0x0300, 0}, 3611 {0x1F60, 0x0300, 0x0345, 0}, 3612 {0x1F62, 0x0345, 0}, 3613 {0x1FA0, 0x0300, 0}, 3614 }; 3615 static const UChar tailorData2[][20]={ 3616 {0x1E63, 0x030C, 0}, /* s with dot below + caron */ 3617 {0x0073, 0x0323, 0x030C, 0}, 3618 {0x0073, 0x030C, 0x0323, 0}, 3619 }; 3620 static const UChar tailorData3[][20]={ 3621 {0x007a, 0}, /* z */ 3622 {0x0061, 0x0065, 0}, /* a + e */ 3623 {0x0061, 0x00ea, 0}, /* a + e with circumflex */ 3624 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */ 3625 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */ 3626 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */ 3627 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */ 3628 {0x00EA, 0}, /* e with circumflex */ 3629 }; 3630 3631 /* Test Vietnamese sort. */ 3632 coll = ucol_open("vi", &status); 3633 if(U_FAILURE(status)) { 3634 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); 3635 return; 3636 } 3637 log_verbose("\n\nVI collation:"); 3638 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) { 3639 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n"); 3640 } 3641 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) { 3642 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n"); 3643 } 3644 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) { 3645 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n"); 3646 } 3647 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) { 3648 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n"); 3649 } 3650 3651 for (j=0; j<8; j++) { 3652 tLen = u_strlen(tData[j]); 3653 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen); 3654 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100); 3655 for(i = 0; i<rLen; i++) { 3656 log_verbose(" %02X", resColl[i]); 3657 } 3658 } 3659 3660 ucol_close(coll); 3661 3662 /* Test Romanian sort. */ 3663 coll = ucol_open("ro", &status); 3664 log_verbose("\n\nRO collation:"); 3665 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) { 3666 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n"); 3667 } 3668 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) { 3669 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n"); 3670 } 3671 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) { 3672 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n"); 3673 } 3674 3675 for (j=4; j<8; j++) { 3676 tLen = u_strlen(tData[j]); 3677 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen); 3678 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100); 3679 for(i = 0; i<rLen; i++) { 3680 log_verbose(" %02X", resColl[i]); 3681 } 3682 } 3683 ucol_close(coll); 3684 3685 /* Test the precomposed Greek character with 3 combining marks. */ 3686 log_verbose("\n\nTailoring test: Greek character with 3 combining marks"); 3687 ruleLen = u_strlen(rule); 3688 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 3689 if (U_FAILURE(status)) { 3690 log_err("ucol_openRules failed with %s\n", u_errorName(status)); 3691 return; 3692 } 3693 sLen = u_strlen(tailorData[0]); 3694 for (j=1; j<6; j++) { 3695 tLen = u_strlen(tailorData[j]); 3696 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) { 3697 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]); 3698 } 3699 } 3700 /* Test getSortKey. */ 3701 tLen = u_strlen(tailorData[0]); 3702 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100); 3703 for (j=0; j<6; j++) { 3704 tLen = u_strlen(tailorData[j]); 3705 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100); 3706 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 3707 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 3708 for(i = 0; i<rLen; i++) { 3709 log_err(" %02X", resColl[i]); 3710 } 3711 } 3712 } 3713 ucol_close(coll); 3714 3715 log_verbose("\n\nTailoring test for s with caron:"); 3716 ruleLen = u_strlen(rule2); 3717 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 3718 tLen = u_strlen(tailorData2[0]); 3719 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100); 3720 for (j=1; j<3; j++) { 3721 tLen = u_strlen(tailorData2[j]); 3722 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100); 3723 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 3724 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 3725 for(i = 0; i<rLen; i++) { 3726 log_err(" %02X", resColl[i]); 3727 } 3728 } 3729 } 3730 ucol_close(coll); 3731 3732 log_verbose("\n\nTailoring test for &z< ae with circumflex:"); 3733 ruleLen = u_strlen(rule3); 3734 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 3735 tLen = u_strlen(tailorData3[3]); 3736 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100); 3737 log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen); 3738 for(i = 0; i<kLen; i++) { 3739 log_verbose(" %02X", expColl[i]); 3740 } 3741 for (j=4; j<6; j++) { 3742 tLen = u_strlen(tailorData3[j]); 3743 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100); 3744 3745 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 3746 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen); 3747 for(i = 0; i<rLen; i++) { 3748 log_err(" %02X", resColl[i]); 3749 } 3750 } 3751 3752 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen); 3753 for(i = 0; i<rLen; i++) { 3754 log_verbose(" %02X", resColl[i]); 3755 } 3756 } 3757 ucol_close(coll); 3758} 3759 3760static void 3761TestTailor6179(void) 3762{ 3763 UErrorCode status = U_ZERO_ERROR; 3764 int32_t i; 3765 UCollator *coll =NULL; 3766 uint8_t resColl[100]; 3767 int32_t rLen, tLen, ruleLen; 3768 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */ 3769 static const UChar rule1[]={ 3770 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79, 3771 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20, 3772 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20, 3773 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0}; 3774 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */ 3775 static const UChar rule2[]={ 3776 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61, 3777 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C, 3778 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E, 3779 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C, 3780 0x3C,0x3C,0x20,0x62,0}; 3781 3782 static const UChar tData1[][4]={ 3783 {0x61, 0}, 3784 {0x62, 0}, 3785 { 0xFDD0,0x009E, 0} 3786 }; 3787 static const UChar tData2[][4]={ 3788 {0x61, 0}, 3789 {0x62, 0}, 3790 { 0xFDD0,0x009E, 0} 3791 }; 3792 3793 /* 3794 * These values from FractionalUCA.txt will change, 3795 * and need to be updated here. 3796 * TODO: Make this not check for particular sort keys. 3797 * Instead, test that we get CEs before & after other ignorables; see ticket #6179. 3798 */ 3799 static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0}; 3800 static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0}; 3801 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0}; 3802 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0}; 3803 3804 UParseError parseError; 3805 3806 /* Test [Last Primary ignorable] */ 3807 3808 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n"); 3809 ruleLen = u_strlen(rule1); 3810 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 3811 if (U_FAILURE(status)) { 3812 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status)); 3813 return; 3814 } 3815 tLen = u_strlen(tData1[0]); 3816 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100); 3817 if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) { 3818 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen); 3819 for(i = 0; i<rLen; i++) { 3820 log_err(" %02X", resColl[i]); 3821 } 3822 log_err("\n"); 3823 } 3824 tLen = u_strlen(tData1[1]); 3825 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100); 3826 if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) { 3827 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen); 3828 for(i = 0; i<rLen; i++) { 3829 log_err(" %02X", resColl[i]); 3830 } 3831 log_err("\n"); 3832 } 3833 ucol_close(coll); 3834 3835 3836 /* Test [Last Secondary ignorable] */ 3837 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n"); 3838 ruleLen = u_strlen(rule2); 3839 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status); 3840 if (U_FAILURE(status)) { 3841 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status)); 3842 log_info(" offset=%d \"%s\" | \"%s\"\n", 3843 parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1)); 3844 return; 3845 } 3846 tLen = u_strlen(tData2[0]); 3847 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100); 3848 if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) { 3849 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen); 3850 for(i = 0; i<rLen; i++) { 3851 log_err(" %02X", resColl[i]); 3852 } 3853 log_err("\n"); 3854 } 3855 tLen = u_strlen(tData2[1]); 3856 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100); 3857 if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) { 3858 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen); 3859 for(i = 0; i<rLen; i++) { 3860 log_err(" %02X", resColl[i]); 3861 } 3862 log_err("\n"); 3863 } 3864 ucol_close(coll); 3865} 3866 3867static void 3868TestUCAPrecontext(void) 3869{ 3870 UErrorCode status = U_ZERO_ERROR; 3871 int32_t i, j; 3872 UCollator *coll =NULL; 3873 uint8_t resColl[100], prevColl[100]; 3874 int32_t rLen, tLen, ruleLen; 3875 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */ 3876 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0}; 3877 /* & l middle-dot << a a is an expansion. */ 3878 3879 UChar tData1[][20]={ 3880 { 0xb7, 0}, /* standalone middle dot(0xb7) */ 3881 { 0x387, 0}, /* standalone middle dot(0x387) */ 3882 { 0x61, 0}, /* a */ 3883 { 0x6C, 0}, /* l */ 3884 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */ 3885 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */ 3886 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */ 3887 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */ 3888 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */ 3889 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */ 3890 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */ 3891 }; 3892 3893 log_verbose("\n\nEN collation:"); 3894 coll = ucol_open("en", &status); 3895 if (U_FAILURE(status)) { 3896 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status)); 3897 return; 3898 } 3899 for (j=0; j<11; j++) { 3900 tLen = u_strlen(tData1[j]); 3901 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 3902 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 3903 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 3904 j, tData1[j]); 3905 } 3906 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 3907 for(i = 0; i<rLen; i++) { 3908 log_verbose(" %02X", resColl[i]); 3909 } 3910 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 3911 } 3912 ucol_close(coll); 3913 3914 3915 log_verbose("\n\nJA collation:"); 3916 coll = ucol_open("ja", &status); 3917 if (U_FAILURE(status)) { 3918 log_err("Tailoring test: &z <<a|- failed!"); 3919 return; 3920 } 3921 for (j=0; j<11; j++) { 3922 tLen = u_strlen(tData1[j]); 3923 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 3924 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 3925 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 3926 j, tData1[j]); 3927 } 3928 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 3929 for(i = 0; i<rLen; i++) { 3930 log_verbose(" %02X", resColl[i]); 3931 } 3932 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 3933 } 3934 ucol_close(coll); 3935 3936 3937 log_verbose("\n\nTailoring test: & middle dot < a "); 3938 ruleLen = u_strlen(rule1); 3939 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 3940 if (U_FAILURE(status)) { 3941 log_err("Tailoring test: & middle dot < a failed!"); 3942 return; 3943 } 3944 for (j=0; j<11; j++) { 3945 tLen = u_strlen(tData1[j]); 3946 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 3947 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 3948 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 3949 j, tData1[j]); 3950 } 3951 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 3952 for(i = 0; i<rLen; i++) { 3953 log_verbose(" %02X", resColl[i]); 3954 } 3955 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 3956 } 3957 ucol_close(coll); 3958 3959 3960 log_verbose("\n\nTailoring test: & l middle-dot << a "); 3961 ruleLen = u_strlen(rule2); 3962 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 3963 if (U_FAILURE(status)) { 3964 log_err("Tailoring test: & l middle-dot << a failed!"); 3965 return; 3966 } 3967 for (j=0; j<11; j++) { 3968 tLen = u_strlen(tData1[j]); 3969 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 3970 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 3971 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 3972 j, tData1[j]); 3973 } 3974 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) { 3975 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.", 3976 j, tData1[j]); 3977 } 3978 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 3979 for(i = 0; i<rLen; i++) { 3980 log_verbose(" %02X", resColl[i]); 3981 } 3982 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 3983 } 3984 ucol_close(coll); 3985} 3986 3987static void 3988TestOutOfBuffer5468(void) 3989{ 3990 static const char *test = "\\u4e00"; 3991 UChar ustr[256]; 3992 int32_t ustr_length = u_unescape(test, ustr, 256); 3993 unsigned char shortKeyBuf[1]; 3994 int32_t sortkey_length; 3995 UErrorCode status = U_ZERO_ERROR; 3996 static UCollator *coll = NULL; 3997 3998 coll = ucol_open("root", &status); 3999 if(U_FAILURE(status)) { 4000 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 4001 return; 4002 } 4003 ucol_setStrength(coll, UCOL_PRIMARY); 4004 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 4005 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 4006 if (U_FAILURE(status)) { 4007 log_err("Failed setting atributes\n"); 4008 return; 4009 } 4010 4011 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf)); 4012 if (sortkey_length != 4) { 4013 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length); 4014 } 4015 log_verbose("length of sortKey is %d", sortkey_length); 4016 ucol_close(coll); 4017} 4018 4019#define TSKC_DATA_SIZE 5 4020#define TSKC_BUF_SIZE 50 4021static void 4022TestSortKeyConsistency(void) 4023{ 4024 UErrorCode icuRC = U_ZERO_ERROR; 4025 UCollator* ucol; 4026 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD}; 4027 4028 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE]; 4029 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE]; 4030 int32_t i, j, i2; 4031 4032 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC); 4033 if (U_FAILURE(icuRC)) 4034 { 4035 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC)); 4036 return; 4037 } 4038 4039 for (i = 0; i < TSKC_DATA_SIZE; i++) 4040 { 4041 UCharIterator uiter; 4042 uint32_t state[2] = { 0, 0 }; 4043 int32_t dataLen = i+1; 4044 for (j=0; j<TSKC_BUF_SIZE; j++) 4045 bufFull[i][j] = bufPart[i][j] = 0; 4046 4047 /* Full sort key */ 4048 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE); 4049 4050 /* Partial sort key */ 4051 uiter_setString(&uiter, data, dataLen); 4052 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC); 4053 if (U_FAILURE(icuRC)) 4054 { 4055 log_err("ucol_nextSortKeyPart failed\n"); 4056 ucol_close(ucol); 4057 return; 4058 } 4059 4060 for (i2=0; i2<i; i2++) 4061 { 4062 UBool fullMatch = TRUE; 4063 UBool partMatch = TRUE; 4064 for (j=0; j<TSKC_BUF_SIZE; j++) 4065 { 4066 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]); 4067 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]); 4068 } 4069 if (fullMatch != partMatch) { 4070 log_err(fullMatch ? "full key was consistent, but partial key changed\n" 4071 : "partial key was consistent, but full key changed\n"); 4072 ucol_close(ucol); 4073 return; 4074 } 4075 } 4076 } 4077 4078 /*=============================================*/ 4079 ucol_close(ucol); 4080} 4081 4082/* ticket: 6101 */ 4083static void TestCroatianSortKey(void) { 4084 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3"; 4085 UErrorCode status = U_ZERO_ERROR; 4086 UCollator *ucol; 4087 UCharIterator iter; 4088 4089 static const UChar text[] = { 0x0044, 0xD81A }; 4090 4091 size_t length = sizeof(text)/sizeof(*text); 4092 4093 uint8_t textSortKey[32]; 4094 size_t lenSortKey = 32; 4095 size_t actualSortKeyLen; 4096 uint32_t uStateInfo[2] = { 0, 0 }; 4097 4098 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status); 4099 if (U_FAILURE(status)) { 4100 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status)); 4101 return; 4102 } 4103 4104 uiter_setString(&iter, text, length); 4105 4106 actualSortKeyLen = ucol_nextSortKeyPart( 4107 ucol, &iter, (uint32_t*)uStateInfo, 4108 textSortKey, lenSortKey, &status 4109 ); 4110 4111 if (actualSortKeyLen == lenSortKey) { 4112 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n"); 4113 } 4114 4115 ucol_close(ucol); 4116} 4117 4118/* ticket: 6140 */ 4119/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since 4120 * they are both Hiragana and Katakana 4121 */ 4122#define SORTKEYLEN 50 4123static void TestHiragana(void) { 4124 UErrorCode status = U_ZERO_ERROR; 4125 UCollator* ucol; 4126 UCollationResult strcollresult; 4127 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */ 4128 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 }; 4129 int32_t data1Len = sizeof(data1)/sizeof(*data1); 4130 int32_t data2Len = sizeof(data2)/sizeof(*data2); 4131 int32_t i, j; 4132 uint8_t sortKey1[SORTKEYLEN]; 4133 uint8_t sortKey2[SORTKEYLEN]; 4134 4135 UCharIterator uiter1; 4136 UCharIterator uiter2; 4137 uint32_t state1[2] = { 0, 0 }; 4138 uint32_t state2[2] = { 0, 0 }; 4139 int32_t keySize1; 4140 int32_t keySize2; 4141 4142 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL, 4143 &status); 4144 if (U_FAILURE(status)) { 4145 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status)); 4146 return; 4147 } 4148 4149 /* Start of full sort keys */ 4150 /* Full sort key1 */ 4151 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN); 4152 /* Full sort key2 */ 4153 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN); 4154 if (keySize1 == keySize2) { 4155 for (i = 0; i < keySize1; i++) { 4156 if (sortKey1[i] != sortKey2[i]) { 4157 log_err("Full sort keys are different. Should be equal."); 4158 } 4159 } 4160 } else { 4161 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2); 4162 } 4163 /* End of full sort keys */ 4164 4165 /* Start of partial sort keys */ 4166 /* Partial sort key1 */ 4167 uiter_setString(&uiter1, data1, data1Len); 4168 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status); 4169 /* Partial sort key2 */ 4170 uiter_setString(&uiter2, data2, data2Len); 4171 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status); 4172 if (U_SUCCESS(status) && keySize1 == keySize2) { 4173 for (j = 0; j < keySize1; j++) { 4174 if (sortKey1[j] != sortKey2[j]) { 4175 log_err("Partial sort keys are different. Should be equal"); 4176 } 4177 } 4178 } else { 4179 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2); 4180 } 4181 /* End of partial sort keys */ 4182 4183 /* Start of strcoll */ 4184 /* Use ucol_strcoll() to determine ordering */ 4185 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len); 4186 if (strcollresult != UCOL_EQUAL) { 4187 log_err("Result from ucol_strcoll() should be UCOL_EQUAL."); 4188 } 4189 4190 ucol_close(ucol); 4191} 4192 4193/* Convenient struct for running collation tests */ 4194typedef struct { 4195 const UChar source[MAX_TOKEN_LEN]; /* String on left */ 4196 const UChar target[MAX_TOKEN_LEN]; /* String on right */ 4197 UCollationResult result; /* -1, 0 or +1, depending on collation */ 4198} OneTestCase; 4199 4200/* 4201 * Utility function to test one collation test case. 4202 * @param testcases Array of test cases. 4203 * @param n_testcases Size of the array testcases. 4204 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats. 4205 * @param n_rules Size of the array str_rules. 4206 */ 4207static void doTestOneTestCase(const OneTestCase testcases[], 4208 int n_testcases, 4209 const char* str_rules[], 4210 int n_rules) 4211{ 4212 int rule_no, testcase_no; 4213 UChar rule[500]; 4214 int32_t length = 0; 4215 UErrorCode status = U_ZERO_ERROR; 4216 UParseError parse_error; 4217 UCollator *myCollation; 4218 4219 for (rule_no = 0; rule_no < n_rules; ++rule_no) { 4220 4221 length = u_unescape(str_rules[rule_no], rule, 500); 4222 if (length == 0) { 4223 log_err("ERROR: The rule cannot be unescaped: %s\n"); 4224 return; 4225 } 4226 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status); 4227 if(U_FAILURE(status)){ 4228 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 4229 log_info(" offset=%d \"%s\" | \"%s\"\n", 4230 parse_error.offset, 4231 aescstrdup(parse_error.preContext, -1), 4232 aescstrdup(parse_error.postContext, -1)); 4233 return; 4234 } 4235 log_verbose("Testing the <<* syntax\n"); 4236 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 4237 ucol_setStrength(myCollation, UCOL_TERTIARY); 4238 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) { 4239 doTest(myCollation, 4240 testcases[testcase_no].source, 4241 testcases[testcase_no].target, 4242 testcases[testcase_no].result 4243 ); 4244 } 4245 ucol_close(myCollation); 4246 } 4247} 4248 4249const static OneTestCase rangeTestcases[] = { 4250 { {0x0061}, {0x0062}, UCOL_LESS }, /* "a" < "b" */ 4251 { {0x0062}, {0x0063}, UCOL_LESS }, /* "b" < "c" */ 4252 { {0x0061}, {0x0063}, UCOL_LESS }, /* "a" < "c" */ 4253 4254 { {0x0062}, {0x006b}, UCOL_LESS }, /* "b" << "k" */ 4255 { {0x006b}, {0x006c}, UCOL_LESS }, /* "k" << "l" */ 4256 { {0x0062}, {0x006c}, UCOL_LESS }, /* "b" << "l" */ 4257 { {0x0061}, {0x006c}, UCOL_LESS }, /* "a" < "l" */ 4258 { {0x0061}, {0x006d}, UCOL_LESS }, /* "a" < "m" */ 4259 4260 { {0x0079}, {0x006d}, UCOL_LESS }, /* "y" < "f" */ 4261 { {0x0079}, {0x0067}, UCOL_LESS }, /* "y" < "g" */ 4262 { {0x0061}, {0x0068}, UCOL_LESS }, /* "y" < "h" */ 4263 { {0x0061}, {0x0065}, UCOL_LESS }, /* "g" < "e" */ 4264 4265 { {0x0061}, {0x0031}, UCOL_EQUAL }, /* "a" = "1" */ 4266 { {0x0061}, {0x0032}, UCOL_EQUAL }, /* "a" = "2" */ 4267 { {0x0061}, {0x0033}, UCOL_EQUAL }, /* "a" = "3" */ 4268 { {0x0061}, {0x0066}, UCOL_LESS }, /* "a" < "f" */ 4269 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS }, /* "la" < "123" */ 4270 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL }, /* "aaa" = "123" */ 4271 { {0x0062}, {0x007a}, UCOL_LESS }, /* "b" < "z" */ 4272 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS }, /* "azm" = "2yc" */ 4273}; 4274 4275static int nRangeTestcases = LEN(rangeTestcases); 4276 4277const static OneTestCase rangeTestcasesSupplemental[] = { 4278 { {0x4e00}, {0xfffb}, UCOL_LESS }, /* U+4E00 < U+FFFB */ 4279 { {0xfffb}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFB < U+10000 */ 4280 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */ 4281 { {0x4e00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+4E00 < U+10001 */ 4282 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */ 4283 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */ 4284 { {0x4e00}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+4E00 < U+10001 */ 4285}; 4286 4287static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental); 4288 4289const static OneTestCase rangeTestcasesQwerty[] = { 4290 { {0x0071}, {0x0077}, UCOL_LESS }, /* "q" < "w" */ 4291 { {0x0077}, {0x0065}, UCOL_LESS }, /* "w" < "e" */ 4292 4293 { {0x0079}, {0x0075}, UCOL_LESS }, /* "y" < "u" */ 4294 { {0x0071}, {0x0075}, UCOL_LESS }, /* "q" << "u" */ 4295 4296 { {0x0074}, {0x0069}, UCOL_LESS }, /* "t" << "i" */ 4297 { {0x006f}, {0x0070}, UCOL_LESS }, /* "o" << "p" */ 4298 4299 { {0x0079}, {0x0065}, UCOL_LESS }, /* "y" < "e" */ 4300 { {0x0069}, {0x0075}, UCOL_LESS }, /* "i" < "u" */ 4301 4302 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, 4303 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS }, /* "quest" < "were" */ 4304 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b}, 4305 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS }, /* "quack" < "quest" */ 4306}; 4307 4308static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty); 4309 4310static void TestSameStrengthList(void) 4311{ 4312 const char* strRules[] = { 4313 /* Normal */ 4314 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3", 4315 4316 /* Lists */ 4317 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123", 4318 }; 4319 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 4320} 4321 4322static void TestSameStrengthListQuoted(void) 4323{ 4324 const char* strRules[] = { 4325 /* Lists with quoted characters */ 4326 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123", 4327 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123", 4328 4329 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033", 4330 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'", 4331 4332 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033", 4333 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'", 4334 }; 4335 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 4336} 4337 4338static void TestSameStrengthListSupplemental(void) 4339{ 4340 const char* strRules[] = { 4341 "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002", 4342 "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02", 4343 "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002", 4344 "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02", 4345 }; 4346 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules)); 4347} 4348 4349static void TestSameStrengthListQwerty(void) 4350{ 4351 const char* strRules[] = { 4352 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */ 4353 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */ 4354 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064", 4355 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064", 4356 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064", 4357 4358 /* Quoted characters also will work if two quoted characters are not consecutive. */ 4359 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064", 4360 4361 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */ 4362 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/ 4363 4364 }; 4365 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules)); 4366} 4367 4368static void TestSameStrengthListQuotedQwerty(void) 4369{ 4370 const char* strRules[] = { 4371 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */ 4372 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */ 4373 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */ 4374 4375 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */ 4376 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */ 4377 }; 4378 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules)); 4379} 4380 4381static void TestSameStrengthListRanges(void) 4382{ 4383 const char* strRules[] = { 4384 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3", 4385 }; 4386 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 4387} 4388 4389static void TestSameStrengthListSupplementalRanges(void) 4390{ 4391 const char* strRules[] = { 4392 /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */ 4393 "&\\u4e00<*\\ufffb\\U00010000-\\U00010002", 4394 }; 4395 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules)); 4396} 4397 4398static void TestSpecialCharacters(void) 4399{ 4400 const char* strRules[] = { 4401 /* Normal */ 4402 "&';'<'+'<','<'-'<'&'<'*'", 4403 4404 /* List */ 4405 "&';'<*'+,-&*'", 4406 4407 /* Range */ 4408 "&';'<*'+'-'-&*'", 4409 }; 4410 4411 const static OneTestCase specialCharacterStrings[] = { 4412 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */ 4413 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */ 4414 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */ 4415 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */ 4416 }; 4417 doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules)); 4418} 4419 4420static void TestPrivateUseCharacters(void) 4421{ 4422 const char* strRules[] = { 4423 /* Normal */ 4424 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'", 4425 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d", 4426 }; 4427 4428 const static OneTestCase privateUseCharacterStrings[] = { 4429 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, 4430 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, 4431 { {0xe2d9}, {0xe2da}, UCOL_LESS }, 4432 { {0xe2da}, {0xe2db}, UCOL_LESS }, 4433 { {0xe2db}, {0xe2dc}, UCOL_LESS }, 4434 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, 4435 }; 4436 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 4437} 4438 4439static void TestPrivateUseCharactersInList(void) 4440{ 4441 const char* strRules[] = { 4442 /* List */ 4443 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'", 4444 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */ 4445 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d", 4446 }; 4447 4448 const static OneTestCase privateUseCharacterStrings[] = { 4449 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, 4450 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, 4451 { {0xe2d9}, {0xe2da}, UCOL_LESS }, 4452 { {0xe2da}, {0xe2db}, UCOL_LESS }, 4453 { {0xe2db}, {0xe2dc}, UCOL_LESS }, 4454 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, 4455 }; 4456 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 4457} 4458 4459static void TestPrivateUseCharactersInRange(void) 4460{ 4461 const char* strRules[] = { 4462 /* Range */ 4463 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'", 4464 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d", 4465 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */ 4466 }; 4467 4468 const static OneTestCase privateUseCharacterStrings[] = { 4469 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, 4470 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, 4471 { {0xe2d9}, {0xe2da}, UCOL_LESS }, 4472 { {0xe2da}, {0xe2db}, UCOL_LESS }, 4473 { {0xe2db}, {0xe2dc}, UCOL_LESS }, 4474 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, 4475 }; 4476 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 4477} 4478 4479static void TestInvalidListsAndRanges(void) 4480{ 4481 const char* invalidRules[] = { 4482 /* Range not in starred expression */ 4483 "&\\ufffe<\\uffff-\\U00010002", 4484 4485 /* Range without start */ 4486 "&a<*-c", 4487 4488 /* Range without end */ 4489 "&a<*b-", 4490 4491 /* More than one hyphen */ 4492 "&a<*b-g-l", 4493 4494 /* Range in the wrong order */ 4495 "&a<*k-b", 4496 4497 }; 4498 4499 UChar rule[500]; 4500 UErrorCode status = U_ZERO_ERROR; 4501 UParseError parse_error; 4502 int n_rules = LEN(invalidRules); 4503 int rule_no; 4504 int length; 4505 UCollator *myCollation; 4506 4507 for (rule_no = 0; rule_no < n_rules; ++rule_no) { 4508 4509 length = u_unescape(invalidRules[rule_no], rule, 500); 4510 if (length == 0) { 4511 log_err("ERROR: The rule cannot be unescaped: %s\n"); 4512 return; 4513 } 4514 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status); 4515 (void)myCollation; /* Suppress set but not used warning. */ 4516 if(!U_FAILURE(status)){ 4517 log_err("ERROR: Could not cause a failure as expected: \n"); 4518 } 4519 status = U_ZERO_ERROR; 4520 } 4521} 4522 4523/* 4524 * This test ensures that characters placed before a character in a different script have the same lead byte 4525 * in their collation key before and after script reordering. 4526 */ 4527static void TestBeforeRuleWithScriptReordering(void) 4528{ 4529 UParseError error; 4530 UErrorCode status = U_ZERO_ERROR; 4531 UCollator *myCollation; 4532 char srules[500] = "&[before 1]\\u03b1 < \\u0e01"; 4533 UChar rules[500]; 4534 uint32_t rulesLength = 0; 4535 int32_t reorderCodes[1] = {USCRIPT_GREEK}; 4536 UCollationResult collResult; 4537 4538 uint8_t baseKey[256]; 4539 uint32_t baseKeyLength; 4540 uint8_t beforeKey[256]; 4541 uint32_t beforeKeyLength; 4542 4543 UChar base[] = { 0x03b1 }; /* base */ 4544 int32_t baseLen = sizeof(base)/sizeof(*base); 4545 4546 UChar before[] = { 0x0e01 }; /* ko kai */ 4547 int32_t beforeLen = sizeof(before)/sizeof(*before); 4548 4549 /*UChar *data[] = { before, base }; 4550 genericRulesStarter(srules, data, 2);*/ 4551 4552 log_verbose("Testing the &[before 1] rule with [reorder grek]\n"); 4553 4554 (void)beforeKeyLength; /* Suppress set but not used warnings. */ 4555 (void)baseKeyLength; 4556 4557 /* build collator */ 4558 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n"); 4559 4560 rulesLength = u_unescape(srules, rules, LEN(rules)); 4561 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status); 4562 if(U_FAILURE(status)) { 4563 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 4564 return; 4565 } 4566 4567 /* check collation results - before rule applied but not script reordering */ 4568 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen); 4569 if (collResult != UCOL_GREATER) { 4570 log_err("Collation result not correct before script reordering = %d\n", collResult); 4571 } 4572 4573 /* check the lead byte of the collation keys before script reordering */ 4574 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256); 4575 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256); 4576 if (baseKey[0] != beforeKey[0]) { 4577 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]); 4578 } 4579 4580 /* reorder the scripts */ 4581 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status); 4582 if(U_FAILURE(status)) { 4583 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status)); 4584 return; 4585 } 4586 4587 /* check collation results - before rule applied and after script reordering */ 4588 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen); 4589 if (collResult != UCOL_GREATER) { 4590 log_err("Collation result not correct after script reordering = %d\n", collResult); 4591 } 4592 4593 /* check the lead byte of the collation keys after script reordering */ 4594 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256); 4595 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256); 4596 if (baseKey[0] != beforeKey[0]) { 4597 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]); 4598 } 4599 4600 ucol_close(myCollation); 4601} 4602 4603/* 4604 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering. 4605 */ 4606static void TestNonLeadBytesDuringCollationReordering(void) 4607{ 4608 UErrorCode status = U_ZERO_ERROR; 4609 UCollator *myCollation; 4610 int32_t reorderCodes[1] = {USCRIPT_GREEK}; 4611 4612 uint8_t baseKey[256]; 4613 uint32_t baseKeyLength; 4614 uint8_t reorderKey[256]; 4615 uint32_t reorderKeyLength; 4616 4617 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 }; 4618 4619 uint32_t i; 4620 4621 4622 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 4623 4624 /* build collator tertiary */ 4625 myCollation = ucol_open("", &status); 4626 ucol_setStrength(myCollation, UCOL_TERTIARY); 4627 if(U_FAILURE(status)) { 4628 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 4629 return; 4630 } 4631 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256); 4632 4633 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 4634 if(U_FAILURE(status)) { 4635 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 4636 return; 4637 } 4638 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256); 4639 4640 if (baseKeyLength != reorderKeyLength) { 4641 log_err("Key lengths not the same during reordering.\n"); 4642 return; 4643 } 4644 4645 for (i = 1; i < baseKeyLength; i++) { 4646 if (baseKey[i] != reorderKey[i]) { 4647 log_err("Collation key bytes not the same at position %d.\n", i); 4648 return; 4649 } 4650 } 4651 ucol_close(myCollation); 4652 4653 /* build collator quaternary */ 4654 myCollation = ucol_open("", &status); 4655 ucol_setStrength(myCollation, UCOL_QUATERNARY); 4656 if(U_FAILURE(status)) { 4657 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 4658 return; 4659 } 4660 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256); 4661 4662 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 4663 if(U_FAILURE(status)) { 4664 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 4665 return; 4666 } 4667 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256); 4668 4669 if (baseKeyLength != reorderKeyLength) { 4670 log_err("Key lengths not the same during reordering.\n"); 4671 return; 4672 } 4673 4674 for (i = 1; i < baseKeyLength; i++) { 4675 if (baseKey[i] != reorderKey[i]) { 4676 log_err("Collation key bytes not the same at position %d.\n", i); 4677 return; 4678 } 4679 } 4680 ucol_close(myCollation); 4681} 4682 4683/* 4684 * Test reordering API. 4685 */ 4686static void TestReorderingAPI(void) 4687{ 4688 UErrorCode status = U_ZERO_ERROR; 4689 UCollator *myCollation; 4690 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 4691 int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS}; 4692 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 4693 UCollationResult collResult; 4694 int32_t retrievedReorderCodesLength; 4695 int32_t retrievedReorderCodes[10]; 4696 UChar greekString[] = { 0x03b1 }; 4697 UChar punctuationString[] = { 0x203e }; 4698 int loopIndex; 4699 4700 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 4701 4702 /* build collator tertiary */ 4703 myCollation = ucol_open("", &status); 4704 ucol_setStrength(myCollation, UCOL_TERTIARY); 4705 if(U_FAILURE(status)) { 4706 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 4707 return; 4708 } 4709 4710 /* set the reorderding */ 4711 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 4712 if (U_FAILURE(status)) { 4713 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 4714 return; 4715 } 4716 4717 /* get the reordering */ 4718 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 4719 if (status != U_BUFFER_OVERFLOW_ERROR) { 4720 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status)); 4721 return; 4722 } 4723 status = U_ZERO_ERROR; 4724 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 4725 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 4726 return; 4727 } 4728 /* now let's really get it */ 4729 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status); 4730 if (U_FAILURE(status)) { 4731 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status)); 4732 return; 4733 } 4734 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 4735 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 4736 return; 4737 } 4738 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 4739 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) { 4740 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 4741 return; 4742 } 4743 } 4744 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 4745 if (collResult != UCOL_LESS) { 4746 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n"); 4747 return; 4748 } 4749 4750 /* clear the reordering */ 4751 ucol_setReorderCodes(myCollation, NULL, 0, &status); 4752 if (U_FAILURE(status)) { 4753 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status)); 4754 return; 4755 } 4756 4757 /* get the reordering again */ 4758 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 4759 if (retrievedReorderCodesLength != 0) { 4760 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0); 4761 return; 4762 } 4763 4764 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 4765 if (collResult != UCOL_GREATER) { 4766 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n"); 4767 return; 4768 } 4769 4770 /* test for error condition on duplicate reorder codes */ 4771 ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status); 4772 if (!U_FAILURE(status)) { 4773 log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n"); 4774 return; 4775 } 4776 4777 status = U_ZERO_ERROR; 4778 /* test for reorder codes after a reset code */ 4779 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status); 4780 if (!U_FAILURE(status)) { 4781 log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n"); 4782 return; 4783 } 4784 4785 ucol_close(myCollation); 4786} 4787 4788/* 4789 * Test reordering API. 4790 */ 4791static void TestReorderingAPIWithRuleCreatedCollator(void) 4792{ 4793 UErrorCode status = U_ZERO_ERROR; 4794 UCollator *myCollation; 4795 UChar rules[90]; 4796 static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK}; 4797 static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 4798 static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT}; 4799 UCollationResult collResult; 4800 int32_t retrievedReorderCodesLength; 4801 int32_t retrievedReorderCodes[10]; 4802 static const UChar greekString[] = { 0x03b1 }; 4803 static const UChar punctuationString[] = { 0x203e }; 4804 static const UChar hanString[] = { 0x65E5, 0x672C }; 4805 int loopIndex; 4806 4807 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 4808 4809 /* build collator from rules */ 4810 u_uastrcpy(rules, "[reorder Hani Grek]"); 4811 myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status); 4812 if(U_FAILURE(status)) { 4813 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 4814 return; 4815 } 4816 4817 /* get the reordering */ 4818 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status); 4819 if (U_FAILURE(status)) { 4820 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status)); 4821 return; 4822 } 4823 if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) { 4824 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes)); 4825 return; 4826 } 4827 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 4828 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) { 4829 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 4830 return; 4831 } 4832 } 4833 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString)); 4834 if (collResult != UCOL_GREATER) { 4835 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n"); 4836 return; 4837 } 4838 4839 /* set the reordering */ 4840 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 4841 if (U_FAILURE(status)) { 4842 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 4843 return; 4844 } 4845 4846 /* get the reordering */ 4847 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 4848 if (status != U_BUFFER_OVERFLOW_ERROR) { 4849 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status)); 4850 return; 4851 } 4852 status = U_ZERO_ERROR; 4853 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 4854 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 4855 return; 4856 } 4857 /* now let's really get it */ 4858 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status); 4859 if (U_FAILURE(status)) { 4860 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status)); 4861 return; 4862 } 4863 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 4864 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 4865 return; 4866 } 4867 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 4868 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) { 4869 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 4870 return; 4871 } 4872 } 4873 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 4874 if (collResult != UCOL_LESS) { 4875 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n"); 4876 return; 4877 } 4878 4879 /* clear the reordering */ 4880 ucol_setReorderCodes(myCollation, NULL, 0, &status); 4881 if (U_FAILURE(status)) { 4882 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status)); 4883 return; 4884 } 4885 4886 /* get the reordering again */ 4887 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 4888 if (retrievedReorderCodesLength != 0) { 4889 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0); 4890 return; 4891 } 4892 4893 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 4894 if (collResult != UCOL_GREATER) { 4895 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n"); 4896 return; 4897 } 4898 4899 /* reset the reordering */ 4900 ucol_setReorderCodes(myCollation, onlyDefault, 1, &status); 4901 if (U_FAILURE(status)) { 4902 log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status)); 4903 return; 4904 } 4905 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status); 4906 if (U_FAILURE(status)) { 4907 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status)); 4908 return; 4909 } 4910 if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) { 4911 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes)); 4912 return; 4913 } 4914 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 4915 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) { 4916 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 4917 return; 4918 } 4919 } 4920 4921 ucol_close(myCollation); 4922} 4923 4924static int compareUScriptCodes(const void * a, const void * b) 4925{ 4926 return ( *(int32_t*)a - *(int32_t*)b ); 4927} 4928 4929static void TestEquivalentReorderingScripts(void) { 4930 UErrorCode status = U_ZERO_ERROR; 4931 int32_t equivalentScripts[50]; 4932 int32_t equivalentScriptsLength; 4933 int loopIndex; 4934 int32_t equivalentScriptsResult[] = { 4935 USCRIPT_BOPOMOFO, 4936 USCRIPT_LISU, 4937 USCRIPT_LYCIAN, 4938 USCRIPT_CARIAN, 4939 USCRIPT_LYDIAN, 4940 USCRIPT_YI, 4941 USCRIPT_OLD_ITALIC, 4942 USCRIPT_GOTHIC, 4943 USCRIPT_DESERET, 4944 USCRIPT_SHAVIAN, 4945 USCRIPT_OSMANYA, 4946 USCRIPT_LINEAR_B, 4947 USCRIPT_CYPRIOT, 4948 USCRIPT_OLD_SOUTH_ARABIAN, 4949 USCRIPT_AVESTAN, 4950 USCRIPT_IMPERIAL_ARAMAIC, 4951 USCRIPT_INSCRIPTIONAL_PARTHIAN, 4952 USCRIPT_INSCRIPTIONAL_PAHLAVI, 4953 USCRIPT_UGARITIC, 4954 USCRIPT_OLD_PERSIAN, 4955 USCRIPT_CUNEIFORM, 4956 USCRIPT_EGYPTIAN_HIEROGLYPHS, 4957 USCRIPT_PHONETIC_POLLARD, 4958 USCRIPT_SORA_SOMPENG, 4959 USCRIPT_MEROITIC_CURSIVE, 4960 USCRIPT_MEROITIC_HIEROGLYPHS 4961 }; 4962 4963 qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes); 4964 4965 /* UScript.GOTHIC */ 4966 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status); 4967 if (U_FAILURE(status)) { 4968 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status)); 4969 return; 4970 } 4971 /* 4972 fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); 4973 fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength); 4974 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { 4975 fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]); 4976 } 4977 */ 4978 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) { 4979 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength); 4980 return; 4981 } 4982 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { 4983 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) { 4984 log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]); 4985 return; 4986 } 4987 } 4988 4989 /* UScript.SHAVIAN */ 4990 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status); 4991 if (U_FAILURE(status)) { 4992 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status)); 4993 return; 4994 } 4995 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) { 4996 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength); 4997 return; 4998 } 4999 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { 5000 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) { 5001 log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]); 5002 return; 5003 } 5004 } 5005} 5006 5007static void TestReorderingAcrossCloning(void) 5008{ 5009 UErrorCode status = U_ZERO_ERROR; 5010 UCollator *myCollation; 5011 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 5012 UCollator *clonedCollation; 5013 int32_t retrievedReorderCodesLength; 5014 int32_t retrievedReorderCodes[10]; 5015 int loopIndex; 5016 5017 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 5018 5019 /* build collator tertiary */ 5020 myCollation = ucol_open("", &status); 5021 ucol_setStrength(myCollation, UCOL_TERTIARY); 5022 if(U_FAILURE(status)) { 5023 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 5024 return; 5025 } 5026 5027 /* set the reorderding */ 5028 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 5029 if (U_FAILURE(status)) { 5030 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 5031 return; 5032 } 5033 5034 /* clone the collator */ 5035 clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status); 5036 if (U_FAILURE(status)) { 5037 log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status)); 5038 return; 5039 } 5040 5041 /* get the reordering */ 5042 retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status); 5043 if (U_FAILURE(status)) { 5044 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status)); 5045 return; 5046 } 5047 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 5048 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 5049 return; 5050 } 5051 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 5052 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) { 5053 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 5054 return; 5055 } 5056 } 5057 5058 /*uprv_free(buffer);*/ 5059 ucol_close(myCollation); 5060 ucol_close(clonedCollation); 5061} 5062 5063/* 5064 * Utility function to test one collation reordering test case set. 5065 * @param testcases Array of test cases. 5066 * @param n_testcases Size of the array testcases. 5067 * @param reorderTokens Array of reordering codes. 5068 * @param reorderTokensLen Size of the array reorderTokens. 5069 */ 5070static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen) 5071{ 5072 uint32_t testCaseNum; 5073 UErrorCode status = U_ZERO_ERROR; 5074 UCollator *myCollation; 5075 5076 myCollation = ucol_open("", &status); 5077 if (U_FAILURE(status)) { 5078 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 5079 return; 5080 } 5081 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status); 5082 if(U_FAILURE(status)) { 5083 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status)); 5084 return; 5085 } 5086 5087 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) { 5088 doTest(myCollation, 5089 testCases[testCaseNum].source, 5090 testCases[testCaseNum].target, 5091 testCases[testCaseNum].result 5092 ); 5093 } 5094 ucol_close(myCollation); 5095} 5096 5097static void TestGreekFirstReorder(void) 5098{ 5099 const char* strRules[] = { 5100 "[reorder Grek]" 5101 }; 5102 5103 const int32_t apiRules[] = { 5104 USCRIPT_GREEK 5105 }; 5106 5107 const static OneTestCase privateUseCharacterStrings[] = { 5108 { {0x0391}, {0x0391}, UCOL_EQUAL }, 5109 { {0x0041}, {0x0391}, UCOL_GREATER }, 5110 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER }, 5111 { {0x0060}, {0x0391}, UCOL_LESS }, 5112 { {0x0391}, {0xe2dc}, UCOL_LESS }, 5113 { {0x0391}, {0x0060}, UCOL_GREATER }, 5114 }; 5115 5116 /* Test rules creation */ 5117 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5118 5119 /* Test collation reordering API */ 5120 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 5121} 5122 5123static void TestGreekLastReorder(void) 5124{ 5125 const char* strRules[] = { 5126 "[reorder Zzzz Grek]" 5127 }; 5128 5129 const int32_t apiRules[] = { 5130 USCRIPT_UNKNOWN, USCRIPT_GREEK 5131 }; 5132 5133 const static OneTestCase privateUseCharacterStrings[] = { 5134 { {0x0391}, {0x0391}, UCOL_EQUAL }, 5135 { {0x0041}, {0x0391}, UCOL_LESS }, 5136 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS }, 5137 { {0x0060}, {0x0391}, UCOL_LESS }, 5138 { {0x0391}, {0xe2dc}, UCOL_GREATER }, 5139 }; 5140 5141 /* Test rules creation */ 5142 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5143 5144 /* Test collation reordering API */ 5145 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 5146} 5147 5148static void TestNonScriptReorder(void) 5149{ 5150 const char* strRules[] = { 5151 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]" 5152 }; 5153 5154 const int32_t apiRules[] = { 5155 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, 5156 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN, 5157 UCOL_REORDER_CODE_CURRENCY 5158 }; 5159 5160 const static OneTestCase privateUseCharacterStrings[] = { 5161 { {0x0391}, {0x0041}, UCOL_LESS }, 5162 { {0x0041}, {0x0391}, UCOL_GREATER }, 5163 { {0x0060}, {0x0041}, UCOL_LESS }, 5164 { {0x0060}, {0x0391}, UCOL_GREATER }, 5165 { {0x0024}, {0x0041}, UCOL_GREATER }, 5166 }; 5167 5168 /* Test rules creation */ 5169 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5170 5171 /* Test collation reordering API */ 5172 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 5173} 5174 5175static void TestHaniReorder(void) 5176{ 5177 const char* strRules[] = { 5178 "[reorder Hani]" 5179 }; 5180 const int32_t apiRules[] = { 5181 USCRIPT_HAN 5182 }; 5183 5184 const static OneTestCase privateUseCharacterStrings[] = { 5185 { {0x4e00}, {0x0041}, UCOL_LESS }, 5186 { {0x4e00}, {0x0060}, UCOL_GREATER }, 5187 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS }, 5188 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER }, 5189 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS }, 5190 { {0xfa27}, {0x0041}, UCOL_LESS }, 5191 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS }, 5192 }; 5193 5194 /* Test rules creation */ 5195 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5196 5197 /* Test collation reordering API */ 5198 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 5199} 5200 5201static void TestHaniReorderWithOtherRules(void) 5202{ 5203 const char* strRules[] = { 5204 "[reorder Hani] &b<a" 5205 }; 5206 /*const int32_t apiRules[] = { 5207 USCRIPT_HAN 5208 };*/ 5209 5210 const static OneTestCase privateUseCharacterStrings[] = { 5211 { {0x4e00}, {0x0041}, UCOL_LESS }, 5212 { {0x4e00}, {0x0060}, UCOL_GREATER }, 5213 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS }, 5214 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER }, 5215 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS }, 5216 { {0xfa27}, {0x0041}, UCOL_LESS }, 5217 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS }, 5218 { {0x0062}, {0x0061}, UCOL_LESS }, 5219 }; 5220 5221 /* Test rules creation */ 5222 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5223} 5224 5225static void TestMultipleReorder(void) 5226{ 5227 const char* strRules[] = { 5228 "[reorder Grek Zzzz DIGIT Latn Hani]" 5229 }; 5230 5231 const int32_t apiRules[] = { 5232 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN 5233 }; 5234 5235 const static OneTestCase collationTestCases[] = { 5236 { {0x0391}, {0x0041}, UCOL_LESS}, 5237 { {0x0031}, {0x0041}, UCOL_LESS}, 5238 { {0x0041}, {0x4e00}, UCOL_LESS}, 5239 }; 5240 5241 /* Test rules creation */ 5242 doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules)); 5243 5244 /* Test collation reordering API */ 5245 doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules)); 5246} 5247 5248/* 5249 * Test that covers issue reported in ticket 8814 5250 */ 5251static void TestReorderWithNumericCollation(void) 5252{ 5253 UErrorCode status = U_ZERO_ERROR; 5254 UCollator *myCollation; 5255 UCollator *myReorderCollation; 5256 int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS}; 5257 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 }; 5258 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */ 5259 UChar fortyS[] = { 0x0053 }; 5260 UChar fortyThreeP[] = { 0x0050 }; 5261 uint8_t fortyS_sortKey[128]; 5262 int32_t fortyS_sortKey_Length; 5263 uint8_t fortyThreeP_sortKey[128]; 5264 int32_t fortyThreeP_sortKey_Length; 5265 uint8_t fortyS_sortKey_reorder[128]; 5266 int32_t fortyS_sortKey_reorder_Length; 5267 uint8_t fortyThreeP_sortKey_reorder[128]; 5268 int32_t fortyThreeP_sortKey_reorder_Length; 5269 UCollationResult collResult; 5270 UCollationResult collResultReorder; 5271 5272 log_verbose("Testing reordering with and without numeric collation\n"); 5273 5274 /* build collator tertiary with numeric */ 5275 myCollation = ucol_open("", &status); 5276 /* 5277 ucol_setStrength(myCollation, UCOL_TERTIARY); 5278 */ 5279 ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); 5280 if(U_FAILURE(status)) { 5281 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 5282 return; 5283 } 5284 5285 /* build collator tertiary with numeric and reordering */ 5286 myReorderCollation = ucol_open("", &status); 5287 /* 5288 ucol_setStrength(myReorderCollation, UCOL_TERTIARY); 5289 */ 5290 ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); 5291 ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status); 5292 if(U_FAILURE(status)) { 5293 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 5294 return; 5295 } 5296 5297 fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128); 5298 fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128); 5299 fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128); 5300 fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128); 5301 5302 if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) { 5303 log_err_status(status, "ERROR: couldn't generate sort keys\n"); 5304 return; 5305 } 5306 collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP)); 5307 collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP)); 5308 /* 5309 fprintf(stderr, "\tcollResult = %x\n", collResult); 5310 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder); 5311 fprintf(stderr, "\nfortyS\n"); 5312 for (i = 0; i < fortyS_sortKey_Length; i++) { 5313 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]); 5314 } 5315 fprintf(stderr, "\nfortyThreeP\n"); 5316 for (i = 0; i < fortyThreeP_sortKey_Length; i++) { 5317 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]); 5318 } 5319 */ 5320 if (collResult != collResultReorder) { 5321 log_err_status(status, "ERROR: collation results should have been the same.\n"); 5322 return; 5323 } 5324 5325 ucol_close(myCollation); 5326 ucol_close(myReorderCollation); 5327} 5328 5329static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b) 5330{ 5331 for (; *a == *b; ++a, ++b) { 5332 if (*a == 0) { 5333 return 0; 5334 } 5335 } 5336 return (*a < *b ? -1 : 1); 5337} 5338 5339static void TestImportRulesDeWithPhonebook(void) 5340{ 5341 const char* normalRules[] = { 5342 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc", 5343 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc", 5344 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc", 5345 }; 5346 const OneTestCase normalTests[] = { 5347 { {0x00e6}, {0x00c6}, UCOL_LESS}, 5348 { {0x00fc}, {0x00dc}, UCOL_GREATER}, 5349 }; 5350 5351 const char* importRules[] = { 5352 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]", 5353 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]", 5354 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]", 5355 }; 5356 const OneTestCase importTests[] = { 5357 { {0x00e6}, {0x00c6}, UCOL_LESS}, 5358 { {0x00fc}, {0x00dc}, UCOL_LESS}, 5359 }; 5360 5361 doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules)); 5362 doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules)); 5363} 5364 5365#if 0 5366static void TestImportRulesFiWithEor(void) 5367{ 5368 /* DUCET. */ 5369 const char* defaultRules[] = { 5370 "&a<b", /* Dummy rule. */ 5371 }; 5372 5373 const OneTestCase defaultTests[] = { 5374 { {0x0110}, {0x00F0}, UCOL_LESS}, 5375 { {0x00a3}, {0x00a5}, UCOL_LESS}, 5376 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS}, 5377 }; 5378 5379 /* European Ordering rules: ignore currency characters. */ 5380 const char* eorRules[] = { 5381 "[import root-u-co-eor]", 5382 }; 5383 5384 const OneTestCase eorTests[] = { 5385 { {0x0110}, {0x00F0}, UCOL_LESS}, 5386 { {0x00a3}, {0x00a5}, UCOL_EQUAL}, 5387 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL}, 5388 }; 5389 5390 const char* fiStdRules[] = { 5391 "[import fi-u-co-standard]", 5392 }; 5393 5394 const OneTestCase fiStdTests[] = { 5395 { {0x0110}, {0x00F0}, UCOL_GREATER}, 5396 { {0x00a3}, {0x00a5}, UCOL_LESS}, 5397 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS}, 5398 }; 5399 5400 /* Both European Ordering Rules and Fi Standard Rules. */ 5401 const char* eorFiStdRules[] = { 5402 "[import root-u-co-eor][import fi-u-co-standard]", 5403 }; 5404 5405 /* This is essentially same as the one before once fi.txt is updated with import. */ 5406 const char* fiEorRules[] = { 5407 "[import fi-u-co-eor]", 5408 }; 5409 5410 const OneTestCase fiEorTests[] = { 5411 { {0x0110}, {0x00F0}, UCOL_GREATER}, 5412 { {0x00a3}, {0x00a5}, UCOL_EQUAL}, 5413 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL}, 5414 }; 5415 5416 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules)); 5417 doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules)); 5418 doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules)); 5419 doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules)); 5420 5421 log_knownIssue("8962", NULL); 5422 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule: 5423 eor{ 5424 Sequence{ 5425 "[import root-u-co-eor][import fi-u-co-standard]" 5426 } 5427 Version{"21.0"} 5428 } 5429 */ 5430 /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */ 5431 5432} 5433#endif 5434 5435#if 0 5436/* 5437 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless 5438 * the resource files are built with -includeUnihanColl option. 5439 * TODO: Uncomment this function and make it work when unihan rules are built by default. 5440 */ 5441static void TestImportRulesCJKWithUnihan(void) 5442{ 5443 /* DUCET. */ 5444 const char* defaultRules[] = { 5445 "&a<b", /* Dummy rule. */ 5446 }; 5447 5448 const OneTestCase defaultTests[] = { 5449 { {0x3402}, {0x4e1e}, UCOL_GREATER}, 5450 }; 5451 5452 /* European Ordering rules: ignore currency characters. */ 5453 const char* unihanRules[] = { 5454 "[import ko-u-co-unihan]", 5455 }; 5456 5457 const OneTestCase unihanTests[] = { 5458 { {0x3402}, {0x4e1e}, UCOL_LESS}, 5459 }; 5460 5461 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules)); 5462 doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules)); 5463 5464} 5465#endif 5466 5467static void TestImport(void) 5468{ 5469 UCollator* vicoll; 5470 UCollator* escoll; 5471 UCollator* viescoll; 5472 UCollator* importviescoll; 5473 UParseError error; 5474 UErrorCode status = U_ZERO_ERROR; 5475 UChar* virules; 5476 int32_t viruleslength; 5477 UChar* esrules; 5478 int32_t esruleslength; 5479 UChar* viesrules; 5480 int32_t viesruleslength; 5481 char srules[500] = "[import vi][import es]"; 5482 UChar rules[500]; 5483 uint32_t length = 0; 5484 int32_t itemCount; 5485 int32_t i, k; 5486 UChar32 start; 5487 UChar32 end; 5488 UChar str[500]; 5489 int32_t strLength; 5490 5491 uint8_t sk1[500]; 5492 uint8_t sk2[500]; 5493 5494 UBool b; 5495 USet* tailoredSet; 5496 USet* importTailoredSet; 5497 5498 5499 vicoll = ucol_open("vi", &status); 5500 if(U_FAILURE(status)){ 5501 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status)); 5502 return; 5503 } 5504 5505 virules = (UChar*) ucol_getRules(vicoll, &viruleslength); 5506 escoll = ucol_open("es", &status); 5507 esrules = (UChar*) ucol_getRules(escoll, &esruleslength); 5508 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*)); 5509 viesrules[0] = 0; 5510 u_strcat(viesrules, virules); 5511 u_strcat(viesrules, esrules); 5512 viesruleslength = viruleslength + esruleslength; 5513 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status); 5514 5515 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */ 5516 length = u_unescape(srules, rules, 500); 5517 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status); 5518 if(U_FAILURE(status)){ 5519 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 5520 return; 5521 } 5522 5523 tailoredSet = ucol_getTailoredSet(viescoll, &status); 5524 importTailoredSet = ucol_getTailoredSet(importviescoll, &status); 5525 5526 if(!uset_equals(tailoredSet, importTailoredSet)){ 5527 log_err("Tailored sets not equal"); 5528 } 5529 5530 uset_close(importTailoredSet); 5531 5532 itemCount = uset_getItemCount(tailoredSet); 5533 5534 for( i = 0; i < itemCount; i++){ 5535 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status); 5536 if(strLength < 2){ 5537 for (; start <= end; start++){ 5538 k = 0; 5539 U16_APPEND(str, k, 500, start, b); 5540 (void)b; /* Suppress set but not used warning. */ 5541 ucol_getSortKey(viescoll, str, 1, sk1, 500); 5542 ucol_getSortKey(importviescoll, str, 1, sk2, 500); 5543 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 5544 log_err("Sort key for %s not equal\n", str); 5545 break; 5546 } 5547 } 5548 }else{ 5549 ucol_getSortKey(viescoll, str, strLength, sk1, 500); 5550 ucol_getSortKey(importviescoll, str, strLength, sk2, 500); 5551 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 5552 log_err("ZZSort key for %s not equal\n", str); 5553 break; 5554 } 5555 5556 } 5557 } 5558 5559 uset_close(tailoredSet); 5560 5561 uprv_free(viesrules); 5562 5563 ucol_close(vicoll); 5564 ucol_close(escoll); 5565 ucol_close(viescoll); 5566 ucol_close(importviescoll); 5567} 5568 5569static void TestImportWithType(void) 5570{ 5571 UCollator* vicoll; 5572 UCollator* decoll; 5573 UCollator* videcoll; 5574 UCollator* importvidecoll; 5575 UParseError error; 5576 UErrorCode status = U_ZERO_ERROR; 5577 const UChar* virules; 5578 int32_t viruleslength; 5579 const UChar* derules; 5580 int32_t deruleslength; 5581 UChar* viderules; 5582 int32_t videruleslength; 5583 const char srules[500] = "[import vi][import de-u-co-phonebk]"; 5584 UChar rules[500]; 5585 uint32_t length = 0; 5586 int32_t itemCount; 5587 int32_t i, k; 5588 UChar32 start; 5589 UChar32 end; 5590 UChar str[500]; 5591 int32_t strLength; 5592 5593 uint8_t sk1[500]; 5594 uint8_t sk2[500]; 5595 5596 USet* tailoredSet; 5597 USet* importTailoredSet; 5598 5599 vicoll = ucol_open("vi", &status); 5600 if(U_FAILURE(status)){ 5601 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 5602 return; 5603 } 5604 virules = ucol_getRules(vicoll, &viruleslength); 5605 /* decoll = ucol_open("de@collation=phonebook", &status); */ 5606 decoll = ucol_open("de-u-co-phonebk", &status); 5607 if(U_FAILURE(status)){ 5608 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 5609 return; 5610 } 5611 5612 5613 derules = ucol_getRules(decoll, &deruleslength); 5614 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*)); 5615 viderules[0] = 0; 5616 u_strcat(viderules, virules); 5617 u_strcat(viderules, derules); 5618 videruleslength = viruleslength + deruleslength; 5619 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status); 5620 5621 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */ 5622 length = u_unescape(srules, rules, 500); 5623 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status); 5624 if(U_FAILURE(status)){ 5625 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 5626 return; 5627 } 5628 5629 tailoredSet = ucol_getTailoredSet(videcoll, &status); 5630 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status); 5631 5632 if(!uset_equals(tailoredSet, importTailoredSet)){ 5633 log_err("Tailored sets not equal"); 5634 } 5635 5636 uset_close(importTailoredSet); 5637 5638 itemCount = uset_getItemCount(tailoredSet); 5639 5640 for( i = 0; i < itemCount; i++){ 5641 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status); 5642 if(strLength < 2){ 5643 for (; start <= end; start++){ 5644 k = 0; 5645 U16_APPEND_UNSAFE(str, k, start); 5646 ucol_getSortKey(videcoll, str, 1, sk1, 500); 5647 ucol_getSortKey(importvidecoll, str, 1, sk2, 500); 5648 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 5649 log_err("Sort key for %s not equal\n", str); 5650 break; 5651 } 5652 } 5653 }else{ 5654 ucol_getSortKey(videcoll, str, strLength, sk1, 500); 5655 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500); 5656 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 5657 log_err("Sort key for %s not equal\n", str); 5658 break; 5659 } 5660 5661 } 5662 } 5663 5664 uset_close(tailoredSet); 5665 5666 uprv_free(viderules); 5667 5668 ucol_close(videcoll); 5669 ucol_close(importvidecoll); 5670 ucol_close(vicoll); 5671 ucol_close(decoll); 5672} 5673 5674/* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */ 5675static const UChar longUpperStr1[]= { /* 155 chars */ 5676 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 5677 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52, 5678 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E, 5679 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C, 5680 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E, 5681 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20, 5682 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45, 5683 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32, 5684 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62, 5685 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61 5686}; 5687 5688/* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */ 5689static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */ 5690 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, 5691 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, 5692 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, 5693 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, 5694 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20 5695}; 5696 5697/* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */ 5698static const UChar longUpperStr3[]= { /* 324 chars */ 5699 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 5700 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 5701 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 5702 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 5703 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 5704 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 5705 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 5706 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 5707 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 5708 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 5709 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 5710 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20 5711}; 5712 5713#define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0])) 5714 5715typedef struct { 5716 const UChar * longUpperStrPtr; 5717 int32_t longUpperStrLen; 5718} LongUpperStrItem; 5719 5720/* String pointers must be in reverse collation order of the corresponding strings */ 5721static const LongUpperStrItem longUpperStrItems[] = { 5722 { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) }, 5723 { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) }, 5724 { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) }, 5725 { NULL, 0 } 5726}; 5727 5728enum { kCollKeyLenMax = 850 }; /* may change with collation changes */ 5729 5730/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */ 5731static void TestCaseLevelBufferOverflow(void) 5732{ 5733 UErrorCode status = U_ZERO_ERROR; 5734 UCollator * ucol = ucol_open("root", &status); 5735 if ( U_SUCCESS(status) ) { 5736 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status); 5737 if ( U_SUCCESS(status) ) { 5738 const LongUpperStrItem * itemPtr; 5739 uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax]; 5740 for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) { 5741 int32_t sortKeyLen; 5742 if (itemPtr > longUpperStrItems) { 5743 uprv_strcpy((char *)sortKeyB, (char *)sortKeyA); 5744 } 5745 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax); 5746 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) { 5747 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen); 5748 break; 5749 } 5750 if ( itemPtr > longUpperStrItems ) { 5751 int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB); 5752 if (compareResult >= 0) { 5753 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult); 5754 } 5755 } 5756 } 5757 } else { 5758 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status)); 5759 } 5760 ucol_close(ucol); 5761 } else { 5762 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status)); 5763 } 5764} 5765 5766/* Test for #10595 */ 5767static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */ 5768#define KEY_PART_SIZE 16 5769 5770static void TestNextSortKeyPartJaIdentical(void) 5771{ 5772 UErrorCode status = U_ZERO_ERROR; 5773 UCollator *coll; 5774 uint8_t keyPart[KEY_PART_SIZE]; 5775 UCharIterator iter; 5776 uint32_t state[2] = {0, 0}; 5777 int32_t keyPartLen; 5778 5779 coll = ucol_open("ja", &status); 5780 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status); 5781 if (U_FAILURE(status)) { 5782 log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status)); 5783 return; 5784 } 5785 5786 uiter_setString(&iter, testJapaneseName, 5); 5787 keyPartLen = KEY_PART_SIZE; 5788 while (keyPartLen == KEY_PART_SIZE) { 5789 keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status); 5790 if (U_FAILURE(status)) { 5791 log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status)); 5792 break; 5793 } 5794 } 5795 5796 ucol_close(coll); 5797} 5798 5799#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x) 5800 5801void addMiscCollTest(TestNode** root) 5802{ 5803 TEST(TestRuleOptions); 5804 TEST(TestBeforePrefixFailure); 5805 TEST(TestContractionClosure); 5806 TEST(TestPrefixCompose); 5807 TEST(TestStrCollIdenticalPrefix); 5808 TEST(TestPrefix); 5809 TEST(TestNewJapanese); 5810 /*TEST(TestLimitations);*/ 5811 TEST(TestNonChars); 5812 TEST(TestExtremeCompression); 5813 TEST(TestSurrogates); 5814 TEST(TestVariableTopSetting); 5815 TEST(TestMaxVariable); 5816 TEST(TestBocsuCoverage); 5817 TEST(TestCyrillicTailoring); 5818 TEST(TestCase); 5819 TEST(IncompleteCntTest); 5820 TEST(BlackBirdTest); 5821 TEST(FunkyATest); 5822 TEST(BillFairmanTest); 5823 TEST(TestChMove); 5824 TEST(TestImplicitTailoring); 5825 TEST(TestFCDProblem); 5826 TEST(TestEmptyRule); 5827 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */ 5828 TEST(TestJ815); 5829 /*TEST(TestJ831);*/ /* we changed lv locale */ 5830 TEST(TestBefore); 5831 TEST(TestHangulTailoring); 5832 TEST(TestUCARules); 5833 TEST(TestIncrementalNormalize); 5834 TEST(TestComposeDecompose); 5835 TEST(TestCompressOverlap); 5836 TEST(TestContraction); 5837 TEST(TestExpansion); 5838 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */ 5839 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */ 5840 TEST(TestOptimize); 5841 TEST(TestSuppressContractions); 5842 TEST(Alexis2); 5843 TEST(TestHebrewUCA); 5844 TEST(TestPartialSortKeyTermination); 5845 TEST(TestSettings); 5846 TEST(TestEquals); 5847 TEST(TestJ2726); 5848 TEST(NullRule); 5849 TEST(TestNumericCollation); 5850 TEST(TestTibetanConformance); 5851 TEST(TestPinyinProblem); 5852 TEST(TestSeparateTrees); 5853 TEST(TestBeforePinyin); 5854 TEST(TestBeforeTightening); 5855 /*TEST(TestMoreBefore);*/ 5856 TEST(TestTailorNULL); 5857 TEST(TestUpperFirstQuaternary); 5858 TEST(TestJ4960); 5859 TEST(TestJ5223); 5860 TEST(TestJ5232); 5861 TEST(TestJ5367); 5862 TEST(TestHiragana); 5863 TEST(TestSortKeyConsistency); 5864 TEST(TestVI5913); /* VI, RO tailored rules */ 5865 TEST(TestCroatianSortKey); 5866 TEST(TestTailor6179); 5867 TEST(TestUCAPrecontext); 5868 TEST(TestOutOfBuffer5468); 5869 TEST(TestSameStrengthList); 5870 5871 TEST(TestSameStrengthListQuoted); 5872 TEST(TestSameStrengthListSupplemental); 5873 TEST(TestSameStrengthListQwerty); 5874 TEST(TestSameStrengthListQuotedQwerty); 5875 TEST(TestSameStrengthListRanges); 5876 TEST(TestSameStrengthListSupplementalRanges); 5877 TEST(TestSpecialCharacters); 5878 TEST(TestPrivateUseCharacters); 5879 TEST(TestPrivateUseCharactersInList); 5880 TEST(TestPrivateUseCharactersInRange); 5881 TEST(TestInvalidListsAndRanges); 5882 TEST(TestImportRulesDeWithPhonebook); 5883 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */ 5884 /* TEST(TestImportRulesCJKWithUnihan); */ 5885 TEST(TestImport); 5886 TEST(TestImportWithType); 5887 5888 TEST(TestBeforeRuleWithScriptReordering); 5889 TEST(TestNonLeadBytesDuringCollationReordering); 5890 TEST(TestReorderingAPI); 5891 TEST(TestReorderingAPIWithRuleCreatedCollator); 5892 TEST(TestEquivalentReorderingScripts); 5893 TEST(TestGreekFirstReorder); 5894 TEST(TestGreekLastReorder); 5895 TEST(TestNonScriptReorder); 5896 TEST(TestHaniReorder); 5897 TEST(TestHaniReorderWithOtherRules); 5898 TEST(TestMultipleReorder); 5899 TEST(TestReorderingAcrossCloning); 5900 TEST(TestReorderWithNumericCollation); 5901 5902 TEST(TestCaseLevelBufferOverflow); 5903 TEST(TestNextSortKeyPartJaIdentical); 5904} 5905 5906#endif /* #if !UCONFIG_NO_COLLATION */ 5907