1/* 2********************************************************************** 3* Copyright (c) 2013-2014, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6*/ 7 8#include <string.h> 9#include "unicode/localpointer.h" 10#include "unicode/uperf.h" 11#include "unicode/ucol.h" 12#include "unicode/coll.h" 13#include "unicode/uiter.h" 14#include "unicode/ustring.h" 15#include "unicode/sortkey.h" 16#include "uarrsort.h" 17#include "uoptions.h" 18#include "ustr_imp.h" 19 20#define COMPACT_ARRAY(CompactArrays, UNIT) \ 21struct CompactArrays{\ 22 CompactArrays(const CompactArrays & );\ 23 CompactArrays & operator=(const CompactArrays & );\ 24 int32_t count;/*total number of the strings*/ \ 25 int32_t * index;/*relative offset in data*/ \ 26 UNIT * data; /*the real space to hold strings*/ \ 27 \ 28 ~CompactArrays(){free(index);free(data);} \ 29 CompactArrays() : count(0), index(NULL), data(NULL) { \ 30 index = (int32_t *) realloc(index, sizeof(int32_t)); \ 31 index[0] = 0; \ 32 } \ 33 void append_one(int32_t theLen){ /*include terminal NULL*/ \ 34 count++; \ 35 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \ 36 index[count] = index[count - 1] + theLen; \ 37 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \ 38 } \ 39 UNIT * last(){return data + index[count - 1];} \ 40 const UNIT * dataOf(int32_t i) const {return data + index[i];} \ 41 int32_t lengthOf(int i) const {return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/ \ 42}; 43 44COMPACT_ARRAY(CA_uchar, UChar) 45COMPACT_ARRAY(CA_char, char) 46 47#define MAX_TEST_STRINGS_FOR_PERMUTING 1000 48 49// C API test cases 50 51// 52// Test case taking a single test data array, calling ucol_strcoll by permuting the test data 53// 54class Strcoll : public UPerfFunction 55{ 56public: 57 Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen); 58 ~Strcoll(); 59 virtual void call(UErrorCode* status); 60 virtual long getOperationsPerIteration(); 61 62private: 63 const UCollator *coll; 64 const CA_uchar *source; 65 UBool useLen; 66 int32_t maxTestStrings; 67}; 68 69Strcoll::Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen) 70 : coll(coll), 71 source(source), 72 useLen(useLen) 73{ 74 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count; 75} 76 77Strcoll::~Strcoll() 78{ 79} 80 81void Strcoll::call(UErrorCode* status) 82{ 83 if (U_FAILURE(*status)) return; 84 85 // call strcoll for permutation 86 int32_t divisor = source->count / maxTestStrings; 87 int32_t srcLen, tgtLen; 88 int32_t cmp = 0; 89 for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) { 90 if (i % divisor) continue; 91 numTestStringsI++; 92 srcLen = useLen ? source->lengthOf(i) : -1; 93 for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) { 94 if (j % divisor) continue; 95 numTestStringsJ++; 96 tgtLen = useLen ? source->lengthOf(j) : -1; 97 cmp += ucol_strcoll(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen); 98 } 99 } 100 // At the end, cmp must be 0 101 if (cmp != 0) { 102 *status = U_INTERNAL_PROGRAM_ERROR; 103 } 104} 105 106long Strcoll::getOperationsPerIteration() 107{ 108 return maxTestStrings * maxTestStrings; 109} 110 111// 112// Test case taking two test data arrays, calling ucol_strcoll for strings at a same index 113// 114class Strcoll_2 : public UPerfFunction 115{ 116public: 117 Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen); 118 ~Strcoll_2(); 119 virtual void call(UErrorCode* status); 120 virtual long getOperationsPerIteration(); 121 122private: 123 const UCollator *coll; 124 const CA_uchar *source; 125 const CA_uchar *target; 126 UBool useLen; 127}; 128 129Strcoll_2::Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen) 130 : coll(coll), 131 source(source), 132 target(target), 133 useLen(useLen) 134{ 135} 136 137Strcoll_2::~Strcoll_2() 138{ 139} 140 141void Strcoll_2::call(UErrorCode* status) 142{ 143 if (U_FAILURE(*status)) return; 144 145 // call strcoll for two strings at the same index 146 if (source->count < target->count) { 147 *status = U_ILLEGAL_ARGUMENT_ERROR; 148 } else { 149 for (int32_t i = 0; i < source->count; i++) { 150 int32_t srcLen = useLen ? source->lengthOf(i) : -1; 151 int32_t tgtLen = useLen ? target->lengthOf(i) : -1; 152 ucol_strcoll(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen); 153 } 154 } 155} 156 157long Strcoll_2::getOperationsPerIteration() 158{ 159 return source->count; 160} 161 162 163// 164// Test case taking a single test data array, calling ucol_strcollUTF8 by permuting the test data 165// 166class StrcollUTF8 : public UPerfFunction 167{ 168public: 169 StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen); 170 ~StrcollUTF8(); 171 virtual void call(UErrorCode* status); 172 virtual long getOperationsPerIteration(); 173 174private: 175 const UCollator *coll; 176 const CA_char *source; 177 UBool useLen; 178 int32_t maxTestStrings; 179}; 180 181StrcollUTF8::StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen) 182 : coll(coll), 183 source(source), 184 useLen(useLen) 185{ 186 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count; 187} 188 189StrcollUTF8::~StrcollUTF8() 190{ 191} 192 193void StrcollUTF8::call(UErrorCode* status) 194{ 195 if (U_FAILURE(*status)) return; 196 197 // call strcollUTF8 for permutation 198 int32_t divisor = source->count / maxTestStrings; 199 int32_t srcLen, tgtLen; 200 int32_t cmp = 0; 201 for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) { 202 if (i % divisor) continue; 203 numTestStringsI++; 204 srcLen = useLen ? source->lengthOf(i) : -1; 205 for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) { 206 if (j % divisor) continue; 207 numTestStringsJ++; 208 tgtLen = useLen ? source->lengthOf(j) : -1; 209 cmp += ucol_strcollUTF8(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen, status); 210 } 211 } 212 // At the end, cmp must be 0 213 if (cmp != 0) { 214 *status = U_INTERNAL_PROGRAM_ERROR; 215 } 216} 217 218long StrcollUTF8::getOperationsPerIteration() 219{ 220 return maxTestStrings * maxTestStrings; 221} 222 223// 224// Test case taking two test data arrays, calling ucol_strcoll for strings at a same index 225// 226class StrcollUTF8_2 : public UPerfFunction 227{ 228public: 229 StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen); 230 ~StrcollUTF8_2(); 231 virtual void call(UErrorCode* status); 232 virtual long getOperationsPerIteration(); 233 234private: 235 const UCollator *coll; 236 const CA_char *source; 237 const CA_char *target; 238 UBool useLen; 239}; 240 241StrcollUTF8_2::StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen) 242 : coll(coll), 243 source(source), 244 target(target), 245 useLen(useLen) 246{ 247} 248 249StrcollUTF8_2::~StrcollUTF8_2() 250{ 251} 252 253void StrcollUTF8_2::call(UErrorCode* status) 254{ 255 if (U_FAILURE(*status)) return; 256 257 // call strcoll for two strings at the same index 258 if (source->count < target->count) { 259 *status = U_ILLEGAL_ARGUMENT_ERROR; 260 } else { 261 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) { 262 int32_t srcLen = useLen ? source->lengthOf(i) : -1; 263 int32_t tgtLen = useLen ? target->lengthOf(i) : -1; 264 ucol_strcollUTF8(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen, status); 265 } 266 } 267} 268 269long StrcollUTF8_2::getOperationsPerIteration() 270{ 271 return source->count; 272} 273 274// 275// Test case taking a single test data array, calling ucol_getSortKey for each 276// 277class GetSortKey : public UPerfFunction 278{ 279public: 280 GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen); 281 ~GetSortKey(); 282 virtual void call(UErrorCode* status); 283 virtual long getOperationsPerIteration(); 284 285private: 286 const UCollator *coll; 287 const CA_uchar *source; 288 UBool useLen; 289}; 290 291GetSortKey::GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen) 292 : coll(coll), 293 source(source), 294 useLen(useLen) 295{ 296} 297 298GetSortKey::~GetSortKey() 299{ 300} 301 302#define KEY_BUF_SIZE 512 303 304void GetSortKey::call(UErrorCode* status) 305{ 306 if (U_FAILURE(*status)) return; 307 308 uint8_t key[KEY_BUF_SIZE]; 309 int32_t len; 310 311 if (useLen) { 312 for (int32_t i = 0; i < source->count; i++) { 313 len = ucol_getSortKey(coll, source->dataOf(i), source->lengthOf(i), key, KEY_BUF_SIZE); 314 } 315 } else { 316 for (int32_t i = 0; i < source->count; i++) { 317 len = ucol_getSortKey(coll, source->dataOf(i), -1, key, KEY_BUF_SIZE); 318 } 319 } 320} 321 322long GetSortKey::getOperationsPerIteration() 323{ 324 return source->count; 325} 326 327// 328// Test case taking a single test data array in UTF-16, calling ucol_nextSortKeyPart for each for the 329// given buffer size 330// 331class NextSortKeyPart : public UPerfFunction 332{ 333public: 334 NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration = -1); 335 ~NextSortKeyPart(); 336 virtual void call(UErrorCode* status); 337 virtual long getOperationsPerIteration(); 338 virtual long getEventsPerIteration(); 339 340private: 341 const UCollator *coll; 342 const CA_uchar *source; 343 int32_t bufSize; 344 int32_t maxIteration; 345 long events; 346}; 347 348// Note: maxIteration = -1 -> repeat until the end of collation key 349NextSortKeyPart::NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration /* = -1 */) 350 : coll(coll), 351 source(source), 352 bufSize(bufSize), 353 maxIteration(maxIteration), 354 events(0) 355{ 356} 357 358NextSortKeyPart::~NextSortKeyPart() 359{ 360} 361 362void NextSortKeyPart::call(UErrorCode* status) 363{ 364 if (U_FAILURE(*status)) return; 365 366 uint8_t *part = (uint8_t *)malloc(bufSize); 367 uint32_t state[2]; 368 UCharIterator iter; 369 370 events = 0; 371 for (int i = 0; i < source->count && U_SUCCESS(*status); i++) { 372 uiter_setString(&iter, source->dataOf(i), source->lengthOf(i)); 373 state[0] = 0; 374 state[1] = 0; 375 int32_t partLen = bufSize; 376 for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) { 377 partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status); 378 events++; 379 } 380 } 381 free(part); 382} 383 384long NextSortKeyPart::getOperationsPerIteration() 385{ 386 return source->count; 387} 388 389long NextSortKeyPart::getEventsPerIteration() 390{ 391 return events; 392} 393 394// 395// Test case taking a single test data array in UTF-8, calling ucol_nextSortKeyPart for each for the 396// given buffer size 397// 398class NextSortKeyPartUTF8 : public UPerfFunction 399{ 400public: 401 NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration = -1); 402 ~NextSortKeyPartUTF8(); 403 virtual void call(UErrorCode* status); 404 virtual long getOperationsPerIteration(); 405 virtual long getEventsPerIteration(); 406 407private: 408 const UCollator *coll; 409 const CA_char *source; 410 int32_t bufSize; 411 int32_t maxIteration; 412 long events; 413}; 414 415// Note: maxIteration = -1 -> repeat until the end of collation key 416NextSortKeyPartUTF8::NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration /* = -1 */) 417 : coll(coll), 418 source(source), 419 bufSize(bufSize), 420 maxIteration(maxIteration), 421 events(0) 422{ 423} 424 425NextSortKeyPartUTF8::~NextSortKeyPartUTF8() 426{ 427} 428 429void NextSortKeyPartUTF8::call(UErrorCode* status) 430{ 431 if (U_FAILURE(*status)) return; 432 433 uint8_t *part = (uint8_t *)malloc(bufSize); 434 uint32_t state[2]; 435 UCharIterator iter; 436 437 events = 0; 438 for (int i = 0; i < source->count && U_SUCCESS(*status); i++) { 439 uiter_setUTF8(&iter, source->dataOf(i), source->lengthOf(i)); 440 state[0] = 0; 441 state[1] = 0; 442 int32_t partLen = bufSize; 443 for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) { 444 partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status); 445 events++; 446 } 447 } 448 free(part); 449} 450 451long NextSortKeyPartUTF8::getOperationsPerIteration() 452{ 453 return source->count; 454} 455 456long NextSortKeyPartUTF8::getEventsPerIteration() 457{ 458 return events; 459} 460 461// CPP API test cases 462 463// 464// Test case taking a single test data array, calling Collator::compare by permuting the test data 465// 466class CppCompare : public UPerfFunction 467{ 468public: 469 CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen); 470 ~CppCompare(); 471 virtual void call(UErrorCode* status); 472 virtual long getOperationsPerIteration(); 473 474private: 475 const Collator *coll; 476 const CA_uchar *source; 477 UBool useLen; 478 int32_t maxTestStrings; 479}; 480 481CppCompare::CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen) 482 : coll(coll), 483 source(source), 484 useLen(useLen) 485{ 486 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count; 487} 488 489CppCompare::~CppCompare() 490{ 491} 492 493void CppCompare::call(UErrorCode* status) { 494 if (U_FAILURE(*status)) return; 495 496 // call compare for permutation of test data 497 int32_t divisor = source->count / maxTestStrings; 498 int32_t srcLen, tgtLen; 499 int32_t cmp = 0; 500 for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) { 501 if (i % divisor) continue; 502 numTestStringsI++; 503 srcLen = useLen ? source->lengthOf(i) : -1; 504 for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) { 505 if (j % divisor) continue; 506 numTestStringsJ++; 507 tgtLen = useLen ? source->lengthOf(j) : -1; 508 cmp += coll->compare(source->dataOf(i), srcLen, source->dataOf(j), tgtLen); 509 } 510 } 511 // At the end, cmp must be 0 512 if (cmp != 0) { 513 *status = U_INTERNAL_PROGRAM_ERROR; 514 } 515} 516 517long CppCompare::getOperationsPerIteration() 518{ 519 return maxTestStrings * maxTestStrings; 520} 521 522// 523// Test case taking two test data arrays, calling Collator::compare for strings at a same index 524// 525class CppCompare_2 : public UPerfFunction 526{ 527public: 528 CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen); 529 ~CppCompare_2(); 530 virtual void call(UErrorCode* status); 531 virtual long getOperationsPerIteration(); 532 533private: 534 const Collator *coll; 535 const CA_uchar *source; 536 const CA_uchar *target; 537 UBool useLen; 538}; 539 540CppCompare_2::CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen) 541 : coll(coll), 542 source(source), 543 target(target), 544 useLen(useLen) 545{ 546} 547 548CppCompare_2::~CppCompare_2() 549{ 550} 551 552void CppCompare_2::call(UErrorCode* status) { 553 if (U_FAILURE(*status)) return; 554 555 // call strcoll for two strings at the same index 556 if (source->count < target->count) { 557 *status = U_ILLEGAL_ARGUMENT_ERROR; 558 } else { 559 for (int32_t i = 0; i < source->count; i++) { 560 int32_t srcLen = useLen ? source->lengthOf(i) : -1; 561 int32_t tgtLen = useLen ? target->lengthOf(i) : -1; 562 coll->compare(source->dataOf(i), srcLen, target->dataOf(i), tgtLen); 563 } 564 } 565} 566 567long CppCompare_2::getOperationsPerIteration() 568{ 569 return source->count; 570} 571 572 573// 574// Test case taking a single test data array, calling Collator::compareUTF8 by permuting the test data 575// 576class CppCompareUTF8 : public UPerfFunction 577{ 578public: 579 CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen); 580 ~CppCompareUTF8(); 581 virtual void call(UErrorCode* status); 582 virtual long getOperationsPerIteration(); 583 584private: 585 const Collator *coll; 586 const CA_char *source; 587 UBool useLen; 588 int32_t maxTestStrings; 589}; 590 591CppCompareUTF8::CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen) 592 : coll(coll), 593 source(source), 594 useLen(useLen) 595{ 596 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count; 597} 598 599CppCompareUTF8::~CppCompareUTF8() 600{ 601} 602 603void CppCompareUTF8::call(UErrorCode* status) { 604 if (U_FAILURE(*status)) return; 605 606 // call compareUTF8 for all permutations 607 int32_t divisor = source->count / maxTestStrings; 608 StringPiece src, tgt; 609 int32_t cmp = 0; 610 for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) { 611 if (i % divisor) continue; 612 numTestStringsI++; 613 614 if (useLen) { 615 src.set(source->dataOf(i), source->lengthOf(i)); 616 } else { 617 src.set(source->dataOf(i)); 618 } 619 for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) { 620 if (j % divisor) continue; 621 numTestStringsJ++; 622 623 if (useLen) { 624 tgt.set(source->dataOf(i), source->lengthOf(i)); 625 } else { 626 tgt.set(source->dataOf(i)); 627 } 628 cmp += coll->compareUTF8(src, tgt, *status); 629 } 630 } 631 // At the end, cmp must be 0 632 if (cmp != 0) { 633 *status = U_INTERNAL_PROGRAM_ERROR; 634 } 635} 636 637long CppCompareUTF8::getOperationsPerIteration() 638{ 639 return maxTestStrings * maxTestStrings; 640} 641 642 643// 644// Test case taking two test data arrays, calling Collator::compareUTF8 for strings at a same index 645// 646class CppCompareUTF8_2 : public UPerfFunction 647{ 648public: 649 CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen); 650 ~CppCompareUTF8_2(); 651 virtual void call(UErrorCode* status); 652 virtual long getOperationsPerIteration(); 653 654private: 655 const Collator *coll; 656 const CA_char *source; 657 const CA_char *target; 658 UBool useLen; 659}; 660 661CppCompareUTF8_2::CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen) 662 : coll(coll), 663 source(source), 664 target(target), 665 useLen(useLen) 666{ 667} 668 669CppCompareUTF8_2::~CppCompareUTF8_2() 670{ 671} 672 673void CppCompareUTF8_2::call(UErrorCode* status) { 674 if (U_FAILURE(*status)) return; 675 676 // call strcoll for two strings at the same index 677 StringPiece src, tgt; 678 if (source->count < target->count) { 679 *status = U_ILLEGAL_ARGUMENT_ERROR; 680 } else { 681 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) { 682 if (useLen) { 683 src.set(source->dataOf(i), source->lengthOf(i)); 684 tgt.set(target->dataOf(i), target->lengthOf(i)); 685 } else { 686 src.set(source->dataOf(i)); 687 tgt.set(target->dataOf(i)); 688 } 689 coll->compareUTF8(src, tgt, *status); 690 } 691 } 692} 693 694long CppCompareUTF8_2::getOperationsPerIteration() 695{ 696 return source->count; 697} 698 699 700// 701// Test case taking a single test data array, calling Collator::getCollationKey for each 702// 703class CppGetCollationKey : public UPerfFunction 704{ 705public: 706 CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen); 707 ~CppGetCollationKey(); 708 virtual void call(UErrorCode* status); 709 virtual long getOperationsPerIteration(); 710 711private: 712 const Collator *coll; 713 const CA_uchar *source; 714 UBool useLen; 715}; 716 717CppGetCollationKey::CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen) 718 : coll(coll), 719 source(source), 720 useLen(useLen) 721{ 722} 723 724CppGetCollationKey::~CppGetCollationKey() 725{ 726} 727 728void CppGetCollationKey::call(UErrorCode* status) 729{ 730 if (U_FAILURE(*status)) return; 731 732 CollationKey key; 733 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) { 734 coll->getCollationKey(source->dataOf(i), source->lengthOf(i), key, *status); 735 } 736} 737 738long CppGetCollationKey::getOperationsPerIteration() { 739 return source->count; 740} 741 742namespace { 743 744struct CollatorAndCounter { 745 CollatorAndCounter(const Collator& coll) : coll(coll), ucoll(NULL), counter(0) {} 746 CollatorAndCounter(const Collator& coll, const UCollator *ucoll) 747 : coll(coll), ucoll(ucoll), counter(0) {} 748 const Collator& coll; 749 const UCollator *ucoll; 750 int32_t counter; 751}; 752 753int32_t U_CALLCONV 754UniStrCollatorComparator(const void* context, const void* left, const void* right) { 755 CollatorAndCounter& cc = *(CollatorAndCounter*)context; 756 const UnicodeString& leftString = **(const UnicodeString**)left; 757 const UnicodeString& rightString = **(const UnicodeString**)right; 758 UErrorCode errorCode = U_ZERO_ERROR; 759 ++cc.counter; 760 return cc.coll.compare(leftString, rightString, errorCode); 761} 762 763} // namespace 764 765class CollPerfFunction : public UPerfFunction { 766public: 767 CollPerfFunction(const Collator& coll, const UCollator *ucoll) 768 : coll(coll), ucoll(ucoll), ops(0) {} 769 virtual ~CollPerfFunction(); 770 /** Calls call() to set the ops field, and returns that. */ 771 virtual long getOperationsPerIteration(); 772 773protected: 774 const Collator& coll; 775 const UCollator *ucoll; 776 int32_t ops; 777}; 778 779CollPerfFunction::~CollPerfFunction() {} 780 781long CollPerfFunction::getOperationsPerIteration() { 782 UErrorCode errorCode = U_ZERO_ERROR; 783 call(&errorCode); 784 return U_SUCCESS(errorCode) ? ops : 0; 785} 786 787class UniStrCollPerfFunction : public CollPerfFunction { 788public: 789 UniStrCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16) 790 : CollPerfFunction(coll, ucoll), d16(data16), 791 source(new UnicodeString*[d16->count]) { 792 for (int32_t i = 0; i < d16->count; ++i) { 793 source[i] = new UnicodeString(TRUE, d16->dataOf(i), d16->lengthOf(i)); 794 } 795 } 796 virtual ~UniStrCollPerfFunction(); 797 798protected: 799 const CA_uchar* d16; 800 UnicodeString** source; 801}; 802 803UniStrCollPerfFunction::~UniStrCollPerfFunction() { 804 for (int32_t i = 0; i < d16->count; ++i) { 805 delete source[i]; 806 } 807 delete[] source; 808} 809 810// 811// Test case sorting an array of UnicodeString pointers. 812// 813class UniStrSort : public UniStrCollPerfFunction { 814public: 815 UniStrSort(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16) 816 : UniStrCollPerfFunction(coll, ucoll, data16), 817 dest(new UnicodeString*[d16->count]) {} 818 virtual ~UniStrSort(); 819 virtual void call(UErrorCode* status); 820 821private: 822 UnicodeString** dest; // aliases only 823}; 824 825UniStrSort::~UniStrSort() { 826 delete[] dest; 827} 828 829void UniStrSort::call(UErrorCode* status) { 830 if (U_FAILURE(*status)) return; 831 832 CollatorAndCounter cc(coll); 833 int32_t count = d16->count; 834 memcpy(dest, source, count * sizeof(UnicodeString *)); 835 uprv_sortArray(dest, count, (int32_t)sizeof(UnicodeString *), 836 UniStrCollatorComparator, &cc, TRUE, status); 837 ops = cc.counter; 838} 839 840namespace { 841 842int32_t U_CALLCONV 843StringPieceCollatorComparator(const void* context, const void* left, const void* right) { 844 CollatorAndCounter& cc = *(CollatorAndCounter*)context; 845 const StringPiece& leftString = *(const StringPiece*)left; 846 const StringPiece& rightString = *(const StringPiece*)right; 847 UErrorCode errorCode = U_ZERO_ERROR; 848 ++cc.counter; 849 return cc.coll.compareUTF8(leftString, rightString, errorCode); 850} 851 852int32_t U_CALLCONV 853StringPieceUCollatorComparator(const void* context, const void* left, const void* right) { 854 CollatorAndCounter& cc = *(CollatorAndCounter*)context; 855 const StringPiece& leftString = *(const StringPiece*)left; 856 const StringPiece& rightString = *(const StringPiece*)right; 857 UErrorCode errorCode = U_ZERO_ERROR; 858 ++cc.counter; 859 return ucol_strcollUTF8(cc.ucoll, 860 leftString.data(), leftString.length(), 861 rightString.data(), rightString.length(), &errorCode); 862} 863 864} // namespace 865 866class StringPieceCollPerfFunction : public CollPerfFunction { 867public: 868 StringPieceCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 869 : CollPerfFunction(coll, ucoll), d8(data8), 870 source(new StringPiece[d8->count]) { 871 for (int32_t i = 0; i < d8->count; ++i) { 872 source[i].set(d8->dataOf(i), d8->lengthOf(i)); 873 } 874 } 875 virtual ~StringPieceCollPerfFunction(); 876 877protected: 878 const CA_char* d8; 879 StringPiece* source; 880}; 881 882StringPieceCollPerfFunction::~StringPieceCollPerfFunction() { 883 delete[] source; 884} 885 886class StringPieceSort : public StringPieceCollPerfFunction { 887public: 888 StringPieceSort(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 889 : StringPieceCollPerfFunction(coll, ucoll, data8), 890 dest(new StringPiece[d8->count]) {} 891 virtual ~StringPieceSort(); 892 893protected: 894 StringPiece* dest; 895}; 896 897StringPieceSort::~StringPieceSort() { 898 delete[] dest; 899} 900 901// 902// Test case sorting an array of UTF-8 StringPiece's with Collator::compareUTF8(). 903// 904class StringPieceSortCpp : public StringPieceSort { 905public: 906 StringPieceSortCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 907 : StringPieceSort(coll, ucoll, data8) {} 908 virtual ~StringPieceSortCpp(); 909 virtual void call(UErrorCode* status); 910}; 911 912StringPieceSortCpp::~StringPieceSortCpp() {} 913 914void StringPieceSortCpp::call(UErrorCode* status) { 915 if (U_FAILURE(*status)) return; 916 917 CollatorAndCounter cc(coll); 918 int32_t count = d8->count; 919 memcpy(dest, source, count * sizeof(StringPiece)); 920 uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece), 921 StringPieceCollatorComparator, &cc, TRUE, status); 922 ops = cc.counter; 923} 924 925// 926// Test case sorting an array of UTF-8 StringPiece's with ucol_strcollUTF8(). 927// 928class StringPieceSortC : public StringPieceSort { 929public: 930 StringPieceSortC(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 931 : StringPieceSort(coll, ucoll, data8) {} 932 virtual ~StringPieceSortC(); 933 virtual void call(UErrorCode* status); 934}; 935 936StringPieceSortC::~StringPieceSortC() {} 937 938void StringPieceSortC::call(UErrorCode* status) { 939 if (U_FAILURE(*status)) return; 940 941 CollatorAndCounter cc(coll, ucoll); 942 int32_t count = d8->count; 943 memcpy(dest, source, count * sizeof(StringPiece)); 944 uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece), 945 StringPieceUCollatorComparator, &cc, TRUE, status); 946 ops = cc.counter; 947} 948 949// 950// Test case performing binary searches in a sorted array of UnicodeString pointers. 951// 952class UniStrBinSearch : public UniStrCollPerfFunction { 953public: 954 UniStrBinSearch(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16) 955 : UniStrCollPerfFunction(coll, ucoll, data16) {} 956 virtual ~UniStrBinSearch(); 957 virtual void call(UErrorCode* status); 958}; 959 960UniStrBinSearch::~UniStrBinSearch() {} 961 962void UniStrBinSearch::call(UErrorCode* status) { 963 if (U_FAILURE(*status)) return; 964 965 CollatorAndCounter cc(coll); 966 int32_t count = d16->count; 967 for (int32_t i = 0; i < count; ++i) { 968 (void)uprv_stableBinarySearch((char *)source, count, 969 source + i, (int32_t)sizeof(UnicodeString *), 970 UniStrCollatorComparator, &cc); 971 } 972 ops = cc.counter; 973} 974 975class StringPieceBinSearch : public StringPieceCollPerfFunction { 976public: 977 StringPieceBinSearch(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 978 : StringPieceCollPerfFunction(coll, ucoll, data8) {} 979 virtual ~StringPieceBinSearch(); 980}; 981 982StringPieceBinSearch::~StringPieceBinSearch() {} 983 984// 985// Test case performing binary searches in a sorted array of UTF-8 StringPiece's 986// with Collator::compareUTF8(). 987// 988class StringPieceBinSearchCpp : public StringPieceBinSearch { 989public: 990 StringPieceBinSearchCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 991 : StringPieceBinSearch(coll, ucoll, data8) {} 992 virtual ~StringPieceBinSearchCpp(); 993 virtual void call(UErrorCode* status); 994}; 995 996StringPieceBinSearchCpp::~StringPieceBinSearchCpp() {} 997 998void StringPieceBinSearchCpp::call(UErrorCode* status) { 999 if (U_FAILURE(*status)) return; 1000 1001 CollatorAndCounter cc(coll); 1002 int32_t count = d8->count; 1003 for (int32_t i = 0; i < count; ++i) { 1004 (void)uprv_stableBinarySearch((char *)source, count, 1005 source + i, (int32_t)sizeof(StringPiece), 1006 StringPieceCollatorComparator, &cc); 1007 } 1008 ops = cc.counter; 1009} 1010 1011// 1012// Test case performing binary searches in a sorted array of UTF-8 StringPiece's 1013// with ucol_strcollUTF8(). 1014// 1015class StringPieceBinSearchC : public StringPieceBinSearch { 1016public: 1017 StringPieceBinSearchC(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 1018 : StringPieceBinSearch(coll, ucoll, data8) {} 1019 virtual ~StringPieceBinSearchC(); 1020 virtual void call(UErrorCode* status); 1021}; 1022 1023StringPieceBinSearchC::~StringPieceBinSearchC() {} 1024 1025void StringPieceBinSearchC::call(UErrorCode* status) { 1026 if (U_FAILURE(*status)) return; 1027 1028 CollatorAndCounter cc(coll, ucoll); 1029 int32_t count = d8->count; 1030 for (int32_t i = 0; i < count; ++i) { 1031 (void)uprv_stableBinarySearch((char *)source, count, 1032 source + i, (int32_t)sizeof(StringPiece), 1033 StringPieceUCollatorComparator, &cc); 1034 } 1035 ops = cc.counter; 1036} 1037 1038 1039class CollPerf2Test : public UPerfTest 1040{ 1041public: 1042 CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status); 1043 ~CollPerf2Test(); 1044 virtual UPerfFunction* runIndexedTest( 1045 int32_t index, UBool exec, const char *&name, char *par = NULL); 1046 1047private: 1048 UCollator* coll; 1049 Collator* collObj; 1050 1051 int32_t count; 1052 CA_uchar* data16; 1053 CA_char* data8; 1054 1055 CA_uchar* modData16; 1056 CA_char* modData8; 1057 1058 CA_uchar* sortedData16; 1059 CA_char* sortedData8; 1060 1061 CA_uchar* randomData16; 1062 CA_char* randomData8; 1063 1064 const CA_uchar* getData16(UErrorCode &status); 1065 const CA_char* getData8(UErrorCode &status); 1066 1067 const CA_uchar* getModData16(UErrorCode &status); 1068 const CA_char* getModData8(UErrorCode &status); 1069 1070 const CA_uchar* getSortedData16(UErrorCode &status); 1071 const CA_char* getSortedData8(UErrorCode &status); 1072 1073 const CA_uchar* getRandomData16(UErrorCode &status); 1074 const CA_char* getRandomData8(UErrorCode &status); 1075 1076 static CA_uchar* sortData16( 1077 const CA_uchar* d16, 1078 UComparator *cmp, const void *context, 1079 UErrorCode &status); 1080 static CA_char* getData8FromData16(const CA_uchar* d16, UErrorCode &status); 1081 1082 UPerfFunction* TestStrcoll(); 1083 UPerfFunction* TestStrcollNull(); 1084 UPerfFunction* TestStrcollSimilar(); 1085 1086 UPerfFunction* TestStrcollUTF8(); 1087 UPerfFunction* TestStrcollUTF8Null(); 1088 UPerfFunction* TestStrcollUTF8Similar(); 1089 1090 UPerfFunction* TestGetSortKey(); 1091 UPerfFunction* TestGetSortKeyNull(); 1092 1093 UPerfFunction* TestNextSortKeyPart_4All(); 1094 UPerfFunction* TestNextSortKeyPart_4x2(); 1095 UPerfFunction* TestNextSortKeyPart_4x4(); 1096 UPerfFunction* TestNextSortKeyPart_4x8(); 1097 UPerfFunction* TestNextSortKeyPart_32All(); 1098 UPerfFunction* TestNextSortKeyPart_32x2(); 1099 1100 UPerfFunction* TestNextSortKeyPartUTF8_4All(); 1101 UPerfFunction* TestNextSortKeyPartUTF8_4x2(); 1102 UPerfFunction* TestNextSortKeyPartUTF8_4x4(); 1103 UPerfFunction* TestNextSortKeyPartUTF8_4x8(); 1104 UPerfFunction* TestNextSortKeyPartUTF8_32All(); 1105 UPerfFunction* TestNextSortKeyPartUTF8_32x2(); 1106 1107 UPerfFunction* TestCppCompare(); 1108 UPerfFunction* TestCppCompareNull(); 1109 UPerfFunction* TestCppCompareSimilar(); 1110 1111 UPerfFunction* TestCppCompareUTF8(); 1112 UPerfFunction* TestCppCompareUTF8Null(); 1113 UPerfFunction* TestCppCompareUTF8Similar(); 1114 1115 UPerfFunction* TestCppGetCollationKey(); 1116 UPerfFunction* TestCppGetCollationKeyNull(); 1117 1118 UPerfFunction* TestUniStrSort(); 1119 UPerfFunction* TestStringPieceSortCpp(); 1120 UPerfFunction* TestStringPieceSortC(); 1121 1122 UPerfFunction* TestUniStrBinSearch(); 1123 UPerfFunction* TestStringPieceBinSearchCpp(); 1124 UPerfFunction* TestStringPieceBinSearchC(); 1125}; 1126 1127CollPerf2Test::CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status) : 1128 UPerfTest(argc, argv, status), 1129 coll(NULL), 1130 collObj(NULL), 1131 count(0), 1132 data16(NULL), 1133 data8(NULL), 1134 modData16(NULL), 1135 modData8(NULL), 1136 sortedData16(NULL), 1137 sortedData8(NULL), 1138 randomData16(NULL), 1139 randomData8(NULL) 1140{ 1141 if (U_FAILURE(status)) { 1142 return; 1143 } 1144 1145 if (locale == NULL){ 1146 locale = "en_US"; // set default locale 1147 } 1148 1149 // Set up an ICU collator 1150 coll = ucol_open(locale, &status); 1151 collObj = Collator::createInstance(locale, status); 1152 1153 // Keyword support should be actually a part of ICU collator, see ICU ticket #8260. 1154 char keyBuffer[256]; 1155 UColAttributeValue val; 1156 if (uloc_getKeywordValue(locale, "strength", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1157 if (strcmp(keyBuffer, "primary") == 0) { 1158 val = UCOL_PRIMARY; 1159 } else if (strcmp(keyBuffer, "secondary") == 0) { 1160 val = UCOL_SECONDARY; 1161 } else if (strcmp(keyBuffer, "tertiary") == 0) { 1162 val = UCOL_TERTIARY; 1163 } else if (strcmp(keyBuffer, "quaternary") == 0) { 1164 val = UCOL_QUATERNARY; 1165 } else if (strcmp(keyBuffer, "identical") == 0) { 1166 val = UCOL_IDENTICAL; 1167 } else { 1168 status = U_ILLEGAL_ARGUMENT_ERROR; 1169 } 1170 if (U_SUCCESS(status)) { 1171 ucol_setAttribute(coll, UCOL_STRENGTH, val, &status); 1172 collObj->setAttribute(UCOL_STRENGTH, val, status); 1173 } 1174 } 1175 if (uloc_getKeywordValue(locale, "alternate", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1176 if (strcmp(keyBuffer, "non-ignorable") == 0) { 1177 val = UCOL_NON_IGNORABLE; 1178 } else if (strcmp(keyBuffer, "shifted") == 0) { 1179 val = UCOL_SHIFTED; 1180 } else { 1181 status = U_ILLEGAL_ARGUMENT_ERROR; 1182 } 1183 if (U_SUCCESS(status)) { 1184 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, val, &status); 1185 collObj->setAttribute(UCOL_ALTERNATE_HANDLING, val, status); 1186 } 1187 } 1188 if (uloc_getKeywordValue(locale, "backwards", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1189 if (strcmp(keyBuffer, "on") == 0) { 1190 val = UCOL_ON; 1191 } else if (strcmp(keyBuffer, "off") == 0) { 1192 val = UCOL_OFF; 1193 } else { 1194 status = U_ILLEGAL_ARGUMENT_ERROR; 1195 } 1196 if (U_SUCCESS(status)) { 1197 ucol_setAttribute(coll, UCOL_FRENCH_COLLATION, val, &status); 1198 collObj->setAttribute(UCOL_FRENCH_COLLATION, val, status); 1199 } 1200 } 1201 if (uloc_getKeywordValue(locale, "normalization", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1202 if (strcmp(keyBuffer, "on") == 0) { 1203 val = UCOL_ON; 1204 } else if (strcmp(keyBuffer, "off") == 0) { 1205 val = UCOL_OFF; 1206 } else { 1207 status = U_ILLEGAL_ARGUMENT_ERROR; 1208 } 1209 if (U_SUCCESS(status)) { 1210 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, val, &status); 1211 collObj->setAttribute(UCOL_NORMALIZATION_MODE, val, status); 1212 } 1213 } 1214 if (uloc_getKeywordValue(locale, "caseLevel", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1215 if (strcmp(keyBuffer, "on") == 0) { 1216 val = UCOL_ON; 1217 } else if (strcmp(keyBuffer, "off") == 0) { 1218 val = UCOL_OFF; 1219 } else { 1220 status = U_ILLEGAL_ARGUMENT_ERROR; 1221 } 1222 if (U_SUCCESS(status)) { 1223 ucol_setAttribute(coll, UCOL_CASE_LEVEL, val, &status); 1224 collObj->setAttribute(UCOL_CASE_LEVEL, val, status); 1225 } 1226 } 1227 if (uloc_getKeywordValue(locale, "caseFirst", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1228 if (strcmp(keyBuffer, "upper") == 0) { 1229 val = UCOL_UPPER_FIRST; 1230 } else if (strcmp(keyBuffer, "lower") == 0) { 1231 val = UCOL_LOWER_FIRST; 1232 } else if (strcmp(keyBuffer, "off") == 0) { 1233 val = UCOL_OFF; 1234 } else { 1235 status = U_ILLEGAL_ARGUMENT_ERROR; 1236 } 1237 if (U_SUCCESS(status)) { 1238 ucol_setAttribute(coll, UCOL_CASE_FIRST, val, &status); 1239 collObj->setAttribute(UCOL_CASE_FIRST, val, status); 1240 } 1241 } 1242 if (uloc_getKeywordValue(locale, "hiraganaQuaternary", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1243 if (strcmp(keyBuffer, "on") == 0) { 1244 val = UCOL_ON; 1245 } else if (strcmp(keyBuffer, "off") == 0) { 1246 val = UCOL_OFF; 1247 } else { 1248 status = U_ILLEGAL_ARGUMENT_ERROR; 1249 } 1250 if (U_SUCCESS(status)) { 1251 ucol_setAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, val, &status); 1252 collObj->setAttribute(UCOL_HIRAGANA_QUATERNARY_MODE, val, status); 1253 } 1254 } 1255 if (uloc_getKeywordValue(locale, "numeric", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1256 if (strcmp(keyBuffer, "on") == 0) { 1257 val = UCOL_ON; 1258 } else if (strcmp(keyBuffer, "off") == 0) { 1259 val = UCOL_OFF; 1260 } else { 1261 status = U_ILLEGAL_ARGUMENT_ERROR; 1262 } 1263 if (U_SUCCESS(status)) { 1264 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, val, &status); 1265 collObj->setAttribute(UCOL_NUMERIC_COLLATION, val, status); 1266 } 1267 } 1268 if (uloc_getKeywordValue(locale, "variableTop", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1269 // no support for now 1270 status = U_UNSUPPORTED_ERROR; 1271 } 1272 if (uloc_getKeywordValue(locale, "reorder", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1273 // no support for now 1274 status = U_UNSUPPORTED_ERROR; 1275 } 1276} 1277 1278CollPerf2Test::~CollPerf2Test() 1279{ 1280 ucol_close(coll); 1281 delete collObj; 1282 1283 delete data16; 1284 delete data8; 1285 delete modData16; 1286 delete modData8; 1287 delete sortedData16; 1288 delete sortedData8; 1289 delete randomData16; 1290 delete randomData8; 1291} 1292 1293#define MAX_NUM_DATA 10000 1294 1295const CA_uchar* CollPerf2Test::getData16(UErrorCode &status) 1296{ 1297 if (U_FAILURE(status)) return NULL; 1298 if (data16) return data16; 1299 1300 CA_uchar* d16 = new CA_uchar(); 1301 const UChar *line = NULL; 1302 int32_t len = 0; 1303 int32_t numData = 0; 1304 1305 for (;;) { 1306 line = ucbuf_readline(ucharBuf, &len, &status); 1307 if (line == NULL || U_FAILURE(status)) break; 1308 1309 // Refer to the source code of ucbuf_readline() 1310 // 1. 'len' includes the line terminal symbols 1311 // 2. The length of the line terminal symbols is only one character 1312 // 3. The Windows CR LF line terminal symbols will be converted to CR 1313 1314 if (len == 1 || line[0] == 0x23 /* '#' */) { 1315 continue; // skip empty/comment line 1316 } else { 1317 d16->append_one(len); 1318 u_memcpy(d16->last(), line, len); 1319 1320 numData++; 1321 if (numData >= MAX_NUM_DATA) break; 1322 } 1323 } 1324 1325 if (U_SUCCESS(status)) { 1326 data16 = d16; 1327 } else { 1328 delete d16; 1329 } 1330 1331 return data16; 1332} 1333 1334const CA_char* CollPerf2Test::getData8(UErrorCode &status) 1335{ 1336 if (U_FAILURE(status)) return NULL; 1337 if (data8) return data8; 1338 return data8 = getData8FromData16(getData16(status), status); 1339} 1340 1341const CA_uchar* CollPerf2Test::getModData16(UErrorCode &status) 1342{ 1343 if (U_FAILURE(status)) return NULL; 1344 if (modData16) return modData16; 1345 1346 const CA_uchar* d16 = getData16(status); 1347 if (U_FAILURE(status)) return NULL; 1348 1349 CA_uchar* modData16 = new CA_uchar(); 1350 1351 for (int32_t i = 0; i < d16->count; i++) { 1352 const UChar *s = d16->dataOf(i); 1353 int32_t len = d16->lengthOf(i) + 1; // including NULL terminator 1354 1355 modData16->append_one(len); 1356 u_memcpy(modData16->last(), s, len); 1357 1358 // replacing the last character with a different character 1359 UChar *lastChar = &modData16->last()[len -2]; 1360 for (int32_t j = i + 1; j != i; j++) { 1361 if (j >= d16->count) { 1362 j = 0; 1363 } 1364 const UChar *s1 = d16->dataOf(j); 1365 UChar lastChar1 = s1[d16->lengthOf(j) - 1]; 1366 if (*lastChar != lastChar1) { 1367 *lastChar = lastChar1; 1368 break; 1369 } 1370 } 1371 } 1372 1373 return modData16; 1374} 1375 1376const CA_char* CollPerf2Test::getModData8(UErrorCode &status) 1377{ 1378 if (U_FAILURE(status)) return NULL; 1379 if (modData8) return modData8; 1380 return modData8 = getData8FromData16(getModData16(status), status); 1381} 1382 1383namespace { 1384 1385struct ArrayAndColl { 1386 ArrayAndColl(const CA_uchar* a, const Collator& c) : d16(a), coll(c) {} 1387 const CA_uchar* d16; 1388 const Collator& coll; 1389}; 1390 1391int32_t U_CALLCONV 1392U16CollatorComparator(const void* context, const void* left, const void* right) { 1393 const ArrayAndColl& ac = *(const ArrayAndColl*)context; 1394 const CA_uchar* d16 = ac.d16; 1395 int32_t leftIndex = *(const int32_t*)left; 1396 int32_t rightIndex = *(const int32_t*)right; 1397 UErrorCode errorCode = U_ZERO_ERROR; 1398 return ac.coll.compare(d16->dataOf(leftIndex), d16->lengthOf(leftIndex), 1399 d16->dataOf(rightIndex), d16->lengthOf(rightIndex), 1400 errorCode); 1401} 1402 1403int32_t U_CALLCONV 1404U16HashComparator(const void* context, const void* left, const void* right) { 1405 const CA_uchar* d16 = (const CA_uchar*)context; 1406 int32_t leftIndex = *(const int32_t*)left; 1407 int32_t rightIndex = *(const int32_t*)right; 1408 int32_t leftHash = ustr_hashUCharsN(d16->dataOf(leftIndex), d16->lengthOf(leftIndex)); 1409 int32_t rightHash = ustr_hashUCharsN(d16->dataOf(rightIndex), d16->lengthOf(rightIndex)); 1410 return leftHash < rightHash ? -1 : leftHash == rightHash ? 0 : 1; 1411} 1412 1413} // namespace 1414 1415const CA_uchar* CollPerf2Test::getSortedData16(UErrorCode &status) { 1416 if (U_FAILURE(status)) return NULL; 1417 if (sortedData16) return sortedData16; 1418 1419 ArrayAndColl ac(getData16(status), *collObj); 1420 return sortedData16 = sortData16(ac.d16, U16CollatorComparator, &ac, status); 1421} 1422 1423const CA_char* CollPerf2Test::getSortedData8(UErrorCode &status) { 1424 if (U_FAILURE(status)) return NULL; 1425 if (sortedData8) return sortedData8; 1426 return sortedData8 = getData8FromData16(getSortedData16(status), status); 1427} 1428 1429const CA_uchar* CollPerf2Test::getRandomData16(UErrorCode &status) { 1430 if (U_FAILURE(status)) return NULL; 1431 if (randomData16) return randomData16; 1432 1433 // Sort the strings by their hash codes, which should be a reasonably pseudo-random order. 1434 const CA_uchar* d16 = getData16(status); 1435 return randomData16 = sortData16(d16, U16HashComparator, d16, status); 1436} 1437 1438const CA_char* CollPerf2Test::getRandomData8(UErrorCode &status) { 1439 if (U_FAILURE(status)) return NULL; 1440 if (randomData8) return randomData8; 1441 return randomData8 = getData8FromData16(getRandomData16(status), status); 1442} 1443 1444CA_uchar* CollPerf2Test::sortData16(const CA_uchar* d16, 1445 UComparator *cmp, const void *context, 1446 UErrorCode &status) { 1447 if (U_FAILURE(status)) return NULL; 1448 1449 LocalArray<int32_t> indexes(new int32_t[d16->count]); 1450 for (int32_t i = 0; i < d16->count; ++i) { 1451 indexes[i] = i; 1452 } 1453 uprv_sortArray(indexes.getAlias(), d16->count, 4, cmp, context, TRUE, &status); 1454 if (U_FAILURE(status)) return NULL; 1455 1456 // Copy the strings in sorted order into a new array. 1457 LocalPointer<CA_uchar> newD16(new CA_uchar()); 1458 for (int32_t i = 0; i < d16->count; i++) { 1459 const UChar* s = d16->dataOf(i); 1460 int32_t len = d16->lengthOf(i); 1461 int32_t capacity = len + 1; // including NULL terminator 1462 newD16->append_one(capacity); 1463 u_memcpy(newD16->last(), s, capacity); 1464 } 1465 1466 if (U_SUCCESS(status)) { 1467 return newD16.orphan(); 1468 } else { 1469 return NULL; 1470 } 1471} 1472 1473CA_char* CollPerf2Test::getData8FromData16(const CA_uchar* d16, UErrorCode &status) { 1474 if (U_FAILURE(status)) return NULL; 1475 1476 // UTF-16 -> UTF-8 conversion 1477 LocalPointer<CA_char> d8(new CA_char()); 1478 for (int32_t i = 0; i < d16->count; i++) { 1479 const UChar *s16 = d16->dataOf(i); 1480 int32_t length16 = d16->lengthOf(i); 1481 1482 // get length in UTF-8 1483 int32_t length8; 1484 u_strToUTF8(NULL, 0, &length8, s16, length16, &status); 1485 if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){ 1486 status = U_ZERO_ERROR; 1487 } else { 1488 break; 1489 } 1490 int32_t capacity8 = length8 + 1; // plus terminal NULL 1491 d8->append_one(capacity8); 1492 1493 // convert to UTF-8 1494 u_strToUTF8(d8->last(), capacity8, NULL, s16, length16, &status); 1495 if (U_FAILURE(status)) break; 1496 } 1497 1498 if (U_SUCCESS(status)) { 1499 return d8.orphan(); 1500 } else { 1501 return NULL; 1502 } 1503} 1504 1505UPerfFunction* 1506CollPerf2Test::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par /*= NULL*/) 1507{ 1508 (void)par; 1509 TESTCASE_AUTO_BEGIN; 1510 1511 TESTCASE_AUTO(TestStrcoll); 1512 TESTCASE_AUTO(TestStrcollNull); 1513 TESTCASE_AUTO(TestStrcollSimilar); 1514 1515 TESTCASE_AUTO(TestStrcollUTF8); 1516 TESTCASE_AUTO(TestStrcollUTF8Null); 1517 TESTCASE_AUTO(TestStrcollUTF8Similar); 1518 1519 TESTCASE_AUTO(TestGetSortKey); 1520 TESTCASE_AUTO(TestGetSortKeyNull); 1521 1522 TESTCASE_AUTO(TestNextSortKeyPart_4All); 1523 TESTCASE_AUTO(TestNextSortKeyPart_4x4); 1524 TESTCASE_AUTO(TestNextSortKeyPart_4x8); 1525 TESTCASE_AUTO(TestNextSortKeyPart_32All); 1526 TESTCASE_AUTO(TestNextSortKeyPart_32x2); 1527 1528 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4All); 1529 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x4); 1530 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x8); 1531 TESTCASE_AUTO(TestNextSortKeyPartUTF8_32All); 1532 TESTCASE_AUTO(TestNextSortKeyPartUTF8_32x2); 1533 1534 TESTCASE_AUTO(TestCppCompare); 1535 TESTCASE_AUTO(TestCppCompareNull); 1536 TESTCASE_AUTO(TestCppCompareSimilar); 1537 1538 TESTCASE_AUTO(TestCppCompareUTF8); 1539 TESTCASE_AUTO(TestCppCompareUTF8Null); 1540 TESTCASE_AUTO(TestCppCompareUTF8Similar); 1541 1542 TESTCASE_AUTO(TestCppGetCollationKey); 1543 TESTCASE_AUTO(TestCppGetCollationKeyNull); 1544 1545 TESTCASE_AUTO(TestUniStrSort); 1546 TESTCASE_AUTO(TestStringPieceSortCpp); 1547 TESTCASE_AUTO(TestStringPieceSortC); 1548 1549 TESTCASE_AUTO(TestUniStrBinSearch); 1550 TESTCASE_AUTO(TestStringPieceBinSearchCpp); 1551 TESTCASE_AUTO(TestStringPieceBinSearchC); 1552 1553 TESTCASE_AUTO_END; 1554 return NULL; 1555} 1556 1557 1558 1559UPerfFunction* CollPerf2Test::TestStrcoll() 1560{ 1561 UErrorCode status = U_ZERO_ERROR; 1562 Strcoll *testCase = new Strcoll(coll, getData16(status), TRUE /* useLen */); 1563 if (U_FAILURE(status)) { 1564 delete testCase; 1565 return NULL; 1566 } 1567 return testCase; 1568} 1569 1570UPerfFunction* CollPerf2Test::TestStrcollNull() 1571{ 1572 UErrorCode status = U_ZERO_ERROR; 1573 Strcoll *testCase = new Strcoll(coll, getData16(status), FALSE /* useLen */); 1574 if (U_FAILURE(status)) { 1575 delete testCase; 1576 return NULL; 1577 } 1578 return testCase; 1579} 1580 1581UPerfFunction* CollPerf2Test::TestStrcollSimilar() 1582{ 1583 UErrorCode status = U_ZERO_ERROR; 1584 Strcoll_2 *testCase = new Strcoll_2(coll, getData16(status), getModData16(status), TRUE /* useLen */); 1585 if (U_FAILURE(status)) { 1586 delete testCase; 1587 return NULL; 1588 } 1589 return testCase; 1590} 1591 1592UPerfFunction* CollPerf2Test::TestStrcollUTF8() 1593{ 1594 UErrorCode status = U_ZERO_ERROR; 1595 StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status), TRUE /* useLen */); 1596 if (U_FAILURE(status)) { 1597 delete testCase; 1598 return NULL; 1599 } 1600 return testCase; 1601} 1602 1603UPerfFunction* CollPerf2Test::TestStrcollUTF8Null() 1604{ 1605 UErrorCode status = U_ZERO_ERROR; 1606 StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status),FALSE /* useLen */); 1607 if (U_FAILURE(status)) { 1608 delete testCase; 1609 return NULL; 1610 } 1611 return testCase; 1612} 1613 1614UPerfFunction* CollPerf2Test::TestStrcollUTF8Similar() 1615{ 1616 UErrorCode status = U_ZERO_ERROR; 1617 StrcollUTF8_2 *testCase = new StrcollUTF8_2(coll, getData8(status), getModData8(status), TRUE /* useLen */); 1618 if (U_FAILURE(status)) { 1619 delete testCase; 1620 return NULL; 1621 } 1622 return testCase; 1623} 1624 1625UPerfFunction* CollPerf2Test::TestGetSortKey() 1626{ 1627 UErrorCode status = U_ZERO_ERROR; 1628 GetSortKey *testCase = new GetSortKey(coll, getData16(status), TRUE /* useLen */); 1629 if (U_FAILURE(status)) { 1630 delete testCase; 1631 return NULL; 1632 } 1633 return testCase; 1634} 1635 1636UPerfFunction* CollPerf2Test::TestGetSortKeyNull() 1637{ 1638 UErrorCode status = U_ZERO_ERROR; 1639 GetSortKey *testCase = new GetSortKey(coll, getData16(status), FALSE /* useLen */); 1640 if (U_FAILURE(status)) { 1641 delete testCase; 1642 return NULL; 1643 } 1644 return testCase; 1645} 1646 1647UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4All() 1648{ 1649 UErrorCode status = U_ZERO_ERROR; 1650 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */); 1651 if (U_FAILURE(status)) { 1652 delete testCase; 1653 return NULL; 1654 } 1655 return testCase; 1656} 1657 1658UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x4() 1659{ 1660 UErrorCode status = U_ZERO_ERROR; 1661 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 4 /* maxIteration */); 1662 if (U_FAILURE(status)) { 1663 delete testCase; 1664 return NULL; 1665 } 1666 return testCase; 1667} 1668 1669UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x8() 1670{ 1671 UErrorCode status = U_ZERO_ERROR; 1672 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 8 /* maxIteration */); 1673 if (U_FAILURE(status)) { 1674 delete testCase; 1675 return NULL; 1676 } 1677 return testCase; 1678} 1679 1680UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32All() 1681{ 1682 UErrorCode status = U_ZERO_ERROR; 1683 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */); 1684 if (U_FAILURE(status)) { 1685 delete testCase; 1686 return NULL; 1687 } 1688 return testCase; 1689} 1690 1691UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32x2() 1692{ 1693 UErrorCode status = U_ZERO_ERROR; 1694 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */, 2 /* maxIteration */); 1695 if (U_FAILURE(status)) { 1696 delete testCase; 1697 return NULL; 1698 } 1699 return testCase; 1700} 1701 1702UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4All() 1703{ 1704 UErrorCode status = U_ZERO_ERROR; 1705 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */); 1706 if (U_FAILURE(status)) { 1707 delete testCase; 1708 return NULL; 1709 } 1710 return testCase; 1711} 1712 1713UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x4() 1714{ 1715 UErrorCode status = U_ZERO_ERROR; 1716 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 4 /* maxIteration */); 1717 if (U_FAILURE(status)) { 1718 delete testCase; 1719 return NULL; 1720 } 1721 return testCase; 1722} 1723 1724UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x8() 1725{ 1726 UErrorCode status = U_ZERO_ERROR; 1727 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 8 /* maxIteration */); 1728 if (U_FAILURE(status)) { 1729 delete testCase; 1730 return NULL; 1731 } 1732 return testCase; 1733} 1734 1735UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32All() 1736{ 1737 UErrorCode status = U_ZERO_ERROR; 1738 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */); 1739 if (U_FAILURE(status)) { 1740 delete testCase; 1741 return NULL; 1742 } 1743 return testCase; 1744} 1745 1746UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32x2() 1747{ 1748 UErrorCode status = U_ZERO_ERROR; 1749 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */, 2 /* maxIteration */); 1750 if (U_FAILURE(status)) { 1751 delete testCase; 1752 return NULL; 1753 } 1754 return testCase; 1755} 1756 1757UPerfFunction* CollPerf2Test::TestCppCompare() 1758{ 1759 UErrorCode status = U_ZERO_ERROR; 1760 CppCompare *testCase = new CppCompare(collObj, getData16(status), TRUE /* useLen */); 1761 if (U_FAILURE(status)) { 1762 delete testCase; 1763 return NULL; 1764 } 1765 return testCase; 1766} 1767 1768UPerfFunction* CollPerf2Test::TestCppCompareNull() 1769{ 1770 UErrorCode status = U_ZERO_ERROR; 1771 CppCompare *testCase = new CppCompare(collObj, getData16(status), FALSE /* useLen */); 1772 if (U_FAILURE(status)) { 1773 delete testCase; 1774 return NULL; 1775 } 1776 return testCase; 1777} 1778 1779UPerfFunction* CollPerf2Test::TestCppCompareSimilar() 1780{ 1781 UErrorCode status = U_ZERO_ERROR; 1782 CppCompare_2 *testCase = new CppCompare_2(collObj, getData16(status), getModData16(status), TRUE /* useLen */); 1783 if (U_FAILURE(status)) { 1784 delete testCase; 1785 return NULL; 1786 } 1787 return testCase; 1788} 1789 1790UPerfFunction* CollPerf2Test::TestCppCompareUTF8() 1791{ 1792 UErrorCode status = U_ZERO_ERROR; 1793 CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), TRUE /* useLen */); 1794 if (U_FAILURE(status)) { 1795 delete testCase; 1796 return NULL; 1797 } 1798 return testCase; 1799} 1800 1801UPerfFunction* CollPerf2Test::TestCppCompareUTF8Null() 1802{ 1803 UErrorCode status = U_ZERO_ERROR; 1804 CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), FALSE /* useLen */); 1805 if (U_FAILURE(status)) { 1806 delete testCase; 1807 return NULL; 1808 } 1809 return testCase; 1810} 1811 1812UPerfFunction* CollPerf2Test::TestCppCompareUTF8Similar() 1813{ 1814 UErrorCode status = U_ZERO_ERROR; 1815 CppCompareUTF8_2 *testCase = new CppCompareUTF8_2(collObj, getData8(status), getModData8(status), TRUE /* useLen */); 1816 if (U_FAILURE(status)) { 1817 delete testCase; 1818 return NULL; 1819 } 1820 return testCase; 1821} 1822 1823UPerfFunction* CollPerf2Test::TestCppGetCollationKey() 1824{ 1825 UErrorCode status = U_ZERO_ERROR; 1826 CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), TRUE /* useLen */); 1827 if (U_FAILURE(status)) { 1828 delete testCase; 1829 return NULL; 1830 } 1831 return testCase; 1832} 1833 1834UPerfFunction* CollPerf2Test::TestCppGetCollationKeyNull() 1835{ 1836 UErrorCode status = U_ZERO_ERROR; 1837 CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), FALSE /* useLen */); 1838 if (U_FAILURE(status)) { 1839 delete testCase; 1840 return NULL; 1841 } 1842 return testCase; 1843} 1844 1845UPerfFunction* CollPerf2Test::TestUniStrSort() { 1846 UErrorCode status = U_ZERO_ERROR; 1847 UPerfFunction *testCase = new UniStrSort(*collObj, coll, getRandomData16(status)); 1848 if (U_FAILURE(status)) { 1849 delete testCase; 1850 return NULL; 1851 } 1852 return testCase; 1853} 1854 1855UPerfFunction* CollPerf2Test::TestStringPieceSortCpp() { 1856 UErrorCode status = U_ZERO_ERROR; 1857 UPerfFunction *testCase = new StringPieceSortCpp(*collObj, coll, getRandomData8(status)); 1858 if (U_FAILURE(status)) { 1859 delete testCase; 1860 return NULL; 1861 } 1862 return testCase; 1863} 1864 1865UPerfFunction* CollPerf2Test::TestStringPieceSortC() { 1866 UErrorCode status = U_ZERO_ERROR; 1867 UPerfFunction *testCase = new StringPieceSortC(*collObj, coll, getRandomData8(status)); 1868 if (U_FAILURE(status)) { 1869 delete testCase; 1870 return NULL; 1871 } 1872 return testCase; 1873} 1874 1875UPerfFunction* CollPerf2Test::TestUniStrBinSearch() { 1876 UErrorCode status = U_ZERO_ERROR; 1877 UPerfFunction *testCase = new UniStrBinSearch(*collObj, coll, getSortedData16(status)); 1878 if (U_FAILURE(status)) { 1879 delete testCase; 1880 return NULL; 1881 } 1882 return testCase; 1883} 1884 1885UPerfFunction* CollPerf2Test::TestStringPieceBinSearchCpp() { 1886 UErrorCode status = U_ZERO_ERROR; 1887 UPerfFunction *testCase = new StringPieceBinSearchCpp(*collObj, coll, getSortedData8(status)); 1888 if (U_FAILURE(status)) { 1889 delete testCase; 1890 return NULL; 1891 } 1892 return testCase; 1893} 1894 1895UPerfFunction* CollPerf2Test::TestStringPieceBinSearchC() { 1896 UErrorCode status = U_ZERO_ERROR; 1897 UPerfFunction *testCase = new StringPieceBinSearchC(*collObj, coll, getSortedData8(status)); 1898 if (U_FAILURE(status)) { 1899 delete testCase; 1900 return NULL; 1901 } 1902 return testCase; 1903} 1904 1905 1906int main(int argc, const char *argv[]) 1907{ 1908 UErrorCode status = U_ZERO_ERROR; 1909 CollPerf2Test test(argc, argv, status); 1910 1911 if (U_FAILURE(status)){ 1912 printf("The error is %s\n", u_errorName(status)); 1913 //TODO: print usage here 1914 return status; 1915 } 1916 1917 if (test.run() == FALSE){ 1918 fprintf(stderr, "FAILED: Tests could not be run please check the arguments.\n"); 1919 return -1; 1920 } 1921 return 0; 1922} 1923