1/*
2**********************************************************************
3* Copyright (c) 2013-2014, International Business Machines
4* Corporation and others.  All Rights Reserved.
5**********************************************************************
6*/
7
8#include <string.h>
9#include "unicode/localpointer.h"
10#include "unicode/uperf.h"
11#include "unicode/ucol.h"
12#include "unicode/coll.h"
13#include "unicode/uiter.h"
14#include "unicode/ustring.h"
15#include "unicode/sortkey.h"
16#include "uarrsort.h"
17#include "uoptions.h"
18#include "ustr_imp.h"
19
20#define COMPACT_ARRAY(CompactArrays, UNIT) \
21struct CompactArrays{\
22    CompactArrays(const CompactArrays & );\
23    CompactArrays & operator=(const CompactArrays & );\
24    int32_t   count;/*total number of the strings*/ \
25    int32_t * index;/*relative offset in data*/ \
26    UNIT    * data; /*the real space to hold strings*/ \
27    \
28    ~CompactArrays(){free(index);free(data);} \
29    CompactArrays() : count(0), index(NULL), data(NULL) { \
30        index = (int32_t *) realloc(index, sizeof(int32_t)); \
31        index[0] = 0; \
32    } \
33    void append_one(int32_t theLen){ /*include terminal NULL*/ \
34        count++; \
35        index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
36        index[count] = index[count - 1] + theLen; \
37        data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
38    } \
39    UNIT * last(){return data + index[count - 1];} \
40    const UNIT * dataOf(int32_t i) const {return data + index[i];} \
41    int32_t lengthOf(int i) const {return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/  \
42};
43
44COMPACT_ARRAY(CA_uchar, UChar)
45COMPACT_ARRAY(CA_char, char)
46
47#define MAX_TEST_STRINGS_FOR_PERMUTING 1000
48
49// C API test cases
50
51//
52// Test case taking a single test data array, calling ucol_strcoll by permuting the test data
53//
54class Strcoll : public UPerfFunction
55{
56public:
57    Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen);
58    ~Strcoll();
59    virtual void call(UErrorCode* status);
60    virtual long getOperationsPerIteration();
61
62private:
63    const UCollator *coll;
64    const CA_uchar *source;
65    UBool useLen;
66    int32_t maxTestStrings;
67};
68
69Strcoll::Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen)
70    :   coll(coll),
71        source(source),
72        useLen(useLen)
73{
74    maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
75}
76
77Strcoll::~Strcoll()
78{
79}
80
81void Strcoll::call(UErrorCode* status)
82{
83    if (U_FAILURE(*status)) return;
84
85    // call strcoll for permutation
86    int32_t divisor = source->count / maxTestStrings;
87    int32_t srcLen, tgtLen;
88    int32_t cmp = 0;
89    for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) {
90        if (i % divisor) continue;
91        numTestStringsI++;
92        srcLen = useLen ? source->lengthOf(i) : -1;
93        for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) {
94            if (j % divisor) continue;
95            numTestStringsJ++;
96            tgtLen = useLen ? source->lengthOf(j) : -1;
97            cmp += ucol_strcoll(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen);
98        }
99    }
100    // At the end, cmp must be 0
101    if (cmp != 0) {
102        *status = U_INTERNAL_PROGRAM_ERROR;
103    }
104}
105
106long Strcoll::getOperationsPerIteration()
107{
108    return maxTestStrings * maxTestStrings;
109}
110
111//
112// Test case taking two test data arrays, calling ucol_strcoll for strings at a same index
113//
114class Strcoll_2 : public UPerfFunction
115{
116public:
117    Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen);
118    ~Strcoll_2();
119    virtual void call(UErrorCode* status);
120    virtual long getOperationsPerIteration();
121
122private:
123    const UCollator *coll;
124    const CA_uchar *source;
125    const CA_uchar *target;
126    UBool useLen;
127};
128
129Strcoll_2::Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen)
130    :   coll(coll),
131        source(source),
132        target(target),
133        useLen(useLen)
134{
135}
136
137Strcoll_2::~Strcoll_2()
138{
139}
140
141void Strcoll_2::call(UErrorCode* status)
142{
143    if (U_FAILURE(*status)) return;
144
145    // call strcoll for two strings at the same index
146    if (source->count < target->count) {
147        *status = U_ILLEGAL_ARGUMENT_ERROR;
148    } else {
149        for (int32_t i = 0; i < source->count; i++) {
150            int32_t srcLen = useLen ? source->lengthOf(i) : -1;
151            int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
152            ucol_strcoll(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen);
153        }
154    }
155}
156
157long Strcoll_2::getOperationsPerIteration()
158{
159    return source->count;
160}
161
162
163//
164// Test case taking a single test data array, calling ucol_strcollUTF8 by permuting the test data
165//
166class StrcollUTF8 : public UPerfFunction
167{
168public:
169    StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen);
170    ~StrcollUTF8();
171    virtual void call(UErrorCode* status);
172    virtual long getOperationsPerIteration();
173
174private:
175    const UCollator *coll;
176    const CA_char *source;
177    UBool useLen;
178    int32_t maxTestStrings;
179};
180
181StrcollUTF8::StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen)
182    :   coll(coll),
183        source(source),
184        useLen(useLen)
185{
186    maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
187}
188
189StrcollUTF8::~StrcollUTF8()
190{
191}
192
193void StrcollUTF8::call(UErrorCode* status)
194{
195    if (U_FAILURE(*status)) return;
196
197    // call strcollUTF8 for permutation
198    int32_t divisor = source->count / maxTestStrings;
199    int32_t srcLen, tgtLen;
200    int32_t cmp = 0;
201    for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) {
202        if (i % divisor) continue;
203        numTestStringsI++;
204        srcLen = useLen ? source->lengthOf(i) : -1;
205        for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) {
206            if (j % divisor) continue;
207            numTestStringsJ++;
208            tgtLen = useLen ? source->lengthOf(j) : -1;
209            cmp += ucol_strcollUTF8(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen, status);
210        }
211    }
212    // At the end, cmp must be 0
213    if (cmp != 0) {
214        *status = U_INTERNAL_PROGRAM_ERROR;
215    }
216}
217
218long StrcollUTF8::getOperationsPerIteration()
219{
220    return maxTestStrings * maxTestStrings;
221}
222
223//
224// Test case taking two test data arrays, calling ucol_strcoll for strings at a same index
225//
226class StrcollUTF8_2 : public UPerfFunction
227{
228public:
229    StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen);
230    ~StrcollUTF8_2();
231    virtual void call(UErrorCode* status);
232    virtual long getOperationsPerIteration();
233
234private:
235    const UCollator *coll;
236    const CA_char *source;
237    const CA_char *target;
238    UBool useLen;
239};
240
241StrcollUTF8_2::StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen)
242    :   coll(coll),
243        source(source),
244        target(target),
245        useLen(useLen)
246{
247}
248
249StrcollUTF8_2::~StrcollUTF8_2()
250{
251}
252
253void StrcollUTF8_2::call(UErrorCode* status)
254{
255    if (U_FAILURE(*status)) return;
256
257    // call strcoll for two strings at the same index
258    if (source->count < target->count) {
259        *status = U_ILLEGAL_ARGUMENT_ERROR;
260    } else {
261        for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
262            int32_t srcLen = useLen ? source->lengthOf(i) : -1;
263            int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
264            ucol_strcollUTF8(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen, status);
265        }
266    }
267}
268
269long StrcollUTF8_2::getOperationsPerIteration()
270{
271    return source->count;
272}
273
274//
275// Test case taking a single test data array, calling ucol_getSortKey for each
276//
277class GetSortKey : public UPerfFunction
278{
279public:
280    GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen);
281    ~GetSortKey();
282    virtual void call(UErrorCode* status);
283    virtual long getOperationsPerIteration();
284
285private:
286    const UCollator *coll;
287    const CA_uchar *source;
288    UBool useLen;
289};
290
291GetSortKey::GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen)
292    :   coll(coll),
293        source(source),
294        useLen(useLen)
295{
296}
297
298GetSortKey::~GetSortKey()
299{
300}
301
302#define KEY_BUF_SIZE 512
303
304void GetSortKey::call(UErrorCode* status)
305{
306    if (U_FAILURE(*status)) return;
307
308    uint8_t key[KEY_BUF_SIZE];
309    int32_t len;
310
311    if (useLen) {
312        for (int32_t i = 0; i < source->count; i++) {
313            len = ucol_getSortKey(coll, source->dataOf(i), source->lengthOf(i), key, KEY_BUF_SIZE);
314        }
315    } else {
316        for (int32_t i = 0; i < source->count; i++) {
317            len = ucol_getSortKey(coll, source->dataOf(i), -1, key, KEY_BUF_SIZE);
318        }
319    }
320}
321
322long GetSortKey::getOperationsPerIteration()
323{
324    return source->count;
325}
326
327//
328// Test case taking a single test data array in UTF-16, calling ucol_nextSortKeyPart for each for the
329// given buffer size
330//
331class NextSortKeyPart : public UPerfFunction
332{
333public:
334    NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration = -1);
335    ~NextSortKeyPart();
336    virtual void call(UErrorCode* status);
337    virtual long getOperationsPerIteration();
338    virtual long getEventsPerIteration();
339
340private:
341    const UCollator *coll;
342    const CA_uchar *source;
343    int32_t bufSize;
344    int32_t maxIteration;
345    long events;
346};
347
348// Note: maxIteration = -1 -> repeat until the end of collation key
349NextSortKeyPart::NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration /* = -1 */)
350    :   coll(coll),
351        source(source),
352        bufSize(bufSize),
353        maxIteration(maxIteration),
354        events(0)
355{
356}
357
358NextSortKeyPart::~NextSortKeyPart()
359{
360}
361
362void NextSortKeyPart::call(UErrorCode* status)
363{
364    if (U_FAILURE(*status)) return;
365
366    uint8_t *part = (uint8_t *)malloc(bufSize);
367    uint32_t state[2];
368    UCharIterator iter;
369
370    events = 0;
371    for (int i = 0; i < source->count && U_SUCCESS(*status); i++) {
372        uiter_setString(&iter, source->dataOf(i), source->lengthOf(i));
373        state[0] = 0;
374        state[1] = 0;
375        int32_t partLen = bufSize;
376        for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) {
377            partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status);
378            events++;
379        }
380    }
381    free(part);
382}
383
384long NextSortKeyPart::getOperationsPerIteration()
385{
386    return source->count;
387}
388
389long NextSortKeyPart::getEventsPerIteration()
390{
391    return events;
392}
393
394//
395// Test case taking a single test data array in UTF-8, calling ucol_nextSortKeyPart for each for the
396// given buffer size
397//
398class NextSortKeyPartUTF8 : public UPerfFunction
399{
400public:
401    NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration = -1);
402    ~NextSortKeyPartUTF8();
403    virtual void call(UErrorCode* status);
404    virtual long getOperationsPerIteration();
405    virtual long getEventsPerIteration();
406
407private:
408    const UCollator *coll;
409    const CA_char *source;
410    int32_t bufSize;
411    int32_t maxIteration;
412    long events;
413};
414
415// Note: maxIteration = -1 -> repeat until the end of collation key
416NextSortKeyPartUTF8::NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration /* = -1 */)
417    :   coll(coll),
418        source(source),
419        bufSize(bufSize),
420        maxIteration(maxIteration),
421        events(0)
422{
423}
424
425NextSortKeyPartUTF8::~NextSortKeyPartUTF8()
426{
427}
428
429void NextSortKeyPartUTF8::call(UErrorCode* status)
430{
431    if (U_FAILURE(*status)) return;
432
433    uint8_t *part = (uint8_t *)malloc(bufSize);
434    uint32_t state[2];
435    UCharIterator iter;
436
437    events = 0;
438    for (int i = 0; i < source->count && U_SUCCESS(*status); i++) {
439        uiter_setUTF8(&iter, source->dataOf(i), source->lengthOf(i));
440        state[0] = 0;
441        state[1] = 0;
442        int32_t partLen = bufSize;
443        for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) {
444            partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status);
445            events++;
446        }
447    }
448    free(part);
449}
450
451long NextSortKeyPartUTF8::getOperationsPerIteration()
452{
453    return source->count;
454}
455
456long NextSortKeyPartUTF8::getEventsPerIteration()
457{
458    return events;
459}
460
461// CPP API test cases
462
463//
464// Test case taking a single test data array, calling Collator::compare by permuting the test data
465//
466class CppCompare : public UPerfFunction
467{
468public:
469    CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen);
470    ~CppCompare();
471    virtual void call(UErrorCode* status);
472    virtual long getOperationsPerIteration();
473
474private:
475    const Collator *coll;
476    const CA_uchar *source;
477    UBool useLen;
478    int32_t maxTestStrings;
479};
480
481CppCompare::CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen)
482    :   coll(coll),
483        source(source),
484        useLen(useLen)
485{
486    maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
487}
488
489CppCompare::~CppCompare()
490{
491}
492
493void CppCompare::call(UErrorCode* status) {
494    if (U_FAILURE(*status)) return;
495
496    // call compare for permutation of test data
497    int32_t divisor = source->count / maxTestStrings;
498    int32_t srcLen, tgtLen;
499    int32_t cmp = 0;
500    for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) {
501        if (i % divisor) continue;
502        numTestStringsI++;
503        srcLen = useLen ? source->lengthOf(i) : -1;
504        for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) {
505            if (j % divisor) continue;
506            numTestStringsJ++;
507            tgtLen = useLen ? source->lengthOf(j) : -1;
508            cmp += coll->compare(source->dataOf(i), srcLen, source->dataOf(j), tgtLen);
509        }
510    }
511    // At the end, cmp must be 0
512    if (cmp != 0) {
513        *status = U_INTERNAL_PROGRAM_ERROR;
514    }
515}
516
517long CppCompare::getOperationsPerIteration()
518{
519    return maxTestStrings * maxTestStrings;
520}
521
522//
523// Test case taking two test data arrays, calling Collator::compare for strings at a same index
524//
525class CppCompare_2 : public UPerfFunction
526{
527public:
528    CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen);
529    ~CppCompare_2();
530    virtual void call(UErrorCode* status);
531    virtual long getOperationsPerIteration();
532
533private:
534    const Collator *coll;
535    const CA_uchar *source;
536    const CA_uchar *target;
537    UBool useLen;
538};
539
540CppCompare_2::CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen)
541    :   coll(coll),
542        source(source),
543        target(target),
544        useLen(useLen)
545{
546}
547
548CppCompare_2::~CppCompare_2()
549{
550}
551
552void CppCompare_2::call(UErrorCode* status) {
553    if (U_FAILURE(*status)) return;
554
555    // call strcoll for two strings at the same index
556    if (source->count < target->count) {
557        *status = U_ILLEGAL_ARGUMENT_ERROR;
558    } else {
559        for (int32_t i = 0; i < source->count; i++) {
560            int32_t srcLen = useLen ? source->lengthOf(i) : -1;
561            int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
562            coll->compare(source->dataOf(i), srcLen, target->dataOf(i), tgtLen);
563        }
564    }
565}
566
567long CppCompare_2::getOperationsPerIteration()
568{
569    return source->count;
570}
571
572
573//
574// Test case taking a single test data array, calling Collator::compareUTF8 by permuting the test data
575//
576class CppCompareUTF8 : public UPerfFunction
577{
578public:
579    CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen);
580    ~CppCompareUTF8();
581    virtual void call(UErrorCode* status);
582    virtual long getOperationsPerIteration();
583
584private:
585    const Collator *coll;
586    const CA_char *source;
587    UBool useLen;
588    int32_t maxTestStrings;
589};
590
591CppCompareUTF8::CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen)
592    :   coll(coll),
593        source(source),
594        useLen(useLen)
595{
596    maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
597}
598
599CppCompareUTF8::~CppCompareUTF8()
600{
601}
602
603void CppCompareUTF8::call(UErrorCode* status) {
604    if (U_FAILURE(*status)) return;
605
606    // call compareUTF8 for all permutations
607    int32_t divisor = source->count / maxTestStrings;
608    StringPiece src, tgt;
609    int32_t cmp = 0;
610    for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) {
611        if (i % divisor) continue;
612        numTestStringsI++;
613
614        if (useLen) {
615            src.set(source->dataOf(i), source->lengthOf(i));
616        } else {
617            src.set(source->dataOf(i));
618        }
619        for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) {
620            if (j % divisor) continue;
621            numTestStringsJ++;
622
623            if (useLen) {
624                tgt.set(source->dataOf(i), source->lengthOf(i));
625            } else {
626                tgt.set(source->dataOf(i));
627            }
628            cmp += coll->compareUTF8(src, tgt, *status);
629        }
630    }
631    // At the end, cmp must be 0
632    if (cmp != 0) {
633        *status = U_INTERNAL_PROGRAM_ERROR;
634    }
635}
636
637long CppCompareUTF8::getOperationsPerIteration()
638{
639    return maxTestStrings * maxTestStrings;
640}
641
642
643//
644// Test case taking two test data arrays, calling Collator::compareUTF8 for strings at a same index
645//
646class CppCompareUTF8_2 : public UPerfFunction
647{
648public:
649    CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen);
650    ~CppCompareUTF8_2();
651    virtual void call(UErrorCode* status);
652    virtual long getOperationsPerIteration();
653
654private:
655    const Collator *coll;
656    const CA_char *source;
657    const CA_char *target;
658    UBool useLen;
659};
660
661CppCompareUTF8_2::CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen)
662    :   coll(coll),
663        source(source),
664        target(target),
665        useLen(useLen)
666{
667}
668
669CppCompareUTF8_2::~CppCompareUTF8_2()
670{
671}
672
673void CppCompareUTF8_2::call(UErrorCode* status) {
674    if (U_FAILURE(*status)) return;
675
676    // call strcoll for two strings at the same index
677    StringPiece src, tgt;
678    if (source->count < target->count) {
679        *status = U_ILLEGAL_ARGUMENT_ERROR;
680    } else {
681        for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
682            if (useLen) {
683                src.set(source->dataOf(i), source->lengthOf(i));
684                tgt.set(target->dataOf(i), target->lengthOf(i));
685            } else {
686                src.set(source->dataOf(i));
687                tgt.set(target->dataOf(i));
688            }
689            coll->compareUTF8(src, tgt, *status);
690        }
691    }
692}
693
694long CppCompareUTF8_2::getOperationsPerIteration()
695{
696    return source->count;
697}
698
699
700//
701// Test case taking a single test data array, calling Collator::getCollationKey for each
702//
703class CppGetCollationKey : public UPerfFunction
704{
705public:
706    CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen);
707    ~CppGetCollationKey();
708    virtual void call(UErrorCode* status);
709    virtual long getOperationsPerIteration();
710
711private:
712    const Collator *coll;
713    const CA_uchar *source;
714    UBool useLen;
715};
716
717CppGetCollationKey::CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen)
718    :   coll(coll),
719        source(source),
720        useLen(useLen)
721{
722}
723
724CppGetCollationKey::~CppGetCollationKey()
725{
726}
727
728void CppGetCollationKey::call(UErrorCode* status)
729{
730    if (U_FAILURE(*status)) return;
731
732    CollationKey key;
733    for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
734        coll->getCollationKey(source->dataOf(i), source->lengthOf(i), key, *status);
735    }
736}
737
738long CppGetCollationKey::getOperationsPerIteration() {
739    return source->count;
740}
741
742namespace {
743
744struct CollatorAndCounter {
745    CollatorAndCounter(const Collator& coll) : coll(coll), ucoll(NULL), counter(0) {}
746    CollatorAndCounter(const Collator& coll, const UCollator *ucoll)
747            : coll(coll), ucoll(ucoll), counter(0) {}
748    const Collator& coll;
749    const UCollator *ucoll;
750    int32_t counter;
751};
752
753int32_t U_CALLCONV
754UniStrCollatorComparator(const void* context, const void* left, const void* right) {
755    CollatorAndCounter& cc = *(CollatorAndCounter*)context;
756    const UnicodeString& leftString = **(const UnicodeString**)left;
757    const UnicodeString& rightString = **(const UnicodeString**)right;
758    UErrorCode errorCode = U_ZERO_ERROR;
759    ++cc.counter;
760    return cc.coll.compare(leftString, rightString, errorCode);
761}
762
763}  // namespace
764
765class CollPerfFunction : public UPerfFunction {
766public:
767    CollPerfFunction(const Collator& coll, const UCollator *ucoll)
768            : coll(coll), ucoll(ucoll), ops(0) {}
769    virtual ~CollPerfFunction();
770    /** Calls call() to set the ops field, and returns that. */
771    virtual long getOperationsPerIteration();
772
773protected:
774    const Collator& coll;
775    const UCollator *ucoll;
776    int32_t ops;
777};
778
779CollPerfFunction::~CollPerfFunction() {}
780
781long CollPerfFunction::getOperationsPerIteration() {
782    UErrorCode errorCode = U_ZERO_ERROR;
783    call(&errorCode);
784    return U_SUCCESS(errorCode) ? ops : 0;
785}
786
787class UniStrCollPerfFunction : public CollPerfFunction {
788public:
789    UniStrCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
790            : CollPerfFunction(coll, ucoll), d16(data16),
791              source(new UnicodeString*[d16->count]) {
792        for (int32_t i = 0; i < d16->count; ++i) {
793            source[i] = new UnicodeString(TRUE, d16->dataOf(i), d16->lengthOf(i));
794        }
795    }
796    virtual ~UniStrCollPerfFunction();
797
798protected:
799    const CA_uchar* d16;
800    UnicodeString** source;
801};
802
803UniStrCollPerfFunction::~UniStrCollPerfFunction() {
804    for (int32_t i = 0; i < d16->count; ++i) {
805        delete source[i];
806    }
807    delete[] source;
808}
809
810//
811// Test case sorting an array of UnicodeString pointers.
812//
813class UniStrSort : public UniStrCollPerfFunction {
814public:
815    UniStrSort(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
816            : UniStrCollPerfFunction(coll, ucoll, data16),
817              dest(new UnicodeString*[d16->count]) {}
818    virtual ~UniStrSort();
819    virtual void call(UErrorCode* status);
820
821private:
822    UnicodeString** dest;  // aliases only
823};
824
825UniStrSort::~UniStrSort() {
826    delete[] dest;
827}
828
829void UniStrSort::call(UErrorCode* status) {
830    if (U_FAILURE(*status)) return;
831
832    CollatorAndCounter cc(coll);
833    int32_t count = d16->count;
834    memcpy(dest, source, count * sizeof(UnicodeString *));
835    uprv_sortArray(dest, count, (int32_t)sizeof(UnicodeString *),
836                   UniStrCollatorComparator, &cc, TRUE, status);
837    ops = cc.counter;
838}
839
840namespace {
841
842int32_t U_CALLCONV
843StringPieceCollatorComparator(const void* context, const void* left, const void* right) {
844    CollatorAndCounter& cc = *(CollatorAndCounter*)context;
845    const StringPiece& leftString = *(const StringPiece*)left;
846    const StringPiece& rightString = *(const StringPiece*)right;
847    UErrorCode errorCode = U_ZERO_ERROR;
848    ++cc.counter;
849    return cc.coll.compareUTF8(leftString, rightString, errorCode);
850}
851
852int32_t U_CALLCONV
853StringPieceUCollatorComparator(const void* context, const void* left, const void* right) {
854    CollatorAndCounter& cc = *(CollatorAndCounter*)context;
855    const StringPiece& leftString = *(const StringPiece*)left;
856    const StringPiece& rightString = *(const StringPiece*)right;
857    UErrorCode errorCode = U_ZERO_ERROR;
858    ++cc.counter;
859    return ucol_strcollUTF8(cc.ucoll,
860                            leftString.data(), leftString.length(),
861                            rightString.data(), rightString.length(), &errorCode);
862}
863
864}  // namespace
865
866class StringPieceCollPerfFunction : public CollPerfFunction {
867public:
868    StringPieceCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
869            : CollPerfFunction(coll, ucoll), d8(data8),
870              source(new StringPiece[d8->count]) {
871        for (int32_t i = 0; i < d8->count; ++i) {
872            source[i].set(d8->dataOf(i), d8->lengthOf(i));
873        }
874    }
875    virtual ~StringPieceCollPerfFunction();
876
877protected:
878    const CA_char* d8;
879    StringPiece* source;
880};
881
882StringPieceCollPerfFunction::~StringPieceCollPerfFunction() {
883    delete[] source;
884}
885
886class StringPieceSort : public StringPieceCollPerfFunction {
887public:
888    StringPieceSort(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
889            : StringPieceCollPerfFunction(coll, ucoll, data8),
890              dest(new StringPiece[d8->count]) {}
891    virtual ~StringPieceSort();
892
893protected:
894    StringPiece* dest;
895};
896
897StringPieceSort::~StringPieceSort() {
898    delete[] dest;
899}
900
901//
902// Test case sorting an array of UTF-8 StringPiece's with Collator::compareUTF8().
903//
904class StringPieceSortCpp : public StringPieceSort {
905public:
906    StringPieceSortCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
907            : StringPieceSort(coll, ucoll, data8) {}
908    virtual ~StringPieceSortCpp();
909    virtual void call(UErrorCode* status);
910};
911
912StringPieceSortCpp::~StringPieceSortCpp() {}
913
914void StringPieceSortCpp::call(UErrorCode* status) {
915    if (U_FAILURE(*status)) return;
916
917    CollatorAndCounter cc(coll);
918    int32_t count = d8->count;
919    memcpy(dest, source, count * sizeof(StringPiece));
920    uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece),
921                   StringPieceCollatorComparator, &cc, TRUE, status);
922    ops = cc.counter;
923}
924
925//
926// Test case sorting an array of UTF-8 StringPiece's with ucol_strcollUTF8().
927//
928class StringPieceSortC : public StringPieceSort {
929public:
930    StringPieceSortC(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
931            : StringPieceSort(coll, ucoll, data8) {}
932    virtual ~StringPieceSortC();
933    virtual void call(UErrorCode* status);
934};
935
936StringPieceSortC::~StringPieceSortC() {}
937
938void StringPieceSortC::call(UErrorCode* status) {
939    if (U_FAILURE(*status)) return;
940
941    CollatorAndCounter cc(coll, ucoll);
942    int32_t count = d8->count;
943    memcpy(dest, source, count * sizeof(StringPiece));
944    uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece),
945                   StringPieceUCollatorComparator, &cc, TRUE, status);
946    ops = cc.counter;
947}
948
949//
950// Test case performing binary searches in a sorted array of UnicodeString pointers.
951//
952class UniStrBinSearch : public UniStrCollPerfFunction {
953public:
954    UniStrBinSearch(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
955            : UniStrCollPerfFunction(coll, ucoll, data16) {}
956    virtual ~UniStrBinSearch();
957    virtual void call(UErrorCode* status);
958};
959
960UniStrBinSearch::~UniStrBinSearch() {}
961
962void UniStrBinSearch::call(UErrorCode* status) {
963    if (U_FAILURE(*status)) return;
964
965    CollatorAndCounter cc(coll);
966    int32_t count = d16->count;
967    for (int32_t i = 0; i < count; ++i) {
968        (void)uprv_stableBinarySearch((char *)source, count,
969                                      source + i, (int32_t)sizeof(UnicodeString *),
970                                      UniStrCollatorComparator, &cc);
971    }
972    ops = cc.counter;
973}
974
975class StringPieceBinSearch : public StringPieceCollPerfFunction {
976public:
977    StringPieceBinSearch(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
978            : StringPieceCollPerfFunction(coll, ucoll, data8) {}
979    virtual ~StringPieceBinSearch();
980};
981
982StringPieceBinSearch::~StringPieceBinSearch() {}
983
984//
985// Test case performing binary searches in a sorted array of UTF-8 StringPiece's
986// with Collator::compareUTF8().
987//
988class StringPieceBinSearchCpp : public StringPieceBinSearch {
989public:
990    StringPieceBinSearchCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
991            : StringPieceBinSearch(coll, ucoll, data8) {}
992    virtual ~StringPieceBinSearchCpp();
993    virtual void call(UErrorCode* status);
994};
995
996StringPieceBinSearchCpp::~StringPieceBinSearchCpp() {}
997
998void StringPieceBinSearchCpp::call(UErrorCode* status) {
999    if (U_FAILURE(*status)) return;
1000
1001    CollatorAndCounter cc(coll);
1002    int32_t count = d8->count;
1003    for (int32_t i = 0; i < count; ++i) {
1004        (void)uprv_stableBinarySearch((char *)source, count,
1005                                      source + i, (int32_t)sizeof(StringPiece),
1006                                      StringPieceCollatorComparator, &cc);
1007    }
1008    ops = cc.counter;
1009}
1010
1011//
1012// Test case performing binary searches in a sorted array of UTF-8 StringPiece's
1013// with ucol_strcollUTF8().
1014//
1015class StringPieceBinSearchC : public StringPieceBinSearch {
1016public:
1017    StringPieceBinSearchC(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
1018            : StringPieceBinSearch(coll, ucoll, data8) {}
1019    virtual ~StringPieceBinSearchC();
1020    virtual void call(UErrorCode* status);
1021};
1022
1023StringPieceBinSearchC::~StringPieceBinSearchC() {}
1024
1025void StringPieceBinSearchC::call(UErrorCode* status) {
1026    if (U_FAILURE(*status)) return;
1027
1028    CollatorAndCounter cc(coll, ucoll);
1029    int32_t count = d8->count;
1030    for (int32_t i = 0; i < count; ++i) {
1031        (void)uprv_stableBinarySearch((char *)source, count,
1032                                      source + i, (int32_t)sizeof(StringPiece),
1033                                      StringPieceUCollatorComparator, &cc);
1034    }
1035    ops = cc.counter;
1036}
1037
1038
1039class CollPerf2Test : public UPerfTest
1040{
1041public:
1042    CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status);
1043    ~CollPerf2Test();
1044    virtual UPerfFunction* runIndexedTest(
1045        int32_t index, UBool exec, const char *&name, char *par = NULL);
1046
1047private:
1048    UCollator* coll;
1049    Collator* collObj;
1050
1051    int32_t count;
1052    CA_uchar* data16;
1053    CA_char* data8;
1054
1055    CA_uchar* modData16;
1056    CA_char* modData8;
1057
1058    CA_uchar* sortedData16;
1059    CA_char* sortedData8;
1060
1061    CA_uchar* randomData16;
1062    CA_char* randomData8;
1063
1064    const CA_uchar* getData16(UErrorCode &status);
1065    const CA_char* getData8(UErrorCode &status);
1066
1067    const CA_uchar* getModData16(UErrorCode &status);
1068    const CA_char* getModData8(UErrorCode &status);
1069
1070    const CA_uchar* getSortedData16(UErrorCode &status);
1071    const CA_char* getSortedData8(UErrorCode &status);
1072
1073    const CA_uchar* getRandomData16(UErrorCode &status);
1074    const CA_char* getRandomData8(UErrorCode &status);
1075
1076    static CA_uchar* sortData16(
1077            const CA_uchar* d16,
1078            UComparator *cmp, const void *context,
1079            UErrorCode &status);
1080    static CA_char* getData8FromData16(const CA_uchar* d16, UErrorCode &status);
1081
1082    UPerfFunction* TestStrcoll();
1083    UPerfFunction* TestStrcollNull();
1084    UPerfFunction* TestStrcollSimilar();
1085
1086    UPerfFunction* TestStrcollUTF8();
1087    UPerfFunction* TestStrcollUTF8Null();
1088    UPerfFunction* TestStrcollUTF8Similar();
1089
1090    UPerfFunction* TestGetSortKey();
1091    UPerfFunction* TestGetSortKeyNull();
1092
1093    UPerfFunction* TestNextSortKeyPart_4All();
1094    UPerfFunction* TestNextSortKeyPart_4x2();
1095    UPerfFunction* TestNextSortKeyPart_4x4();
1096    UPerfFunction* TestNextSortKeyPart_4x8();
1097    UPerfFunction* TestNextSortKeyPart_32All();
1098    UPerfFunction* TestNextSortKeyPart_32x2();
1099
1100    UPerfFunction* TestNextSortKeyPartUTF8_4All();
1101    UPerfFunction* TestNextSortKeyPartUTF8_4x2();
1102    UPerfFunction* TestNextSortKeyPartUTF8_4x4();
1103    UPerfFunction* TestNextSortKeyPartUTF8_4x8();
1104    UPerfFunction* TestNextSortKeyPartUTF8_32All();
1105    UPerfFunction* TestNextSortKeyPartUTF8_32x2();
1106
1107    UPerfFunction* TestCppCompare();
1108    UPerfFunction* TestCppCompareNull();
1109    UPerfFunction* TestCppCompareSimilar();
1110
1111    UPerfFunction* TestCppCompareUTF8();
1112    UPerfFunction* TestCppCompareUTF8Null();
1113    UPerfFunction* TestCppCompareUTF8Similar();
1114
1115    UPerfFunction* TestCppGetCollationKey();
1116    UPerfFunction* TestCppGetCollationKeyNull();
1117
1118    UPerfFunction* TestUniStrSort();
1119    UPerfFunction* TestStringPieceSortCpp();
1120    UPerfFunction* TestStringPieceSortC();
1121
1122    UPerfFunction* TestUniStrBinSearch();
1123    UPerfFunction* TestStringPieceBinSearchCpp();
1124    UPerfFunction* TestStringPieceBinSearchC();
1125};
1126
1127CollPerf2Test::CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status) :
1128    UPerfTest(argc, argv, status),
1129    coll(NULL),
1130    collObj(NULL),
1131    count(0),
1132    data16(NULL),
1133    data8(NULL),
1134    modData16(NULL),
1135    modData8(NULL),
1136    sortedData16(NULL),
1137    sortedData8(NULL),
1138    randomData16(NULL),
1139    randomData8(NULL)
1140{
1141    if (U_FAILURE(status)) {
1142        return;
1143    }
1144
1145    if (locale == NULL){
1146        locale = "en_US";   // set default locale
1147    }
1148
1149    //  Set up an ICU collator
1150    coll = ucol_open(locale, &status);
1151    collObj = Collator::createInstance(locale, status);
1152
1153    // Keyword support should be actually a part of ICU collator, see ICU ticket #8260.
1154    char keyBuffer[256];
1155    UColAttributeValue val;
1156    if (uloc_getKeywordValue(locale, "strength", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1157        if (strcmp(keyBuffer, "primary") == 0) {
1158            val = UCOL_PRIMARY;
1159        } else if (strcmp(keyBuffer, "secondary") == 0) {
1160            val = UCOL_SECONDARY;
1161        } else if (strcmp(keyBuffer, "tertiary") == 0) {
1162            val = UCOL_TERTIARY;
1163        } else if (strcmp(keyBuffer, "quaternary") == 0) {
1164            val = UCOL_QUATERNARY;
1165        } else if (strcmp(keyBuffer, "identical") == 0) {
1166            val = UCOL_IDENTICAL;
1167        } else {
1168            status = U_ILLEGAL_ARGUMENT_ERROR;
1169        }
1170        if (U_SUCCESS(status)) {
1171            ucol_setAttribute(coll, UCOL_STRENGTH, val, &status);
1172            collObj->setAttribute(UCOL_STRENGTH, val, status);
1173        }
1174    }
1175    if (uloc_getKeywordValue(locale, "alternate", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1176        if (strcmp(keyBuffer, "non-ignorable") == 0) {
1177            val = UCOL_NON_IGNORABLE;
1178        } else if (strcmp(keyBuffer, "shifted") == 0) {
1179            val = UCOL_SHIFTED;
1180        } else {
1181            status = U_ILLEGAL_ARGUMENT_ERROR;
1182        }
1183        if (U_SUCCESS(status)) {
1184            ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, val, &status);
1185            collObj->setAttribute(UCOL_ALTERNATE_HANDLING, val, status);
1186        }
1187    }
1188    if (uloc_getKeywordValue(locale, "backwards", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1189        if (strcmp(keyBuffer, "on") == 0) {
1190            val = UCOL_ON;
1191        } else if (strcmp(keyBuffer, "off") == 0) {
1192            val = UCOL_OFF;
1193        } else {
1194            status = U_ILLEGAL_ARGUMENT_ERROR;
1195        }
1196        if (U_SUCCESS(status)) {
1197            ucol_setAttribute(coll, UCOL_FRENCH_COLLATION, val, &status);
1198            collObj->setAttribute(UCOL_FRENCH_COLLATION, val, status);
1199        }
1200    }
1201    if (uloc_getKeywordValue(locale, "normalization", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1202        if (strcmp(keyBuffer, "on") == 0) {
1203            val = UCOL_ON;
1204        } else if (strcmp(keyBuffer, "off") == 0) {
1205            val = UCOL_OFF;
1206        } else {
1207            status = U_ILLEGAL_ARGUMENT_ERROR;
1208        }
1209        if (U_SUCCESS(status)) {
1210            ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, val, &status);
1211            collObj->setAttribute(UCOL_NORMALIZATION_MODE, val, status);
1212        }
1213    }
1214    if (uloc_getKeywordValue(locale, "caseLevel", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1215        if (strcmp(keyBuffer, "on") == 0) {
1216            val = UCOL_ON;
1217        } else if (strcmp(keyBuffer, "off") == 0) {
1218            val = UCOL_OFF;
1219        } else {
1220            status = U_ILLEGAL_ARGUMENT_ERROR;
1221        }
1222        if (U_SUCCESS(status)) {
1223            ucol_setAttribute(coll, UCOL_CASE_LEVEL, val, &status);
1224            collObj->setAttribute(UCOL_CASE_LEVEL, val, status);
1225        }
1226    }
1227    if (uloc_getKeywordValue(locale, "caseFirst", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1228        if (strcmp(keyBuffer, "upper") == 0) {
1229            val = UCOL_UPPER_FIRST;
1230        } else if (strcmp(keyBuffer, "lower") == 0) {
1231            val = UCOL_LOWER_FIRST;
1232        } else if (strcmp(keyBuffer, "off") == 0) {
1233            val = UCOL_OFF;
1234        } else {
1235            status = U_ILLEGAL_ARGUMENT_ERROR;
1236        }
1237        if (U_SUCCESS(status)) {
1238            ucol_setAttribute(coll, UCOL_CASE_FIRST, val, &status);
1239            collObj->setAttribute(UCOL_CASE_FIRST, val, status);
1240        }
1241    }
1242    if (uloc_getKeywordValue(locale, "hiraganaQuaternary", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1243        if (strcmp(keyBuffer, "on") == 0) {
1244            val = UCOL_ON;
1245        } else if (strcmp(keyBuffer, "off") == 0) {
1246            val = UCOL_OFF;
1247        } else {
1248            status = U_ILLEGAL_ARGUMENT_ERROR;
1249        }
1250        if (U_SUCCESS(status)) {
1251            ucol_setAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, val, &status);
1252            collObj->setAttribute(UCOL_HIRAGANA_QUATERNARY_MODE, val, status);
1253        }
1254    }
1255    if (uloc_getKeywordValue(locale, "numeric", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1256        if (strcmp(keyBuffer, "on") == 0) {
1257            val = UCOL_ON;
1258        } else if (strcmp(keyBuffer, "off") == 0) {
1259            val = UCOL_OFF;
1260        } else {
1261            status = U_ILLEGAL_ARGUMENT_ERROR;
1262        }
1263        if (U_SUCCESS(status)) {
1264            ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, val, &status);
1265            collObj->setAttribute(UCOL_NUMERIC_COLLATION, val, status);
1266        }
1267    }
1268    if (uloc_getKeywordValue(locale, "variableTop", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1269        // no support for now
1270        status = U_UNSUPPORTED_ERROR;
1271    }
1272    if (uloc_getKeywordValue(locale, "reorder", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1273        // no support for now
1274        status = U_UNSUPPORTED_ERROR;
1275    }
1276}
1277
1278CollPerf2Test::~CollPerf2Test()
1279{
1280    ucol_close(coll);
1281    delete collObj;
1282
1283    delete data16;
1284    delete data8;
1285    delete modData16;
1286    delete modData8;
1287    delete sortedData16;
1288    delete sortedData8;
1289    delete randomData16;
1290    delete randomData8;
1291}
1292
1293#define MAX_NUM_DATA 10000
1294
1295const CA_uchar* CollPerf2Test::getData16(UErrorCode &status)
1296{
1297    if (U_FAILURE(status)) return NULL;
1298    if (data16) return data16;
1299
1300    CA_uchar* d16 = new CA_uchar();
1301    const UChar *line = NULL;
1302    int32_t len = 0;
1303    int32_t numData = 0;
1304
1305    for (;;) {
1306        line = ucbuf_readline(ucharBuf, &len, &status);
1307        if (line == NULL || U_FAILURE(status)) break;
1308
1309        // Refer to the source code of ucbuf_readline()
1310        // 1. 'len' includes the line terminal symbols
1311        // 2. The length of the line terminal symbols is only one character
1312        // 3. The Windows CR LF line terminal symbols will be converted to CR
1313
1314        if (len == 1 || line[0] == 0x23 /* '#' */) {
1315            continue; // skip empty/comment line
1316        } else {
1317            d16->append_one(len);
1318            u_memcpy(d16->last(), line, len);
1319
1320            numData++;
1321            if (numData >= MAX_NUM_DATA) break;
1322        }
1323    }
1324
1325    if (U_SUCCESS(status)) {
1326        data16 = d16;
1327    } else {
1328        delete d16;
1329    }
1330
1331    return data16;
1332}
1333
1334const CA_char* CollPerf2Test::getData8(UErrorCode &status)
1335{
1336    if (U_FAILURE(status)) return NULL;
1337    if (data8) return data8;
1338    return data8 = getData8FromData16(getData16(status), status);
1339}
1340
1341const CA_uchar* CollPerf2Test::getModData16(UErrorCode &status)
1342{
1343    if (U_FAILURE(status)) return NULL;
1344    if (modData16) return modData16;
1345
1346    const CA_uchar* d16 = getData16(status);
1347    if (U_FAILURE(status)) return NULL;
1348
1349    CA_uchar* modData16 = new CA_uchar();
1350
1351    for (int32_t i = 0; i < d16->count; i++) {
1352        const UChar *s = d16->dataOf(i);
1353        int32_t len = d16->lengthOf(i) + 1; // including NULL terminator
1354
1355        modData16->append_one(len);
1356        u_memcpy(modData16->last(), s, len);
1357
1358        // replacing the last character with a different character
1359        UChar *lastChar = &modData16->last()[len -2];
1360        for (int32_t j = i + 1; j != i; j++) {
1361            if (j >= d16->count) {
1362                j = 0;
1363            }
1364            const UChar *s1 = d16->dataOf(j);
1365            UChar lastChar1 = s1[d16->lengthOf(j) - 1];
1366            if (*lastChar != lastChar1) {
1367                *lastChar = lastChar1;
1368                break;
1369            }
1370        }
1371    }
1372
1373    return modData16;
1374}
1375
1376const CA_char* CollPerf2Test::getModData8(UErrorCode &status)
1377{
1378    if (U_FAILURE(status)) return NULL;
1379    if (modData8) return modData8;
1380    return modData8 = getData8FromData16(getModData16(status), status);
1381}
1382
1383namespace {
1384
1385struct ArrayAndColl {
1386    ArrayAndColl(const CA_uchar* a, const Collator& c) : d16(a), coll(c) {}
1387    const CA_uchar* d16;
1388    const Collator& coll;
1389};
1390
1391int32_t U_CALLCONV
1392U16CollatorComparator(const void* context, const void* left, const void* right) {
1393    const ArrayAndColl& ac = *(const ArrayAndColl*)context;
1394    const CA_uchar* d16 = ac.d16;
1395    int32_t leftIndex = *(const int32_t*)left;
1396    int32_t rightIndex = *(const int32_t*)right;
1397    UErrorCode errorCode = U_ZERO_ERROR;
1398    return ac.coll.compare(d16->dataOf(leftIndex), d16->lengthOf(leftIndex),
1399                           d16->dataOf(rightIndex), d16->lengthOf(rightIndex),
1400                           errorCode);
1401}
1402
1403int32_t U_CALLCONV
1404U16HashComparator(const void* context, const void* left, const void* right) {
1405    const CA_uchar* d16 = (const CA_uchar*)context;
1406    int32_t leftIndex = *(const int32_t*)left;
1407    int32_t rightIndex = *(const int32_t*)right;
1408    int32_t leftHash = ustr_hashUCharsN(d16->dataOf(leftIndex), d16->lengthOf(leftIndex));
1409    int32_t rightHash = ustr_hashUCharsN(d16->dataOf(rightIndex), d16->lengthOf(rightIndex));
1410    return leftHash < rightHash ? -1 : leftHash == rightHash ? 0 : 1;
1411}
1412
1413}  // namespace
1414
1415const CA_uchar* CollPerf2Test::getSortedData16(UErrorCode &status) {
1416    if (U_FAILURE(status)) return NULL;
1417    if (sortedData16) return sortedData16;
1418
1419    ArrayAndColl ac(getData16(status), *collObj);
1420    return sortedData16 = sortData16(ac.d16, U16CollatorComparator, &ac, status);
1421}
1422
1423const CA_char* CollPerf2Test::getSortedData8(UErrorCode &status) {
1424    if (U_FAILURE(status)) return NULL;
1425    if (sortedData8) return sortedData8;
1426    return sortedData8 = getData8FromData16(getSortedData16(status), status);
1427}
1428
1429const CA_uchar* CollPerf2Test::getRandomData16(UErrorCode &status) {
1430    if (U_FAILURE(status)) return NULL;
1431    if (randomData16) return randomData16;
1432
1433    // Sort the strings by their hash codes, which should be a reasonably pseudo-random order.
1434    const CA_uchar* d16 = getData16(status);
1435    return randomData16 = sortData16(d16, U16HashComparator, d16, status);
1436}
1437
1438const CA_char* CollPerf2Test::getRandomData8(UErrorCode &status) {
1439    if (U_FAILURE(status)) return NULL;
1440    if (randomData8) return randomData8;
1441    return randomData8 = getData8FromData16(getRandomData16(status), status);
1442}
1443
1444CA_uchar* CollPerf2Test::sortData16(const CA_uchar* d16,
1445                                    UComparator *cmp, const void *context,
1446                                    UErrorCode &status) {
1447    if (U_FAILURE(status)) return NULL;
1448
1449    LocalArray<int32_t> indexes(new int32_t[d16->count]);
1450    for (int32_t i = 0; i < d16->count; ++i) {
1451        indexes[i] = i;
1452    }
1453    uprv_sortArray(indexes.getAlias(), d16->count, 4, cmp, context, TRUE, &status);
1454    if (U_FAILURE(status)) return NULL;
1455
1456    // Copy the strings in sorted order into a new array.
1457    LocalPointer<CA_uchar> newD16(new CA_uchar());
1458    for (int32_t i = 0; i < d16->count; i++) {
1459        const UChar* s = d16->dataOf(i);
1460        int32_t len = d16->lengthOf(i);
1461        int32_t capacity = len + 1;  // including NULL terminator
1462        newD16->append_one(capacity);
1463        u_memcpy(newD16->last(), s, capacity);
1464    }
1465
1466    if (U_SUCCESS(status)) {
1467        return newD16.orphan();
1468    } else {
1469        return NULL;
1470    }
1471}
1472
1473CA_char* CollPerf2Test::getData8FromData16(const CA_uchar* d16, UErrorCode &status) {
1474    if (U_FAILURE(status)) return NULL;
1475
1476    // UTF-16 -> UTF-8 conversion
1477    LocalPointer<CA_char> d8(new CA_char());
1478    for (int32_t i = 0; i < d16->count; i++) {
1479        const UChar *s16 = d16->dataOf(i);
1480        int32_t length16 = d16->lengthOf(i);
1481
1482        // get length in UTF-8
1483        int32_t length8;
1484        u_strToUTF8(NULL, 0, &length8, s16, length16, &status);
1485        if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
1486            status = U_ZERO_ERROR;
1487        } else {
1488            break;
1489        }
1490        int32_t capacity8 = length8 + 1;  // plus terminal NULL
1491        d8->append_one(capacity8);
1492
1493        // convert to UTF-8
1494        u_strToUTF8(d8->last(), capacity8, NULL, s16, length16, &status);
1495        if (U_FAILURE(status)) break;
1496    }
1497
1498    if (U_SUCCESS(status)) {
1499        return d8.orphan();
1500    } else {
1501        return NULL;
1502    }
1503}
1504
1505UPerfFunction*
1506CollPerf2Test::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par /*= NULL*/)
1507{
1508    (void)par;
1509    TESTCASE_AUTO_BEGIN;
1510
1511    TESTCASE_AUTO(TestStrcoll);
1512    TESTCASE_AUTO(TestStrcollNull);
1513    TESTCASE_AUTO(TestStrcollSimilar);
1514
1515    TESTCASE_AUTO(TestStrcollUTF8);
1516    TESTCASE_AUTO(TestStrcollUTF8Null);
1517    TESTCASE_AUTO(TestStrcollUTF8Similar);
1518
1519    TESTCASE_AUTO(TestGetSortKey);
1520    TESTCASE_AUTO(TestGetSortKeyNull);
1521
1522    TESTCASE_AUTO(TestNextSortKeyPart_4All);
1523    TESTCASE_AUTO(TestNextSortKeyPart_4x4);
1524    TESTCASE_AUTO(TestNextSortKeyPart_4x8);
1525    TESTCASE_AUTO(TestNextSortKeyPart_32All);
1526    TESTCASE_AUTO(TestNextSortKeyPart_32x2);
1527
1528    TESTCASE_AUTO(TestNextSortKeyPartUTF8_4All);
1529    TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x4);
1530    TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x8);
1531    TESTCASE_AUTO(TestNextSortKeyPartUTF8_32All);
1532    TESTCASE_AUTO(TestNextSortKeyPartUTF8_32x2);
1533
1534    TESTCASE_AUTO(TestCppCompare);
1535    TESTCASE_AUTO(TestCppCompareNull);
1536    TESTCASE_AUTO(TestCppCompareSimilar);
1537
1538    TESTCASE_AUTO(TestCppCompareUTF8);
1539    TESTCASE_AUTO(TestCppCompareUTF8Null);
1540    TESTCASE_AUTO(TestCppCompareUTF8Similar);
1541
1542    TESTCASE_AUTO(TestCppGetCollationKey);
1543    TESTCASE_AUTO(TestCppGetCollationKeyNull);
1544
1545    TESTCASE_AUTO(TestUniStrSort);
1546    TESTCASE_AUTO(TestStringPieceSortCpp);
1547    TESTCASE_AUTO(TestStringPieceSortC);
1548
1549    TESTCASE_AUTO(TestUniStrBinSearch);
1550    TESTCASE_AUTO(TestStringPieceBinSearchCpp);
1551    TESTCASE_AUTO(TestStringPieceBinSearchC);
1552
1553    TESTCASE_AUTO_END;
1554    return NULL;
1555}
1556
1557
1558
1559UPerfFunction* CollPerf2Test::TestStrcoll()
1560{
1561    UErrorCode status = U_ZERO_ERROR;
1562    Strcoll *testCase = new Strcoll(coll, getData16(status), TRUE /* useLen */);
1563    if (U_FAILURE(status)) {
1564        delete testCase;
1565        return NULL;
1566    }
1567    return testCase;
1568}
1569
1570UPerfFunction* CollPerf2Test::TestStrcollNull()
1571{
1572    UErrorCode status = U_ZERO_ERROR;
1573    Strcoll *testCase = new Strcoll(coll, getData16(status), FALSE /* useLen */);
1574    if (U_FAILURE(status)) {
1575        delete testCase;
1576        return NULL;
1577    }
1578    return testCase;
1579}
1580
1581UPerfFunction* CollPerf2Test::TestStrcollSimilar()
1582{
1583    UErrorCode status = U_ZERO_ERROR;
1584    Strcoll_2 *testCase = new Strcoll_2(coll, getData16(status), getModData16(status), TRUE /* useLen */);
1585    if (U_FAILURE(status)) {
1586        delete testCase;
1587        return NULL;
1588    }
1589    return testCase;
1590}
1591
1592UPerfFunction* CollPerf2Test::TestStrcollUTF8()
1593{
1594    UErrorCode status = U_ZERO_ERROR;
1595    StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status), TRUE /* useLen */);
1596    if (U_FAILURE(status)) {
1597        delete testCase;
1598        return NULL;
1599    }
1600    return testCase;
1601}
1602
1603UPerfFunction* CollPerf2Test::TestStrcollUTF8Null()
1604{
1605    UErrorCode status = U_ZERO_ERROR;
1606    StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status),FALSE /* useLen */);
1607    if (U_FAILURE(status)) {
1608        delete testCase;
1609        return NULL;
1610    }
1611    return testCase;
1612}
1613
1614UPerfFunction* CollPerf2Test::TestStrcollUTF8Similar()
1615{
1616    UErrorCode status = U_ZERO_ERROR;
1617    StrcollUTF8_2 *testCase = new StrcollUTF8_2(coll, getData8(status), getModData8(status), TRUE /* useLen */);
1618    if (U_FAILURE(status)) {
1619        delete testCase;
1620        return NULL;
1621    }
1622    return testCase;
1623}
1624
1625UPerfFunction* CollPerf2Test::TestGetSortKey()
1626{
1627    UErrorCode status = U_ZERO_ERROR;
1628    GetSortKey *testCase = new GetSortKey(coll, getData16(status), TRUE /* useLen */);
1629    if (U_FAILURE(status)) {
1630        delete testCase;
1631        return NULL;
1632    }
1633    return testCase;
1634}
1635
1636UPerfFunction* CollPerf2Test::TestGetSortKeyNull()
1637{
1638    UErrorCode status = U_ZERO_ERROR;
1639    GetSortKey *testCase = new GetSortKey(coll, getData16(status), FALSE /* useLen */);
1640    if (U_FAILURE(status)) {
1641        delete testCase;
1642        return NULL;
1643    }
1644    return testCase;
1645}
1646
1647UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4All()
1648{
1649    UErrorCode status = U_ZERO_ERROR;
1650    NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */);
1651    if (U_FAILURE(status)) {
1652        delete testCase;
1653        return NULL;
1654    }
1655    return testCase;
1656}
1657
1658UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x4()
1659{
1660    UErrorCode status = U_ZERO_ERROR;
1661    NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 4 /* maxIteration */);
1662    if (U_FAILURE(status)) {
1663        delete testCase;
1664        return NULL;
1665    }
1666    return testCase;
1667}
1668
1669UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x8()
1670{
1671    UErrorCode status = U_ZERO_ERROR;
1672    NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 8 /* maxIteration */);
1673    if (U_FAILURE(status)) {
1674        delete testCase;
1675        return NULL;
1676    }
1677    return testCase;
1678}
1679
1680UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32All()
1681{
1682    UErrorCode status = U_ZERO_ERROR;
1683    NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */);
1684    if (U_FAILURE(status)) {
1685        delete testCase;
1686        return NULL;
1687    }
1688    return testCase;
1689}
1690
1691UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32x2()
1692{
1693    UErrorCode status = U_ZERO_ERROR;
1694    NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */, 2 /* maxIteration */);
1695    if (U_FAILURE(status)) {
1696        delete testCase;
1697        return NULL;
1698    }
1699    return testCase;
1700}
1701
1702UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4All()
1703{
1704    UErrorCode status = U_ZERO_ERROR;
1705    NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */);
1706    if (U_FAILURE(status)) {
1707        delete testCase;
1708        return NULL;
1709    }
1710    return testCase;
1711}
1712
1713UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x4()
1714{
1715    UErrorCode status = U_ZERO_ERROR;
1716    NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 4 /* maxIteration */);
1717    if (U_FAILURE(status)) {
1718        delete testCase;
1719        return NULL;
1720    }
1721    return testCase;
1722}
1723
1724UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x8()
1725{
1726    UErrorCode status = U_ZERO_ERROR;
1727    NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 8 /* maxIteration */);
1728    if (U_FAILURE(status)) {
1729        delete testCase;
1730        return NULL;
1731    }
1732    return testCase;
1733}
1734
1735UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32All()
1736{
1737    UErrorCode status = U_ZERO_ERROR;
1738    NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */);
1739    if (U_FAILURE(status)) {
1740        delete testCase;
1741        return NULL;
1742    }
1743    return testCase;
1744}
1745
1746UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32x2()
1747{
1748    UErrorCode status = U_ZERO_ERROR;
1749    NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */, 2 /* maxIteration */);
1750    if (U_FAILURE(status)) {
1751        delete testCase;
1752        return NULL;
1753    }
1754    return testCase;
1755}
1756
1757UPerfFunction* CollPerf2Test::TestCppCompare()
1758{
1759    UErrorCode status = U_ZERO_ERROR;
1760    CppCompare *testCase = new CppCompare(collObj, getData16(status), TRUE /* useLen */);
1761    if (U_FAILURE(status)) {
1762        delete testCase;
1763        return NULL;
1764    }
1765    return testCase;
1766}
1767
1768UPerfFunction* CollPerf2Test::TestCppCompareNull()
1769{
1770    UErrorCode status = U_ZERO_ERROR;
1771    CppCompare *testCase = new CppCompare(collObj, getData16(status), FALSE /* useLen */);
1772    if (U_FAILURE(status)) {
1773        delete testCase;
1774        return NULL;
1775    }
1776    return testCase;
1777}
1778
1779UPerfFunction* CollPerf2Test::TestCppCompareSimilar()
1780{
1781    UErrorCode status = U_ZERO_ERROR;
1782    CppCompare_2 *testCase = new CppCompare_2(collObj, getData16(status), getModData16(status), TRUE /* useLen */);
1783    if (U_FAILURE(status)) {
1784        delete testCase;
1785        return NULL;
1786    }
1787    return testCase;
1788}
1789
1790UPerfFunction* CollPerf2Test::TestCppCompareUTF8()
1791{
1792    UErrorCode status = U_ZERO_ERROR;
1793    CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), TRUE /* useLen */);
1794    if (U_FAILURE(status)) {
1795        delete testCase;
1796        return NULL;
1797    }
1798    return testCase;
1799}
1800
1801UPerfFunction* CollPerf2Test::TestCppCompareUTF8Null()
1802{
1803    UErrorCode status = U_ZERO_ERROR;
1804    CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), FALSE /* useLen */);
1805    if (U_FAILURE(status)) {
1806        delete testCase;
1807        return NULL;
1808    }
1809    return testCase;
1810}
1811
1812UPerfFunction* CollPerf2Test::TestCppCompareUTF8Similar()
1813{
1814    UErrorCode status = U_ZERO_ERROR;
1815    CppCompareUTF8_2 *testCase = new CppCompareUTF8_2(collObj, getData8(status), getModData8(status), TRUE /* useLen */);
1816    if (U_FAILURE(status)) {
1817        delete testCase;
1818        return NULL;
1819    }
1820    return testCase;
1821}
1822
1823UPerfFunction* CollPerf2Test::TestCppGetCollationKey()
1824{
1825    UErrorCode status = U_ZERO_ERROR;
1826    CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), TRUE /* useLen */);
1827    if (U_FAILURE(status)) {
1828        delete testCase;
1829        return NULL;
1830    }
1831    return testCase;
1832}
1833
1834UPerfFunction* CollPerf2Test::TestCppGetCollationKeyNull()
1835{
1836    UErrorCode status = U_ZERO_ERROR;
1837    CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), FALSE /* useLen */);
1838    if (U_FAILURE(status)) {
1839        delete testCase;
1840        return NULL;
1841    }
1842    return testCase;
1843}
1844
1845UPerfFunction* CollPerf2Test::TestUniStrSort() {
1846    UErrorCode status = U_ZERO_ERROR;
1847    UPerfFunction *testCase = new UniStrSort(*collObj, coll, getRandomData16(status));
1848    if (U_FAILURE(status)) {
1849        delete testCase;
1850        return NULL;
1851    }
1852    return testCase;
1853}
1854
1855UPerfFunction* CollPerf2Test::TestStringPieceSortCpp() {
1856    UErrorCode status = U_ZERO_ERROR;
1857    UPerfFunction *testCase = new StringPieceSortCpp(*collObj, coll, getRandomData8(status));
1858    if (U_FAILURE(status)) {
1859        delete testCase;
1860        return NULL;
1861    }
1862    return testCase;
1863}
1864
1865UPerfFunction* CollPerf2Test::TestStringPieceSortC() {
1866    UErrorCode status = U_ZERO_ERROR;
1867    UPerfFunction *testCase = new StringPieceSortC(*collObj, coll, getRandomData8(status));
1868    if (U_FAILURE(status)) {
1869        delete testCase;
1870        return NULL;
1871    }
1872    return testCase;
1873}
1874
1875UPerfFunction* CollPerf2Test::TestUniStrBinSearch() {
1876    UErrorCode status = U_ZERO_ERROR;
1877    UPerfFunction *testCase = new UniStrBinSearch(*collObj, coll, getSortedData16(status));
1878    if (U_FAILURE(status)) {
1879        delete testCase;
1880        return NULL;
1881    }
1882    return testCase;
1883}
1884
1885UPerfFunction* CollPerf2Test::TestStringPieceBinSearchCpp() {
1886    UErrorCode status = U_ZERO_ERROR;
1887    UPerfFunction *testCase = new StringPieceBinSearchCpp(*collObj, coll, getSortedData8(status));
1888    if (U_FAILURE(status)) {
1889        delete testCase;
1890        return NULL;
1891    }
1892    return testCase;
1893}
1894
1895UPerfFunction* CollPerf2Test::TestStringPieceBinSearchC() {
1896    UErrorCode status = U_ZERO_ERROR;
1897    UPerfFunction *testCase = new StringPieceBinSearchC(*collObj, coll, getSortedData8(status));
1898    if (U_FAILURE(status)) {
1899        delete testCase;
1900        return NULL;
1901    }
1902    return testCase;
1903}
1904
1905
1906int main(int argc, const char *argv[])
1907{
1908    UErrorCode status = U_ZERO_ERROR;
1909    CollPerf2Test test(argc, argv, status);
1910
1911    if (U_FAILURE(status)){
1912        printf("The error is %s\n", u_errorName(status));
1913        //TODO: print usage here
1914        return status;
1915    }
1916
1917    if (test.run() == FALSE){
1918        fprintf(stderr, "FAILED: Tests could not be run please check the arguments.\n");
1919        return -1;
1920    }
1921    return 0;
1922}
1923