1
2/********************************************************************
3 * COPYRIGHT:
4 * Copyright (c) 2001-2014, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7/*******************************************************************************
8*
9* File cmsccoll.C
10*
11*******************************************************************************/
12/**
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
14 * to fit.
15 */
16
17#include <stdio.h>
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_COLLATION
22
23#include "unicode/ucol.h"
24#include "unicode/ucoleitr.h"
25#include "unicode/uloc.h"
26#include "cintltst.h"
27#include "ccolltst.h"
28#include "callcoll.h"
29#include "unicode/ustring.h"
30#include "string.h"
31#include "ucol_imp.h"
32#include "cmemory.h"
33#include "cstring.h"
34#include "uassert.h"
35#include "unicode/parseerr.h"
36#include "unicode/ucnv.h"
37#include "unicode/ures.h"
38#include "unicode/uscript.h"
39#include "unicode/utf16.h"
40#include "uparse.h"
41#include "putilimp.h"
42
43
44#define LEN(a) (sizeof(a)/sizeof(a[0]))
45
46#define MAX_TOKEN_LEN 16
47
48typedef UCollationResult tst_strcoll(void *collator, const int object,
49                        const UChar *source, const int sLen,
50                        const UChar *target, const int tLen);
51
52
53
54const static char cnt1[][10] = {
55
56  "AA",
57  "AC",
58  "AZ",
59  "AQ",
60  "AB",
61  "ABZ",
62  "ABQ",
63  "Z",
64  "ABC",
65  "Q",
66  "B"
67};
68
69const static char cnt2[][10] = {
70  "DA",
71  "DAD",
72  "DAZ",
73  "MAR",
74  "Z",
75  "DAVIS",
76  "MARK",
77  "DAV",
78  "DAVI"
79};
80
81static void IncompleteCntTest(void)
82{
83  UErrorCode status = U_ZERO_ERROR;
84  UChar temp[90];
85  UChar t1[90];
86  UChar t2[90];
87
88  UCollator *coll =  NULL;
89  uint32_t i = 0, j = 0;
90  uint32_t size = 0;
91
92  u_uastrcpy(temp, " & Z < ABC < Q < B");
93
94  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
95
96  if(U_SUCCESS(status)) {
97    size = sizeof(cnt1)/sizeof(cnt1[0]);
98    for(i = 0; i < size-1; i++) {
99      for(j = i+1; j < size; j++) {
100        UCollationElements *iter;
101        u_uastrcpy(t1, cnt1[i]);
102        u_uastrcpy(t2, cnt1[j]);
103        doTest(coll, t1, t2, UCOL_LESS);
104        /* synwee : added collation element iterator test */
105        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
106        if (U_FAILURE(status)) {
107          log_err("Creation of iterator failed\n");
108          break;
109        }
110        backAndForth(iter);
111        ucol_closeElements(iter);
112      }
113    }
114  }
115
116  ucol_close(coll);
117
118
119  u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
120  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
121
122  if(U_SUCCESS(status)) {
123    size = sizeof(cnt2)/sizeof(cnt2[0]);
124    for(i = 0; i < size-1; i++) {
125      for(j = i+1; j < size; j++) {
126        UCollationElements *iter;
127        u_uastrcpy(t1, cnt2[i]);
128        u_uastrcpy(t2, cnt2[j]);
129        doTest(coll, t1, t2, UCOL_LESS);
130
131        /* synwee : added collation element iterator test */
132        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
133        if (U_FAILURE(status)) {
134          log_err("Creation of iterator failed\n");
135          break;
136        }
137        backAndForth(iter);
138        ucol_closeElements(iter);
139      }
140    }
141  }
142
143  ucol_close(coll);
144
145
146}
147
148const static char shifted[][20] = {
149  "black bird",
150  "black-bird",
151  "blackbird",
152  "black Bird",
153  "black-Bird",
154  "blackBird",
155  "black birds",
156  "black-birds",
157  "blackbirds"
158};
159
160const static UCollationResult shiftedTert[] = {
161  UCOL_EQUAL,
162  UCOL_EQUAL,
163  UCOL_EQUAL,
164  UCOL_LESS,
165  UCOL_EQUAL,
166  UCOL_EQUAL,
167  UCOL_LESS,
168  UCOL_EQUAL,
169  UCOL_EQUAL
170};
171
172const static char nonignorable[][20] = {
173  "black bird",
174  "black Bird",
175  "black birds",
176  "black-bird",
177  "black-Bird",
178  "black-birds",
179  "blackbird",
180  "blackBird",
181  "blackbirds"
182};
183
184static void BlackBirdTest(void) {
185  UErrorCode status = U_ZERO_ERROR;
186  UChar t1[90];
187  UChar t2[90];
188
189  uint32_t i = 0, j = 0;
190  uint32_t size = 0;
191  UCollator *coll = ucol_open("en_US", &status);
192
193  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
194  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
195
196  if(U_SUCCESS(status)) {
197    size = sizeof(nonignorable)/sizeof(nonignorable[0]);
198    for(i = 0; i < size-1; i++) {
199      for(j = i+1; j < size; j++) {
200        u_uastrcpy(t1, nonignorable[i]);
201        u_uastrcpy(t2, nonignorable[j]);
202        doTest(coll, t1, t2, UCOL_LESS);
203      }
204    }
205  }
206
207  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
208  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
209
210  if(U_SUCCESS(status)) {
211    size = sizeof(shifted)/sizeof(shifted[0]);
212    for(i = 0; i < size-1; i++) {
213      for(j = i+1; j < size; j++) {
214        u_uastrcpy(t1, shifted[i]);
215        u_uastrcpy(t2, shifted[j]);
216        doTest(coll, t1, t2, UCOL_LESS);
217      }
218    }
219  }
220
221  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
222  if(U_SUCCESS(status)) {
223    size = sizeof(shifted)/sizeof(shifted[0]);
224    for(i = 1; i < size; i++) {
225      u_uastrcpy(t1, shifted[i-1]);
226      u_uastrcpy(t2, shifted[i]);
227      doTest(coll, t1, t2, shiftedTert[i]);
228    }
229  }
230
231  ucol_close(coll);
232}
233
234const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
235    {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
236    {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
237    {0x0041/*'A'*/, 0x0300, 0x0000},
238    {0x00C0, 0x0301, 0x0000},
239    /* this would work with forced normalization */
240    {0x00C0, 0x0316, 0x0000}
241};
242
243const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
244    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
245    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
246    {0x00C0, 0},
247    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
248    /* this would work with forced normalization */
249    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
250};
251
252const static UCollationResult results[] = {
253    UCOL_GREATER,
254    UCOL_EQUAL,
255    UCOL_EQUAL,
256    UCOL_GREATER,
257    UCOL_EQUAL
258};
259
260static void FunkyATest(void)
261{
262
263    int32_t i;
264    UErrorCode status = U_ZERO_ERROR;
265    UCollator  *myCollation;
266    myCollation = ucol_open("en_US", &status);
267    if(U_FAILURE(status)){
268        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
269        return;
270    }
271    log_verbose("Testing some A letters, for some reason\n");
272    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
273    ucol_setStrength(myCollation, UCOL_TERTIARY);
274    for (i = 0; i < 4 ; i++)
275    {
276        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
277    }
278    ucol_close(myCollation);
279}
280
281UColAttributeValue caseFirst[] = {
282    UCOL_OFF,
283    UCOL_LOWER_FIRST,
284    UCOL_UPPER_FIRST
285};
286
287
288UColAttributeValue alternateHandling[] = {
289    UCOL_NON_IGNORABLE,
290    UCOL_SHIFTED
291};
292
293UColAttributeValue caseLevel[] = {
294    UCOL_OFF,
295    UCOL_ON
296};
297
298UColAttributeValue strengths[] = {
299    UCOL_PRIMARY,
300    UCOL_SECONDARY,
301    UCOL_TERTIARY,
302    UCOL_QUATERNARY,
303    UCOL_IDENTICAL
304};
305
306#if 0
307static const char * strengthsC[] = {
308    "UCOL_PRIMARY",
309    "UCOL_SECONDARY",
310    "UCOL_TERTIARY",
311    "UCOL_QUATERNARY",
312    "UCOL_IDENTICAL"
313};
314
315static const char * caseFirstC[] = {
316    "UCOL_OFF",
317    "UCOL_LOWER_FIRST",
318    "UCOL_UPPER_FIRST"
319};
320
321
322static const char * alternateHandlingC[] = {
323    "UCOL_NON_IGNORABLE",
324    "UCOL_SHIFTED"
325};
326
327static const char * caseLevelC[] = {
328    "UCOL_OFF",
329    "UCOL_ON"
330};
331
332/* not used currently - does not test only prints */
333static void PrintMarkDavis(void)
334{
335  UErrorCode status = U_ZERO_ERROR;
336  UChar m[256];
337  uint8_t sortkey[256];
338  UCollator *coll = ucol_open("en_US", &status);
339  uint32_t h,i,j,k, sortkeysize;
340  uint32_t sizem = 0;
341  char buffer[512];
342  uint32_t len = 512;
343
344  log_verbose("PrintMarkDavis");
345
346  u_uastrcpy(m, "Mark Davis");
347  sizem = u_strlen(m);
348
349
350  m[1] = 0xe4;
351
352  for(i = 0; i<sizem; i++) {
353    fprintf(stderr, "\\u%04X ", m[i]);
354  }
355  fprintf(stderr, "\n");
356
357  for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
358    ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
359    fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
360
361    for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
362      ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
363      fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
364
365      for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
366        ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
367        fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
368
369        for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
370          ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
371          sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
372          fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
373          fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
374        }
375
376      }
377
378    }
379
380  }
381}
382#endif
383
384static void BillFairmanTest(void) {
385/*
386** check for actual locale via ICU resource bundles
387**
388** lp points to the original locale ("fr_FR_....")
389*/
390
391    UResourceBundle *lr,*cr;
392    UErrorCode              lec = U_ZERO_ERROR;
393    const char *lp = "fr_FR_you_ll_never_find_this_locale";
394
395    log_verbose("BillFairmanTest\n");
396
397    lr = ures_open(NULL,lp,&lec);
398    if (lr) {
399        cr = ures_getByKey(lr,"collations",0,&lec);
400        if (cr) {
401            lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
402            if (lp) {
403                if (U_SUCCESS(lec)) {
404                    if(strcmp(lp, "fr") != 0) {
405                        log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
406                    }
407                }
408            }
409            ures_close(cr);
410        }
411        ures_close(lr);
412    }
413}
414
415const static char chTest[][20] = {
416  "c",
417  "C",
418  "ca", "cb", "cx", "cy", "CZ",
419  "c\\u030C", "C\\u030C",
420  "h",
421  "H",
422  "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
423  "ch", "cH", "Ch", "CH",
424  "cha", "charly", "che", "chh", "chch", "chr",
425  "i", "I", "iarly",
426  "r", "R",
427  "r\\u030C", "R\\u030C",
428  "s",
429  "S",
430  "s\\u030C", "S\\u030C",
431  "z", "Z",
432  "z\\u030C", "Z\\u030C"
433};
434
435static void TestChMove(void) {
436    UChar t1[256] = {0};
437    UChar t2[256] = {0};
438
439    uint32_t i = 0, j = 0;
440    uint32_t size = 0;
441    UErrorCode status = U_ZERO_ERROR;
442
443    UCollator *coll = ucol_open("cs", &status);
444
445    if(U_SUCCESS(status)) {
446        size = sizeof(chTest)/sizeof(chTest[0]);
447        for(i = 0; i < size-1; i++) {
448            for(j = i+1; j < size; j++) {
449                u_unescape(chTest[i], t1, 256);
450                u_unescape(chTest[j], t2, 256);
451                doTest(coll, t1, t2, UCOL_LESS);
452            }
453        }
454    }
455    else {
456        log_data_err("Can't open collator");
457    }
458    ucol_close(coll);
459}
460
461
462
463
464/*
465const static char impTest[][20] = {
466  "\\u4e00",
467    "a",
468    "A",
469    "b",
470    "B",
471    "\\u4e01"
472};
473*/
474
475
476static void TestImplicitTailoring(void) {
477  static const struct {
478    const char *rules;
479    const char *data[10];
480    const uint32_t len;
481  } tests[] = {
482      {
483        /* Tailor b and c before U+4E00. */
484        "&[before 1]\\u4e00 < b < c "
485        /* Now, before U+4E00 is c; put d and e after that. */
486        "&[before 1]\\u4e00 < d < e",
487        { "b", "c", "d", "e", "\\u4e00"}, 5 },
488      { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
489      { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
490      { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
491  };
492
493  int32_t i = 0;
494
495  for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
496      genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
497  }
498
499/*
500  UChar t1[256] = {0};
501  UChar t2[256] = {0};
502
503  const char *rule = "&\\u4e00 < a <<< A < b <<< B";
504
505  uint32_t i = 0, j = 0;
506  uint32_t size = 0;
507  uint32_t ruleLen = 0;
508  UErrorCode status = U_ZERO_ERROR;
509  UCollator *coll = NULL;
510  ruleLen = u_unescape(rule, t1, 256);
511
512  coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
513
514  if(U_SUCCESS(status)) {
515    size = sizeof(impTest)/sizeof(impTest[0]);
516    for(i = 0; i < size-1; i++) {
517      for(j = i+1; j < size; j++) {
518        u_unescape(impTest[i], t1, 256);
519        u_unescape(impTest[j], t2, 256);
520        doTest(coll, t1, t2, UCOL_LESS);
521      }
522    }
523  }
524  else {
525    log_err("Can't open collator");
526  }
527  ucol_close(coll);
528  */
529}
530
531static void TestFCDProblem(void) {
532  UChar t1[256] = {0};
533  UChar t2[256] = {0};
534
535  const char *s1 = "\\u0430\\u0306\\u0325";
536  const char *s2 = "\\u04D1\\u0325";
537
538  UErrorCode status = U_ZERO_ERROR;
539  UCollator *coll = ucol_open("", &status);
540  u_unescape(s1, t1, 256);
541  u_unescape(s2, t2, 256);
542
543  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
544  doTest(coll, t1, t2, UCOL_EQUAL);
545
546  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
547  doTest(coll, t1, t2, UCOL_EQUAL);
548
549  ucol_close(coll);
550}
551
552/*
553The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
554We're only using NFC/NFD in this test.
555*/
556#define NORM_BUFFER_TEST_LEN 18
557typedef struct {
558  UChar32 u;
559  UChar NFC[NORM_BUFFER_TEST_LEN];
560  UChar NFD[NORM_BUFFER_TEST_LEN];
561} tester;
562
563static void TestComposeDecompose(void) {
564    /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
565    static const UChar UNICODESET_STR[] = {
566        0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
567        0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
568        0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
569    };
570    int32_t noOfLoc;
571    int32_t i = 0, j = 0;
572
573    UErrorCode status = U_ZERO_ERROR;
574    const char *locName = NULL;
575    uint32_t nfcSize;
576    uint32_t nfdSize;
577    tester **t;
578    uint32_t noCases = 0;
579    UCollator *coll = NULL;
580    UChar32 u = 0;
581    UChar comp[NORM_BUFFER_TEST_LEN];
582    uint32_t len = 0;
583    UCollationElements *iter;
584    USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
585    int32_t charsToTestSize;
586
587    noOfLoc = uloc_countAvailable();
588
589    coll = ucol_open("", &status);
590    if (U_FAILURE(status)) {
591        log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
592        return;
593    }
594    charsToTestSize = uset_size(charsToTest);
595    if (charsToTestSize <= 0) {
596        log_err("Set was zero. Missing data?\n");
597        return;
598    }
599    t = (tester **)malloc(charsToTestSize * sizeof(tester *));
600    t[0] = (tester *)malloc(sizeof(tester));
601    log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
602
603    for(u = 0; u < charsToTestSize; u++) {
604        UChar32 ch = uset_charAt(charsToTest, u);
605        len = 0;
606        U16_APPEND_UNSAFE(comp, len, ch);
607        nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
608        nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
609
610        if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
611          || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
612            t[noCases]->u = ch;
613            if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
614                u_strncpy(t[noCases]->NFC, comp, len);
615                t[noCases]->NFC[len] = 0;
616            }
617            noCases++;
618            t[noCases] = (tester *)malloc(sizeof(tester));
619            uprv_memset(t[noCases], 0, sizeof(tester));
620        }
621    }
622    log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
623    uset_close(charsToTest);
624    charsToTest = NULL;
625
626    for(u=0; u<(UChar32)noCases; u++) {
627        if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
628            log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
629            doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
630        }
631    }
632    /*
633    for(u = 0; u < charsToTestSize; u++) {
634      if(!(u&0xFFFF)) {
635        log_verbose("%08X ", u);
636      }
637      uprv_memset(t[noCases], 0, sizeof(tester));
638      t[noCases]->u = u;
639      len = 0;
640      U16_APPEND_UNSAFE(comp, len, u);
641      comp[len] = 0;
642      nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
643      nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
644      doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
645      doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
646    }
647    */
648
649    ucol_close(coll);
650
651    log_verbose("Testing locales, number of cases = %i\n", noCases);
652    for(i = 0; i<noOfLoc; i++) {
653        status = U_ZERO_ERROR;
654        locName = uloc_getAvailable(i);
655        if(hasCollationElements(locName)) {
656            char cName[256];
657            UChar name[256];
658            int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
659
660            for(j = 0; j<nameSize; j++) {
661                cName[j] = (char)name[j];
662            }
663            cName[nameSize] = 0;
664            log_verbose("\nTesting locale %s (%s)\n", locName, cName);
665
666            coll = ucol_open(locName, &status);
667            ucol_setStrength(coll, UCOL_IDENTICAL);
668            iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
669
670            for(u=0; u<(UChar32)noCases; u++) {
671                if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
672                    log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
673                    doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
674                    log_verbose("Testing NFC\n");
675                    ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
676                    backAndForth(iter);
677                    log_verbose("Testing NFD\n");
678                    ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
679                    backAndForth(iter);
680                }
681            }
682            ucol_closeElements(iter);
683            ucol_close(coll);
684        }
685    }
686    for(u = 0; u <= (UChar32)noCases; u++) {
687        free(t[u]);
688    }
689    free(t);
690}
691
692static void TestEmptyRule(void) {
693  UErrorCode status = U_ZERO_ERROR;
694  UChar rulez[] = { 0 };
695  UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
696
697  ucol_close(coll);
698}
699
700static void TestUCARules(void) {
701  UErrorCode status = U_ZERO_ERROR;
702  UChar b[256];
703  UChar *rules = b;
704  uint32_t ruleLen = 0;
705  UCollator *UCAfromRules = NULL;
706  UCollator *coll = ucol_open("", &status);
707  if(status == U_FILE_ACCESS_ERROR) {
708    log_data_err("Is your data around?\n");
709    return;
710  } else if(U_FAILURE(status)) {
711    log_err("Error opening collator\n");
712    return;
713  }
714  ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
715
716  log_verbose("TestUCARules\n");
717  if(ruleLen > 256) {
718    rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
719    ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
720  }
721  log_verbose("Rules length is %d\n", ruleLen);
722  UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
723  if(U_SUCCESS(status)) {
724    ucol_close(UCAfromRules);
725  } else {
726    log_verbose("Unable to create a collator from UCARules!\n");
727  }
728/*
729  u_unescape(blah, b, 256);
730  ucol_getSortKey(coll, b, 1, res, 256);
731*/
732  ucol_close(coll);
733  if(rules != b) {
734    free(rules);
735  }
736}
737
738
739/* Pinyin tonal order */
740/*
741    A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
742          (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
743    E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
744    I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
745    O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
746    U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
747      < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
748.. (\u00fc)
749
750However, in testing we got the following order:
751    A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
752          (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
753    E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
754.. (\u0113)
755    I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
756    O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
757    U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
758.. (\u01d8)
759      < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
760*/
761
762static void TestBefore(void) {
763  const static char *data[] = {
764      "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
765      "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
766      "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
767      "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
768      "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
769      "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
770  };
771  genericRulesStarter(
772    "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
773    "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
774    "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
775    "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
776    "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
777    "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
778    data, sizeof(data)/sizeof(data[0]));
779}
780
781#if 0
782/* superceded by TestBeforePinyin */
783static void TestJ784(void) {
784  const static char *data[] = {
785      "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
786      "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
787      "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
788      "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
789      "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
790      "\\u00fc",
791           "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
792  };
793  genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
794}
795#endif
796
797#if 0
798/* superceded by the changes to the lv locale */
799static void TestJ831(void) {
800  const static char *data[] = {
801    "I",
802      "i",
803      "Y",
804      "y"
805  };
806  genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
807}
808#endif
809
810static void TestJ815(void) {
811  const static char *data[] = {
812    "aa",
813      "Aa",
814      "ab",
815      "Ab",
816      "ad",
817      "Ad",
818      "ae",
819      "Ae",
820      "\\u00e6",
821      "\\u00c6",
822      "af",
823      "Af",
824      "b",
825      "B"
826  };
827  genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
828  genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
829}
830
831
832static void TestCase(void)
833{
834    const static UChar gRules[MAX_TOKEN_LEN] =
835    /*" & 0 < 1,\u2461<a,A"*/
836    { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
837
838    const static UChar testCase[][MAX_TOKEN_LEN] =
839    {
840        /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
841        /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
842        /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
843        /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
844    };
845
846    const static UCollationResult caseTestResults[][9] =
847    {
848        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
849        { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
850        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
851        { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
852    };
853
854    const static UColAttributeValue caseTestAttributes[][2] =
855    {
856        { UCOL_LOWER_FIRST, UCOL_OFF},
857        { UCOL_UPPER_FIRST, UCOL_OFF},
858        { UCOL_LOWER_FIRST, UCOL_ON},
859        { UCOL_UPPER_FIRST, UCOL_ON}
860    };
861    int32_t i,j,k;
862    UErrorCode status = U_ZERO_ERROR;
863    UCollationElements *iter;
864    UCollator  *myCollation;
865    myCollation = ucol_open("en_US", &status);
866
867    if(U_FAILURE(status)){
868        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
869        return;
870    }
871    log_verbose("Testing different case settings\n");
872    ucol_setStrength(myCollation, UCOL_TERTIARY);
873
874    for(k = 0; k<4; k++) {
875      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
876      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
877      log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
878      for (i = 0; i < 3 ; i++) {
879        for(j = i+1; j<4; j++) {
880          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
881        }
882      }
883    }
884    ucol_close(myCollation);
885
886    myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
887    if(U_FAILURE(status)){
888        log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
889        return;
890    }
891    log_verbose("Testing different case settings with custom rules\n");
892    ucol_setStrength(myCollation, UCOL_TERTIARY);
893
894    for(k = 0; k<4; k++) {
895      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
896      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
897      for (i = 0; i < 3 ; i++) {
898        for(j = i+1; j<4; j++) {
899          log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
900          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
901          iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
902          backAndForth(iter);
903          ucol_closeElements(iter);
904          iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
905          backAndForth(iter);
906          ucol_closeElements(iter);
907        }
908      }
909    }
910    ucol_close(myCollation);
911    {
912      const static char *lowerFirst[] = {
913        "h",
914        "H",
915        "ch",
916        "Ch",
917        "CH",
918        "cha",
919        "chA",
920        "Cha",
921        "ChA",
922        "CHa",
923        "CHA",
924        "i",
925        "I"
926      };
927
928      const static char *upperFirst[] = {
929        "H",
930        "h",
931        "CH",
932        "Ch",
933        "ch",
934        "CHA",
935        "CHa",
936        "ChA",
937        "Cha",
938        "chA",
939        "cha",
940        "I",
941        "i"
942      };
943      log_verbose("mixed case test\n");
944      log_verbose("lower first, case level off\n");
945      genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
946      log_verbose("upper first, case level off\n");
947      genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
948      log_verbose("lower first, case level on\n");
949      genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
950      log_verbose("upper first, case level on\n");
951      genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
952    }
953
954}
955
956static void TestIncrementalNormalize(void) {
957
958    /*UChar baseA     =0x61;*/
959    UChar baseA     =0x41;
960/*    UChar baseB     = 0x42;*/
961    static const UChar ccMix[]   = {0x316, 0x321, 0x300};
962    /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
963    /*
964        0x316 is combining grave accent below, cc=220
965        0x321 is combining palatalized hook below, cc=202
966        0x300 is combining grave accent, cc=230
967    */
968
969#define MAXSLEN 2000
970    /*int          maxSLen   = 64000;*/
971    int          sLen;
972    int          i;
973
974    UCollator        *coll;
975    UErrorCode       status = U_ZERO_ERROR;
976    UCollationResult result;
977
978    int32_t myQ = getTestOption(QUICK_OPTION);
979
980    if(getTestOption(QUICK_OPTION) < 0) {
981        setTestOption(QUICK_OPTION, 1);
982    }
983
984    {
985        /* Test 1.  Run very long unnormalized strings, to force overflow of*/
986        /*          most buffers along the way.*/
987        UChar            strA[MAXSLEN+1];
988        UChar            strB[MAXSLEN+1];
989
990        coll = ucol_open("en_US", &status);
991        if(status == U_FILE_ACCESS_ERROR) {
992          log_data_err("Is your data around?\n");
993          return;
994        } else if(U_FAILURE(status)) {
995          log_err("Error opening collator\n");
996          return;
997        }
998        ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
999
1000        /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
1001        /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
1002        /*for (sLen = 1000; sLen<1001; sLen++) {*/
1003        for (sLen = 500; sLen<501; sLen++) {
1004        /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1005            strA[0] = baseA;
1006            strB[0] = baseA;
1007            for (i=1; i<=sLen-1; i++) {
1008                strA[i] = ccMix[i % 3];
1009                strB[sLen-i] = ccMix[i % 3];
1010            }
1011            strA[sLen]   = 0;
1012            strB[sLen]   = 0;
1013
1014            ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
1015            doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
1016            ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
1017            doTest(coll, strA, strB, UCOL_EQUAL);
1018        }
1019    }
1020
1021    setTestOption(QUICK_OPTION, myQ);
1022
1023
1024    /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
1025    /*         of the string.  Checks a couple of edge cases.*/
1026
1027    {
1028        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1029        static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
1030        ucol_setStrength(coll, UCOL_TERTIARY);
1031        doTest(coll, strA, strB, UCOL_EQUAL);
1032    }
1033
1034    /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
1035
1036    {
1037      /* New UCA  3.1.1.
1038       * test below used a code point from Desseret, which sorts differently
1039       * than d800 dc00
1040       */
1041        /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1042        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1043        static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1044        ucol_setStrength(coll, UCOL_TERTIARY);
1045        doTest(coll, strA, strB, UCOL_GREATER);
1046    }
1047
1048    /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
1049
1050    {
1051        static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1052        static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
1053        char  sortKeyA[50];
1054        char  sortKeyAz[50];
1055        char  sortKeyB[50];
1056        char  sortKeyBz[50];
1057        int   r;
1058
1059        /* there used to be -3 here. Hmmmm.... */
1060        /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1061        result = ucol_strcoll(coll, strA, 3, strB, 3);
1062        if (result != UCOL_GREATER) {
1063            log_err("ERROR 1 in test 4\n");
1064        }
1065        result = ucol_strcoll(coll, strA, -1, strB, -1);
1066        if (result != UCOL_EQUAL) {
1067            log_err("ERROR 2 in test 4\n");
1068        }
1069
1070        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1071        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1072        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1073        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1074
1075        r = strcmp(sortKeyA, sortKeyAz);
1076        if (r <= 0) {
1077            log_err("Error 3 in test 4\n");
1078        }
1079        r = strcmp(sortKeyA, sortKeyB);
1080        if (r <= 0) {
1081            log_err("Error 4 in test 4\n");
1082        }
1083        r = strcmp(sortKeyAz, sortKeyBz);
1084        if (r != 0) {
1085            log_err("Error 5 in test 4\n");
1086        }
1087
1088        ucol_setStrength(coll, UCOL_IDENTICAL);
1089        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1090        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1091        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1092        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1093
1094        r = strcmp(sortKeyA, sortKeyAz);
1095        if (r <= 0) {
1096            log_err("Error 6 in test 4\n");
1097        }
1098        r = strcmp(sortKeyA, sortKeyB);
1099        if (r <= 0) {
1100            log_err("Error 7 in test 4\n");
1101        }
1102        r = strcmp(sortKeyAz, sortKeyBz);
1103        if (r != 0) {
1104            log_err("Error 8 in test 4\n");
1105        }
1106        ucol_setStrength(coll, UCOL_TERTIARY);
1107    }
1108
1109
1110    /*  Test 5:  Null characters in non-normal source strings.*/
1111
1112    {
1113        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1114        static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1115        char  sortKeyA[50];
1116        char  sortKeyAz[50];
1117        char  sortKeyB[50];
1118        char  sortKeyBz[50];
1119        int   r;
1120
1121        result = ucol_strcoll(coll, strA, 6, strB, 6);
1122        if (result != UCOL_GREATER) {
1123            log_err("ERROR 1 in test 5\n");
1124        }
1125        result = ucol_strcoll(coll, strA, -1, strB, -1);
1126        if (result != UCOL_EQUAL) {
1127            log_err("ERROR 2 in test 5\n");
1128        }
1129
1130        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1131        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1132        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1133        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1134
1135        r = strcmp(sortKeyA, sortKeyAz);
1136        if (r <= 0) {
1137            log_err("Error 3 in test 5\n");
1138        }
1139        r = strcmp(sortKeyA, sortKeyB);
1140        if (r <= 0) {
1141            log_err("Error 4 in test 5\n");
1142        }
1143        r = strcmp(sortKeyAz, sortKeyBz);
1144        if (r != 0) {
1145            log_err("Error 5 in test 5\n");
1146        }
1147
1148        ucol_setStrength(coll, UCOL_IDENTICAL);
1149        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1150        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1151        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1152        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1153
1154        r = strcmp(sortKeyA, sortKeyAz);
1155        if (r <= 0) {
1156            log_err("Error 6 in test 5\n");
1157        }
1158        r = strcmp(sortKeyA, sortKeyB);
1159        if (r <= 0) {
1160            log_err("Error 7 in test 5\n");
1161        }
1162        r = strcmp(sortKeyAz, sortKeyBz);
1163        if (r != 0) {
1164            log_err("Error 8 in test 5\n");
1165        }
1166        ucol_setStrength(coll, UCOL_TERTIARY);
1167    }
1168
1169
1170    /*  Test 6:  Null character as base of a non-normal combining sequence.*/
1171
1172    {
1173        static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1174        static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1175
1176        result = ucol_strcoll(coll, strA, 5, strB, 5);
1177        if (result != UCOL_LESS) {
1178            log_err("Error 1 in test 6\n");
1179        }
1180        result = ucol_strcoll(coll, strA, -1, strB, -1);
1181        if (result != UCOL_EQUAL) {
1182            log_err("Error 2 in test 6\n");
1183        }
1184    }
1185
1186    ucol_close(coll);
1187}
1188
1189
1190
1191#if 0
1192static void TestGetCaseBit(void) {
1193  static const char *caseBitData[] = {
1194    "a", "A", "ch", "Ch", "CH",
1195      "\\uFF9E", "\\u0009"
1196  };
1197
1198  static const uint8_t results[] = {
1199    UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1200      UCOL_UPPER_CASE, UCOL_LOWER_CASE
1201  };
1202
1203  uint32_t i, blen = 0;
1204  UChar b[256] = {0};
1205  UErrorCode status = U_ZERO_ERROR;
1206  UCollator *UCA = ucol_open("", &status);
1207  uint8_t res = 0;
1208
1209  for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
1210    blen = u_unescape(caseBitData[i], b, 256);
1211    res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1212    if(results[i] != res) {
1213      log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1214    }
1215  }
1216}
1217#endif
1218
1219static void TestHangulTailoring(void) {
1220    static const char *koreanData[] = {
1221        "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1222            "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1223            "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1224            "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1225            "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1226            "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1227    };
1228
1229    const char *rules =
1230        "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1231        "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1232        "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1233        "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1234        "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1235        "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1236
1237
1238  UErrorCode status = U_ZERO_ERROR;
1239  UChar rlz[2048] = { 0 };
1240  uint32_t rlen = u_unescape(rules, rlz, 2048);
1241
1242  UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1243  if(status == U_FILE_ACCESS_ERROR) {
1244    log_data_err("Is your data around?\n");
1245    return;
1246  } else if(U_FAILURE(status)) {
1247    log_err("Error opening collator\n");
1248    return;
1249  }
1250
1251  log_verbose("Using start of korean rules\n");
1252
1253  if(U_SUCCESS(status)) {
1254    genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
1255  } else {
1256    log_err("Unable to open collator with rules %s\n", rules);
1257  }
1258
1259  ucol_close(coll);
1260
1261  log_verbose("Using ko__LOTUS locale\n");
1262  genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
1263}
1264
1265/*
1266 * The secondary/tertiary compression middle byte
1267 * as used by the current implementation.
1268 * Subject to change as the sort key compression changes.
1269 * See class CollationKeys.
1270 */
1271enum {
1272    SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
1273    TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
1274};
1275
1276static void TestCompressOverlap(void) {
1277    UChar       secstr[150];
1278    UChar       tertstr[150];
1279    UErrorCode  status = U_ZERO_ERROR;
1280    UCollator  *coll;
1281    uint8_t     result[500];
1282    uint32_t    resultlen;
1283    int         count = 0;
1284    uint8_t    *tempptr;
1285
1286    coll = ucol_open("", &status);
1287
1288    if (U_FAILURE(status)) {
1289        log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
1290        return;
1291    }
1292    while (count < 149) {
1293        secstr[count] = 0x0020; /* [06, 05, 05] */
1294        tertstr[count] = 0x0020;
1295        count ++;
1296    }
1297
1298    /* top down compression ----------------------------------- */
1299    secstr[count] = 0x0332; /* [, 87, 05] */
1300    tertstr[count] = 0x3000; /* [06, 05, 07] */
1301
1302    /* no compression secstr should have 150 secondary bytes, tertstr should
1303    have 150 tertiary bytes.
1304    with correct compression, secstr should have 6 secondary
1305    bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1306    resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
1307    (void)resultlen;    /* Suppress set but not used warning. */
1308    tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1309    while (*(tempptr + 1) != 1) {
1310        /* the last secondary collation element is not checked since it is not
1311        part of the compression */
1312        if (*tempptr < SEC_COMMON_MIDDLE) {
1313            log_err("Secondary top down compression overlapped\n");
1314        }
1315        tempptr ++;
1316    }
1317
1318    /* tertiary top/bottom/common for en_US is similar to the secondary
1319    top/bottom/common */
1320    resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
1321    tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1322    while (*(tempptr + 1) != 0) {
1323        /* the last secondary collation element is not checked since it is not
1324        part of the compression */
1325        if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1326            log_err("Tertiary top down compression overlapped\n");
1327        }
1328        tempptr ++;
1329    }
1330
1331    /* bottom up compression ------------------------------------- */
1332    secstr[count] = 0;
1333    tertstr[count] = 0;
1334    resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
1335    tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1336    while (*(tempptr + 1) != 1) {
1337        /* the last secondary collation element is not checked since it is not
1338        part of the compression */
1339        if (*tempptr > SEC_COMMON_MIDDLE) {
1340            log_err("Secondary bottom up compression overlapped\n");
1341        }
1342        tempptr ++;
1343    }
1344
1345    /* tertiary top/bottom/common for en_US is similar to the secondary
1346    top/bottom/common */
1347    resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
1348    tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1349    while (*(tempptr + 1) != 0) {
1350        /* the last secondary collation element is not checked since it is not
1351        part of the compression */
1352        if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1353            log_err("Tertiary bottom up compression overlapped\n");
1354        }
1355        tempptr ++;
1356    }
1357
1358    ucol_close(coll);
1359}
1360
1361static void TestCyrillicTailoring(void) {
1362  static const char *test[] = {
1363    "\\u0410b",
1364      "\\u0410\\u0306a",
1365      "\\u04d0A"
1366  };
1367
1368    /* Russian overrides contractions, so this test is not valid anymore */
1369    /*genericLocaleStarter("ru", test, 3);*/
1370
1371    genericLocaleStarter("root", test, 3);
1372    genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1373    genericRulesStarter("&Z < \\u0410", test, 3);
1374    genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1375    genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
1376    genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1377    genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1378}
1379
1380static void TestSuppressContractions(void) {
1381
1382  static const char *testNoCont2[] = {
1383      "\\u0410\\u0302a",
1384      "\\u0410\\u0306b",
1385      "\\u0410c"
1386  };
1387  static const char *testNoCont[] = {
1388      "a\\u0410",
1389      "A\\u0410\\u0306",
1390      "\\uFF21\\u0410\\u0302"
1391  };
1392
1393  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1394  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1395}
1396
1397static void TestContraction(void) {
1398    const static char *testrules[] = {
1399        "&A = AB / B",
1400        "&A = A\\u0306/\\u0306",
1401        "&c = ch / h"
1402    };
1403    const static UChar testdata[][2] = {
1404        {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1405        {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1406        {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1407    };
1408    const static UChar testdata2[][2] = {
1409        {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1410        {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1411        {0x0063 /* 'c' */, 0x006C /* 'l' */}
1412    };
1413#if 0
1414    /*
1415     * These pairs of rule strings are not guaranteed to yield the very same mappings.
1416     * In fact, LDML 24 recommends an improved way of creating mappings
1417     * which always yields different mappings for such pairs. See
1418     * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1419     */
1420    const static char *testrules3[] = {
1421        "&z < xyz &xyzw << B",
1422        "&z < xyz &xyz << B / w",
1423        "&z < ch &achm << B",
1424        "&z < ch &a << B / chm",
1425        "&\\ud800\\udc00w << B",
1426        "&\\ud800\\udc00 << B / w",
1427        "&a\\ud800\\udc00m << B",
1428        "&a << B / \\ud800\\udc00m",
1429    };
1430#endif
1431
1432    UErrorCode  status   = U_ZERO_ERROR;
1433    UCollator  *coll;
1434    UChar       rule[256] = {0};
1435    uint32_t    rlen     = 0;
1436    int         i;
1437
1438    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
1439        UCollationElements *iter1;
1440        int j = 0;
1441        log_verbose("Rule %s for testing\n", testrules[i]);
1442        rlen = u_unescape(testrules[i], rule, 32);
1443        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1444        if (U_FAILURE(status)) {
1445            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1446            return;
1447        }
1448        iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1449        if (U_FAILURE(status)) {
1450            log_err("Collation iterator creation failed\n");
1451            return;
1452        }
1453        while (j < 2) {
1454            UCollationElements *iter2 = ucol_openElements(coll,
1455                                                         &(testdata[i][j]),
1456                                                         1, &status);
1457            uint32_t ce;
1458            if (U_FAILURE(status)) {
1459                log_err("Collation iterator creation failed\n");
1460                return;
1461            }
1462            ce = ucol_next(iter2, &status);
1463            while (ce != UCOL_NULLORDER) {
1464                if ((uint32_t)ucol_next(iter1, &status) != ce) {
1465                    log_err("Collation elements in contraction split does not match\n");
1466                    return;
1467                }
1468                ce = ucol_next(iter2, &status);
1469            }
1470            j ++;
1471            ucol_closeElements(iter2);
1472        }
1473        if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1474            log_err("Collation elements not exhausted\n");
1475            return;
1476        }
1477        ucol_closeElements(iter1);
1478        ucol_close(coll);
1479    }
1480
1481    rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1482    coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1483    if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1484        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1485                testdata2[0][0], testdata2[0][1], testdata2[1][0],
1486                testdata2[1][1]);
1487        return;
1488    }
1489    if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1490        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1491                testdata2[1][0], testdata2[1][1], testdata2[2][0],
1492                testdata2[2][1]);
1493        return;
1494    }
1495    ucol_close(coll);
1496#if 0  /* see above */
1497    for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
1498        log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
1499        UCollator          *coll1,
1500                           *coll2;
1501        UCollationElements *iter1,
1502                           *iter2;
1503        UChar               ch = 0x0042 /* 'B' */;
1504        uint32_t            ce;
1505        rlen = u_unescape(testrules3[i], rule, 32);
1506        coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1507        rlen = u_unescape(testrules3[i + 1], rule, 32);
1508        coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1509        if (U_FAILURE(status)) {
1510            log_err("Collator creation failed %s\n", testrules[i]);
1511            return;
1512        }
1513        iter1 = ucol_openElements(coll1, &ch, 1, &status);
1514        iter2 = ucol_openElements(coll2, &ch, 1, &status);
1515        if (U_FAILURE(status)) {
1516            log_err("Collation iterator creation failed\n");
1517            return;
1518        }
1519        ce = ucol_next(iter1, &status);
1520        if (U_FAILURE(status)) {
1521            log_err("Retrieving ces failed\n");
1522            return;
1523        }
1524        while (ce != UCOL_NULLORDER) {
1525            uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1526            if (ce == ce2) {
1527                log_verbose("CEs match: %08x\n", ce);
1528            } else {
1529                log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
1530                return;
1531            }
1532            ce = ucol_next(iter1, &status);
1533            if (U_FAILURE(status)) {
1534                log_err("Retrieving ces failed\n");
1535                return;
1536            }
1537        }
1538        if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1539            log_err("CEs not exhausted\n");
1540            return;
1541        }
1542        ucol_closeElements(iter1);
1543        ucol_closeElements(iter2);
1544        ucol_close(coll1);
1545        ucol_close(coll2);
1546    }
1547#endif
1548}
1549
1550static void TestExpansion(void) {
1551    const static char *testrules[] = {
1552#if 0
1553        /*
1554         * This seems to have tested that M was not mapped to an expansion.
1555         * I believe the old builder just did that because it computed the extension CEs
1556         * at the very end, which was a bug.
1557         * Among other problems, it violated the core tailoring principle
1558         * by making an earlier rule depend on a later one.
1559         * And, of course, if M did not get an expansion, then it was primary different from K,
1560         * unlike what the rule &K<<M says.
1561         */
1562        "&J << K / B & K << M",
1563#endif
1564        "&J << K / B << M"
1565    };
1566    const static UChar testdata[][3] = {
1567        {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1568        {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1569        {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1570        {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1571        {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1572        {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1573    };
1574
1575    UErrorCode  status   = U_ZERO_ERROR;
1576    UCollator  *coll;
1577    UChar       rule[256] = {0};
1578    uint32_t    rlen     = 0;
1579    int         i;
1580
1581    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
1582        int j = 0;
1583        log_verbose("Rule %s for testing\n", testrules[i]);
1584        rlen = u_unescape(testrules[i], rule, 32);
1585        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1586        if (U_FAILURE(status)) {
1587            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1588            return;
1589        }
1590
1591        for (j = 0; j < 5; j ++) {
1592            doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1593        }
1594        ucol_close(coll);
1595    }
1596}
1597
1598#if 0
1599/* this test tests the current limitations of the engine */
1600/* it always fail, so it is disabled by default */
1601static void TestLimitations(void) {
1602  /* recursive expansions */
1603  {
1604    static const char *rule = "&a=b/c&d=c/e";
1605    static const char *tlimit01[] = {"add","b","adf"};
1606    static const char *tlimit02[] = {"aa","b","af"};
1607    log_verbose("recursive expansions\n");
1608    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
1609    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
1610  }
1611  /* contractions spanning expansions */
1612  {
1613    static const char *rule = "&a<<<c/e&g<<<eh";
1614    static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1615    static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1616    log_verbose("contractions spanning expansions\n");
1617    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
1618    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
1619  }
1620  /* normalization: nulls in contractions */
1621  {
1622    static const char *rule = "&a<<<\\u0000\\u0302";
1623    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1624    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1625    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1626    static const UColAttributeValue valOn[] = { UCOL_ON };
1627    static const UColAttributeValue valOff[] = { UCOL_OFF };
1628
1629    log_verbose("NULL in contractions\n");
1630    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1631    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1632    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1633    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1634
1635  }
1636  /* normalization: contractions spanning normalization */
1637  {
1638    static const char *rule = "&a<<<\\u0000\\u0302";
1639    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1640    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1641    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1642    static const UColAttributeValue valOn[] = { UCOL_ON };
1643    static const UColAttributeValue valOff[] = { UCOL_OFF };
1644
1645    log_verbose("contractions spanning normalization\n");
1646    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1647    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1648    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1649    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1650
1651  }
1652  /* variable top:  */
1653  {
1654    /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1655    static const char *rule = "&\\u2010<x<[variable top]=z";
1656    /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1657    static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1658    static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1659    static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1660    static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1661    static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1662    static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1663
1664    log_verbose("variable top\n");
1665    genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1666    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1667    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1668    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
1669    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
1670
1671  }
1672  /* case level */
1673  {
1674    static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1675    static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1676    static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1677    static const UColAttribute att[] = { UCOL_CASE_FIRST};
1678    static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1679    /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1680    log_verbose("case level\n");
1681    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1682    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1683    /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
1684    /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
1685  }
1686
1687}
1688#endif
1689
1690static void TestBocsuCoverage(void) {
1691  UErrorCode status = U_ZERO_ERROR;
1692  const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1693  UChar       test[256] = {0};
1694  uint32_t    tlen     = u_unescape(testString, test, 32);
1695  uint8_t key[256]     = {0};
1696  uint32_t klen         = 0;
1697
1698  UCollator *coll = ucol_open("", &status);
1699  if(U_SUCCESS(status)) {
1700  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1701
1702  klen = ucol_getSortKey(coll, test, tlen, key, 256);
1703  (void)klen;    /* Suppress set but not used warning. */
1704
1705  ucol_close(coll);
1706  } else {
1707    log_data_err("Couldn't open UCA\n");
1708  }
1709}
1710
1711static void TestVariableTopSetting(void) {
1712  UErrorCode status = U_ZERO_ERROR;
1713  uint32_t varTopOriginal = 0, varTop1, varTop2;
1714  UCollator *coll = ucol_open("", &status);
1715  if(U_SUCCESS(status)) {
1716
1717  static const UChar nul = 0;
1718  static const UChar space = 0x20;
1719  static const UChar dot = 0x2e;  /* punctuation */
1720  static const UChar degree = 0xb0;  /* symbol */
1721  static const UChar dollar = 0x24;  /* currency symbol */
1722  static const UChar zero = 0x30;  /* digit */
1723
1724  varTopOriginal = ucol_getVariableTop(coll, &status);
1725  log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1726  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1727
1728  varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1729  varTop2 = ucol_getVariableTop(coll, &status);
1730  log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1731  if(U_FAILURE(status) || varTop1 != varTop2 ||
1732      !ucol_equal(coll, &nul, 0, &space, 1) ||
1733      ucol_equal(coll, &nul, 0, &dot, 1) ||
1734      ucol_equal(coll, &nul, 0, &degree, 1) ||
1735      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1736      ucol_equal(coll, &nul, 0, &zero, 1) ||
1737      ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1738    log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
1739  }
1740
1741  varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1742  varTop2 = ucol_getVariableTop(coll, &status);
1743  log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1744  if(U_FAILURE(status) || varTop1 != varTop2 ||
1745      !ucol_equal(coll, &nul, 0, &space, 1) ||
1746      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1747      ucol_equal(coll, &nul, 0, &degree, 1) ||
1748      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1749      ucol_equal(coll, &nul, 0, &zero, 1) ||
1750      ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1751    log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1752  }
1753
1754  varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
1755  varTop2 = ucol_getVariableTop(coll, &status);
1756  log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1757  if(U_FAILURE(status) || varTop1 != varTop2 ||
1758      !ucol_equal(coll, &nul, 0, &space, 1) ||
1759      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1760      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1761      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1762      ucol_equal(coll, &nul, 0, &zero, 1) ||
1763      ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1764    log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
1765  }
1766
1767  varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1768  varTop2 = ucol_getVariableTop(coll, &status);
1769  log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1770  if(U_FAILURE(status) || varTop1 != varTop2 ||
1771      !ucol_equal(coll, &nul, 0, &space, 1) ||
1772      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1773      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1774      !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1775      ucol_equal(coll, &nul, 0, &zero, 1) ||
1776      ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1777    log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1778  }
1779
1780  log_verbose("Testing setting variable top to contractions\n");
1781  {
1782    UChar first[4] = { 0 };
1783    first[0] = 0x0040;
1784    first[1] = 0x0050;
1785    first[2] = 0x0000;
1786
1787    status = U_ZERO_ERROR;
1788    ucol_setVariableTop(coll, first, -1, &status);
1789
1790    if(U_SUCCESS(status)) {
1791      log_err("Invalid contraction succeded in setting variable top!\n");
1792    }
1793
1794  }
1795
1796  log_verbose("Test restoring variable top\n");
1797
1798  status = U_ZERO_ERROR;
1799  ucol_restoreVariableTop(coll, varTopOriginal, &status);
1800  if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1801    log_err("Couldn't restore old variable top\n");
1802  }
1803
1804  log_verbose("Testing calling with error set\n");
1805
1806  status = U_INTERNAL_PROGRAM_ERROR;
1807  varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1808  varTop2 = ucol_getVariableTop(coll, &status);
1809  ucol_restoreVariableTop(coll, varTop2, &status);
1810  varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
1811  varTop2 = ucol_getVariableTop(NULL, &status);
1812  ucol_restoreVariableTop(NULL, varTop2, &status);
1813  if(status != U_INTERNAL_PROGRAM_ERROR) {
1814    log_err("Bad reaction to passed error!\n");
1815  }
1816  ucol_close(coll);
1817  } else {
1818    log_data_err("Couldn't open UCA collator\n");
1819  }
1820}
1821
1822static void TestMaxVariable() {
1823  UErrorCode status = U_ZERO_ERROR;
1824  UColReorderCode oldMax, max;
1825  UCollator *coll;
1826
1827  static const UChar nul = 0;
1828  static const UChar space = 0x20;
1829  static const UChar dot = 0x2e;  /* punctuation */
1830  static const UChar degree = 0xb0;  /* symbol */
1831  static const UChar dollar = 0x24;  /* currency symbol */
1832  static const UChar zero = 0x30;  /* digit */
1833
1834  coll = ucol_open("", &status);
1835  if(U_FAILURE(status)) {
1836    log_data_err("Couldn't open root collator\n");
1837    return;
1838  }
1839
1840  oldMax = ucol_getMaxVariable(coll);
1841  log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1842  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1843
1844  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1845  max = ucol_getMaxVariable(coll);
1846  log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1847  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1848      !ucol_equal(coll, &nul, 0, &space, 1) ||
1849      ucol_equal(coll, &nul, 0, &dot, 1) ||
1850      ucol_equal(coll, &nul, 0, &degree, 1) ||
1851      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1852      ucol_equal(coll, &nul, 0, &zero, 1) ||
1853      ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1854    log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1855  }
1856
1857  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1858  max = ucol_getMaxVariable(coll);
1859  log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1860  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1861      !ucol_equal(coll, &nul, 0, &space, 1) ||
1862      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1863      ucol_equal(coll, &nul, 0, &degree, 1) ||
1864      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1865      ucol_equal(coll, &nul, 0, &zero, 1) ||
1866      ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1867    log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1868  }
1869
1870  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1871  max = ucol_getMaxVariable(coll);
1872  log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1873  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1874      !ucol_equal(coll, &nul, 0, &space, 1) ||
1875      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1876      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1877      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1878      ucol_equal(coll, &nul, 0, &zero, 1) ||
1879      ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1880    log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1881  }
1882
1883  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1884  max = ucol_getMaxVariable(coll);
1885  log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1886  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1887      !ucol_equal(coll, &nul, 0, &space, 1) ||
1888      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1889      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1890      !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1891      ucol_equal(coll, &nul, 0, &zero, 1) ||
1892      ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1893    log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1894  }
1895
1896  log_verbose("Test restoring maxVariable\n");
1897  status = U_ZERO_ERROR;
1898  ucol_setMaxVariable(coll, oldMax, &status);
1899  if(oldMax != ucol_getMaxVariable(coll)) {
1900    log_err("Couldn't restore old maxVariable\n");
1901  }
1902
1903  log_verbose("Testing calling with error set\n");
1904  status = U_INTERNAL_PROGRAM_ERROR;
1905  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1906  max = ucol_getMaxVariable(coll);
1907  if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1908    log_err("Bad reaction to passed error!\n");
1909  }
1910  ucol_close(coll);
1911}
1912
1913static void TestNonChars(void) {
1914  static const char *test[] = {
1915      "\\u0000",  /* ignorable */
1916      "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
1917      "\\uFDD0", "\\uFDEF",
1918      "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
1919      "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
1920      "\\U0003FFFE", "\\U0003FFFF",
1921      "\\U0004FFFE", "\\U0004FFFF",
1922      "\\U0005FFFE", "\\U0005FFFF",
1923      "\\U0006FFFE", "\\U0006FFFF",
1924      "\\U0007FFFE", "\\U0007FFFF",
1925      "\\U0008FFFE", "\\U0008FFFF",
1926      "\\U0009FFFE", "\\U0009FFFF",
1927      "\\U000AFFFE", "\\U000AFFFF",
1928      "\\U000BFFFE", "\\U000BFFFF",
1929      "\\U000CFFFE", "\\U000CFFFF",
1930      "\\U000DFFFE", "\\U000DFFFF",
1931      "\\U000EFFFE", "\\U000EFFFF",
1932      "\\U000FFFFE", "\\U000FFFFF",
1933      "\\U0010FFFE", "\\U0010FFFF",
1934      "\\uFFFF"  /* special character with maximum primary weight */
1935  };
1936  UErrorCode status = U_ZERO_ERROR;
1937  UCollator *coll = ucol_open("en_US", &status);
1938
1939  log_verbose("Test non characters\n");
1940
1941  if(U_SUCCESS(status)) {
1942    genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
1943  } else {
1944    log_err_status(status, "Unable to open collator\n");
1945  }
1946
1947  ucol_close(coll);
1948}
1949
1950static void TestExtremeCompression(void) {
1951  static char *test[4];
1952  int32_t j = 0, i = 0;
1953
1954  for(i = 0; i<4; i++) {
1955    test[i] = (char *)malloc(2048*sizeof(char));
1956  }
1957
1958  for(j = 20; j < 500; j++) {
1959    for(i = 0; i<4; i++) {
1960      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1961      test[i][j-1] = (char)('a'+i);
1962      test[i][j] = 0;
1963    }
1964    genericLocaleStarter("en_US", (const char **)test, 4);
1965  }
1966
1967
1968  for(i = 0; i<4; i++) {
1969    free(test[i]);
1970  }
1971}
1972
1973#if 0
1974static void TestExtremeCompression(void) {
1975  static char *test[4];
1976  int32_t j = 0, i = 0;
1977  UErrorCode status = U_ZERO_ERROR;
1978  UCollator *coll = ucol_open("en_US", status);
1979  for(i = 0; i<4; i++) {
1980    test[i] = (char *)malloc(2048*sizeof(char));
1981  }
1982  for(j = 10; j < 2048; j++) {
1983    for(i = 0; i<4; i++) {
1984      uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1985      test[i][j-1] = (char)('a'+i);
1986      test[i][j] = 0;
1987    }
1988  }
1989  genericLocaleStarter("en_US", (const char **)test, 4);
1990
1991  for(j = 10; j < 2048; j++) {
1992    for(i = 0; i<1; i++) {
1993      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1994      test[i][j] = 0;
1995    }
1996  }
1997  for(i = 0; i<4; i++) {
1998    free(test[i]);
1999  }
2000}
2001#endif
2002
2003static void TestSurrogates(void) {
2004  static const char *test[] = {
2005    "z","\\ud900\\udc25",  "\\ud805\\udc50",
2006       "\\ud800\\udc00y",  "\\ud800\\udc00r",
2007       "\\ud800\\udc00f",  "\\ud800\\udc00",
2008       "\\ud800\\udc00c", "\\ud800\\udc00b",
2009       "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2010       "\\ud800\\udc00a",
2011       "c", "b"
2012  };
2013
2014  static const char *rule =
2015    "&z < \\ud900\\udc25   < \\ud805\\udc50"
2016       "< \\ud800\\udc00y  < \\ud800\\udc00r"
2017       "< \\ud800\\udc00f  << \\ud800\\udc00"
2018       "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2019       "< \\ud800\\udc00a  < c < b" ;
2020
2021  genericRulesStarter(rule, test, 14);
2022}
2023
2024/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
2025static void TestPrefix(void) {
2026  uint32_t i;
2027
2028  static const struct {
2029    const char *rules;
2030    const char *data[50];
2031    const uint32_t len;
2032  } tests[] = {
2033    { "&z <<< z|a",
2034      {"zz", "za"}, 2 },
2035
2036    { "&z <<< z|   a",
2037      {"zz", "za"}, 2 },
2038    { "[strength I]"
2039      "&a=\\ud900\\udc25"
2040      "&z<<<\\ud900\\udc25|a",
2041      {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2042  };
2043
2044
2045  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2046    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2047  }
2048}
2049
2050/* This test uses data suplied by Masashiko Maedera to test the implementation */
2051/* JIS X 4061 collation order implementation                                   */
2052static void TestNewJapanese(void) {
2053
2054  static const char * const test1[] = {
2055      "\\u30b7\\u30e3\\u30fc\\u30ec",
2056      "\\u30b7\\u30e3\\u30a4",
2057      "\\u30b7\\u30e4\\u30a3",
2058      "\\u30b7\\u30e3\\u30ec",
2059      "\\u3061\\u3087\\u3053",
2060      "\\u3061\\u3088\\u3053",
2061      "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2062      "\\u3066\\u30fc\\u305f",
2063      "\\u30c6\\u30fc\\u30bf",
2064      "\\u30c6\\u30a7\\u30bf",
2065      "\\u3066\\u3048\\u305f",
2066      "\\u3067\\u30fc\\u305f",
2067      "\\u30c7\\u30fc\\u30bf",
2068      "\\u30c7\\u30a7\\u30bf",
2069      "\\u3067\\u3048\\u305f",
2070      "\\u3066\\u30fc\\u305f\\u30fc",
2071      "\\u30c6\\u30fc\\u30bf\\u30a1",
2072      "\\u30c6\\u30a7\\u30bf\\u30fc",
2073      "\\u3066\\u3047\\u305f\\u3041",
2074      "\\u3066\\u3048\\u305f\\u30fc",
2075      "\\u3067\\u30fc\\u305f\\u30fc",
2076      "\\u30c7\\u30fc\\u30bf\\u30a1",
2077      "\\u3067\\u30a7\\u305f\\u30a1",
2078      "\\u30c7\\u3047\\u30bf\\u3041",
2079      "\\u30c7\\u30a8\\u30bf\\u30a2",
2080      "\\u3072\\u3086",
2081      "\\u3073\\u3085\\u3042",
2082      "\\u3074\\u3085\\u3042",
2083      "\\u3073\\u3085\\u3042\\u30fc",
2084      "\\u30d3\\u30e5\\u30a2\\u30fc",
2085      "\\u3074\\u3085\\u3042\\u30fc",
2086      "\\u30d4\\u30e5\\u30a2\\u30fc",
2087      "\\u30d2\\u30e5\\u30a6",
2088      "\\u30d2\\u30e6\\u30a6",
2089      "\\u30d4\\u30e5\\u30a6\\u30a2",
2090      "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2091      "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2092      "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2093      "\\u3072\\u3085\\u3093",
2094      "\\u3074\\u3085\\u3093",
2095      "\\u3075\\u30fc\\u308a",
2096      "\\u30d5\\u30fc\\u30ea",
2097      "\\u3075\\u3045\\u308a",
2098      "\\u3075\\u30a5\\u308a",
2099      "\\u3075\\u30a5\\u30ea",
2100      "\\u30d5\\u30a6\\u30ea",
2101      "\\u3076\\u30fc\\u308a",
2102      "\\u30d6\\u30fc\\u30ea",
2103      "\\u3076\\u3045\\u308a",
2104      "\\u30d6\\u30a5\\u308a",
2105      "\\u3077\\u3046\\u308a",
2106      "\\u30d7\\u30a6\\u30ea",
2107      "\\u3075\\u30fc\\u308a\\u30fc",
2108      "\\u30d5\\u30a5\\u30ea\\u30fc",
2109      "\\u3075\\u30a5\\u308a\\u30a3",
2110      "\\u30d5\\u3045\\u308a\\u3043",
2111      "\\u30d5\\u30a6\\u30ea\\u30fc",
2112      "\\u3075\\u3046\\u308a\\u3043",
2113      "\\u30d6\\u30a6\\u30ea\\u30a4",
2114      "\\u3077\\u30fc\\u308a\\u30fc",
2115      "\\u3077\\u30a5\\u308a\\u30a4",
2116      "\\u3077\\u3046\\u308a\\u30fc",
2117      "\\u30d7\\u30a6\\u30ea\\u30a4",
2118      "\\u30d5\\u30fd",
2119      "\\u3075\\u309e",
2120      "\\u3076\\u309d",
2121      "\\u3076\\u3075",
2122      "\\u3076\\u30d5",
2123      "\\u30d6\\u3075",
2124      "\\u30d6\\u30d5",
2125      "\\u3076\\u309e",
2126      "\\u3076\\u3077",
2127      "\\u30d6\\u3077",
2128      "\\u3077\\u309d",
2129      "\\u30d7\\u30fd",
2130      "\\u3077\\u3075",
2131};
2132
2133  static const char *test2[] = {
2134    "\\u306f\\u309d", /* H\\u309d */
2135    "\\u30cf\\u30fd", /* K\\u30fd */
2136    "\\u306f\\u306f", /* HH */
2137    "\\u306f\\u30cf", /* HK */
2138    "\\u30cf\\u30cf", /* KK */
2139    "\\u306f\\u309e", /* H\\u309e */
2140    "\\u30cf\\u30fe", /* K\\u30fe */
2141    "\\u306f\\u3070", /* HH\\u309b */
2142    "\\u30cf\\u30d0", /* KK\\u309b */
2143    "\\u306f\\u3071", /* HH\\u309c */
2144    "\\u30cf\\u3071", /* KH\\u309c */
2145    "\\u30cf\\u30d1", /* KK\\u309c */
2146    "\\u3070\\u309d", /* H\\u309b\\u309d */
2147    "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2148    "\\u3070\\u306f", /* H\\u309bH */
2149    "\\u30d0\\u30cf", /* K\\u309bK */
2150    "\\u3070\\u309e", /* H\\u309b\\u309e */
2151    "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2152    "\\u3070\\u3070", /* H\\u309bH\\u309b */
2153    "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2154    "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2155    "\\u3070\\u3071", /* H\\u309bH\\u309c */
2156    "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2157    "\\u3071\\u309d", /* H\\u309c\\u309d */
2158    "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2159    "\\u3071\\u306f", /* H\\u309cH */
2160    "\\u30d1\\u30cf", /* K\\u309cK */
2161    "\\u3071\\u3070", /* H\\u309cH\\u309b */
2162    "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2163    "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2164    "\\u3071\\u3071", /* H\\u309cH\\u309c */
2165    "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2166  };
2167  /*
2168  static const char *test3[] = {
2169    "\\u221er\\u221e",
2170    "\\u221eR#",
2171    "\\u221et\\u221e",
2172    "#r\\u221e",
2173    "#R#",
2174    "#t%",
2175    "#T%",
2176    "8t\\u221e",
2177    "8T\\u221e",
2178    "8t#",
2179    "8T#",
2180    "8t%",
2181    "8T%",
2182    "8t8",
2183    "8T8",
2184    "\\u03c9r\\u221e",
2185    "\\u03a9R%",
2186    "rr\\u221e",
2187    "rR\\u221e",
2188    "Rr\\u221e",
2189    "RR\\u221e",
2190    "RT%",
2191    "rt8",
2192    "tr\\u221e",
2193    "tr8",
2194    "TR8",
2195    "tt8",
2196    "\\u30b7\\u30e3\\u30fc\\u30ec",
2197  };
2198  */
2199  static const UColAttribute att[] = { UCOL_STRENGTH };
2200  static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2201
2202  static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2203  static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2204
2205  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
2206  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
2207  /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
2208  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
2209  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
2210}
2211
2212static void TestStrCollIdenticalPrefix(void) {
2213  const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2214  const char* test[] = {
2215    "ab\\ud9b0\\udc70",
2216    "ab\\ud9b0\\udc71"
2217  };
2218  genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
2219}
2220/* Contractions should have all their canonically equivalent */
2221/* strings included */
2222static void TestContractionClosure(void) {
2223  static const struct {
2224    const char *rules;
2225    const char *data[10];
2226    const uint32_t len;
2227  } tests[] = {
2228    {   "&b=\\u00e4\\u00e4",
2229      { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2230    {   "&b=\\u00C5",
2231      { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2232  };
2233  uint32_t i;
2234
2235
2236  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2237    genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
2238  }
2239}
2240
2241/* This tests also fails*/
2242static void TestBeforePrefixFailure(void) {
2243  static const struct {
2244    const char *rules;
2245    const char *data[10];
2246    const uint32_t len;
2247  } tests[] = {
2248    { "&g <<< a"
2249      "&[before 3]\\uff41 <<< x",
2250      {"x", "\\uff41"}, 2 },
2251    {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2252        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2253        "&[before 3]\\u30a7<<<\\u30a9",
2254      {"\\u30a9", "\\u30a7"}, 2 },
2255    {   "&[before 3]\\u30a7<<<\\u30a9"
2256        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2257        "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2258      {"\\u30a9", "\\u30a7"}, 2 },
2259  };
2260  uint32_t i;
2261
2262
2263  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2264    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2265  }
2266
2267#if 0
2268  const char* rule1 =
2269        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2270        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2271        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2272  const char* rule2 =
2273        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2274        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2275        "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2276  const char* test[] = {
2277      "\\u30c6\\u30fc\\u30bf",
2278      "\\u30c6\\u30a7\\u30bf",
2279  };
2280  genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
2281  genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
2282/* this piece of code should be in some sort of verbose mode     */
2283/* it gets the collation elements for elements and prints them   */
2284/* This is useful when trying to see whether the problem is      */
2285  {
2286    UErrorCode status = U_ZERO_ERROR;
2287    uint32_t i = 0;
2288    UCollationElements *it = NULL;
2289    uint32_t CE;
2290    UChar string[256];
2291    uint32_t uStringLen;
2292    UCollator *coll = NULL;
2293
2294    uStringLen = u_unescape(rule1, string, 256);
2295
2296    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2297
2298    /*coll = ucol_open("ja_JP_JIS", &status);*/
2299    it = ucol_openElements(coll, string, 0, &status);
2300
2301    for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
2302      log_verbose("%s\n", test[i]);
2303      uStringLen = u_unescape(test[i], string, 256);
2304      ucol_setText(it, string, uStringLen, &status);
2305
2306      while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2307        log_verbose("%08X\n", CE);
2308      }
2309      log_verbose("\n");
2310
2311    }
2312
2313    ucol_closeElements(it);
2314    ucol_close(coll);
2315  }
2316#endif
2317}
2318
2319static void TestPrefixCompose(void) {
2320  const char* rule1 =
2321        "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2322  /*
2323  const char* test[] = {
2324      "\\u30c6\\u30fc\\u30bf",
2325      "\\u30c6\\u30a7\\u30bf",
2326  };
2327  */
2328  {
2329    UErrorCode status = U_ZERO_ERROR;
2330    /*uint32_t i = 0;*/
2331    /*UCollationElements *it = NULL;*/
2332/*    uint32_t CE;*/
2333    UChar string[256];
2334    uint32_t uStringLen;
2335    UCollator *coll = NULL;
2336
2337    uStringLen = u_unescape(rule1, string, 256);
2338
2339    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2340    ucol_close(coll);
2341  }
2342
2343
2344}
2345
2346/*
2347[last variable] last variable value
2348[last primary ignorable] largest CE for primary ignorable
2349[last secondary ignorable] largest CE for secondary ignorable
2350[last tertiary ignorable] largest CE for tertiary ignorable
2351[top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2352*/
2353
2354static void TestRuleOptions(void) {
2355  /* values here are hardcoded and are correct for the current UCA
2356   * when the UCA changes, one might be forced to change these
2357   * values.
2358   */
2359
2360  /*
2361   * These strings contain the last character before [variable top]
2362   * and the first and second characters (by primary weights) after it.
2363   * See FractionalUCA.txt. For example:
2364      [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2365      [variable top = 0C FE]
2366      [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2367     and
2368      00B4; [0D 0C, 05, 05]
2369   *
2370   * Note: Starting with UCA 6.0, the [variable top] collation element
2371   * is not the weight of any character or string,
2372   * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2373   */
2374#define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2375#define FIRST_REGULAR_CHAR_STRING "\\u0060"
2376#define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2377
2378  /*
2379   * This string has to match the character that has the [last regular] weight
2380   * which changes with each UCA version.
2381   * See the bottom of FractionalUCA.txt which says something like
2382      [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2383   *
2384   * Note: Starting with UCA 6.0, the [last regular] collation element
2385   * is not the weight of any character or string,
2386   * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2387   */
2388#define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2389
2390  static const struct {
2391    const char *rules;
2392    const char *data[10];
2393    const uint32_t len;
2394  } tests[] = {
2395#if 0
2396    /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2397    /* - all befores here amount to zero */
2398    { "&[before 3][first tertiary ignorable]<<<a",
2399        { "\\u0000", "a"}, 2
2400    }, /* you cannot go before first tertiary ignorable */
2401
2402    { "&[before 3][last tertiary ignorable]<<<a",
2403        { "\\u0000", "a"}, 2
2404    }, /* you cannot go before last tertiary ignorable */
2405#endif
2406    /*
2407     * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2408     * and it *is* possible to "go before" that.
2409     */
2410    { "&[before 3][first secondary ignorable]<<<a",
2411        { "\\u0000", "a"}, 2
2412    },
2413
2414    { "&[before 3][last secondary ignorable]<<<a",
2415        { "\\u0000", "a"}, 2
2416    },
2417
2418    /* 'normal' befores */
2419
2420    /*
2421     * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2422     * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2423     * because there is no tailoring space before that boundary.
2424     * Made the tests work by tailoring to a space instead.
2425     */
2426    { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
2427        {  "c", "b", "\\u0332", "a" }, 4
2428    },
2429
2430    /* we don't have a code point that corresponds to
2431     * the last primary ignorable
2432     */
2433    { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
2434        {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2435    },
2436
2437    { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2438        {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
2439    },
2440
2441    { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2442        { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
2443    },
2444
2445    { "&[first regular]<a"
2446      "&[before 1][first regular]<b",
2447      { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
2448    },
2449
2450    { "&[before 1][last regular]<b"
2451      "&[last regular]<a",
2452        { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2453    },
2454
2455    { "&[before 1][first implicit]<b"
2456      "&[first implicit]<a",
2457        { "b", "\\u4e00", "a", "\\u4e01"}, 4
2458    },
2459#if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2460    { "&[before 1][last implicit]<b"
2461      "&[last implicit]<a",
2462        { "b", "\\U0010FFFD", "a" }, 3
2463    },
2464#endif
2465    { "&[last variable]<z"
2466      "&' '<x"  /* was &[last primary ignorable]<x, see above */
2467      "&[last secondary ignorable]<<y"
2468      "&[last tertiary ignorable]<<<w"
2469      "&[top]<u",
2470      {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
2471    }
2472
2473  };
2474  uint32_t i;
2475
2476  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2477    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2478  }
2479}
2480
2481
2482static void TestOptimize(void) {
2483  /* this is not really a test - just trying out
2484   * whether copying of UCA contents will fail
2485   * Cannot really test, since the functionality
2486   * remains the same.
2487   */
2488  static const struct {
2489    const char *rules;
2490    const char *data[10];
2491    const uint32_t len;
2492  } tests[] = {
2493    /* - all befores here amount to zero */
2494    { "[optimize [\\uAC00-\\uD7FF]]",
2495    { "a", "b"}, 2}
2496  };
2497  uint32_t i;
2498
2499  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2500    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2501  }
2502}
2503
2504/*
2505cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2506weiv    ucol_strcollIter?
2507cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2508weiv    these are the input strings?
2509cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2510weiv    will check - could be a problem with utf-8 iterator
2511cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2512weiv    hmmm
2513cycheng@ca.ibm.c... note that we have a standalone high surrogate
2514weiv    that doesn't sound right
2515cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2516weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
2517cycheng@ca.ibm.c... yes
2518weiv    and then do the comparison
2519cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2520weiv    utf-16 strings look like a little endian ones in the example you sent me
2521weiv    It could be a bug - let me try to test it out
2522cycheng@ca.ibm.c... ok
2523cycheng@ca.ibm.c... we can wait till the conf. call
2524cycheng@ca.ibm.c... next weke
2525weiv    that would be great
2526weiv    hmmm
2527weiv    I might be wrong
2528weiv    let me play with it some more
2529cycheng@ca.ibm.c... ok
2530cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
2531cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2532cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2533weiv    ok
2534cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2535weiv    thanks
2536cycheng@ca.ibm.c... the 4 strings we sent are just samples
2537*/
2538#if 0
2539static void Alexis(void) {
2540  UErrorCode status = U_ZERO_ERROR;
2541  UCollator *coll = ucol_open("", &status);
2542
2543
2544  const char utf16be[2][4] = {
2545    { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2546    { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2547  };
2548
2549  const char utf8[2][4] = {
2550    { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2551    { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2552  };
2553
2554  UCharIterator iterU161, iterU162;
2555  UCharIterator iterU81, iterU82;
2556
2557  UCollationResult resU16, resU8;
2558
2559  uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2560  uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2561
2562  uiter_setUTF8(&iterU81, utf8[0], 4);
2563  uiter_setUTF8(&iterU82, utf8[1], 4);
2564
2565  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2566
2567  resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2568  resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2569
2570
2571  if(resU16 != resU8) {
2572    log_err("different results\n");
2573  }
2574
2575  ucol_close(coll);
2576}
2577#endif
2578
2579#define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
2580static void Alexis2(void) {
2581  UErrorCode status = U_ZERO_ERROR;
2582  UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2583  char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2584  char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2585  int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2586
2587  UConverter *conv = NULL;
2588
2589  UCharIterator U16BEItS, U16BEItT;
2590  UCharIterator U8ItS, U8ItT;
2591
2592  UCollationResult resU16, resU16BE, resU8;
2593
2594  static const char* const pairs[][2] = {
2595    { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2596    { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2597    { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2598    { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2599    { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2600    { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2601    { "\\u0020", "\\u0020\\u0000"}
2602/*
26035F20 (my result here)
26045F204E008E3F
26055F20 (your result here)
2606*/
2607  };
2608
2609  int32_t i = 0;
2610
2611  UCollator *coll = ucol_open("", &status);
2612  if(status == U_FILE_ACCESS_ERROR) {
2613    log_data_err("Is your data around?\n");
2614    return;
2615  } else if(U_FAILURE(status)) {
2616    log_err("Error opening collator\n");
2617    return;
2618  }
2619  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2620  conv = ucnv_open("UTF16BE", &status);
2621  for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
2622    U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2623    U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2624
2625    resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2626
2627    log_verbose("Result of strcoll is %i\n", resU16);
2628
2629    U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2630    U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
2631    (void)U16BELenS;    /* Suppress set but not used warnings. */
2632    (void)U16BELenT;
2633
2634    /* use the original sizes, as the result from converter is in bytes */
2635    uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2636    uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2637
2638    resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2639
2640    log_verbose("Result of U16BE is %i\n", resU16BE);
2641
2642    if(resU16 != resU16BE) {
2643      log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2644    }
2645
2646    u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2647    u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2648
2649    uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2650    uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2651
2652    resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2653
2654    if(resU16 != resU8) {
2655      log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2656    }
2657
2658  }
2659
2660  ucol_close(coll);
2661  ucnv_close(conv);
2662}
2663
2664static void TestHebrewUCA(void) {
2665  UErrorCode status = U_ZERO_ERROR;
2666  static const char *first[] = {
2667    "d790d6b8d79cd795d6bcd7a9",
2668    "d790d79cd79ed7a7d799d799d7a1",
2669    "d790d6b4d79ed795d6bcd7a9",
2670  };
2671
2672  char utf8String[3][256];
2673  UChar utf16String[3][256];
2674
2675  int32_t i = 0, j = 0;
2676  int32_t sizeUTF8[3];
2677  int32_t sizeUTF16[3];
2678
2679  UCollator *coll = ucol_open("", &status);
2680  if (U_FAILURE(status)) {
2681      log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
2682      return;
2683  }
2684  /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2685
2686  for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
2687    sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2688    u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2689    log_verbose("%i: ");
2690    for(j = 0; j < sizeUTF16[i]; j++) {
2691      /*log_verbose("\\u%04X", utf16String[i][j]);*/
2692      log_verbose("%04X", utf16String[i][j]);
2693    }
2694    log_verbose("\n");
2695  }
2696  for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
2697    for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
2698      doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2699    }
2700  }
2701
2702  ucol_close(coll);
2703
2704}
2705
2706static void TestPartialSortKeyTermination(void) {
2707  static const char* cases[] = {
2708    "\\u1234\\u1234\\udc00",
2709    "\\udc00\\ud800\\ud800"
2710  };
2711
2712  int32_t i;
2713
2714  UErrorCode status = U_ZERO_ERROR;
2715
2716  UCollator *coll = ucol_open("", &status);
2717
2718  UCharIterator iter;
2719
2720  UChar currCase[256];
2721  int32_t length = 0;
2722  int32_t pKeyLen = 0;
2723
2724  uint8_t key[256];
2725
2726  for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
2727    uint32_t state[2] = {0, 0};
2728    length = u_unescape(cases[i], currCase, 256);
2729    uiter_setString(&iter, currCase, length);
2730    pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
2731    (void)pKeyLen;   /* Suppress set but not used warning. */
2732
2733    log_verbose("Done\n");
2734
2735  }
2736  ucol_close(coll);
2737}
2738
2739static void TestSettings(void) {
2740  static const char* cases[] = {
2741    "apple",
2742      "Apple"
2743  };
2744
2745  static const char* locales[] = {
2746    "",
2747      "en"
2748  };
2749
2750  UErrorCode status = U_ZERO_ERROR;
2751
2752  int32_t i = 0, j = 0;
2753
2754  UChar source[256], target[256];
2755  int32_t sLen = 0, tLen = 0;
2756
2757  UCollator *collateObject = NULL;
2758  for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
2759    collateObject = ucol_open(locales[i], &status);
2760    ucol_setStrength(collateObject, UCOL_PRIMARY);
2761    ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2762    for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
2763      sLen = u_unescape(cases[j-1], source, 256);
2764      source[sLen] = 0;
2765      tLen = u_unescape(cases[j], target, 256);
2766      source[tLen] = 0;
2767      doTest(collateObject, source, target, UCOL_EQUAL);
2768    }
2769    ucol_close(collateObject);
2770  }
2771}
2772
2773static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
2774    UErrorCode status = U_ZERO_ERROR;
2775    int32_t errorNo = 0;
2776    const UChar *sourceRules = NULL;
2777    int32_t sourceRulesLen = 0;
2778    UParseError parseError;
2779    UColAttributeValue french = UCOL_OFF;
2780
2781    if(!ucol_equals(source, target)) {
2782        log_err("Same collators, different address not equal\n");
2783        errorNo++;
2784    }
2785    ucol_close(target);
2786    if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
2787        target = ucol_safeClone(source, NULL, NULL, &status);
2788        if(U_FAILURE(status)) {
2789            log_err("Error creating clone\n");
2790            errorNo++;
2791            return errorNo;
2792        }
2793        if(!ucol_equals(source, target)) {
2794            log_err("Collator different from it's clone\n");
2795            errorNo++;
2796        }
2797        french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2798        if(french == UCOL_ON) {
2799            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2800        } else {
2801            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2802        }
2803        if(U_FAILURE(status)) {
2804            log_err("Error setting attributes\n");
2805            errorNo++;
2806            return errorNo;
2807        }
2808        if(ucol_equals(source, target)) {
2809            log_err("Collators same even when options changed\n");
2810            errorNo++;
2811        }
2812        ucol_close(target);
2813
2814        sourceRules = ucol_getRules(source, &sourceRulesLen);
2815        target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2816        if(U_FAILURE(status)) {
2817            log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2818            errorNo++;
2819            return errorNo;
2820        }
2821        if(!ucol_equals(source, target)) {
2822            log_err("Collator different from collator that was created from the same rules\n");
2823            errorNo++;
2824        }
2825        ucol_close(target);
2826    }
2827    return errorNo;
2828}
2829
2830
2831static void TestEquals(void) {
2832    /* ucol_equals is not currently a public API. There is a chance that it will become
2833    * something like this, but currently it is only used by RuleBasedCollator::operator==
2834    */
2835    /* test whether the two collators instantiated from the same locale are equal */
2836    UErrorCode status = U_ZERO_ERROR;
2837    UParseError parseError;
2838    int32_t noOfLoc = uloc_countAvailable();
2839    const char *locName = NULL;
2840    UCollator *source = NULL, *target = NULL;
2841    int32_t i = 0;
2842
2843    const char* rules[] = {
2844        "&l < lj <<< Lj <<< LJ",
2845        "&n < nj <<< Nj <<< NJ",
2846        "&ae <<< \\u00e4",
2847        "&AE <<< \\u00c4"
2848    };
2849    /*
2850    const char* badRules[] = {
2851    "&l <<< Lj",
2852    "&n < nj <<< nJ <<< NJ",
2853    "&a <<< \\u00e4",
2854    "&AE <<< \\u00c4 <<< x"
2855    };
2856    */
2857
2858    UChar sourceRules[1024], targetRules[1024];
2859    int32_t sourceRulesSize = 0, targetRulesSize = 0;
2860    int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
2861
2862    for(i = 0; i < rulesSize; i++) {
2863        sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2864        targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2865    }
2866
2867    source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2868    if(status == U_FILE_ACCESS_ERROR) {
2869        log_data_err("Is your data around?\n");
2870        return;
2871    } else if(U_FAILURE(status)) {
2872        log_err("Error opening collator\n");
2873        return;
2874    }
2875    target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2876    if(!ucol_equals(source, target)) {
2877        log_err("Equivalent collators not equal!\n");
2878    }
2879    ucol_close(source);
2880    ucol_close(target);
2881
2882    source = ucol_open("root", &status);
2883    target = ucol_open("root", &status);
2884    log_verbose("Testing root\n");
2885    if(!ucol_equals(source, source)) {
2886        log_err("Same collator not equal\n");
2887    }
2888    if(TestEqualsForCollator(locName, source, target)) {
2889        log_err("Errors for root\n", locName);
2890    }
2891    ucol_close(source);
2892
2893    for(i = 0; i<noOfLoc; i++) {
2894        status = U_ZERO_ERROR;
2895        locName = uloc_getAvailable(i);
2896        /*if(hasCollationElements(locName)) {*/
2897        log_verbose("Testing equality for locale %s\n", locName);
2898        source = ucol_open(locName, &status);
2899        target = ucol_open(locName, &status);
2900        if (U_FAILURE(status)) {
2901            log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
2902            continue;
2903        }
2904        if(TestEqualsForCollator(locName, source, target)) {
2905            log_err("Errors for locale %s\n", locName);
2906        }
2907        ucol_close(source);
2908        /*}*/
2909    }
2910}
2911
2912static void TestJ2726(void) {
2913    UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2914    UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2915    UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2916    UErrorCode status = U_ZERO_ERROR;
2917    UCollator *coll = ucol_open("en", &status);
2918    ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2919    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2920    doTest(coll, a, aSpace, UCOL_EQUAL);
2921    doTest(coll, aSpace, a, UCOL_EQUAL);
2922    doTest(coll, a, spaceA, UCOL_EQUAL);
2923    doTest(coll, spaceA, a, UCOL_EQUAL);
2924    doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2925    doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2926    ucol_close(coll);
2927}
2928
2929static void NullRule(void) {
2930    UChar r[3] = {0};
2931    UErrorCode status = U_ZERO_ERROR;
2932    UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2933    if(U_SUCCESS(status)) {
2934        log_err("This should have been an error!\n");
2935        ucol_close(coll);
2936    } else {
2937        status = U_ZERO_ERROR;
2938    }
2939    coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2940    if(U_FAILURE(status)) {
2941        log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
2942    } else {
2943        ucol_close(coll);
2944    }
2945}
2946
2947/**
2948 * Test for CollationElementIterator previous and next for the whole set of
2949 * unicode characters with normalization on.
2950 */
2951static void TestNumericCollation(void)
2952{
2953    UErrorCode status = U_ZERO_ERROR;
2954
2955    const static char *basicTestStrings[]={
2956    "hello1",
2957    "hello2",
2958    "hello2002",
2959    "hello2003",
2960    "hello123456",
2961    "hello1234567",
2962    "hello10000000",
2963    "hello100000000",
2964    "hello1000000000",
2965    "hello10000000000",
2966    };
2967
2968    const static char *preZeroTestStrings[]={
2969    "avery10000",
2970    "avery010000",
2971    "avery0010000",
2972    "avery00010000",
2973    "avery000010000",
2974    "avery0000010000",
2975    "avery00000010000",
2976    "avery000000010000",
2977    };
2978
2979    const static char *thirtyTwoBitNumericStrings[]={
2980    "avery42949672960",
2981    "avery42949672961",
2982    "avery42949672962",
2983    "avery429496729610"
2984    };
2985
2986     const static char *longNumericStrings[]={
2987     /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2988        In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2989        are treated as multiple collation elements. */
2990    "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2991    "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2992    "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2993    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2994    "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2995    "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
2996    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
2997    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
2998    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
2999    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3000    };
3001
3002    const static char *supplementaryDigits[] = {
3003      "\\uD835\\uDFCE", /* 0 */
3004      "\\uD835\\uDFCF", /* 1 */
3005      "\\uD835\\uDFD0", /* 2 */
3006      "\\uD835\\uDFD1", /* 3 */
3007      "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3008      "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3009      "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3010      "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3011      "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3012      "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3013    };
3014
3015    const static char *foreignDigits[] = {
3016      "\\u0661",
3017        "\\u0662",
3018        "\\u0663",
3019      "\\u0661\\u0660",
3020      "\\u0661\\u0662",
3021      "\\u0661\\u0663",
3022      "\\u0662\\u0660",
3023      "\\u0662\\u0662",
3024      "\\u0662\\u0663",
3025      "\\u0663\\u0660",
3026      "\\u0663\\u0662",
3027      "\\u0663\\u0663"
3028    };
3029
3030    const static char *evenZeroes[] = {
3031      "2000",
3032      "2001",
3033        "2002",
3034        "2003"
3035    };
3036
3037    UColAttribute att = UCOL_NUMERIC_COLLATION;
3038    UColAttributeValue val = UCOL_ON;
3039
3040    /* Open our collator. */
3041    UCollator* coll = ucol_open("root", &status);
3042    if (U_FAILURE(status)){
3043        log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
3044              myErrorName(status));
3045        return;
3046    }
3047    genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
3048    genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
3049    genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
3050    genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
3051    genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
3052    genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
3053
3054    /* Setting up our collator to do digits. */
3055    ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3056    if (U_FAILURE(status)){
3057        log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3058              myErrorName(status));
3059        return;
3060    }
3061
3062    /*
3063       Testing that prepended zeroes still yield the correct collation behavior.
3064       We expect that every element in our strings array will be equal.
3065    */
3066    genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
3067
3068    ucol_close(coll);
3069}
3070
3071static void TestTibetanConformance(void)
3072{
3073    const char* test[] = {
3074        "\\u0FB2\\u0591\\u0F71\\u0061",
3075        "\\u0FB2\\u0F71\\u0061"
3076    };
3077
3078    UErrorCode status = U_ZERO_ERROR;
3079    UCollator *coll = ucol_open("", &status);
3080    UChar source[100];
3081    UChar target[100];
3082    int result;
3083    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3084    if (U_SUCCESS(status)) {
3085        u_unescape(test[0], source, 100);
3086        u_unescape(test[1], target, 100);
3087        doTest(coll, source, target, UCOL_EQUAL);
3088        result = ucol_strcoll(coll, source, -1,   target, -1);
3089        log_verbose("result %d\n", result);
3090        if (UCOL_EQUAL != result) {
3091            log_err("Tibetan comparison error\n");
3092        }
3093    }
3094    ucol_close(coll);
3095
3096    genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3097}
3098
3099static void TestPinyinProblem(void) {
3100    static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3101    genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
3102}
3103
3104/**
3105 * Iterate through the given iterator, checking to see that all the strings
3106 * in the expected array are present.
3107 * @param expected array of strings we expect to see, or NULL
3108 * @param expectedCount number of elements of expected, or 0
3109 */
3110static int32_t checkUEnumeration(const char* msg,
3111                                 UEnumeration* iter,
3112                                 const char** expected,
3113                                 int32_t expectedCount) {
3114    UErrorCode ec = U_ZERO_ERROR;
3115    int32_t i = 0, n, j, bit;
3116    int32_t seenMask = 0;
3117
3118    U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3119    n = uenum_count(iter, &ec);
3120    if (!assertSuccess("count", &ec)) return -1;
3121    log_verbose("%s = [", msg);
3122    for (;; ++i) {
3123        const char* s = uenum_next(iter, NULL, &ec);
3124        if (!assertSuccess("snext", &ec) || s == NULL) break;
3125        if (i != 0) log_verbose(",");
3126        log_verbose("%s", s);
3127        /* check expected list */
3128        for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3129            if ((seenMask&bit) == 0 &&
3130                uprv_strcmp(s, expected[j]) == 0) {
3131                seenMask |= bit;
3132                break;
3133            }
3134        }
3135    }
3136    log_verbose("] (%d)\n", i);
3137    assertTrue("count verified", i==n);
3138    /* did we see all expected strings? */
3139    for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3140        if ((seenMask&bit)!=0) {
3141            log_verbose("Ok: \"%s\" seen\n", expected[j]);
3142        } else {
3143            log_err("FAIL: \"%s\" not seen\n", expected[j]);
3144        }
3145    }
3146    return n;
3147}
3148
3149/**
3150 * Test new API added for separate collation tree.
3151 */
3152static void TestSeparateTrees(void) {
3153    UErrorCode ec = U_ZERO_ERROR;
3154    UEnumeration *e = NULL;
3155    int32_t n = -1;
3156    UBool isAvailable;
3157    char loc[256];
3158
3159    static const char* AVAIL[] = { "en", "de" };
3160
3161    static const char* KW[] = { "collation" };
3162
3163    static const char* KWVAL[] = { "phonebook", "stroke" };
3164
3165#if !UCONFIG_NO_SERVICE
3166    e = ucol_openAvailableLocales(&ec);
3167    if (e != NULL) {
3168        assertSuccess("ucol_openAvailableLocales", &ec);
3169        assertTrue("ucol_openAvailableLocales!=0", e!=0);
3170        n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
3171        (void)n;    /* Suppress set but not used warnings. */
3172        /* Don't need to check n because we check list */
3173        uenum_close(e);
3174    } else {
3175        log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3176    }
3177#endif
3178
3179    e = ucol_getKeywords(&ec);
3180    if (e != NULL) {
3181        assertSuccess("ucol_getKeywords", &ec);
3182        assertTrue("ucol_getKeywords!=0", e!=0);
3183        n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
3184        /* Don't need to check n because we check list */
3185        uenum_close(e);
3186    } else {
3187        log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3188    }
3189
3190    e = ucol_getKeywordValues(KW[0], &ec);
3191    if (e != NULL) {
3192        assertSuccess("ucol_getKeywordValues", &ec);
3193        assertTrue("ucol_getKeywordValues!=0", e!=0);
3194        n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
3195        /* Don't need to check n because we check list */
3196        uenum_close(e);
3197    } else {
3198        log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3199    }
3200
3201    /* Try setting a warning before calling ucol_getKeywordValues */
3202    ec = U_USING_FALLBACK_WARNING;
3203    e = ucol_getKeywordValues(KW[0], &ec);
3204    if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3205        assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
3206        n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
3207        /* Don't need to check n because we check list */
3208        uenum_close(e);
3209    }
3210
3211    /*
3212U_DRAFT int32_t U_EXPORT2
3213ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3214                             const char* locale, UBool* isAvailable,
3215                             UErrorCode* status);
3216}
3217*/
3218    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
3219                                     &isAvailable, &ec);
3220    if (assertSuccess("getFunctionalEquivalent", &ec)) {
3221        assertEquals("getFunctionalEquivalent(de)", "root", loc);
3222        assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3223                   isAvailable == TRUE);
3224    }
3225
3226    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
3227                                     &isAvailable, &ec);
3228    if (assertSuccess("getFunctionalEquivalent", &ec)) {
3229        assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3230        assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3231                   isAvailable == FALSE);
3232    }
3233}
3234
3235/* supercedes TestJ784 */
3236static void TestBeforePinyin(void) {
3237    const static char rules[] = {
3238        "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3239        "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3240        "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3241        "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3242        "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3243        "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3244    };
3245
3246    const static char *test[] = {
3247        "l\\u0101",
3248        "la",
3249        "l\\u0101n",
3250        "lan ",
3251        "l\\u0113",
3252        "le",
3253        "l\\u0113n",
3254        "len"
3255    };
3256
3257    const static char *test2[] = {
3258        "x\\u0101",
3259        "x\\u0100",
3260        "X\\u0101",
3261        "X\\u0100",
3262        "x\\u00E1",
3263        "x\\u00C1",
3264        "X\\u00E1",
3265        "X\\u00C1",
3266        "x\\u01CE",
3267        "x\\u01CD",
3268        "X\\u01CE",
3269        "X\\u01CD",
3270        "x\\u00E0",
3271        "x\\u00C0",
3272        "X\\u00E0",
3273        "X\\u00C0",
3274        "xa",
3275        "xA",
3276        "Xa",
3277        "XA",
3278        "x\\u0101x",
3279        "x\\u0100x",
3280        "x\\u00E1x",
3281        "x\\u00C1x",
3282        "x\\u01CEx",
3283        "x\\u01CDx",
3284        "x\\u00E0x",
3285        "x\\u00C0x",
3286        "xax",
3287        "xAx"
3288    };
3289
3290    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
3291    genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
3292    genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
3293    genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
3294}
3295
3296static void TestBeforeTightening(void) {
3297    static const struct {
3298        const char *rules;
3299        UErrorCode expectedStatus;
3300    } tests[] = {
3301        { "&[before 1]a<x", U_ZERO_ERROR },
3302        { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3303        { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3304        { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3305        { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3306        { "&[before 2]a<<x",U_ZERO_ERROR },
3307        { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3308        { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3309        { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
3310        { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
3311        { "&[before 3]a<<<x",U_ZERO_ERROR },
3312        { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
3313        { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3314    };
3315
3316    int32_t i = 0;
3317
3318    UErrorCode status = U_ZERO_ERROR;
3319    UChar rlz[RULE_BUFFER_LEN] = { 0 };
3320    uint32_t rlen = 0;
3321
3322    UCollator *coll = NULL;
3323
3324
3325    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
3326        rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3327        coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3328        if(status != tests[i].expectedStatus) {
3329            log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
3330                tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3331        }
3332        ucol_close(coll);
3333        status = U_ZERO_ERROR;
3334    }
3335
3336}
3337
3338/*
3339&m < a
3340&[before 1] a < x <<< X << q <<< Q < z
3341assert: m <<< M < x <<< X << q <<< Q < z < a < n
3342
3343&m < a
3344&[before 2] a << x <<< X << q <<< Q < z
3345assert: m <<< M < x <<< X << q <<< Q << a < z < n
3346
3347&m < a
3348&[before 3] a <<< x <<< X << q <<< Q < z
3349assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3350
3351
3352&m << a
3353&[before 1] a < x <<< X << q <<< Q < z
3354assert: x <<< X << q <<< Q < z < m <<< M << a < n
3355
3356&m << a
3357&[before 2] a << x <<< X << q <<< Q < z
3358assert: m <<< M << x <<< X << q <<< Q << a < z < n
3359
3360&m << a
3361&[before 3] a <<< x <<< X << q <<< Q < z
3362assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3363
3364
3365&m <<< a
3366&[before 1] a < x <<< X << q <<< Q < z
3367assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3368
3369&m <<< a
3370&[before 2] a << x <<< X << q <<< Q < z
3371assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
3372
3373&m <<< a
3374&[before 3] a <<< x <<< X << q <<< Q < z
3375assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
3376
3377
3378&[before 1] s < x <<< X << q <<< Q < z
3379assert: r <<< R < x <<< X << q <<< Q < z < s < n
3380
3381&[before 2] s << x <<< X << q <<< Q < z
3382assert: r <<< R < x <<< X << q <<< Q << s < z < n
3383
3384&[before 3] s <<< x <<< X << q <<< Q < z
3385assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3386
3387
3388&[before 1] \u24DC < x <<< X << q <<< Q < z
3389assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3390
3391&[before 2] \u24DC << x <<< X << q <<< Q < z
3392assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3393
3394&[before 3] \u24DC <<< x <<< X << q <<< Q < z
3395assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
3396*/
3397
3398
3399#if 0
3400/* requires features not yet supported */
3401static void TestMoreBefore(void) {
3402    static const struct {
3403        const char* rules;
3404        const char* order[16];
3405        int32_t size;
3406    } tests[] = {
3407        { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3408        { "m","M","x","X","q","Q","z","a","n" }, 9},
3409        { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3410        { "m","M","x","X","q","Q","a","z","n" }, 9},
3411        { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3412        { "m","M","x","X","a","q","Q","z","n" }, 9},
3413        { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3414        { "x","X","q","Q","z","m","M","a","n" }, 9},
3415        { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3416        { "m","M","x","X","q","Q","a","z","n" }, 9},
3417        { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3418        { "m","M","x","X","a","q","Q","z","n" }, 9},
3419        { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3420        { "x","X","q","Q","z","n","m","a","M" }, 9},
3421        { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3422        { "x","X","q","Q","m","a","M","z","n" }, 9},
3423        { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3424        { "m","x","X","a","M","q","Q","z","n" }, 9},
3425        { "&[before 1] s < x <<< X << q <<< Q < z",
3426        { "r","R","x","X","q","Q","z","s","n" }, 9},
3427        { "&[before 2] s << x <<< X << q <<< Q < z",
3428        { "r","R","x","X","q","Q","s","z","n" }, 9},
3429        { "&[before 3] s <<< x <<< X << q <<< Q < z",
3430        { "r","R","x","X","s","q","Q","z","n" }, 9},
3431        { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3432        { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3433        { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3434        { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3435        { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3436        { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3437    };
3438
3439    int32_t i = 0;
3440
3441    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
3442        genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3443    }
3444}
3445#endif
3446
3447static void TestTailorNULL( void ) {
3448    const static char* rule = "&a <<< '\\u0000'";
3449    UErrorCode status = U_ZERO_ERROR;
3450    UChar rlz[RULE_BUFFER_LEN] = { 0 };
3451    uint32_t rlen = 0;
3452    UChar a = 1, null = 0;
3453    UCollationResult res = UCOL_EQUAL;
3454
3455    UCollator *coll = NULL;
3456
3457
3458    rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3459    coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3460
3461    if(U_FAILURE(status)) {
3462        log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
3463    } else {
3464        res = ucol_strcoll(coll, &a, 1, &null, 1);
3465
3466        if(res != UCOL_LESS) {
3467            log_err("NULL was not tailored properly!\n");
3468        }
3469    }
3470
3471    ucol_close(coll);
3472}
3473
3474static void
3475TestUpperFirstQuaternary(void)
3476{
3477  const char* tests[] = { "B", "b", "Bb", "bB" };
3478  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3479  UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
3480  genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
3481}
3482
3483static void
3484TestJ4960(void)
3485{
3486  const char* tests[] = { "\\u00e2T", "aT" };
3487  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3488  UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3489  const char* tests2[] = { "a", "A" };
3490  const char* rule = "&[first tertiary ignorable]=A=a";
3491  UColAttribute att2[] = { UCOL_CASE_LEVEL };
3492  UColAttributeValue attVals2[] = { UCOL_ON };
3493  /* Test whether we correctly ignore primary ignorables on case level when */
3494  /* we have only primary & case level */
3495  genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
3496  /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3497  /* and case level */
3498  genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
3499  /* Test whether completely ignorable letters have case level info (they shouldn't) */
3500  genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
3501}
3502
3503static void
3504TestJ5223(void)
3505{
3506  static const char *test = "this is a test string";
3507  UChar ustr[256];
3508  int32_t ustr_length = u_unescape(test, ustr, 256);
3509  unsigned char sortkey[256];
3510  int32_t sortkey_length;
3511  UErrorCode status = U_ZERO_ERROR;
3512  static UCollator *coll = NULL;
3513  coll = ucol_open("root", &status);
3514  if(U_FAILURE(status)) {
3515    log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
3516    return;
3517  }
3518  ucol_setStrength(coll, UCOL_PRIMARY);
3519  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3520  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3521  if (U_FAILURE(status)) {
3522    log_err("Failed setting atributes\n");
3523    return;
3524  }
3525  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3526  if (sortkey_length > 256) return;
3527
3528  /* we mark the position where the null byte should be written in advance */
3529  sortkey[sortkey_length-1] = 0xAA;
3530
3531  /* we set the buffer size one byte higher than needed */
3532  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3533    sortkey_length+1);
3534
3535  /* no error occurs (for me) */
3536  if (sortkey[sortkey_length-1] == 0xAA) {
3537    log_err("Hit bug at first try\n");
3538  }
3539
3540  /* we mark the position where the null byte should be written again */
3541  sortkey[sortkey_length-1] = 0xAA;
3542
3543  /* this time we set the buffer size to the exact amount needed */
3544  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3545    sortkey_length);
3546
3547  /* now the trailing null byte is not written */
3548  if (sortkey[sortkey_length-1] == 0xAA) {
3549    log_err("Hit bug at second try\n");
3550  }
3551
3552  ucol_close(coll);
3553}
3554
3555/* Regression test for Thai partial sort key problem */
3556static void
3557TestJ5232(void)
3558{
3559    const static char *test[] = {
3560        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3561        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3562    };
3563
3564    genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
3565}
3566
3567static void
3568TestJ5367(void)
3569{
3570    const static char *test[] = { "a", "y" };
3571    const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
3572    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
3573}
3574
3575static void
3576TestVI5913(void)
3577{
3578    UErrorCode status = U_ZERO_ERROR;
3579    int32_t i, j;
3580    UCollator *coll =NULL;
3581    uint8_t  resColl[100], expColl[100];
3582    int32_t  rLen, tLen, ruleLen, sLen, kLen;
3583    UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/
3584    UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
3585    /*
3586     * Note: Just tailoring &z<ae^ does not work as expected:
3587     * The UCA spec requires for discontiguous contractions that they
3588     * extend an *existing match* by one combining mark at a time.
3589     * Therefore, ae must be a contraction so that the builder finds
3590     * discontiguous contractions for ae^, for example with an intervening underdot.
3591     * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3592     */
3593    UChar rule3[256]={
3594        0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */
3595        0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/
3596        0};
3597    static const UChar tData[][20]={
3598        {0x1EAC, 0},
3599        {0x0041, 0x0323, 0x0302, 0},
3600        {0x1EA0, 0x0302, 0},
3601        {0x00C2, 0x0323, 0},
3602        {0x1ED8, 0},  /* O with dot and circumflex */
3603        {0x1ECC, 0x0302, 0},
3604        {0x1EB7, 0},
3605        {0x1EA1, 0x0306, 0},
3606    };
3607    static const UChar tailorData[][20]={
3608        {0x1FA2, 0},  /* Omega with 3 combining marks */
3609        {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3610        {0x1FF3, 0x0313, 0x0300, 0},
3611        {0x1F60, 0x0300, 0x0345, 0},
3612        {0x1F62, 0x0345, 0},
3613        {0x1FA0, 0x0300, 0},
3614    };
3615    static const UChar tailorData2[][20]={
3616        {0x1E63, 0x030C, 0},  /* s with dot below + caron */
3617        {0x0073, 0x0323, 0x030C, 0},
3618        {0x0073, 0x030C, 0x0323, 0},
3619    };
3620    static const UChar tailorData3[][20]={
3621        {0x007a, 0},  /*  z */
3622        {0x0061, 0x0065, 0},  /*  a + e */
3623        {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3624        {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
3625        {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3626        {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
3627        {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
3628        {0x00EA, 0},  /* e with circumflex  */
3629    };
3630
3631    /* Test Vietnamese sort. */
3632    coll = ucol_open("vi", &status);
3633    if(U_FAILURE(status)) {
3634        log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
3635        return;
3636    }
3637    log_verbose("\n\nVI collation:");
3638    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3639        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3640    }
3641    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3642        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3643    }
3644    if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3645        log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3646    }
3647    if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3648        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3649    }
3650
3651    for (j=0; j<8; j++) {
3652        tLen = u_strlen(tData[j]);
3653        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3654        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3655        for(i = 0; i<rLen; i++) {
3656            log_verbose(" %02X", resColl[i]);
3657        }
3658    }
3659
3660    ucol_close(coll);
3661
3662    /* Test Romanian sort. */
3663    coll = ucol_open("ro", &status);
3664    log_verbose("\n\nRO collation:");
3665    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3666        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3667    }
3668    if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3669        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3670    }
3671    if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3672        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3673    }
3674
3675    for (j=4; j<8; j++) {
3676        tLen = u_strlen(tData[j]);
3677        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3678        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3679        for(i = 0; i<rLen; i++) {
3680            log_verbose(" %02X", resColl[i]);
3681        }
3682    }
3683    ucol_close(coll);
3684
3685    /* Test the precomposed Greek character with 3 combining marks. */
3686    log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3687    ruleLen = u_strlen(rule);
3688    coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3689    if (U_FAILURE(status)) {
3690        log_err("ucol_openRules failed with %s\n", u_errorName(status));
3691        return;
3692    }
3693    sLen = u_strlen(tailorData[0]);
3694    for (j=1; j<6; j++) {
3695        tLen = u_strlen(tailorData[j]);
3696        if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
3697            log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3698        }
3699    }
3700    /* Test getSortKey. */
3701    tLen = u_strlen(tailorData[0]);
3702    kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3703    for (j=0; j<6; j++) {
3704        tLen = u_strlen(tailorData[j]);
3705        rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3706        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3707            log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3708            for(i = 0; i<rLen; i++) {
3709                log_err(" %02X", resColl[i]);
3710            }
3711        }
3712    }
3713    ucol_close(coll);
3714
3715    log_verbose("\n\nTailoring test for s with caron:");
3716    ruleLen = u_strlen(rule2);
3717    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3718    tLen = u_strlen(tailorData2[0]);
3719    kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3720    for (j=1; j<3; j++) {
3721        tLen = u_strlen(tailorData2[j]);
3722        rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3723        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3724            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3725            for(i = 0; i<rLen; i++) {
3726                log_err(" %02X", resColl[i]);
3727            }
3728        }
3729    }
3730    ucol_close(coll);
3731
3732    log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3733    ruleLen = u_strlen(rule3);
3734    coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3735    tLen = u_strlen(tailorData3[3]);
3736    kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3737    log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3738    for(i = 0; i<kLen; i++) {
3739        log_verbose(" %02X", expColl[i]);
3740    }
3741    for (j=4; j<6; j++) {
3742        tLen = u_strlen(tailorData3[j]);
3743        rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3744
3745        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3746            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3747            for(i = 0; i<rLen; i++) {
3748                log_err(" %02X", resColl[i]);
3749            }
3750        }
3751
3752        log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3753         for(i = 0; i<rLen; i++) {
3754             log_verbose(" %02X", resColl[i]);
3755         }
3756    }
3757    ucol_close(coll);
3758}
3759
3760static void
3761TestTailor6179(void)
3762{
3763    UErrorCode status = U_ZERO_ERROR;
3764    int32_t i;
3765    UCollator *coll =NULL;
3766    uint8_t  resColl[100];
3767    int32_t  rLen, tLen, ruleLen;
3768    /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
3769    static const UChar rule1[]={
3770            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3771            0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3772            0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3773            0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3774    /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3775    static const UChar rule2[]={
3776            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3777            0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3778            0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3779            0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3780            0x3C,0x3C,0x20,0x62,0};
3781
3782    static const UChar tData1[][4]={
3783        {0x61, 0},
3784        {0x62, 0},
3785        { 0xFDD0,0x009E, 0}
3786    };
3787    static const UChar tData2[][4]={
3788        {0x61, 0},
3789        {0x62, 0},
3790        { 0xFDD0,0x009E, 0}
3791     };
3792
3793    /*
3794     * These values from FractionalUCA.txt will change,
3795     * and need to be updated here.
3796     * TODO: Make this not check for particular sort keys.
3797     * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3798     */
3799    static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3800    static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3801    static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3802    static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3803
3804    UParseError parseError;
3805
3806    /* Test [Last Primary ignorable] */
3807
3808    log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
3809    ruleLen = u_strlen(rule1);
3810    coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3811    if (U_FAILURE(status)) {
3812        log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
3813        return;
3814    }
3815    tLen = u_strlen(tData1[0]);
3816    rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
3817    if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
3818        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
3819        for(i = 0; i<rLen; i++) {
3820            log_err(" %02X", resColl[i]);
3821        }
3822        log_err("\n");
3823    }
3824    tLen = u_strlen(tData1[1]);
3825    rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
3826    if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
3827        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
3828        for(i = 0; i<rLen; i++) {
3829            log_err(" %02X", resColl[i]);
3830        }
3831        log_err("\n");
3832    }
3833    ucol_close(coll);
3834
3835
3836    /* Test [Last Secondary ignorable] */
3837    log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
3838    ruleLen = u_strlen(rule2);
3839    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
3840    if (U_FAILURE(status)) {
3841        log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
3842        log_info("  offset=%d  \"%s\" | \"%s\"\n",
3843                 parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
3844        return;
3845    }
3846    tLen = u_strlen(tData2[0]);
3847    rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
3848    if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
3849        log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
3850        for(i = 0; i<rLen; i++) {
3851            log_err(" %02X", resColl[i]);
3852        }
3853        log_err("\n");
3854    }
3855    tLen = u_strlen(tData2[1]);
3856    rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
3857    if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
3858      log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
3859      for(i = 0; i<rLen; i++) {
3860        log_err(" %02X", resColl[i]);
3861      }
3862      log_err("\n");
3863    }
3864    ucol_close(coll);
3865}
3866
3867static void
3868TestUCAPrecontext(void)
3869{
3870    UErrorCode status = U_ZERO_ERROR;
3871    int32_t i, j;
3872    UCollator *coll =NULL;
3873    uint8_t  resColl[100], prevColl[100];
3874    int32_t  rLen, tLen, ruleLen;
3875    UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3876    UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3877    /* & l middle-dot << a  a is an expansion. */
3878
3879    UChar tData1[][20]={
3880            { 0xb7, 0},  /* standalone middle dot(0xb7) */
3881            { 0x387, 0}, /* standalone middle dot(0x387) */
3882            { 0x61, 0},  /* a */
3883            { 0x6C, 0},  /* l */
3884            { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
3885            { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
3886            { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3887            { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
3888            { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3889            { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
3890            { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
3891     };
3892
3893    log_verbose("\n\nEN collation:");
3894    coll = ucol_open("en", &status);
3895    if (U_FAILURE(status)) {
3896        log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
3897        return;
3898    }
3899    for (j=0; j<11; j++) {
3900        tLen = u_strlen(tData1[j]);
3901        rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3902        if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3903            log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3904                    j, tData1[j]);
3905        }
3906        log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3907        for(i = 0; i<rLen; i++) {
3908            log_verbose(" %02X", resColl[i]);
3909        }
3910        uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3911     }
3912     ucol_close(coll);
3913
3914
3915     log_verbose("\n\nJA collation:");
3916     coll = ucol_open("ja", &status);
3917     if (U_FAILURE(status)) {
3918         log_err("Tailoring test: &z <<a|- failed!");
3919         return;
3920     }
3921     for (j=0; j<11; j++) {
3922         tLen = u_strlen(tData1[j]);
3923         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3924         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3925             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3926                     j, tData1[j]);
3927         }
3928         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3929         for(i = 0; i<rLen; i++) {
3930             log_verbose(" %02X", resColl[i]);
3931         }
3932         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3933      }
3934      ucol_close(coll);
3935
3936
3937      log_verbose("\n\nTailoring test: & middle dot < a ");
3938      ruleLen = u_strlen(rule1);
3939      coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3940      if (U_FAILURE(status)) {
3941          log_err("Tailoring test: & middle dot < a failed!");
3942          return;
3943      }
3944      for (j=0; j<11; j++) {
3945          tLen = u_strlen(tData1[j]);
3946          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3947          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3948              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3949                      j, tData1[j]);
3950          }
3951          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3952          for(i = 0; i<rLen; i++) {
3953              log_verbose(" %02X", resColl[i]);
3954          }
3955          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3956       }
3957       ucol_close(coll);
3958
3959
3960       log_verbose("\n\nTailoring test: & l middle-dot << a ");
3961       ruleLen = u_strlen(rule2);
3962       coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3963       if (U_FAILURE(status)) {
3964           log_err("Tailoring test: & l middle-dot << a failed!");
3965           return;
3966       }
3967       for (j=0; j<11; j++) {
3968           tLen = u_strlen(tData1[j]);
3969           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3970           if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3971               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3972                       j, tData1[j]);
3973           }
3974           if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
3975               log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3976                       j, tData1[j]);
3977           }
3978           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3979           for(i = 0; i<rLen; i++) {
3980               log_verbose(" %02X", resColl[i]);
3981           }
3982           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3983        }
3984        ucol_close(coll);
3985}
3986
3987static void
3988TestOutOfBuffer5468(void)
3989{
3990    static const char *test = "\\u4e00";
3991    UChar ustr[256];
3992    int32_t ustr_length = u_unescape(test, ustr, 256);
3993    unsigned char shortKeyBuf[1];
3994    int32_t sortkey_length;
3995    UErrorCode status = U_ZERO_ERROR;
3996    static UCollator *coll = NULL;
3997
3998    coll = ucol_open("root", &status);
3999    if(U_FAILURE(status)) {
4000      log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4001      return;
4002    }
4003    ucol_setStrength(coll, UCOL_PRIMARY);
4004    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4005    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4006    if (U_FAILURE(status)) {
4007      log_err("Failed setting atributes\n");
4008      return;
4009    }
4010
4011    sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4012    if (sortkey_length != 4) {
4013        log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
4014    }
4015    log_verbose("length of sortKey is %d", sortkey_length);
4016    ucol_close(coll);
4017}
4018
4019#define TSKC_DATA_SIZE 5
4020#define TSKC_BUF_SIZE  50
4021static void
4022TestSortKeyConsistency(void)
4023{
4024    UErrorCode icuRC = U_ZERO_ERROR;
4025    UCollator* ucol;
4026    UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4027
4028    uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4029    uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4030    int32_t i, j, i2;
4031
4032    ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4033    if (U_FAILURE(icuRC))
4034    {
4035        log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
4036        return;
4037    }
4038
4039    for (i = 0; i < TSKC_DATA_SIZE; i++)
4040    {
4041        UCharIterator uiter;
4042        uint32_t state[2] = { 0, 0 };
4043        int32_t dataLen = i+1;
4044        for (j=0; j<TSKC_BUF_SIZE; j++)
4045            bufFull[i][j] = bufPart[i][j] = 0;
4046
4047        /* Full sort key */
4048        ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4049
4050        /* Partial sort key */
4051        uiter_setString(&uiter, data, dataLen);
4052        ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4053        if (U_FAILURE(icuRC))
4054        {
4055            log_err("ucol_nextSortKeyPart failed\n");
4056            ucol_close(ucol);
4057            return;
4058        }
4059
4060        for (i2=0; i2<i; i2++)
4061        {
4062            UBool fullMatch = TRUE;
4063            UBool partMatch = TRUE;
4064            for (j=0; j<TSKC_BUF_SIZE; j++)
4065            {
4066                fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4067                partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4068            }
4069            if (fullMatch != partMatch) {
4070                log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4071                                  : "partial key was consistent, but full key changed\n");
4072                ucol_close(ucol);
4073                return;
4074            }
4075        }
4076    }
4077
4078    /*=============================================*/
4079   ucol_close(ucol);
4080}
4081
4082/* ticket: 6101 */
4083static void TestCroatianSortKey(void) {
4084    const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4085    UErrorCode status = U_ZERO_ERROR;
4086    UCollator *ucol;
4087    UCharIterator iter;
4088
4089    static const UChar text[] = { 0x0044, 0xD81A };
4090
4091    size_t length = sizeof(text)/sizeof(*text);
4092
4093    uint8_t textSortKey[32];
4094    size_t lenSortKey = 32;
4095    size_t actualSortKeyLen;
4096    uint32_t uStateInfo[2] = { 0, 0 };
4097
4098    ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4099    if (U_FAILURE(status)) {
4100        log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
4101        return;
4102    }
4103
4104    uiter_setString(&iter, text, length);
4105
4106    actualSortKeyLen = ucol_nextSortKeyPart(
4107        ucol, &iter, (uint32_t*)uStateInfo,
4108        textSortKey, lenSortKey, &status
4109        );
4110
4111    if (actualSortKeyLen == lenSortKey) {
4112        log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4113    }
4114
4115    ucol_close(ucol);
4116}
4117
4118/* ticket: 6140 */
4119/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4120 * they are both Hiragana and Katakana
4121 */
4122#define SORTKEYLEN 50
4123static void TestHiragana(void) {
4124    UErrorCode status = U_ZERO_ERROR;
4125    UCollator* ucol;
4126    UCollationResult strcollresult;
4127    UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4128    UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4129    int32_t data1Len = sizeof(data1)/sizeof(*data1);
4130    int32_t data2Len = sizeof(data2)/sizeof(*data2);
4131    int32_t i, j;
4132    uint8_t sortKey1[SORTKEYLEN];
4133    uint8_t sortKey2[SORTKEYLEN];
4134
4135    UCharIterator uiter1;
4136    UCharIterator uiter2;
4137    uint32_t state1[2] = { 0, 0 };
4138    uint32_t state2[2] = { 0, 0 };
4139    int32_t keySize1;
4140    int32_t keySize2;
4141
4142    ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4143            &status);
4144    if (U_FAILURE(status)) {
4145        log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
4146        return;
4147    }
4148
4149    /* Start of full sort keys */
4150    /* Full sort key1 */
4151    keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4152    /* Full sort key2 */
4153    keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4154    if (keySize1 == keySize2) {
4155        for (i = 0; i < keySize1; i++) {
4156            if (sortKey1[i] != sortKey2[i]) {
4157                log_err("Full sort keys are different. Should be equal.");
4158            }
4159        }
4160    } else {
4161        log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4162    }
4163    /* End of full sort keys */
4164
4165    /* Start of partial sort keys */
4166    /* Partial sort key1 */
4167    uiter_setString(&uiter1, data1, data1Len);
4168    keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4169    /* Partial sort key2 */
4170    uiter_setString(&uiter2, data2, data2Len);
4171    keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4172    if (U_SUCCESS(status) && keySize1 == keySize2) {
4173        for (j = 0; j < keySize1; j++) {
4174            if (sortKey1[j] != sortKey2[j]) {
4175                log_err("Partial sort keys are different. Should be equal");
4176            }
4177        }
4178    } else {
4179        log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4180    }
4181    /* End of partial sort keys */
4182
4183    /* Start of strcoll */
4184    /* Use ucol_strcoll() to determine ordering */
4185    strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4186    if (strcollresult != UCOL_EQUAL) {
4187        log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4188    }
4189
4190    ucol_close(ucol);
4191}
4192
4193/* Convenient struct for running collation tests */
4194typedef struct {
4195  const UChar source[MAX_TOKEN_LEN];  /* String on left */
4196  const UChar target[MAX_TOKEN_LEN];  /* String on right */
4197  UCollationResult result;            /* -1, 0 or +1, depending on collation */
4198} OneTestCase;
4199
4200/*
4201 * Utility function to test one collation test case.
4202 * @param testcases Array of test cases.
4203 * @param n_testcases Size of the array testcases.
4204 * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
4205 * @param n_rules Size of the array str_rules.
4206 */
4207static void doTestOneTestCase(const OneTestCase testcases[],
4208                              int n_testcases,
4209                              const char* str_rules[],
4210                              int n_rules)
4211{
4212  int rule_no, testcase_no;
4213  UChar rule[500];
4214  int32_t length = 0;
4215  UErrorCode status = U_ZERO_ERROR;
4216  UParseError parse_error;
4217  UCollator  *myCollation;
4218
4219  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4220
4221    length = u_unescape(str_rules[rule_no], rule, 500);
4222    if (length == 0) {
4223        log_err("ERROR: The rule cannot be unescaped: %s\n");
4224        return;
4225    }
4226    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4227    if(U_FAILURE(status)){
4228        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4229        log_info("  offset=%d  \"%s\" | \"%s\"\n",
4230                 parse_error.offset,
4231                 aescstrdup(parse_error.preContext, -1),
4232                 aescstrdup(parse_error.postContext, -1));
4233        return;
4234    }
4235    log_verbose("Testing the <<* syntax\n");
4236    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4237    ucol_setStrength(myCollation, UCOL_TERTIARY);
4238    for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4239      doTest(myCollation,
4240             testcases[testcase_no].source,
4241             testcases[testcase_no].target,
4242             testcases[testcase_no].result
4243             );
4244    }
4245    ucol_close(myCollation);
4246  }
4247}
4248
4249const static OneTestCase rangeTestcases[] = {
4250  { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
4251  { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
4252  { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
4253
4254  { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
4255  { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
4256  { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
4257  { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
4258  { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
4259
4260  { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
4261  { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
4262  { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
4263  { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
4264
4265  { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
4266  { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
4267  { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
4268  { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
4269  { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
4270  { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
4271  { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
4272  { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
4273};
4274
4275static int nRangeTestcases = LEN(rangeTestcases);
4276
4277const static OneTestCase rangeTestcasesSupplemental[] = {
4278  { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */
4279  { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */
4280  { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
4281  { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4282  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4283  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4284  { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4285};
4286
4287static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
4288
4289const static OneTestCase rangeTestcasesQwerty[] = {
4290  { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
4291  { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
4292
4293  { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
4294  { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
4295
4296  { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
4297  { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
4298
4299  { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
4300  { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
4301
4302  { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4303    {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
4304  { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4305    {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
4306};
4307
4308static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
4309
4310static void TestSameStrengthList(void)
4311{
4312  const char* strRules[] = {
4313    /* Normal */
4314    "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
4315
4316    /* Lists */
4317    "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4318  };
4319  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
4320}
4321
4322static void TestSameStrengthListQuoted(void)
4323{
4324  const char* strRules[] = {
4325    /* Lists with quoted characters */
4326    "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4327    "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4328
4329    "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4330    "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4331
4332    "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
4333    "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4334  };
4335  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
4336}
4337
4338static void TestSameStrengthListSupplemental(void)
4339{
4340  const char* strRules[] = {
4341    "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4342    "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4343    "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4344    "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4345  };
4346  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
4347}
4348
4349static void TestSameStrengthListQwerty(void)
4350{
4351  const char* strRules[] = {
4352    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4353    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4354    "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4355    "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4356    "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4357
4358    /* Quoted characters also will work if two quoted characters are not consecutive.  */
4359    "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4360
4361    /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4362    /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4363
4364 };
4365  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
4366}
4367
4368static void TestSameStrengthListQuotedQwerty(void)
4369{
4370  const char* strRules[] = {
4371    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4372    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4373    "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
4374
4375    /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4376    /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4377   };
4378  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
4379}
4380
4381static void TestSameStrengthListRanges(void)
4382{
4383  const char* strRules[] = {
4384    "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4385  };
4386  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
4387}
4388
4389static void TestSameStrengthListSupplementalRanges(void)
4390{
4391  const char* strRules[] = {
4392    /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4393    "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4394  };
4395  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
4396}
4397
4398static void TestSpecialCharacters(void)
4399{
4400  const char* strRules[] = {
4401    /* Normal */
4402    "&';'<'+'<','<'-'<'&'<'*'",
4403
4404    /* List */
4405    "&';'<*'+,-&*'",
4406
4407    /* Range */
4408    "&';'<*'+'-'-&*'",
4409  };
4410
4411  const static OneTestCase specialCharacterStrings[] = {
4412    { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
4413    { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
4414    { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
4415    { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
4416  };
4417  doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
4418}
4419
4420static void TestPrivateUseCharacters(void)
4421{
4422  const char* strRules[] = {
4423    /* Normal */
4424    "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4425    "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4426  };
4427
4428  const static OneTestCase privateUseCharacterStrings[] = {
4429    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4430    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4431    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4432    { {0xe2da}, {0xe2db}, UCOL_LESS },
4433    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4434    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4435  };
4436  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
4437}
4438
4439static void TestPrivateUseCharactersInList(void)
4440{
4441  const char* strRules[] = {
4442    /* List */
4443    "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4444    /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4445    "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4446  };
4447
4448  const static OneTestCase privateUseCharacterStrings[] = {
4449    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4450    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4451    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4452    { {0xe2da}, {0xe2db}, UCOL_LESS },
4453    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4454    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4455  };
4456  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
4457}
4458
4459static void TestPrivateUseCharactersInRange(void)
4460{
4461  const char* strRules[] = {
4462    /* Range */
4463    "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4464    "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4465    /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4466  };
4467
4468  const static OneTestCase privateUseCharacterStrings[] = {
4469    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4470    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4471    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4472    { {0xe2da}, {0xe2db}, UCOL_LESS },
4473    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4474    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4475  };
4476  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
4477}
4478
4479static void TestInvalidListsAndRanges(void)
4480{
4481  const char* invalidRules[] = {
4482    /* Range not in starred expression */
4483    "&\\ufffe<\\uffff-\\U00010002",
4484
4485    /* Range without start */
4486    "&a<*-c",
4487
4488    /* Range without end */
4489    "&a<*b-",
4490
4491    /* More than one hyphen */
4492    "&a<*b-g-l",
4493
4494    /* Range in the wrong order */
4495    "&a<*k-b",
4496
4497  };
4498
4499  UChar rule[500];
4500  UErrorCode status = U_ZERO_ERROR;
4501  UParseError parse_error;
4502  int n_rules = LEN(invalidRules);
4503  int rule_no;
4504  int length;
4505  UCollator  *myCollation;
4506
4507  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4508
4509    length = u_unescape(invalidRules[rule_no], rule, 500);
4510    if (length == 0) {
4511        log_err("ERROR: The rule cannot be unescaped: %s\n");
4512        return;
4513    }
4514    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4515    (void)myCollation;      /* Suppress set but not used warning. */
4516    if(!U_FAILURE(status)){
4517      log_err("ERROR: Could not cause a failure as expected: \n");
4518    }
4519    status = U_ZERO_ERROR;
4520  }
4521}
4522
4523/*
4524 * This test ensures that characters placed before a character in a different script have the same lead byte
4525 * in their collation key before and after script reordering.
4526 */
4527static void TestBeforeRuleWithScriptReordering(void)
4528{
4529    UParseError error;
4530    UErrorCode status = U_ZERO_ERROR;
4531    UCollator  *myCollation;
4532    char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4533    UChar rules[500];
4534    uint32_t rulesLength = 0;
4535    int32_t reorderCodes[1] = {USCRIPT_GREEK};
4536    UCollationResult collResult;
4537
4538    uint8_t baseKey[256];
4539    uint32_t baseKeyLength;
4540    uint8_t beforeKey[256];
4541    uint32_t beforeKeyLength;
4542
4543    UChar base[] = { 0x03b1 }; /* base */
4544    int32_t baseLen = sizeof(base)/sizeof(*base);
4545
4546    UChar before[] = { 0x0e01 }; /* ko kai */
4547    int32_t beforeLen = sizeof(before)/sizeof(*before);
4548
4549    /*UChar *data[] = { before, base };
4550    genericRulesStarter(srules, data, 2);*/
4551
4552    log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4553
4554    (void)beforeKeyLength;   /* Suppress set but not used warnings. */
4555    (void)baseKeyLength;
4556
4557    /* build collator */
4558    log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4559
4560    rulesLength = u_unescape(srules, rules, LEN(rules));
4561    myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4562    if(U_FAILURE(status)) {
4563        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4564        return;
4565    }
4566
4567    /* check collation results - before rule applied but not script reordering */
4568    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4569    if (collResult != UCOL_GREATER) {
4570        log_err("Collation result not correct before script reordering = %d\n", collResult);
4571    }
4572
4573    /* check the lead byte of the collation keys before script reordering */
4574    baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4575    beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4576    if (baseKey[0] != beforeKey[0]) {
4577      log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4578   }
4579
4580    /* reorder the scripts */
4581    ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4582    if(U_FAILURE(status)) {
4583        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4584        return;
4585    }
4586
4587    /* check collation results - before rule applied and after script reordering */
4588    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4589    if (collResult != UCOL_GREATER) {
4590        log_err("Collation result not correct after script reordering = %d\n", collResult);
4591    }
4592
4593    /* check the lead byte of the collation keys after script reordering */
4594    ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4595    ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4596    if (baseKey[0] != beforeKey[0]) {
4597        log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4598    }
4599
4600    ucol_close(myCollation);
4601}
4602
4603/*
4604 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4605 */
4606static void TestNonLeadBytesDuringCollationReordering(void)
4607{
4608    UErrorCode status = U_ZERO_ERROR;
4609    UCollator  *myCollation;
4610    int32_t reorderCodes[1] = {USCRIPT_GREEK};
4611
4612    uint8_t baseKey[256];
4613    uint32_t baseKeyLength;
4614    uint8_t reorderKey[256];
4615    uint32_t reorderKeyLength;
4616
4617    UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4618
4619    uint32_t i;
4620
4621
4622    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4623
4624    /* build collator tertiary */
4625    myCollation = ucol_open("", &status);
4626    ucol_setStrength(myCollation, UCOL_TERTIARY);
4627    if(U_FAILURE(status)) {
4628        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4629        return;
4630    }
4631    baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
4632
4633    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4634    if(U_FAILURE(status)) {
4635        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4636        return;
4637    }
4638    reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
4639
4640    if (baseKeyLength != reorderKeyLength) {
4641        log_err("Key lengths not the same during reordering.\n");
4642        return;
4643    }
4644
4645    for (i = 1; i < baseKeyLength; i++) {
4646        if (baseKey[i] != reorderKey[i]) {
4647            log_err("Collation key bytes not the same at position %d.\n", i);
4648            return;
4649        }
4650    }
4651    ucol_close(myCollation);
4652
4653    /* build collator quaternary */
4654    myCollation = ucol_open("", &status);
4655    ucol_setStrength(myCollation, UCOL_QUATERNARY);
4656    if(U_FAILURE(status)) {
4657        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4658        return;
4659    }
4660    baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
4661
4662    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4663    if(U_FAILURE(status)) {
4664        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4665        return;
4666    }
4667    reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
4668
4669    if (baseKeyLength != reorderKeyLength) {
4670        log_err("Key lengths not the same during reordering.\n");
4671        return;
4672    }
4673
4674    for (i = 1; i < baseKeyLength; i++) {
4675        if (baseKey[i] != reorderKey[i]) {
4676            log_err("Collation key bytes not the same at position %d.\n", i);
4677            return;
4678        }
4679    }
4680    ucol_close(myCollation);
4681}
4682
4683/*
4684 * Test reordering API.
4685 */
4686static void TestReorderingAPI(void)
4687{
4688    UErrorCode status = U_ZERO_ERROR;
4689    UCollator  *myCollation;
4690    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4691    int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
4692    int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4693    UCollationResult collResult;
4694    int32_t retrievedReorderCodesLength;
4695    int32_t retrievedReorderCodes[10];
4696    UChar greekString[] = { 0x03b1 };
4697    UChar punctuationString[] = { 0x203e };
4698    int loopIndex;
4699
4700    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4701
4702    /* build collator tertiary */
4703    myCollation = ucol_open("", &status);
4704    ucol_setStrength(myCollation, UCOL_TERTIARY);
4705    if(U_FAILURE(status)) {
4706        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4707        return;
4708    }
4709
4710    /* set the reorderding */
4711    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4712    if (U_FAILURE(status)) {
4713        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4714        return;
4715    }
4716
4717    /* get the reordering */
4718    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4719    if (status != U_BUFFER_OVERFLOW_ERROR) {
4720        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4721        return;
4722    }
4723    status = U_ZERO_ERROR;
4724    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4725        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4726        return;
4727    }
4728    /* now let's really get it */
4729    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4730    if (U_FAILURE(status)) {
4731        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4732        return;
4733    }
4734    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4735        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4736        return;
4737    }
4738    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4739        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4740            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4741            return;
4742        }
4743    }
4744    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4745    if (collResult != UCOL_LESS) {
4746        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4747        return;
4748    }
4749
4750    /* clear the reordering */
4751    ucol_setReorderCodes(myCollation, NULL, 0, &status);
4752    if (U_FAILURE(status)) {
4753        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4754        return;
4755    }
4756
4757    /* get the reordering again */
4758    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4759    if (retrievedReorderCodesLength != 0) {
4760        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4761        return;
4762    }
4763
4764    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4765    if (collResult != UCOL_GREATER) {
4766        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4767        return;
4768    }
4769
4770    /* test for error condition on duplicate reorder codes */
4771    ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
4772    if (!U_FAILURE(status)) {
4773        log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4774        return;
4775    }
4776
4777    status = U_ZERO_ERROR;
4778    /* test for reorder codes after a reset code */
4779    ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
4780    if (!U_FAILURE(status)) {
4781        log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4782        return;
4783    }
4784
4785    ucol_close(myCollation);
4786}
4787
4788/*
4789 * Test reordering API.
4790 */
4791static void TestReorderingAPIWithRuleCreatedCollator(void)
4792{
4793    UErrorCode status = U_ZERO_ERROR;
4794    UCollator  *myCollation;
4795    UChar rules[90];
4796    static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4797    static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4798    static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4799    UCollationResult collResult;
4800    int32_t retrievedReorderCodesLength;
4801    int32_t retrievedReorderCodes[10];
4802    static const UChar greekString[] = { 0x03b1 };
4803    static const UChar punctuationString[] = { 0x203e };
4804    static const UChar hanString[] = { 0x65E5, 0x672C };
4805    int loopIndex;
4806
4807    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4808
4809    /* build collator from rules */
4810    u_uastrcpy(rules, "[reorder Hani Grek]");
4811    myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4812    if(U_FAILURE(status)) {
4813        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4814        return;
4815    }
4816
4817    /* get the reordering */
4818    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4819    if (U_FAILURE(status)) {
4820        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4821        return;
4822    }
4823    if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
4824        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
4825        return;
4826    }
4827    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4828        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4829            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4830            return;
4831        }
4832    }
4833    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
4834    if (collResult != UCOL_GREATER) {
4835        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4836        return;
4837    }
4838
4839    /* set the reordering */
4840    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4841    if (U_FAILURE(status)) {
4842        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4843        return;
4844    }
4845
4846    /* get the reordering */
4847    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4848    if (status != U_BUFFER_OVERFLOW_ERROR) {
4849        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4850        return;
4851    }
4852    status = U_ZERO_ERROR;
4853    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4854        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4855        return;
4856    }
4857    /* now let's really get it */
4858    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4859    if (U_FAILURE(status)) {
4860        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4861        return;
4862    }
4863    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4864        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4865        return;
4866    }
4867    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4868        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4869            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4870            return;
4871        }
4872    }
4873    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4874    if (collResult != UCOL_LESS) {
4875        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4876        return;
4877    }
4878
4879    /* clear the reordering */
4880    ucol_setReorderCodes(myCollation, NULL, 0, &status);
4881    if (U_FAILURE(status)) {
4882        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4883        return;
4884    }
4885
4886    /* get the reordering again */
4887    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4888    if (retrievedReorderCodesLength != 0) {
4889        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4890        return;
4891    }
4892
4893    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4894    if (collResult != UCOL_GREATER) {
4895        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4896        return;
4897    }
4898
4899    /* reset the reordering */
4900    ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4901    if (U_FAILURE(status)) {
4902        log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4903        return;
4904    }
4905    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4906    if (U_FAILURE(status)) {
4907        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4908        return;
4909    }
4910    if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
4911        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
4912        return;
4913    }
4914    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4915        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4916            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4917            return;
4918        }
4919    }
4920
4921    ucol_close(myCollation);
4922}
4923
4924static int compareUScriptCodes(const void * a, const void * b)
4925{
4926  return ( *(int32_t*)a - *(int32_t*)b );
4927}
4928
4929static void TestEquivalentReorderingScripts(void) {
4930    UErrorCode status = U_ZERO_ERROR;
4931    int32_t equivalentScripts[50];
4932    int32_t equivalentScriptsLength;
4933    int loopIndex;
4934    int32_t equivalentScriptsResult[] = {
4935        USCRIPT_BOPOMOFO,
4936        USCRIPT_LISU,
4937        USCRIPT_LYCIAN,
4938        USCRIPT_CARIAN,
4939        USCRIPT_LYDIAN,
4940        USCRIPT_YI,
4941        USCRIPT_OLD_ITALIC,
4942        USCRIPT_GOTHIC,
4943        USCRIPT_DESERET,
4944        USCRIPT_SHAVIAN,
4945        USCRIPT_OSMANYA,
4946        USCRIPT_LINEAR_B,
4947        USCRIPT_CYPRIOT,
4948        USCRIPT_OLD_SOUTH_ARABIAN,
4949        USCRIPT_AVESTAN,
4950        USCRIPT_IMPERIAL_ARAMAIC,
4951        USCRIPT_INSCRIPTIONAL_PARTHIAN,
4952        USCRIPT_INSCRIPTIONAL_PAHLAVI,
4953        USCRIPT_UGARITIC,
4954        USCRIPT_OLD_PERSIAN,
4955        USCRIPT_CUNEIFORM,
4956        USCRIPT_EGYPTIAN_HIEROGLYPHS,
4957        USCRIPT_PHONETIC_POLLARD,
4958        USCRIPT_SORA_SOMPENG,
4959        USCRIPT_MEROITIC_CURSIVE,
4960        USCRIPT_MEROITIC_HIEROGLYPHS
4961    };
4962
4963    qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
4964
4965    /* UScript.GOTHIC */
4966    equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
4967    if (U_FAILURE(status)) {
4968        log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4969        return;
4970    }
4971    /*
4972    fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
4973    fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
4974    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
4975        fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
4976    }
4977    */
4978    if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
4979        log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
4980        return;
4981    }
4982    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
4983        if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
4984            log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
4985            return;
4986        }
4987    }
4988
4989    /* UScript.SHAVIAN */
4990    equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
4991    if (U_FAILURE(status)) {
4992        log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4993        return;
4994    }
4995    if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
4996        log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
4997        return;
4998    }
4999    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
5000        if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
5001            log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
5002            return;
5003        }
5004    }
5005}
5006
5007static void TestReorderingAcrossCloning(void)
5008{
5009    UErrorCode status = U_ZERO_ERROR;
5010    UCollator  *myCollation;
5011    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5012    UCollator *clonedCollation;
5013    int32_t retrievedReorderCodesLength;
5014    int32_t retrievedReorderCodes[10];
5015    int loopIndex;
5016
5017    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5018
5019    /* build collator tertiary */
5020    myCollation = ucol_open("", &status);
5021    ucol_setStrength(myCollation, UCOL_TERTIARY);
5022    if(U_FAILURE(status)) {
5023        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5024        return;
5025    }
5026
5027    /* set the reorderding */
5028    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5029    if (U_FAILURE(status)) {
5030        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5031        return;
5032    }
5033
5034    /* clone the collator */
5035    clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
5036    if (U_FAILURE(status)) {
5037        log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5038        return;
5039    }
5040
5041    /* get the reordering */
5042    retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
5043    if (U_FAILURE(status)) {
5044        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5045        return;
5046    }
5047    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
5048        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
5049        return;
5050    }
5051    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5052        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5053            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5054            return;
5055        }
5056    }
5057
5058    /*uprv_free(buffer);*/
5059    ucol_close(myCollation);
5060    ucol_close(clonedCollation);
5061}
5062
5063/*
5064 * Utility function to test one collation reordering test case set.
5065 * @param testcases Array of test cases.
5066 * @param n_testcases Size of the array testcases.
5067 * @param reorderTokens Array of reordering codes.
5068 * @param reorderTokensLen Size of the array reorderTokens.
5069 */
5070static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5071{
5072    uint32_t testCaseNum;
5073    UErrorCode status = U_ZERO_ERROR;
5074    UCollator  *myCollation;
5075
5076    myCollation = ucol_open("", &status);
5077    if (U_FAILURE(status)) {
5078        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5079        return;
5080    }
5081    ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5082    if(U_FAILURE(status)) {
5083        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5084        return;
5085    }
5086
5087    for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5088        doTest(myCollation,
5089            testCases[testCaseNum].source,
5090            testCases[testCaseNum].target,
5091            testCases[testCaseNum].result
5092        );
5093    }
5094    ucol_close(myCollation);
5095}
5096
5097static void TestGreekFirstReorder(void)
5098{
5099    const char* strRules[] = {
5100        "[reorder Grek]"
5101    };
5102
5103    const int32_t apiRules[] = {
5104        USCRIPT_GREEK
5105    };
5106
5107    const static OneTestCase privateUseCharacterStrings[] = {
5108        { {0x0391}, {0x0391}, UCOL_EQUAL },
5109        { {0x0041}, {0x0391}, UCOL_GREATER },
5110        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5111        { {0x0060}, {0x0391}, UCOL_LESS },
5112        { {0x0391}, {0xe2dc}, UCOL_LESS },
5113        { {0x0391}, {0x0060}, UCOL_GREATER },
5114    };
5115
5116    /* Test rules creation */
5117    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5118
5119    /* Test collation reordering API */
5120    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5121}
5122
5123static void TestGreekLastReorder(void)
5124{
5125    const char* strRules[] = {
5126        "[reorder Zzzz Grek]"
5127    };
5128
5129    const int32_t apiRules[] = {
5130        USCRIPT_UNKNOWN, USCRIPT_GREEK
5131    };
5132
5133    const static OneTestCase privateUseCharacterStrings[] = {
5134        { {0x0391}, {0x0391}, UCOL_EQUAL },
5135        { {0x0041}, {0x0391}, UCOL_LESS },
5136        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5137        { {0x0060}, {0x0391}, UCOL_LESS },
5138        { {0x0391}, {0xe2dc}, UCOL_GREATER },
5139    };
5140
5141    /* Test rules creation */
5142    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5143
5144    /* Test collation reordering API */
5145    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5146}
5147
5148static void TestNonScriptReorder(void)
5149{
5150    const char* strRules[] = {
5151        "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5152    };
5153
5154    const int32_t apiRules[] = {
5155        USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5156        UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5157        UCOL_REORDER_CODE_CURRENCY
5158    };
5159
5160    const static OneTestCase privateUseCharacterStrings[] = {
5161        { {0x0391}, {0x0041}, UCOL_LESS },
5162        { {0x0041}, {0x0391}, UCOL_GREATER },
5163        { {0x0060}, {0x0041}, UCOL_LESS },
5164        { {0x0060}, {0x0391}, UCOL_GREATER },
5165        { {0x0024}, {0x0041}, UCOL_GREATER },
5166    };
5167
5168    /* Test rules creation */
5169    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5170
5171    /* Test collation reordering API */
5172    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5173}
5174
5175static void TestHaniReorder(void)
5176{
5177    const char* strRules[] = {
5178        "[reorder Hani]"
5179    };
5180    const int32_t apiRules[] = {
5181        USCRIPT_HAN
5182    };
5183
5184    const static OneTestCase privateUseCharacterStrings[] = {
5185        { {0x4e00}, {0x0041}, UCOL_LESS },
5186        { {0x4e00}, {0x0060}, UCOL_GREATER },
5187        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5188        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5189        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5190        { {0xfa27}, {0x0041}, UCOL_LESS },
5191        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5192    };
5193
5194    /* Test rules creation */
5195    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5196
5197    /* Test collation reordering API */
5198    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5199}
5200
5201static void TestHaniReorderWithOtherRules(void)
5202{
5203    const char* strRules[] = {
5204        "[reorder Hani] &b<a"
5205    };
5206    /*const int32_t apiRules[] = {
5207        USCRIPT_HAN
5208    };*/
5209
5210    const static OneTestCase privateUseCharacterStrings[] = {
5211        { {0x4e00}, {0x0041}, UCOL_LESS },
5212        { {0x4e00}, {0x0060}, UCOL_GREATER },
5213        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5214        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5215        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5216        { {0xfa27}, {0x0041}, UCOL_LESS },
5217        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5218        { {0x0062}, {0x0061}, UCOL_LESS },
5219    };
5220
5221    /* Test rules creation */
5222    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5223}
5224
5225static void TestMultipleReorder(void)
5226{
5227    const char* strRules[] = {
5228        "[reorder Grek Zzzz DIGIT Latn Hani]"
5229    };
5230
5231    const int32_t apiRules[] = {
5232        USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5233    };
5234
5235    const static OneTestCase collationTestCases[] = {
5236        { {0x0391}, {0x0041}, UCOL_LESS},
5237        { {0x0031}, {0x0041}, UCOL_LESS},
5238        { {0x0041}, {0x4e00}, UCOL_LESS},
5239    };
5240
5241    /* Test rules creation */
5242    doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
5243
5244    /* Test collation reordering API */
5245    doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
5246}
5247
5248/*
5249 * Test that covers issue reported in ticket 8814
5250 */
5251static void TestReorderWithNumericCollation(void)
5252{
5253    UErrorCode status = U_ZERO_ERROR;
5254    UCollator  *myCollation;
5255    UCollator  *myReorderCollation;
5256    int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5257    /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5258    UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5259    UChar fortyS[] = { 0x0053 };
5260    UChar fortyThreeP[] = { 0x0050 };
5261    uint8_t fortyS_sortKey[128];
5262    int32_t fortyS_sortKey_Length;
5263    uint8_t fortyThreeP_sortKey[128];
5264    int32_t fortyThreeP_sortKey_Length;
5265    uint8_t fortyS_sortKey_reorder[128];
5266    int32_t fortyS_sortKey_reorder_Length;
5267    uint8_t fortyThreeP_sortKey_reorder[128];
5268    int32_t fortyThreeP_sortKey_reorder_Length;
5269    UCollationResult collResult;
5270    UCollationResult collResultReorder;
5271
5272    log_verbose("Testing reordering with and without numeric collation\n");
5273
5274    /* build collator tertiary with numeric */
5275    myCollation = ucol_open("", &status);
5276    /*
5277    ucol_setStrength(myCollation, UCOL_TERTIARY);
5278    */
5279    ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5280    if(U_FAILURE(status)) {
5281        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5282        return;
5283    }
5284
5285    /* build collator tertiary with numeric and reordering */
5286    myReorderCollation = ucol_open("", &status);
5287    /*
5288    ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5289    */
5290    ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5291    ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
5292    if(U_FAILURE(status)) {
5293        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5294        return;
5295    }
5296
5297    fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
5298    fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
5299    fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
5300    fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
5301
5302    if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5303        log_err_status(status, "ERROR: couldn't generate sort keys\n");
5304        return;
5305    }
5306    collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
5307    collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
5308    /*
5309    fprintf(stderr, "\tcollResult = %x\n", collResult);
5310    fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5311    fprintf(stderr, "\nfortyS\n");
5312    for (i = 0; i < fortyS_sortKey_Length; i++) {
5313        fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5314    }
5315    fprintf(stderr, "\nfortyThreeP\n");
5316    for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5317        fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5318    }
5319    */
5320    if (collResult != collResultReorder) {
5321        log_err_status(status, "ERROR: collation results should have been the same.\n");
5322        return;
5323    }
5324
5325    ucol_close(myCollation);
5326    ucol_close(myReorderCollation);
5327}
5328
5329static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5330{
5331  for (; *a == *b; ++a, ++b) {
5332    if (*a == 0) {
5333      return 0;
5334    }
5335  }
5336  return (*a < *b ? -1 : 1);
5337}
5338
5339static void TestImportRulesDeWithPhonebook(void)
5340{
5341  const char* normalRules[] = {
5342    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5343    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5344    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5345  };
5346  const OneTestCase normalTests[] = {
5347    { {0x00e6}, {0x00c6}, UCOL_LESS},
5348    { {0x00fc}, {0x00dc}, UCOL_GREATER},
5349  };
5350
5351  const char* importRules[] = {
5352    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5353    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5354    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5355  };
5356  const OneTestCase importTests[] = {
5357    { {0x00e6}, {0x00c6}, UCOL_LESS},
5358    { {0x00fc}, {0x00dc}, UCOL_LESS},
5359  };
5360
5361  doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
5362  doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
5363}
5364
5365#if 0
5366static void TestImportRulesFiWithEor(void)
5367{
5368  /* DUCET. */
5369  const char* defaultRules[] = {
5370    "&a<b",                                    /* Dummy rule. */
5371  };
5372
5373  const OneTestCase defaultTests[] = {
5374    { {0x0110}, {0x00F0}, UCOL_LESS},
5375    { {0x00a3}, {0x00a5}, UCOL_LESS},
5376    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5377  };
5378
5379  /* European Ordering rules: ignore currency characters. */
5380  const char* eorRules[] = {
5381    "[import root-u-co-eor]",
5382  };
5383
5384  const OneTestCase eorTests[] = {
5385    { {0x0110}, {0x00F0}, UCOL_LESS},
5386    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5387    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5388  };
5389
5390  const char* fiStdRules[] = {
5391    "[import fi-u-co-standard]",
5392  };
5393
5394  const OneTestCase fiStdTests[] = {
5395    { {0x0110}, {0x00F0}, UCOL_GREATER},
5396    { {0x00a3}, {0x00a5}, UCOL_LESS},
5397    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5398  };
5399
5400  /* Both European Ordering Rules and Fi Standard Rules. */
5401  const char* eorFiStdRules[] = {
5402    "[import root-u-co-eor][import fi-u-co-standard]",
5403  };
5404
5405  /* This is essentially same as the one before once fi.txt is updated with import. */
5406  const char* fiEorRules[] = {
5407    "[import fi-u-co-eor]",
5408  };
5409
5410  const OneTestCase fiEorTests[] = {
5411    { {0x0110}, {0x00F0}, UCOL_GREATER},
5412    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5413    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5414  };
5415
5416  doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
5417  doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
5418  doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
5419  doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
5420
5421  log_knownIssue("8962", NULL);
5422  /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5423        eor{
5424            Sequence{
5425                "[import root-u-co-eor][import fi-u-co-standard]"
5426            }
5427            Version{"21.0"}
5428        }
5429  */
5430  /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
5431
5432}
5433#endif
5434
5435#if 0
5436/*
5437 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5438 * the resource files are built with -includeUnihanColl option.
5439 * TODO: Uncomment this function and make it work when unihan rules are built by default.
5440 */
5441static void TestImportRulesCJKWithUnihan(void)
5442{
5443  /* DUCET. */
5444  const char* defaultRules[] = {
5445    "&a<b",                                    /* Dummy rule. */
5446  };
5447
5448  const OneTestCase defaultTests[] = {
5449    { {0x3402}, {0x4e1e}, UCOL_GREATER},
5450  };
5451
5452  /* European Ordering rules: ignore currency characters. */
5453  const char* unihanRules[] = {
5454    "[import ko-u-co-unihan]",
5455  };
5456
5457  const OneTestCase unihanTests[] = {
5458    { {0x3402}, {0x4e1e}, UCOL_LESS},
5459  };
5460
5461  doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
5462  doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
5463
5464}
5465#endif
5466
5467static void TestImport(void)
5468{
5469    UCollator* vicoll;
5470    UCollator* escoll;
5471    UCollator* viescoll;
5472    UCollator* importviescoll;
5473    UParseError error;
5474    UErrorCode status = U_ZERO_ERROR;
5475    UChar* virules;
5476    int32_t viruleslength;
5477    UChar* esrules;
5478    int32_t esruleslength;
5479    UChar* viesrules;
5480    int32_t viesruleslength;
5481    char srules[500] = "[import vi][import es]";
5482    UChar rules[500];
5483    uint32_t length = 0;
5484    int32_t itemCount;
5485    int32_t i, k;
5486    UChar32 start;
5487    UChar32 end;
5488    UChar str[500];
5489    int32_t strLength;
5490
5491    uint8_t sk1[500];
5492    uint8_t sk2[500];
5493
5494    UBool b;
5495    USet* tailoredSet;
5496    USet* importTailoredSet;
5497
5498
5499    vicoll = ucol_open("vi", &status);
5500    if(U_FAILURE(status)){
5501        log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5502        return;
5503    }
5504
5505    virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5506    escoll = ucol_open("es", &status);
5507    esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5508    viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5509    viesrules[0] = 0;
5510    u_strcat(viesrules, virules);
5511    u_strcat(viesrules, esrules);
5512    viesruleslength = viruleslength + esruleslength;
5513    viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5514
5515    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5516    length = u_unescape(srules, rules, 500);
5517    importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5518    if(U_FAILURE(status)){
5519        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5520        return;
5521    }
5522
5523    tailoredSet = ucol_getTailoredSet(viescoll, &status);
5524    importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5525
5526    if(!uset_equals(tailoredSet, importTailoredSet)){
5527        log_err("Tailored sets not equal");
5528    }
5529
5530    uset_close(importTailoredSet);
5531
5532    itemCount = uset_getItemCount(tailoredSet);
5533
5534    for( i = 0; i < itemCount; i++){
5535        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5536        if(strLength < 2){
5537            for (; start <= end; start++){
5538                k = 0;
5539                U16_APPEND(str, k, 500, start, b);
5540                (void)b;    /* Suppress set but not used warning. */
5541                ucol_getSortKey(viescoll, str, 1, sk1, 500);
5542                ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5543                if(compare_uint8_t_arrays(sk1, sk2) != 0){
5544                    log_err("Sort key for %s not equal\n", str);
5545                    break;
5546                }
5547            }
5548        }else{
5549            ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5550            ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5551            if(compare_uint8_t_arrays(sk1, sk2) != 0){
5552                log_err("ZZSort key for %s not equal\n", str);
5553                break;
5554            }
5555
5556        }
5557    }
5558
5559    uset_close(tailoredSet);
5560
5561    uprv_free(viesrules);
5562
5563    ucol_close(vicoll);
5564    ucol_close(escoll);
5565    ucol_close(viescoll);
5566    ucol_close(importviescoll);
5567}
5568
5569static void TestImportWithType(void)
5570{
5571    UCollator* vicoll;
5572    UCollator* decoll;
5573    UCollator* videcoll;
5574    UCollator* importvidecoll;
5575    UParseError error;
5576    UErrorCode status = U_ZERO_ERROR;
5577    const UChar* virules;
5578    int32_t viruleslength;
5579    const UChar* derules;
5580    int32_t deruleslength;
5581    UChar* viderules;
5582    int32_t videruleslength;
5583    const char srules[500] = "[import vi][import de-u-co-phonebk]";
5584    UChar rules[500];
5585    uint32_t length = 0;
5586    int32_t itemCount;
5587    int32_t i, k;
5588    UChar32 start;
5589    UChar32 end;
5590    UChar str[500];
5591    int32_t strLength;
5592
5593    uint8_t sk1[500];
5594    uint8_t sk2[500];
5595
5596    USet* tailoredSet;
5597    USet* importTailoredSet;
5598
5599    vicoll = ucol_open("vi", &status);
5600    if(U_FAILURE(status)){
5601        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5602        return;
5603    }
5604    virules = ucol_getRules(vicoll, &viruleslength);
5605    /* decoll = ucol_open("de@collation=phonebook", &status); */
5606    decoll = ucol_open("de-u-co-phonebk", &status);
5607    if(U_FAILURE(status)){
5608        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5609        return;
5610    }
5611
5612
5613    derules = ucol_getRules(decoll, &deruleslength);
5614    viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5615    viderules[0] = 0;
5616    u_strcat(viderules, virules);
5617    u_strcat(viderules, derules);
5618    videruleslength = viruleslength + deruleslength;
5619    videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5620
5621    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5622    length = u_unescape(srules, rules, 500);
5623    importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5624    if(U_FAILURE(status)){
5625        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5626        return;
5627    }
5628
5629    tailoredSet = ucol_getTailoredSet(videcoll, &status);
5630    importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5631
5632    if(!uset_equals(tailoredSet, importTailoredSet)){
5633        log_err("Tailored sets not equal");
5634    }
5635
5636    uset_close(importTailoredSet);
5637
5638    itemCount = uset_getItemCount(tailoredSet);
5639
5640    for( i = 0; i < itemCount; i++){
5641        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5642        if(strLength < 2){
5643            for (; start <= end; start++){
5644                k = 0;
5645                U16_APPEND_UNSAFE(str, k, start);
5646                ucol_getSortKey(videcoll, str, 1, sk1, 500);
5647                ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5648                if(compare_uint8_t_arrays(sk1, sk2) != 0){
5649                    log_err("Sort key for %s not equal\n", str);
5650                    break;
5651                }
5652            }
5653        }else{
5654            ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5655            ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5656            if(compare_uint8_t_arrays(sk1, sk2) != 0){
5657                log_err("Sort key for %s not equal\n", str);
5658                break;
5659            }
5660
5661        }
5662    }
5663
5664    uset_close(tailoredSet);
5665
5666    uprv_free(viderules);
5667
5668    ucol_close(videcoll);
5669    ucol_close(importvidecoll);
5670    ucol_close(vicoll);
5671    ucol_close(decoll);
5672}
5673
5674/* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5675static const UChar longUpperStr1[]= { /* 155 chars */
5676    0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5677    0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5678    0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5679    0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5680    0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5681    0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5682    0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5683    0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5684    0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5685    0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5686};
5687
5688/* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5689static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5690    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5691    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5692    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5693    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5694    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5695};
5696
5697/* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5698static const UChar longUpperStr3[]= { /* 324 chars */
5699    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5700    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5701    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5702    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5703    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5704    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5705    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5706    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5707    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5708    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5709    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5710    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5711};
5712
5713#define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
5714
5715typedef struct {
5716    const UChar * longUpperStrPtr;
5717    int32_t       longUpperStrLen;
5718} LongUpperStrItem;
5719
5720/* String pointers must be in reverse collation order of the corresponding strings */
5721static const LongUpperStrItem longUpperStrItems[] = {
5722    { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
5723    { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
5724    { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
5725    { NULL,          0                           }
5726};
5727
5728enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
5729
5730/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
5731static void TestCaseLevelBufferOverflow(void)
5732{
5733    UErrorCode status = U_ZERO_ERROR;
5734    UCollator * ucol = ucol_open("root", &status);
5735    if ( U_SUCCESS(status) ) {
5736        ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5737        if ( U_SUCCESS(status) ) {
5738            const LongUpperStrItem * itemPtr;
5739            uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5740            for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5741                int32_t sortKeyLen;
5742                if (itemPtr > longUpperStrItems) {
5743                    uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5744                }
5745                sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5746                if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5747                    log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5748                    break;
5749                }
5750                if ( itemPtr > longUpperStrItems ) {
5751                    int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5752                    if (compareResult >= 0) {
5753                        log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5754                    }
5755                }
5756            }
5757        } else {
5758            log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5759        }
5760        ucol_close(ucol);
5761    } else {
5762        log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5763    }
5764}
5765
5766/* Test for #10595 */
5767static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5768#define KEY_PART_SIZE 16
5769
5770static void TestNextSortKeyPartJaIdentical(void)
5771{
5772    UErrorCode status = U_ZERO_ERROR;
5773    UCollator *coll;
5774    uint8_t keyPart[KEY_PART_SIZE];
5775    UCharIterator iter;
5776    uint32_t state[2] = {0, 0};
5777    int32_t keyPartLen;
5778
5779    coll = ucol_open("ja", &status);
5780    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5781    if (U_FAILURE(status)) {
5782        log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5783        return;
5784    }
5785
5786    uiter_setString(&iter, testJapaneseName, 5);
5787    keyPartLen = KEY_PART_SIZE;
5788    while (keyPartLen == KEY_PART_SIZE) {
5789        keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5790        if (U_FAILURE(status)) {
5791            log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5792            break;
5793        }
5794    }
5795
5796    ucol_close(coll);
5797}
5798
5799#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5800
5801void addMiscCollTest(TestNode** root)
5802{
5803    TEST(TestRuleOptions);
5804    TEST(TestBeforePrefixFailure);
5805    TEST(TestContractionClosure);
5806    TEST(TestPrefixCompose);
5807    TEST(TestStrCollIdenticalPrefix);
5808    TEST(TestPrefix);
5809    TEST(TestNewJapanese);
5810    /*TEST(TestLimitations);*/
5811    TEST(TestNonChars);
5812    TEST(TestExtremeCompression);
5813    TEST(TestSurrogates);
5814    TEST(TestVariableTopSetting);
5815    TEST(TestMaxVariable);
5816    TEST(TestBocsuCoverage);
5817    TEST(TestCyrillicTailoring);
5818    TEST(TestCase);
5819    TEST(IncompleteCntTest);
5820    TEST(BlackBirdTest);
5821    TEST(FunkyATest);
5822    TEST(BillFairmanTest);
5823    TEST(TestChMove);
5824    TEST(TestImplicitTailoring);
5825    TEST(TestFCDProblem);
5826    TEST(TestEmptyRule);
5827    /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5828    TEST(TestJ815);
5829    /*TEST(TestJ831);*/ /* we changed lv locale */
5830    TEST(TestBefore);
5831    TEST(TestHangulTailoring);
5832    TEST(TestUCARules);
5833    TEST(TestIncrementalNormalize);
5834    TEST(TestComposeDecompose);
5835    TEST(TestCompressOverlap);
5836    TEST(TestContraction);
5837    TEST(TestExpansion);
5838    /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5839    /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5840    TEST(TestOptimize);
5841    TEST(TestSuppressContractions);
5842    TEST(Alexis2);
5843    TEST(TestHebrewUCA);
5844    TEST(TestPartialSortKeyTermination);
5845    TEST(TestSettings);
5846    TEST(TestEquals);
5847    TEST(TestJ2726);
5848    TEST(NullRule);
5849    TEST(TestNumericCollation);
5850    TEST(TestTibetanConformance);
5851    TEST(TestPinyinProblem);
5852    TEST(TestSeparateTrees);
5853    TEST(TestBeforePinyin);
5854    TEST(TestBeforeTightening);
5855    /*TEST(TestMoreBefore);*/
5856    TEST(TestTailorNULL);
5857    TEST(TestUpperFirstQuaternary);
5858    TEST(TestJ4960);
5859    TEST(TestJ5223);
5860    TEST(TestJ5232);
5861    TEST(TestJ5367);
5862    TEST(TestHiragana);
5863    TEST(TestSortKeyConsistency);
5864    TEST(TestVI5913);  /* VI, RO tailored rules */
5865    TEST(TestCroatianSortKey);
5866    TEST(TestTailor6179);
5867    TEST(TestUCAPrecontext);
5868    TEST(TestOutOfBuffer5468);
5869    TEST(TestSameStrengthList);
5870
5871    TEST(TestSameStrengthListQuoted);
5872    TEST(TestSameStrengthListSupplemental);
5873    TEST(TestSameStrengthListQwerty);
5874    TEST(TestSameStrengthListQuotedQwerty);
5875    TEST(TestSameStrengthListRanges);
5876    TEST(TestSameStrengthListSupplementalRanges);
5877    TEST(TestSpecialCharacters);
5878    TEST(TestPrivateUseCharacters);
5879    TEST(TestPrivateUseCharactersInList);
5880    TEST(TestPrivateUseCharactersInRange);
5881    TEST(TestInvalidListsAndRanges);
5882    TEST(TestImportRulesDeWithPhonebook);
5883    /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5884    /* TEST(TestImportRulesCJKWithUnihan); */
5885    TEST(TestImport);
5886    TEST(TestImportWithType);
5887
5888    TEST(TestBeforeRuleWithScriptReordering);
5889    TEST(TestNonLeadBytesDuringCollationReordering);
5890    TEST(TestReorderingAPI);
5891    TEST(TestReorderingAPIWithRuleCreatedCollator);
5892    TEST(TestEquivalentReorderingScripts);
5893    TEST(TestGreekFirstReorder);
5894    TEST(TestGreekLastReorder);
5895    TEST(TestNonScriptReorder);
5896    TEST(TestHaniReorder);
5897    TEST(TestHaniReorderWithOtherRules);
5898    TEST(TestMultipleReorder);
5899    TEST(TestReorderingAcrossCloning);
5900    TEST(TestReorderWithNumericCollation);
5901
5902    TEST(TestCaseLevelBufferOverflow);
5903    TEST(TestNextSortKeyPartJaIdentical);
5904}
5905
5906#endif /* #if !UCONFIG_NO_COLLATION */
5907