1
2/********************************************************************
3 * COPYRIGHT:
4 * Copyright (c) 2001-2013, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7/*******************************************************************************
8*
9* File cmsccoll.C
10*
11*******************************************************************************/
12/**
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
14 * to fit.
15 */
16
17#include <stdio.h>
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_COLLATION
22
23#include "unicode/ucol.h"
24#include "unicode/ucoleitr.h"
25#include "unicode/uloc.h"
26#include "cintltst.h"
27#include "ccolltst.h"
28#include "callcoll.h"
29#include "unicode/ustring.h"
30#include "string.h"
31#include "ucol_imp.h"
32#include "ucol_tok.h"
33#include "cmemory.h"
34#include "cstring.h"
35#include "uassert.h"
36#include "unicode/parseerr.h"
37#include "unicode/ucnv.h"
38#include "unicode/ures.h"
39#include "unicode/uscript.h"
40#include "unicode/utf16.h"
41#include "uparse.h"
42#include "putilimp.h"
43
44
45#define LEN(a) (sizeof(a)/sizeof(a[0]))
46
47#define MAX_TOKEN_LEN 16
48
49typedef UCollationResult tst_strcoll(void *collator, const int object,
50                        const UChar *source, const int sLen,
51                        const UChar *target, const int tLen);
52
53
54
55const static char cnt1[][10] = {
56
57  "AA",
58  "AC",
59  "AZ",
60  "AQ",
61  "AB",
62  "ABZ",
63  "ABQ",
64  "Z",
65  "ABC",
66  "Q",
67  "B"
68};
69
70const static char cnt2[][10] = {
71  "DA",
72  "DAD",
73  "DAZ",
74  "MAR",
75  "Z",
76  "DAVIS",
77  "MARK",
78  "DAV",
79  "DAVI"
80};
81
82static void IncompleteCntTest(void)
83{
84  UErrorCode status = U_ZERO_ERROR;
85  UChar temp[90];
86  UChar t1[90];
87  UChar t2[90];
88
89  UCollator *coll =  NULL;
90  uint32_t i = 0, j = 0;
91  uint32_t size = 0;
92
93  u_uastrcpy(temp, " & Z < ABC < Q < B");
94
95  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
96
97  if(U_SUCCESS(status)) {
98    size = sizeof(cnt1)/sizeof(cnt1[0]);
99    for(i = 0; i < size-1; i++) {
100      for(j = i+1; j < size; j++) {
101        UCollationElements *iter;
102        u_uastrcpy(t1, cnt1[i]);
103        u_uastrcpy(t2, cnt1[j]);
104        doTest(coll, t1, t2, UCOL_LESS);
105        /* synwee : added collation element iterator test */
106        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
107        if (U_FAILURE(status)) {
108          log_err("Creation of iterator failed\n");
109          break;
110        }
111        backAndForth(iter);
112        ucol_closeElements(iter);
113      }
114    }
115  }
116
117  ucol_close(coll);
118
119
120  u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
121  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
122
123  if(U_SUCCESS(status)) {
124    size = sizeof(cnt2)/sizeof(cnt2[0]);
125    for(i = 0; i < size-1; i++) {
126      for(j = i+1; j < size; j++) {
127        UCollationElements *iter;
128        u_uastrcpy(t1, cnt2[i]);
129        u_uastrcpy(t2, cnt2[j]);
130        doTest(coll, t1, t2, UCOL_LESS);
131
132        /* synwee : added collation element iterator test */
133        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
134        if (U_FAILURE(status)) {
135          log_err("Creation of iterator failed\n");
136          break;
137        }
138        backAndForth(iter);
139        ucol_closeElements(iter);
140      }
141    }
142  }
143
144  ucol_close(coll);
145
146
147}
148
149const static char shifted[][20] = {
150  "black bird",
151  "black-bird",
152  "blackbird",
153  "black Bird",
154  "black-Bird",
155  "blackBird",
156  "black birds",
157  "black-birds",
158  "blackbirds"
159};
160
161const static UCollationResult shiftedTert[] = {
162  UCOL_EQUAL,
163  UCOL_EQUAL,
164  UCOL_EQUAL,
165  UCOL_LESS,
166  UCOL_EQUAL,
167  UCOL_EQUAL,
168  UCOL_LESS,
169  UCOL_EQUAL,
170  UCOL_EQUAL
171};
172
173const static char nonignorable[][20] = {
174  "black bird",
175  "black Bird",
176  "black birds",
177  "black-bird",
178  "black-Bird",
179  "black-birds",
180  "blackbird",
181  "blackBird",
182  "blackbirds"
183};
184
185static void BlackBirdTest(void) {
186  UErrorCode status = U_ZERO_ERROR;
187  UChar t1[90];
188  UChar t2[90];
189
190  uint32_t i = 0, j = 0;
191  uint32_t size = 0;
192  UCollator *coll = ucol_open("en_US", &status);
193
194  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
195  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
196
197  if(U_SUCCESS(status)) {
198    size = sizeof(nonignorable)/sizeof(nonignorable[0]);
199    for(i = 0; i < size-1; i++) {
200      for(j = i+1; j < size; j++) {
201        u_uastrcpy(t1, nonignorable[i]);
202        u_uastrcpy(t2, nonignorable[j]);
203        doTest(coll, t1, t2, UCOL_LESS);
204      }
205    }
206  }
207
208  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
209  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
210
211  if(U_SUCCESS(status)) {
212    size = sizeof(shifted)/sizeof(shifted[0]);
213    for(i = 0; i < size-1; i++) {
214      for(j = i+1; j < size; j++) {
215        u_uastrcpy(t1, shifted[i]);
216        u_uastrcpy(t2, shifted[j]);
217        doTest(coll, t1, t2, UCOL_LESS);
218      }
219    }
220  }
221
222  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
223  if(U_SUCCESS(status)) {
224    size = sizeof(shifted)/sizeof(shifted[0]);
225    for(i = 1; i < size; i++) {
226      u_uastrcpy(t1, shifted[i-1]);
227      u_uastrcpy(t2, shifted[i]);
228      doTest(coll, t1, t2, shiftedTert[i]);
229    }
230  }
231
232  ucol_close(coll);
233}
234
235const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
236    {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
237    {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
238    {0x0041/*'A'*/, 0x0300, 0x0000},
239    {0x00C0, 0x0301, 0x0000},
240    /* this would work with forced normalization */
241    {0x00C0, 0x0316, 0x0000}
242};
243
244const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
245    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
246    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
247    {0x00C0, 0},
248    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
249    /* this would work with forced normalization */
250    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
251};
252
253const static UCollationResult results[] = {
254    UCOL_GREATER,
255    UCOL_EQUAL,
256    UCOL_EQUAL,
257    UCOL_GREATER,
258    UCOL_EQUAL
259};
260
261static void FunkyATest(void)
262{
263
264    int32_t i;
265    UErrorCode status = U_ZERO_ERROR;
266    UCollator  *myCollation;
267    myCollation = ucol_open("en_US", &status);
268    if(U_FAILURE(status)){
269        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
270        return;
271    }
272    log_verbose("Testing some A letters, for some reason\n");
273    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
274    ucol_setStrength(myCollation, UCOL_TERTIARY);
275    for (i = 0; i < 4 ; i++)
276    {
277        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
278    }
279    ucol_close(myCollation);
280}
281
282UColAttributeValue caseFirst[] = {
283    UCOL_OFF,
284    UCOL_LOWER_FIRST,
285    UCOL_UPPER_FIRST
286};
287
288
289UColAttributeValue alternateHandling[] = {
290    UCOL_NON_IGNORABLE,
291    UCOL_SHIFTED
292};
293
294UColAttributeValue caseLevel[] = {
295    UCOL_OFF,
296    UCOL_ON
297};
298
299UColAttributeValue strengths[] = {
300    UCOL_PRIMARY,
301    UCOL_SECONDARY,
302    UCOL_TERTIARY,
303    UCOL_QUATERNARY,
304    UCOL_IDENTICAL
305};
306
307#if 0
308static const char * strengthsC[] = {
309    "UCOL_PRIMARY",
310    "UCOL_SECONDARY",
311    "UCOL_TERTIARY",
312    "UCOL_QUATERNARY",
313    "UCOL_IDENTICAL"
314};
315
316static const char * caseFirstC[] = {
317    "UCOL_OFF",
318    "UCOL_LOWER_FIRST",
319    "UCOL_UPPER_FIRST"
320};
321
322
323static const char * alternateHandlingC[] = {
324    "UCOL_NON_IGNORABLE",
325    "UCOL_SHIFTED"
326};
327
328static const char * caseLevelC[] = {
329    "UCOL_OFF",
330    "UCOL_ON"
331};
332
333/* not used currently - does not test only prints */
334static void PrintMarkDavis(void)
335{
336  UErrorCode status = U_ZERO_ERROR;
337  UChar m[256];
338  uint8_t sortkey[256];
339  UCollator *coll = ucol_open("en_US", &status);
340  uint32_t h,i,j,k, sortkeysize;
341  uint32_t sizem = 0;
342  char buffer[512];
343  uint32_t len = 512;
344
345  log_verbose("PrintMarkDavis");
346
347  u_uastrcpy(m, "Mark Davis");
348  sizem = u_strlen(m);
349
350
351  m[1] = 0xe4;
352
353  for(i = 0; i<sizem; i++) {
354    fprintf(stderr, "\\u%04X ", m[i]);
355  }
356  fprintf(stderr, "\n");
357
358  for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
359    ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
360    fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
361
362    for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
363      ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
364      fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
365
366      for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
367        ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
368        fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
369
370        for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
371          ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
372          sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
373          fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
374          fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
375        }
376
377      }
378
379    }
380
381  }
382}
383#endif
384
385static void BillFairmanTest(void) {
386/*
387** check for actual locale via ICU resource bundles
388**
389** lp points to the original locale ("fr_FR_....")
390*/
391
392    UResourceBundle *lr,*cr;
393    UErrorCode              lec = U_ZERO_ERROR;
394    const char *lp = "fr_FR_you_ll_never_find_this_locale";
395
396    log_verbose("BillFairmanTest\n");
397
398    lr = ures_open(NULL,lp,&lec);
399    if (lr) {
400        cr = ures_getByKey(lr,"collations",0,&lec);
401        if (cr) {
402            lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
403            if (lp) {
404                if (U_SUCCESS(lec)) {
405                    if(strcmp(lp, "fr") != 0) {
406                        log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
407                    }
408                }
409            }
410            ures_close(cr);
411        }
412        ures_close(lr);
413    }
414}
415
416static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
417    UChar source[256] = { '\0'};
418    UChar target[256] = { '\0'};
419    UChar preP = 0x31a3;
420    UChar preQ = 0x310d;
421/*
422    UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
423    UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
424*/
425    /*log_verbose("Testing primary\n");*/
426
427    doTest(col, p, q, UCOL_LESS);
428/*
429    UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
430
431    if(result!=UCOL_LESS){
432       aescstrdup(p,utfSource,256);
433       aescstrdup(q,utfTarget,256);
434       fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
435    }
436*/
437    source[0] = preP;
438    u_strcpy(source+1,p);
439    target[0] = preQ;
440    u_strcpy(target+1,q);
441    doTest(col, source, target, UCOL_LESS);
442/*
443    fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
444*/
445}
446
447static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
448    UChar source[256] = { '\0'};
449    UChar target[256] = { '\0'};
450
451    /*log_verbose("Testing secondary\n");*/
452
453    doTest(col, p, q, UCOL_LESS);
454/*
455    fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
456*/
457    source[0] = 0x0053;
458    u_strcpy(source+1,p);
459    target[0]= 0x0073;
460    u_strcpy(target+1,q);
461
462    doTest(col, source, target, UCOL_LESS);
463/*
464    fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
465*/
466
467
468    u_strcpy(source,p);
469    source[u_strlen(p)] = 0x62;
470    source[u_strlen(p)+1] = 0;
471
472
473    u_strcpy(target,q);
474    target[u_strlen(q)] = 0x61;
475    target[u_strlen(q)+1] = 0;
476
477    doTest(col, source, target, UCOL_GREATER);
478
479/*
480    fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
481*/
482}
483
484static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
485    UChar source[256] = { '\0'};
486    UChar target[256] = { '\0'};
487
488    /*log_verbose("Testing tertiary\n");*/
489
490    doTest(col, p, q, UCOL_LESS);
491/*
492    fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
493*/
494    source[0] = 0x0020;
495    u_strcpy(source+1,p);
496    target[0]= 0x002D;
497    u_strcpy(target+1,q);
498
499    doTest(col, source, target, UCOL_LESS);
500/*
501    fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
502*/
503
504    u_strcpy(source,p);
505    source[u_strlen(p)] = 0xE0;
506    source[u_strlen(p)+1] = 0;
507
508    u_strcpy(target,q);
509    target[u_strlen(q)] = 0x61;
510    target[u_strlen(q)+1] = 0;
511
512    doTest(col, source, target, UCOL_GREATER);
513
514/*
515    fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
516*/
517}
518
519static void testEquality(UCollator* col, const UChar* p,const UChar* q){
520/*
521    UChar source[256] = { '\0'};
522    UChar target[256] = { '\0'};
523*/
524
525    doTest(col, p, q, UCOL_EQUAL);
526/*
527    fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
528*/
529}
530
531static void testCollator(UCollator *coll, UErrorCode *status) {
532  const UChar *rules = NULL, *current = NULL;
533  int32_t ruleLen = 0;
534  uint32_t strength = 0;
535  uint32_t chOffset = 0; uint32_t chLen = 0;
536  uint32_t exOffset = 0; uint32_t exLen = 0;
537  uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
538  uint32_t firstEx = 0;
539/*  uint32_t rExpsLen = 0; */
540  uint32_t firstLen = 0;
541  UBool varT = FALSE; UBool top_ = TRUE;
542  uint16_t specs = 0;
543  UBool startOfRules = TRUE;
544  UBool lastReset = FALSE;
545  UBool before = FALSE;
546  uint32_t beforeStrength = 0;
547  UColTokenParser src;
548  UColOptionSet opts;
549
550  UChar first[256];
551  UChar second[256];
552  UChar tempB[256];
553  uint32_t tempLen;
554  UChar *rulesCopy = NULL;
555  UParseError parseError;
556
557  uprv_memset(&src, 0, sizeof(UColTokenParser));
558
559  src.opts = &opts;
560
561  rules = ucol_getRules(coll, &ruleLen);
562  if(U_SUCCESS(*status) && ruleLen > 0) {
563    rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
564    uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
565    src.current = src.source = rulesCopy;
566    src.end = rulesCopy+ruleLen;
567    src.extraCurrent = src.end;
568    src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
569    *first = *second = 0;
570
571	/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
572	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
573    while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
574      strength = src.parsedToken.strength;
575      chOffset = src.parsedToken.charsOffset;
576      chLen = src.parsedToken.charsLen;
577      exOffset = src.parsedToken.extensionOffset;
578      exLen = src.parsedToken.extensionLen;
579      prefixOffset = src.parsedToken.prefixOffset;
580      prefixLen = src.parsedToken.prefixLen;
581      specs = src.parsedToken.flags;
582
583      startOfRules = FALSE;
584      varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
585      top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
586      if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
587        second[0] = 0;
588      } else {
589        u_strncpy(second,src.source+chOffset, chLen);
590        second[chLen] = 0;
591
592        if(exLen > 0 && firstEx == 0) {
593          u_strncat(first, src.source+exOffset, exLen);
594          first[firstLen+exLen] = 0;
595        }
596
597        if(lastReset == TRUE && prefixLen != 0) {
598          u_strncpy(first+prefixLen, first, firstLen);
599          u_strncpy(first, src.source+prefixOffset, prefixLen);
600          first[firstLen+prefixLen] = 0;
601          firstLen = firstLen+prefixLen;
602        }
603
604        if(before == TRUE) { /* swap first and second */
605          u_strcpy(tempB, first);
606          u_strcpy(first, second);
607          u_strcpy(second, tempB);
608
609          tempLen = firstLen;
610          firstLen = chLen;
611          chLen = tempLen;
612
613          tempLen = firstEx;
614          firstEx = exLen;
615          exLen = tempLen;
616          if(beforeStrength < strength) {
617            strength = beforeStrength;
618          }
619        }
620      }
621      lastReset = FALSE;
622
623      switch(strength){
624      case UCOL_IDENTICAL:
625          testEquality(coll,first,second);
626          break;
627      case UCOL_PRIMARY:
628          testPrimary(coll,first,second);
629          break;
630      case UCOL_SECONDARY:
631          testSecondary(coll,first,second);
632          break;
633      case UCOL_TERTIARY:
634          testTertiary(coll,first,second);
635          break;
636      case UCOL_TOK_RESET:
637        lastReset = TRUE;
638        before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
639        if(before) {
640          beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
641        }
642        break;
643      default:
644          break;
645      }
646
647      if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
648        before = FALSE;
649      } else {
650        firstLen = chLen;
651        firstEx = exLen;
652        u_strcpy(first, second);
653      }
654    }
655    uprv_free(src.source);
656    uprv_free(src.reorderCodes);
657  }
658}
659
660static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
661  UCollator *UCA = (UCollator *)collator;
662  return ucol_strcoll(UCA, source, sLen, target, tLen);
663}
664
665/*
666static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
667#if U_PLATFORM_HAS_WIN32_API
668  LCID lcid = (LCID)collator;
669  return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
670#else
671  return 0;
672#endif
673}
674*/
675
676static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
677                                     UChar s1, UChar s2,
678                                     const UChar *s, const uint32_t sLen,
679                                     const UChar *t, const uint32_t tLen) {
680  UChar source[256] = {0};
681  UChar target[256] = {0};
682
683  source[0] = s1;
684  u_strcpy(source+1, s);
685  target[0] = s2;
686  u_strcpy(target+1, t);
687
688  return func(collator, opts, source, sLen+1, target, tLen+1);
689}
690
691static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
692                                   UChar s1, UChar s2,
693                                   const UChar *s, const uint32_t sLen,
694                                   const UChar *t, const uint32_t tLen) {
695  UChar source[256] = {0};
696  UChar target[256] = {0};
697
698  u_strcpy(source, s);
699  source[sLen] = s1;
700  u_strcpy(target, t);
701  target[tLen] = s2;
702
703  return func(collator, opts, source, sLen+1, target, tLen+1);
704}
705
706static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
707                              const UChar *s, const uint32_t sLen,
708                              const UChar *t, const uint32_t tLen,
709                              UCollationResult result) {
710  /*UChar fPrimary = 0x6d;*/
711  /*UChar sPrimary = 0x6e;*/
712  UChar fSecondary = 0x310d;
713  UChar sSecondary = 0x31a3;
714  UChar fTertiary = 0x310f;
715  UChar sTertiary = 0x31b7;
716
717  UCollationResult oposite;
718  if(result == UCOL_EQUAL) {
719    return UCOL_IDENTICAL;
720  } else if(result == UCOL_GREATER) {
721    oposite = UCOL_LESS;
722  } else {
723    oposite = UCOL_GREATER;
724  }
725
726  if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
727    return UCOL_PRIMARY;
728  } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
729    (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
730    return UCOL_SECONDARY;
731  } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
732    (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
733    return UCOL_TERTIARY;
734  } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
735    (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
736    return UCOL_QUATERNARY;
737  } else {
738    return UCOL_IDENTICAL;
739  }
740}
741
742static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
743  uint32_t i = 0;
744
745  if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
746    buffer[0] = '=';
747    buffer[1] = '=';
748    buffer[2] = '\0';
749  } else if(res == UCOL_GREATER) {
750    for(i = 0; i<strength+1; i++) {
751      buffer[i] = '>';
752    }
753    buffer[strength+1] = '\0';
754  } else {
755    for(i = 0; i<strength+1; i++) {
756      buffer[i] = '<';
757    }
758    buffer[strength+1] = '\0';
759  }
760
761  return buffer;
762}
763
764
765
766static void logFailure (const char *platform, const char *test,
767                        const UChar *source, const uint32_t sLen,
768                        const UChar *target, const uint32_t tLen,
769                        UCollationResult realRes, uint32_t realStrength,
770                        UCollationResult expRes, uint32_t expStrength, UBool error) {
771
772  uint32_t i = 0;
773
774  char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
775  static int32_t maxOutputLength = 0;
776  int32_t outputLength;
777
778  *sEsc = *tEsc = *s = *t = 0;
779  if(error == TRUE) {
780    log_err("Difference between expected and generated order. Run test with -v for more info\n");
781  } else if(getTestOption(VERBOSITY_OPTION) == 0) {
782    return;
783  }
784  for(i = 0; i<sLen; i++) {
785    sprintf(b, "%04X", source[i]);
786    strcat(sEsc, "\\u");
787    strcat(sEsc, b);
788    strcat(s, b);
789    strcat(s, " ");
790    if(source[i] < 0x80) {
791      sprintf(b, "(%c)", source[i]);
792      strcat(sEsc, b);
793    }
794  }
795  for(i = 0; i<tLen; i++) {
796    sprintf(b, "%04X", target[i]);
797    strcat(tEsc, "\\u");
798    strcat(tEsc, b);
799    strcat(t, b);
800    strcat(t, " ");
801    if(target[i] < 0x80) {
802      sprintf(b, "(%c)", target[i]);
803      strcat(tEsc, b);
804    }
805  }
806/*
807  strcpy(output, "[[ ");
808  strcat(output, sEsc);
809  strcat(output, getRelationSymbol(expRes, expStrength, relation));
810  strcat(output, tEsc);
811
812  strcat(output, " : ");
813
814  strcat(output, sEsc);
815  strcat(output, getRelationSymbol(realRes, realStrength, relation));
816  strcat(output, tEsc);
817  strcat(output, " ]] ");
818
819  log_verbose("%s", output);
820*/
821
822
823  strcpy(output, "DIFF: ");
824
825  strcat(output, s);
826  strcat(output, " : ");
827  strcat(output, t);
828
829  strcat(output, test);
830  strcat(output, ": ");
831
832  strcat(output, sEsc);
833  strcat(output, getRelationSymbol(expRes, expStrength, relation));
834  strcat(output, tEsc);
835
836  strcat(output, " ");
837
838  strcat(output, platform);
839  strcat(output, ": ");
840
841  strcat(output, sEsc);
842  strcat(output, getRelationSymbol(realRes, realStrength, relation));
843  strcat(output, tEsc);
844
845  outputLength = (int32_t)strlen(output);
846  if(outputLength > maxOutputLength) {
847    maxOutputLength = outputLength;
848    U_ASSERT(outputLength < sizeof(output));
849  }
850
851  log_verbose("%s\n", output);
852
853}
854
855/*
856static void printOutRules(const UChar *rules) {
857  uint32_t len = u_strlen(rules);
858  uint32_t i = 0;
859  char toPrint;
860  uint32_t line = 0;
861
862  fprintf(stdout, "Rules:");
863
864  for(i = 0; i<len; i++) {
865    if(rules[i]<0x7f && rules[i]>=0x20) {
866      toPrint = (char)rules[i];
867      if(toPrint == '&') {
868        line = 1;
869        fprintf(stdout, "\n&");
870      } else if(toPrint == ';') {
871        fprintf(stdout, "<<");
872        line+=2;
873      } else if(toPrint == ',') {
874        fprintf(stdout, "<<<");
875        line+=3;
876      } else {
877        fprintf(stdout, "%c", toPrint);
878        line++;
879      }
880    } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
881      fprintf(stdout, "\\u%04X", rules[i]);
882      line+=6;
883    }
884    if(line>72) {
885      fprintf(stdout, "\n");
886      line = 0;
887    }
888  }
889
890  log_verbose("\n");
891
892}
893*/
894
895static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
896  uint32_t diffs = 0;
897  UCollationResult realResult;
898  uint32_t realStrength;
899
900  uint32_t sLen = u_strlen(first);
901  uint32_t tLen = u_strlen(second);
902
903  realResult = func(collator, opts, first, sLen, second, tLen);
904  realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
905
906  if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) {
907    logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
908    diffs++;
909  } else if(realResult != UCOL_LESS || realStrength != strength) {
910    logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
911    diffs++;
912  }
913  return diffs;
914}
915
916
917static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
918  const UChar *rules = NULL, *current = NULL;
919  int32_t ruleLen = 0;
920  uint32_t strength = 0;
921  uint32_t chOffset = 0; uint32_t chLen = 0;
922  uint32_t exOffset = 0; uint32_t exLen = 0;
923  uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
924/*  uint32_t rExpsLen = 0; */
925  uint32_t firstLen = 0, secondLen = 0;
926  UBool varT = FALSE; UBool top_ = TRUE;
927  uint16_t specs = 0;
928  UBool startOfRules = TRUE;
929  UColTokenParser src;
930  UColOptionSet opts;
931
932  UChar first[256];
933  UChar second[256];
934  UChar *rulesCopy = NULL;
935
936  uint32_t UCAdiff = 0;
937  uint32_t Windiff = 1;
938  UParseError parseError;
939
940  uprv_memset(&src, 0, sizeof(UColTokenParser));
941  src.opts = &opts;
942
943  rules = ucol_getRules(coll, &ruleLen);
944
945  /*printOutRules(rules);*/
946
947  if(U_SUCCESS(*status) && ruleLen > 0) {
948    rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
949    uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
950    src.current = src.source = rulesCopy;
951    src.end = rulesCopy+ruleLen;
952    src.extraCurrent = src.end;
953    src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
954    *first = *second = 0;
955
956    /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
957       the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
958    while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
959      strength = src.parsedToken.strength;
960      chOffset = src.parsedToken.charsOffset;
961      chLen = src.parsedToken.charsLen;
962      exOffset = src.parsedToken.extensionOffset;
963      exLen = src.parsedToken.extensionLen;
964      prefixOffset = src.parsedToken.prefixOffset;
965      prefixLen = src.parsedToken.prefixLen;
966      specs = src.parsedToken.flags;
967
968      startOfRules = FALSE;
969      varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
970      top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
971
972      u_strncpy(second,src.source+chOffset, chLen);
973      second[chLen] = 0;
974      secondLen = chLen;
975
976      if(exLen > 0) {
977        u_strncat(first, src.source+exOffset, exLen);
978        first[firstLen+exLen] = 0;
979        firstLen += exLen;
980      }
981
982      if(strength != UCOL_TOK_RESET) {
983        if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
984          UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
985          /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
986        }
987      }
988
989
990      firstLen = chLen;
991      u_strcpy(first, second);
992
993    }
994    if(UCAdiff != 0 && Windiff != 0) {
995      log_verbose("\n");
996    }
997    if(UCAdiff == 0) {
998      log_verbose("No immediate difference with %s!\n", refName);
999    }
1000    if(Windiff == 0) {
1001      log_verbose("No immediate difference with Win32!\n");
1002    }
1003    uprv_free(src.source);
1004    uprv_free(src.reorderCodes);
1005  }
1006}
1007
1008/*
1009 * Takes two CEs (lead and continuation) and
1010 * compares them as CEs should be compared:
1011 * primary vs. primary, secondary vs. secondary
1012 * tertiary vs. tertiary
1013 */
1014static int32_t compareCEs(uint32_t s1, uint32_t s2,
1015                   uint32_t t1, uint32_t t2) {
1016  uint32_t s = 0, t = 0;
1017  if(s1 == t1 && s2 == t2) {
1018    return 0;
1019  }
1020  s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1021  t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1022  if(s < t) {
1023    return -1;
1024  } else if(s > t) {
1025    return 1;
1026  } else {
1027    s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1028    t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1029    if(s < t) {
1030      return -1;
1031    } else if(s > t) {
1032      return 1;
1033    } else {
1034      s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1035      t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1036      if(s < t) {
1037        return -1;
1038      } else {
1039        return 1;
1040      }
1041    }
1042  }
1043}
1044
1045typedef struct {
1046  uint32_t startCE;
1047  uint32_t startContCE;
1048  uint32_t limitCE;
1049  uint32_t limitContCE;
1050} indirectBoundaries;
1051
1052/* these values are used for finding CE values for indirect positioning. */
1053/* Indirect positioning is a mechanism for allowing resets on symbolic   */
1054/* values. It only works for resets and you cannot tailor indirect names */
1055/* An indirect name can define either an anchor point or a range. An     */
1056/* anchor point behaves in exactly the same way as a code point in reset */
1057/* would, except that it cannot be tailored. A range (we currently only  */
1058/* know for the [top] range will explicitly set the upper bound for      */
1059/* generated CEs, thus allowing for better control over how many CEs can */
1060/* be squeezed between in the range without performance penalty.         */
1061/* In that respect, we use [top] for tailoring of locales that use CJK   */
1062/* characters. Other indirect values are currently a pure convenience,   */
1063/* they can be used to assure that the CEs will be always positioned in  */
1064/* the same place relative to a point with known properties (e.g. first  */
1065/* primary ignorable). */
1066static indirectBoundaries ucolIndirectBoundaries[15];
1067static UBool indirectBoundariesSet = FALSE;
1068static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1069    /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1070    /* to initalize here. */
1071    ucolIndirectBoundaries[indexR].startCE = start[0];
1072    ucolIndirectBoundaries[indexR].startContCE = start[1];
1073    if(end) {
1074        ucolIndirectBoundaries[indexR].limitCE = end[0];
1075        ucolIndirectBoundaries[indexR].limitContCE = end[1];
1076    } else {
1077        ucolIndirectBoundaries[indexR].limitCE = 0;
1078        ucolIndirectBoundaries[indexR].limitContCE = 0;
1079    }
1080}
1081
1082static void testCEs(UCollator *coll, UErrorCode *status) {
1083    const UChar *rules = NULL, *current = NULL;
1084    int32_t ruleLen = 0;
1085
1086    uint32_t strength = 0;
1087    uint32_t maxStrength = UCOL_IDENTICAL;
1088    uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1089    uint32_t lastCE;
1090    uint32_t lastContCE;
1091
1092    int32_t result = 0;
1093    uint32_t chOffset = 0; uint32_t chLen = 0;
1094    uint32_t exOffset = 0; uint32_t exLen = 0;
1095    uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1096    uint32_t oldOffset = 0;
1097
1098    /* uint32_t rExpsLen = 0; */
1099    /* uint32_t firstLen = 0; */
1100    uint16_t specs = 0;
1101    UBool varT = FALSE; UBool top_ = TRUE;
1102    UBool startOfRules = TRUE;
1103    UBool before = FALSE;
1104    UColTokenParser src;
1105    UColOptionSet opts;
1106    UParseError parseError;
1107    UChar *rulesCopy = NULL;
1108    collIterate *c = uprv_new_collIterate(status);
1109    UCAConstants *consts = NULL;
1110    uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1111        UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1112    const char *colLoc;
1113    UCollator *UCA = ucol_open("root", status);
1114
1115    if (U_FAILURE(*status)) {
1116        log_err("Could not open root collator %s\n", u_errorName(*status));
1117        uprv_delete_collIterate(c);
1118        return;
1119    }
1120
1121    colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1122    if (U_FAILURE(*status)) {
1123        log_err("Could not get collator name: %s\n", u_errorName(*status));
1124        ucol_close(UCA);
1125        uprv_delete_collIterate(c);
1126        return;
1127    }
1128
1129    uprv_memset(&src, 0, sizeof(UColTokenParser));
1130
1131    consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1132    UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1133    /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1134    UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1135    UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1136
1137    baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1138
1139    src.opts = &opts;
1140
1141    rules = ucol_getRules(coll, &ruleLen);
1142
1143    src.invUCA = ucol_initInverseUCA(status);
1144
1145    if(indirectBoundariesSet == FALSE) {
1146        /* UCOL_RESET_TOP_VALUE */
1147        setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1148        /* UCOL_FIRST_PRIMARY_IGNORABLE */
1149        setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1150        /* UCOL_LAST_PRIMARY_IGNORABLE */
1151        setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1152        /* UCOL_FIRST_SECONDARY_IGNORABLE */
1153        setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1154        /* UCOL_LAST_SECONDARY_IGNORABLE */
1155        setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1156        /* UCOL_FIRST_TERTIARY_IGNORABLE */
1157        setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1158        /* UCOL_LAST_TERTIARY_IGNORABLE */
1159        setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1160        /* UCOL_FIRST_VARIABLE */
1161        setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1162        /* UCOL_LAST_VARIABLE */
1163        setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1164        /* UCOL_FIRST_NON_VARIABLE */
1165        setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1166        /* UCOL_LAST_NON_VARIABLE */
1167        setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1168        /* UCOL_FIRST_IMPLICIT */
1169        setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1170        /* UCOL_LAST_IMPLICIT */
1171        setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1172        /* UCOL_FIRST_TRAILING */
1173        setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1174        /* UCOL_LAST_TRAILING */
1175        setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1176        ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1177        indirectBoundariesSet = TRUE;
1178    }
1179
1180
1181    if(U_SUCCESS(*status) && ruleLen > 0) {
1182        rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1183        uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1184        src.current = src.source = rulesCopy;
1185        src.end = rulesCopy+ruleLen;
1186        src.extraCurrent = src.end;
1187        src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1188
1189	    /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1190	       the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1191        while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1192            strength = src.parsedToken.strength;
1193            chOffset = src.parsedToken.charsOffset;
1194            chLen = src.parsedToken.charsLen;
1195            exOffset = src.parsedToken.extensionOffset;
1196            exLen = src.parsedToken.extensionLen;
1197            prefixOffset = src.parsedToken.prefixOffset;
1198            prefixLen = src.parsedToken.prefixLen;
1199            specs = src.parsedToken.flags;
1200
1201            startOfRules = FALSE;
1202            varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1203            top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1204
1205            uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
1206
1207            currCE = ucol_getNextCE(coll, c, status);
1208            if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
1209                log_verbose("Thai prevowel detected. Will pick next CE\n");
1210                currCE = ucol_getNextCE(coll, c, status);
1211            }
1212
1213            currContCE = ucol_getNextCE(coll, c, status);
1214            if(!isContinuation(currContCE)) {
1215                currContCE = 0;
1216            }
1217
1218            /* we need to repack CEs here */
1219
1220            if(strength == UCOL_TOK_RESET) {
1221                before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1222                if(top_ == TRUE) {
1223                    int32_t tokenIndex = src.parsedToken.indirectIndex;
1224
1225                    nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
1226                    nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
1227                } else {
1228                    nextCE = baseCE = currCE;
1229                    nextContCE = baseContCE = currContCE;
1230                }
1231                maxStrength = UCOL_IDENTICAL;
1232            } else {
1233                if(strength < maxStrength) {
1234                    maxStrength = strength;
1235                    if(baseCE == UCOL_RESET_TOP_VALUE) {
1236                        log_verbose("Resetting to [top]\n");
1237                        nextCE = UCOL_NEXT_TOP_VALUE;
1238                        nextContCE = UCOL_NEXT_TOP_CONT;
1239                    } else {
1240                        result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1241                    }
1242                    if(result < 0) {
1243                        if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
1244                            log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
1245                            return;
1246                        } else {
1247                            log_err("%s: couldn't find the CE\n", colLoc);
1248                            return;
1249                        }
1250                    }
1251                }
1252
1253                currCE &= 0xFFFFFF3F;
1254                currContCE &= 0xFFFFFFBF;
1255
1256                if(maxStrength == UCOL_IDENTICAL) {
1257                    if(baseCE != currCE || baseContCE != currContCE) {
1258                        log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1259                    }
1260                } else {
1261                    if(strength == UCOL_IDENTICAL) {
1262                        if(lastCE != currCE || lastContCE != currContCE) {
1263                            log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1264                        }
1265                    } else {
1266                        if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1267                            /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1268                            log_err("%s: current CE is not less than base CE\n", colLoc);
1269                        }
1270                        if(!before) {
1271                            if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1272                                /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1273                                log_err("%s: sequence of generated CEs is broken\n", colLoc);
1274                            }
1275                        } else {
1276                            before = FALSE;
1277                            if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1278                                /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1279                                log_err("%s: sequence of generated CEs is broken\n", colLoc);
1280                            }
1281                        }
1282                    }
1283                }
1284
1285            }
1286
1287            oldOffset = chOffset;
1288            lastCE = currCE & 0xFFFFFF3F;
1289            lastContCE = currContCE & 0xFFFFFFBF;
1290        }
1291        uprv_free(src.source);
1292        uprv_free(src.reorderCodes);
1293    }
1294    ucol_close(UCA);
1295    uprv_delete_collIterate(c);
1296}
1297
1298#if 0
1299/* these locales are now picked from index RB */
1300static const char* localesToTest[] = {
1301"ar", "bg", "ca", "cs", "da",
1302"el", "en_BE", "en_US_POSIX",
1303"es", "et", "fi", "fr", "hi",
1304"hr", "hu", "is", "iw", "ja",
1305"ko", "lt", "lv", "mk", "mt",
1306"nb", "nn", "nn_NO", "pl", "ro",
1307"ru", "sh", "sk", "sl", "sq",
1308"sr", "sv", "th", "tr", "uk",
1309"vi", "zh", "zh_TW"
1310};
1311#endif
1312
1313static const char* rulesToTest[] = {
1314  /* Funky fa rule */
1315  "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1316  /*"& Z < p, P",*/
1317    /* Cui Mins rules */
1318    "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1319    "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1320    "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1321    "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1322    "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1323    "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1324    "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1325};
1326
1327
1328static void TestCollations(void) {
1329    int32_t noOfLoc = uloc_countAvailable();
1330    int32_t i = 0, j = 0;
1331
1332    UErrorCode status = U_ZERO_ERROR;
1333    char cName[256];
1334    UChar name[256];
1335    int32_t nameSize;
1336
1337
1338    const char *locName = NULL;
1339    UCollator *coll = NULL;
1340    UCollator *UCA = ucol_open("", &status);
1341    UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1342    if (U_FAILURE(status)) {
1343        log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
1344        return;
1345    }
1346    ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1347
1348    for(i = 0; i<noOfLoc; i++) {
1349        status = U_ZERO_ERROR;
1350        locName = uloc_getAvailable(i);
1351        if(uprv_strcmp("ja", locName) == 0) {
1352            log_verbose("Don't know how to test prefixes\n");
1353            continue;
1354        }
1355        if(hasCollationElements(locName)) {
1356            nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1357            for(j = 0; j<nameSize; j++) {
1358                cName[j] = (char)name[j];
1359            }
1360            cName[nameSize] = 0;
1361            log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1362            coll = ucol_open(locName, &status);
1363            if(U_SUCCESS(status)) {
1364                testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1365                ucol_close(coll);
1366            } else {
1367                log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1368                status = U_ZERO_ERROR;
1369            }
1370        }
1371    }
1372    ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1373    ucol_close(UCA);
1374}
1375
1376static void RamsRulesTest(void) {
1377    UErrorCode status = U_ZERO_ERROR;
1378    int32_t i = 0;
1379    UCollator *coll = NULL;
1380    UChar rule[2048];
1381    uint32_t ruleLen;
1382    int32_t noOfLoc = uloc_countAvailable();
1383    const char *locName = NULL;
1384
1385    log_verbose("RamsRulesTest\n");
1386
1387    if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1388        /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1389        return;
1390    }
1391
1392    for(i = 0; i<noOfLoc; i++) {
1393        locName = uloc_getAvailable(i);
1394        if(hasCollationElements(locName)) {
1395            if (uprv_strcmp("ja", locName)==0) {
1396                log_verbose("Don't know how to test Japanese because of prefixes\n");
1397                continue;
1398            }
1399            if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1400                log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1401                continue;
1402            }
1403            if (uprv_strcmp("bn", locName)==0 ||
1404                uprv_strcmp("bs", locName)==0 ||            /* Add due to import per cldrbug 5647 */
1405                uprv_strcmp("bs_Cyrl", locName)==0 ||       /* Add due to import per cldrbug 5647 */
1406                uprv_strcmp("en_US_POSIX", locName)==0 ||
1407                uprv_strcmp("fa_AF", locName)==0 ||         /* Add due to import per cldrbug 5647 */
1408                uprv_strcmp("he", locName)==0 ||            /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
1409                uprv_strcmp("he_IL", locName)==0 ||         /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
1410                uprv_strcmp("km", locName)==0 ||
1411                uprv_strcmp("km_KH", locName)==0 ||
1412                uprv_strcmp("my", locName)==0 ||
1413                uprv_strcmp("si", locName)==0 ||
1414                uprv_strcmp("si_LK", locName)==0 ||
1415                uprv_strcmp("sr_Latn", locName)==0 ||       /* Add due to import per cldrbug 5647 */
1416                uprv_strcmp("th", locName)==0 ||
1417                uprv_strcmp("th_TH", locName)==0 ||
1418                uprv_strcmp("zh", locName)==0 ||
1419                uprv_strcmp("zh_Hant", locName)==0
1420            ) {
1421                log_verbose("Don't know how to test %s. "
1422                            "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
1423                continue;
1424            }
1425            log_verbose("Testing locale %s\n", locName);
1426            status = U_ZERO_ERROR;
1427            coll = ucol_open(locName, &status);
1428            if(U_SUCCESS(status)) {
1429              if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
1430                if(coll->image->jamoSpecial == TRUE) {
1431                  log_err("%s has special JAMOs\n", locName);
1432                }
1433                ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1434                testCollator(coll, &status);
1435                testCEs(coll, &status);
1436              } else {
1437                log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
1438              }
1439              ucol_close(coll);
1440            } else {
1441              log_err("Could not open %s: %s\n", locName, u_errorName(status));
1442            }
1443        }
1444    }
1445
1446    for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1447        log_verbose("Testing rule: %s\n", rulesToTest[i]);
1448        ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1449        status = U_ZERO_ERROR;
1450        coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1451        if(U_SUCCESS(status)) {
1452            testCollator(coll, &status);
1453            testCEs(coll, &status);
1454            ucol_close(coll);
1455        } else {
1456          log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
1457        }
1458    }
1459
1460}
1461
1462static void IsTailoredTest(void) {
1463    UErrorCode status = U_ZERO_ERROR;
1464    uint32_t i = 0;
1465    UCollator *coll = NULL;
1466    UChar rule[2048];
1467    UChar tailored[2048];
1468    UChar notTailored[2048];
1469    uint32_t ruleLen, tailoredLen, notTailoredLen;
1470
1471    log_verbose("IsTailoredTest\n");
1472
1473    u_uastrcpy(rule, "&Z < A, B, C;c < d");
1474    ruleLen = u_strlen(rule);
1475
1476    u_uastrcpy(tailored, "ABCcd");
1477    tailoredLen = u_strlen(tailored);
1478
1479    u_uastrcpy(notTailored, "ZabD");
1480    notTailoredLen = u_strlen(notTailored);
1481
1482    coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1483    if(U_SUCCESS(status)) {
1484        for(i = 0; i<tailoredLen; i++) {
1485            if(!ucol_isTailored(coll, tailored[i], &status)) {
1486                log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1487            }
1488        }
1489        for(i = 0; i<notTailoredLen; i++) {
1490            if(ucol_isTailored(coll, notTailored[i], &status)) {
1491                log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1492            }
1493        }
1494        ucol_close(coll);
1495    }
1496    else {
1497        log_err_status(status, "Can't tailor rules\n");
1498    }
1499    /* Code coverage */
1500    status = U_ZERO_ERROR;
1501    coll = ucol_open("ja", &status);
1502    if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1503        log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
1504    }
1505    ucol_close(coll);
1506}
1507
1508
1509const static char chTest[][20] = {
1510  "c",
1511  "C",
1512  "ca", "cb", "cx", "cy", "CZ",
1513  "c\\u030C", "C\\u030C",
1514  "h",
1515  "H",
1516  "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1517  "ch", "cH", "Ch", "CH",
1518  "cha", "charly", "che", "chh", "chch", "chr",
1519  "i", "I", "iarly",
1520  "r", "R",
1521  "r\\u030C", "R\\u030C",
1522  "s",
1523  "S",
1524  "s\\u030C", "S\\u030C",
1525  "z", "Z",
1526  "z\\u030C", "Z\\u030C"
1527};
1528
1529static void TestChMove(void) {
1530    UChar t1[256] = {0};
1531    UChar t2[256] = {0};
1532
1533    uint32_t i = 0, j = 0;
1534    uint32_t size = 0;
1535    UErrorCode status = U_ZERO_ERROR;
1536
1537    UCollator *coll = ucol_open("cs", &status);
1538
1539    if(U_SUCCESS(status)) {
1540        size = sizeof(chTest)/sizeof(chTest[0]);
1541        for(i = 0; i < size-1; i++) {
1542            for(j = i+1; j < size; j++) {
1543                u_unescape(chTest[i], t1, 256);
1544                u_unescape(chTest[j], t2, 256);
1545                doTest(coll, t1, t2, UCOL_LESS);
1546            }
1547        }
1548    }
1549    else {
1550        log_data_err("Can't open collator");
1551    }
1552    ucol_close(coll);
1553}
1554
1555
1556
1557
1558const static char impTest[][20] = {
1559  "\\u4e00",
1560    "a",
1561    "A",
1562    "b",
1563    "B",
1564    "\\u4e01"
1565};
1566
1567
1568static void TestImplicitTailoring(void) {
1569  static const struct {
1570    const char *rules;
1571    const char *data[10];
1572    const uint32_t len;
1573  } tests[] = {
1574      { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1575      { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1576      { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1577      { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1578  };
1579
1580  int32_t i = 0;
1581
1582  for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1583      genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1584  }
1585
1586/*
1587  UChar t1[256] = {0};
1588  UChar t2[256] = {0};
1589
1590  const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1591
1592  uint32_t i = 0, j = 0;
1593  uint32_t size = 0;
1594  uint32_t ruleLen = 0;
1595  UErrorCode status = U_ZERO_ERROR;
1596  UCollator *coll = NULL;
1597  ruleLen = u_unescape(rule, t1, 256);
1598
1599  coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1600
1601  if(U_SUCCESS(status)) {
1602    size = sizeof(impTest)/sizeof(impTest[0]);
1603    for(i = 0; i < size-1; i++) {
1604      for(j = i+1; j < size; j++) {
1605        u_unescape(impTest[i], t1, 256);
1606        u_unescape(impTest[j], t2, 256);
1607        doTest(coll, t1, t2, UCOL_LESS);
1608      }
1609    }
1610  }
1611  else {
1612    log_err("Can't open collator");
1613  }
1614  ucol_close(coll);
1615  */
1616}
1617
1618static void TestFCDProblem(void) {
1619  UChar t1[256] = {0};
1620  UChar t2[256] = {0};
1621
1622  const char *s1 = "\\u0430\\u0306\\u0325";
1623  const char *s2 = "\\u04D1\\u0325";
1624
1625  UErrorCode status = U_ZERO_ERROR;
1626  UCollator *coll = ucol_open("", &status);
1627  u_unescape(s1, t1, 256);
1628  u_unescape(s2, t2, 256);
1629
1630  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1631  doTest(coll, t1, t2, UCOL_EQUAL);
1632
1633  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1634  doTest(coll, t1, t2, UCOL_EQUAL);
1635
1636  ucol_close(coll);
1637}
1638
1639/*
1640The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1641We're only using NFC/NFD in this test.
1642*/
1643#define NORM_BUFFER_TEST_LEN 18
1644typedef struct {
1645  UChar32 u;
1646  UChar NFC[NORM_BUFFER_TEST_LEN];
1647  UChar NFD[NORM_BUFFER_TEST_LEN];
1648} tester;
1649
1650static void TestComposeDecompose(void) {
1651    /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1652    static const UChar UNICODESET_STR[] = {
1653        0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1654        0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1655        0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1656    };
1657    int32_t noOfLoc;
1658    int32_t i = 0, j = 0;
1659
1660    UErrorCode status = U_ZERO_ERROR;
1661    const char *locName = NULL;
1662    uint32_t nfcSize;
1663    uint32_t nfdSize;
1664    tester **t;
1665    uint32_t noCases = 0;
1666    UCollator *coll = NULL;
1667    UChar32 u = 0;
1668    UChar comp[NORM_BUFFER_TEST_LEN];
1669    uint32_t len = 0;
1670    UCollationElements *iter;
1671    USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
1672    int32_t charsToTestSize;
1673
1674    noOfLoc = uloc_countAvailable();
1675
1676    coll = ucol_open("", &status);
1677    if (U_FAILURE(status)) {
1678        log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
1679        return;
1680    }
1681    charsToTestSize = uset_size(charsToTest);
1682    if (charsToTestSize <= 0) {
1683        log_err("Set was zero. Missing data?\n");
1684        return;
1685    }
1686    t = (tester **)malloc(charsToTestSize * sizeof(tester *));
1687    t[0] = (tester *)malloc(sizeof(tester));
1688    log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
1689
1690    for(u = 0; u < charsToTestSize; u++) {
1691        UChar32 ch = uset_charAt(charsToTest, u);
1692        len = 0;
1693        U16_APPEND_UNSAFE(comp, len, ch);
1694        nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1695        nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1696
1697        if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1698          || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1699            t[noCases]->u = ch;
1700            if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1701                u_strncpy(t[noCases]->NFC, comp, len);
1702                t[noCases]->NFC[len] = 0;
1703            }
1704            noCases++;
1705            t[noCases] = (tester *)malloc(sizeof(tester));
1706            uprv_memset(t[noCases], 0, sizeof(tester));
1707        }
1708    }
1709    log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
1710    uset_close(charsToTest);
1711    charsToTest = NULL;
1712
1713    for(u=0; u<(UChar32)noCases; u++) {
1714        if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1715            log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1716            doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1717        }
1718    }
1719    /*
1720    for(u = 0; u < charsToTestSize; u++) {
1721      if(!(u&0xFFFF)) {
1722        log_verbose("%08X ", u);
1723      }
1724      uprv_memset(t[noCases], 0, sizeof(tester));
1725      t[noCases]->u = u;
1726      len = 0;
1727      U16_APPEND_UNSAFE(comp, len, u);
1728      comp[len] = 0;
1729      nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1730      nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1731      doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1732      doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1733    }
1734    */
1735
1736    ucol_close(coll);
1737
1738    log_verbose("Testing locales, number of cases = %i\n", noCases);
1739    for(i = 0; i<noOfLoc; i++) {
1740        status = U_ZERO_ERROR;
1741        locName = uloc_getAvailable(i);
1742        if(hasCollationElements(locName)) {
1743            char cName[256];
1744            UChar name[256];
1745            int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1746
1747            for(j = 0; j<nameSize; j++) {
1748                cName[j] = (char)name[j];
1749            }
1750            cName[nameSize] = 0;
1751            log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1752
1753            coll = ucol_open(locName, &status);
1754            ucol_setStrength(coll, UCOL_IDENTICAL);
1755            iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1756
1757            for(u=0; u<(UChar32)noCases; u++) {
1758                if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1759                    log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1760                    doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1761                    log_verbose("Testing NFC\n");
1762                    ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1763                    backAndForth(iter);
1764                    log_verbose("Testing NFD\n");
1765                    ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1766                    backAndForth(iter);
1767                }
1768            }
1769            ucol_closeElements(iter);
1770            ucol_close(coll);
1771        }
1772    }
1773    for(u = 0; u <= (UChar32)noCases; u++) {
1774        free(t[u]);
1775    }
1776    free(t);
1777}
1778
1779static void TestEmptyRule(void) {
1780  UErrorCode status = U_ZERO_ERROR;
1781  UChar rulez[] = { 0 };
1782  UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1783
1784  ucol_close(coll);
1785}
1786
1787static void TestUCARules(void) {
1788  UErrorCode status = U_ZERO_ERROR;
1789  UChar b[256];
1790  UChar *rules = b;
1791  uint32_t ruleLen = 0;
1792  UCollator *UCAfromRules = NULL;
1793  UCollator *coll = ucol_open("", &status);
1794  if(status == U_FILE_ACCESS_ERROR) {
1795    log_data_err("Is your data around?\n");
1796    return;
1797  } else if(U_FAILURE(status)) {
1798    log_err("Error opening collator\n");
1799    return;
1800  }
1801  ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1802
1803  log_verbose("TestUCARules\n");
1804  if(ruleLen > 256) {
1805    rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1806    ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1807  }
1808  log_verbose("Rules length is %d\n", ruleLen);
1809  UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1810  if(U_SUCCESS(status)) {
1811    ucol_close(UCAfromRules);
1812  } else {
1813    log_verbose("Unable to create a collator from UCARules!\n");
1814  }
1815/*
1816  u_unescape(blah, b, 256);
1817  ucol_getSortKey(coll, b, 1, res, 256);
1818*/
1819  ucol_close(coll);
1820  if(rules != b) {
1821    free(rules);
1822  }
1823}
1824
1825
1826/* Pinyin tonal order */
1827/*
1828    A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1829          (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
1830    E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1831    I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1832    O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1833    U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1834      < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1835.. (\u00fc)
1836
1837However, in testing we got the following order:
1838    A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1839          (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
1840    E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1841.. (\u0113)
1842    I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1843    O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1844    U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1845.. (\u01d8)
1846      < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1847*/
1848
1849static void TestBefore(void) {
1850  const static char *data[] = {
1851      "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1852      "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1853      "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1854      "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1855      "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1856      "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1857  };
1858  genericRulesStarter(
1859    "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1860    "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1861    "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1862    "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1863    "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1864    "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1865    data, sizeof(data)/sizeof(data[0]));
1866}
1867
1868#if 0
1869/* superceded by TestBeforePinyin */
1870static void TestJ784(void) {
1871  const static char *data[] = {
1872      "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1873      "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1874      "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1875      "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1876      "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1877      "\\u00fc",
1878           "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1879  };
1880  genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1881}
1882#endif
1883
1884#if 0
1885/* superceded by the changes to the lv locale */
1886static void TestJ831(void) {
1887  const static char *data[] = {
1888    "I",
1889      "i",
1890      "Y",
1891      "y"
1892  };
1893  genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1894}
1895#endif
1896
1897static void TestJ815(void) {
1898  const static char *data[] = {
1899    "aa",
1900      "Aa",
1901      "ab",
1902      "Ab",
1903      "ad",
1904      "Ad",
1905      "ae",
1906      "Ae",
1907      "\\u00e6",
1908      "\\u00c6",
1909      "af",
1910      "Af",
1911      "b",
1912      "B"
1913  };
1914  genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1915  genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1916}
1917
1918
1919/*
1920"& a < b < c < d& r < c",                                   "& a < b < d& r < c",
1921"& a < b < c < d& c < m",                                   "& a < b < c < m < d",
1922"& a < b < c < d& a < m",                                   "& a < m < b < c < d",
1923"& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
1924"& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
1925"& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
1926"& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
1927"& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
1928"& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
1929*/
1930static void TestRedundantRules(void) {
1931  int32_t i;
1932
1933  static const struct {
1934      const char *rules;
1935      const char *expectedRules;
1936      const char *testdata[8];
1937      uint32_t testdatalen;
1938  } tests[] = {
1939    /* this test conflicts with positioning of CODAN placeholder */
1940       /*{
1941        "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1942        "&\\u2089<<<x",
1943        {"\\u2089", "x"}, 2
1944       }, */
1945    /* this test conflicts with the [before x] syntax tightening */
1946      /*{
1947        "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1948        "&\\u0252<<<x",
1949        {"\\u0252", "x"}, 2
1950      }, */
1951    /* this test conflicts with the [before x] syntax tightening */
1952      /*{
1953         "& a < b <<< c << d <<< e& [before 1] e <<< x",
1954         "& a <<< x < b <<< c << d <<< e",
1955        {"a", "x", "b", "c", "d", "e"}, 6
1956      }, */
1957      {
1958        "& a < b < c < d& [before 1] c < m",
1959        "& a < b < m < c < d",
1960        {"a", "b", "m", "c", "d"}, 5
1961      },
1962      {
1963        "& a < b <<< c << d <<< e& [before 3] e <<< x",
1964        "& a < b <<< c << d <<< x <<< e",
1965        {"a", "b", "c", "d", "x", "e"}, 6
1966      },
1967    /* this test conflicts with the [before x] syntax tightening */
1968      /* {
1969        "& a < b <<< c << d <<< e& [before 2] e <<< x",
1970        "& a < b <<< c <<< x << d <<< e",
1971        {"a", "b", "c", "x", "d", "e"},, 6
1972      }, */
1973      {
1974        "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1975        "& a < b <<< c << d <<< e <<< f < x < g",
1976        {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1977      },
1978      {
1979        "& a <<< b << c < d& a < m",
1980        "& a <<< b << c < m < d",
1981        {"a", "b", "c", "m", "d"}, 5
1982      },
1983      {
1984        "&a<b<<b\\u0301 &z<b",
1985        "&a<b\\u0301 &z<b",
1986        {"a", "b\\u0301", "z", "b"}, 4
1987      },
1988      {
1989        "&z<m<<<q<<<m",
1990        "&z<q<<<m",
1991        {"z", "q", "m"},3
1992      },
1993      {
1994        "&z<<<m<q<<<m",
1995        "&z<q<<<m",
1996        {"z", "q", "m"}, 3
1997      },
1998      {
1999        "& a < b < c < d& r < c",
2000        "& a < b < d& r < c",
2001        {"a", "b", "d"}, 3
2002      },
2003      {
2004        "& a < b < c < d& r < c",
2005        "& a < b < d& r < c",
2006        {"r", "c"}, 2
2007      },
2008      {
2009        "& a < b < c < d& c < m",
2010        "& a < b < c < m < d",
2011        {"a", "b", "c", "m", "d"}, 5
2012      },
2013      {
2014        "& a < b < c < d& a < m",
2015        "& a < m < b < c < d",
2016        {"a", "m", "b", "c", "d"}, 5
2017      }
2018  };
2019
2020
2021  UCollator *credundant = NULL;
2022  UCollator *cresulting = NULL;
2023  UErrorCode status = U_ZERO_ERROR;
2024  UChar rlz[2048] = { 0 };
2025  uint32_t rlen = 0;
2026
2027  for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
2028    log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
2029    rlen = u_unescape(tests[i].rules, rlz, 2048);
2030
2031    credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2032    if(status == U_FILE_ACCESS_ERROR) {
2033      log_data_err("Is your data around?\n");
2034      return;
2035    } else if(U_FAILURE(status)) {
2036      log_err("Error opening collator\n");
2037      return;
2038    }
2039
2040    rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
2041    cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2042
2043    testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
2044
2045    ucol_close(credundant);
2046    ucol_close(cresulting);
2047
2048    log_verbose("testing using data\n");
2049
2050    genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
2051  }
2052
2053}
2054
2055static void TestExpansionSyntax(void) {
2056  int32_t i;
2057
2058  const static char *rules[] = {
2059    "&AE <<< a << b <<< c &d <<< f",
2060    "&AE <<< a <<< b << c << d < e < f <<< g",
2061    "&AE <<< B <<< C / D <<< F"
2062  };
2063
2064  const static char *expectedRules[] = {
2065    "&A <<< a / E << b / E <<< c /E  &d <<< f",
2066    "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2067    "&A <<< B / E <<< C / ED <<< F / E"
2068  };
2069
2070  const static char *testdata[][8] = {
2071    {"AE", "a", "b", "c"},
2072    {"AE", "a", "b", "c", "d", "e", "f", "g"},
2073    {"AE", "B", "C"} /* / ED <<< F / E"},*/
2074  };
2075
2076  const static uint32_t testdatalen[] = {
2077      4,
2078      8,
2079      3
2080  };
2081
2082
2083
2084  UCollator *credundant = NULL;
2085  UCollator *cresulting = NULL;
2086  UErrorCode status = U_ZERO_ERROR;
2087  UChar rlz[2048] = { 0 };
2088  uint32_t rlen = 0;
2089
2090  for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2091    log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2092    rlen = u_unescape(rules[i], rlz, 2048);
2093
2094    credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2095    if(status == U_FILE_ACCESS_ERROR) {
2096      log_data_err("Is your data around?\n");
2097      return;
2098    } else if(U_FAILURE(status)) {
2099      log_err("Error opening collator\n");
2100      return;
2101    }
2102    rlen = u_unescape(expectedRules[i], rlz, 2048);
2103    cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2104
2105    /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2106    /* as a hard error test, but only in information mode */
2107    testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2108
2109    ucol_close(credundant);
2110    ucol_close(cresulting);
2111
2112    log_verbose("testing using data\n");
2113
2114    genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2115  }
2116}
2117
2118static void TestCase(void)
2119{
2120    const static UChar gRules[MAX_TOKEN_LEN] =
2121    /*" & 0 < 1,\u2461<a,A"*/
2122    { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2123
2124    const static UChar testCase[][MAX_TOKEN_LEN] =
2125    {
2126        /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2127        /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2128        /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2129        /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2130    };
2131
2132    const static UCollationResult caseTestResults[][9] =
2133    {
2134        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2135        { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2136        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2137        { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2138    };
2139
2140    const static UColAttributeValue caseTestAttributes[][2] =
2141    {
2142        { UCOL_LOWER_FIRST, UCOL_OFF},
2143        { UCOL_UPPER_FIRST, UCOL_OFF},
2144        { UCOL_LOWER_FIRST, UCOL_ON},
2145        { UCOL_UPPER_FIRST, UCOL_ON}
2146    };
2147    int32_t i,j,k;
2148    UErrorCode status = U_ZERO_ERROR;
2149    UCollationElements *iter;
2150    UCollator  *myCollation;
2151    myCollation = ucol_open("en_US", &status);
2152
2153    if(U_FAILURE(status)){
2154        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2155        return;
2156    }
2157    log_verbose("Testing different case settings\n");
2158    ucol_setStrength(myCollation, UCOL_TERTIARY);
2159
2160    for(k = 0; k<4; k++) {
2161      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2162      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2163      log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2164      for (i = 0; i < 3 ; i++) {
2165        for(j = i+1; j<4; j++) {
2166          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2167        }
2168      }
2169    }
2170    ucol_close(myCollation);
2171
2172    myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2173    if(U_FAILURE(status)){
2174        log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2175        return;
2176    }
2177    log_verbose("Testing different case settings with custom rules\n");
2178    ucol_setStrength(myCollation, UCOL_TERTIARY);
2179
2180    for(k = 0; k<4; k++) {
2181      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2182      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2183      for (i = 0; i < 3 ; i++) {
2184        for(j = i+1; j<4; j++) {
2185          log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2186          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2187          iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2188          backAndForth(iter);
2189          ucol_closeElements(iter);
2190          iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2191          backAndForth(iter);
2192          ucol_closeElements(iter);
2193        }
2194      }
2195    }
2196    ucol_close(myCollation);
2197    {
2198      const static char *lowerFirst[] = {
2199        "h",
2200        "H",
2201        "ch",
2202        "Ch",
2203        "CH",
2204        "cha",
2205        "chA",
2206        "Cha",
2207        "ChA",
2208        "CHa",
2209        "CHA",
2210        "i",
2211        "I"
2212      };
2213
2214      const static char *upperFirst[] = {
2215        "H",
2216        "h",
2217        "CH",
2218        "Ch",
2219        "ch",
2220        "CHA",
2221        "CHa",
2222        "ChA",
2223        "Cha",
2224        "chA",
2225        "cha",
2226        "I",
2227        "i"
2228      };
2229      log_verbose("mixed case test\n");
2230      log_verbose("lower first, case level off\n");
2231      genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2232      log_verbose("upper first, case level off\n");
2233      genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2234      log_verbose("lower first, case level on\n");
2235      genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2236      log_verbose("upper first, case level on\n");
2237      genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2238    }
2239
2240}
2241
2242static void TestIncrementalNormalize(void) {
2243
2244    /*UChar baseA     =0x61;*/
2245    UChar baseA     =0x41;
2246/*    UChar baseB     = 0x42;*/
2247    static const UChar ccMix[]   = {0x316, 0x321, 0x300};
2248    /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
2249    /*
2250        0x316 is combining grave accent below, cc=220
2251        0x321 is combining palatalized hook below, cc=202
2252        0x300 is combining grave accent, cc=230
2253    */
2254
2255#define MAXSLEN 2000
2256    /*int          maxSLen   = 64000;*/
2257    int          sLen;
2258    int          i;
2259
2260    UCollator        *coll;
2261    UErrorCode       status = U_ZERO_ERROR;
2262    UCollationResult result;
2263
2264    int32_t myQ = getTestOption(QUICK_OPTION);
2265
2266    if(getTestOption(QUICK_OPTION) < 0) {
2267        setTestOption(QUICK_OPTION, 1);
2268    }
2269
2270    {
2271        /* Test 1.  Run very long unnormalized strings, to force overflow of*/
2272        /*          most buffers along the way.*/
2273        UChar            strA[MAXSLEN+1];
2274        UChar            strB[MAXSLEN+1];
2275
2276        coll = ucol_open("en_US", &status);
2277        if(status == U_FILE_ACCESS_ERROR) {
2278          log_data_err("Is your data around?\n");
2279          return;
2280        } else if(U_FAILURE(status)) {
2281          log_err("Error opening collator\n");
2282          return;
2283        }
2284        ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2285
2286        /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2287        /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2288        /*for (sLen = 1000; sLen<1001; sLen++) {*/
2289        for (sLen = 500; sLen<501; sLen++) {
2290        /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2291            strA[0] = baseA;
2292            strB[0] = baseA;
2293            for (i=1; i<=sLen-1; i++) {
2294                strA[i] = ccMix[i % 3];
2295                strB[sLen-i] = ccMix[i % 3];
2296            }
2297            strA[sLen]   = 0;
2298            strB[sLen]   = 0;
2299
2300            ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
2301            doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
2302            ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
2303            doTest(coll, strA, strB, UCOL_EQUAL);
2304        }
2305    }
2306
2307    setTestOption(QUICK_OPTION, myQ);
2308
2309
2310    /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
2311    /*         of the string.  Checks a couple of edge cases.*/
2312
2313    {
2314        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2315        static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2316        ucol_setStrength(coll, UCOL_TERTIARY);
2317        doTest(coll, strA, strB, UCOL_EQUAL);
2318    }
2319
2320    /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
2321
2322    {
2323      /* New UCA  3.1.1.
2324       * test below used a code point from Desseret, which sorts differently
2325       * than d800 dc00
2326       */
2327        /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2328        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2329        static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2330        ucol_setStrength(coll, UCOL_TERTIARY);
2331        doTest(coll, strA, strB, UCOL_GREATER);
2332    }
2333
2334    /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
2335
2336    {
2337        static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2338        static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2339        char  sortKeyA[50];
2340        char  sortKeyAz[50];
2341        char  sortKeyB[50];
2342        char  sortKeyBz[50];
2343        int   r;
2344
2345        /* there used to be -3 here. Hmmmm.... */
2346        /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2347        result = ucol_strcoll(coll, strA, 3, strB, 3);
2348        if (result != UCOL_GREATER) {
2349            log_err("ERROR 1 in test 4\n");
2350        }
2351        result = ucol_strcoll(coll, strA, -1, strB, -1);
2352        if (result != UCOL_EQUAL) {
2353            log_err("ERROR 2 in test 4\n");
2354        }
2355
2356        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2357        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2358        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2359        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2360
2361        r = strcmp(sortKeyA, sortKeyAz);
2362        if (r <= 0) {
2363            log_err("Error 3 in test 4\n");
2364        }
2365        r = strcmp(sortKeyA, sortKeyB);
2366        if (r <= 0) {
2367            log_err("Error 4 in test 4\n");
2368        }
2369        r = strcmp(sortKeyAz, sortKeyBz);
2370        if (r != 0) {
2371            log_err("Error 5 in test 4\n");
2372        }
2373
2374        ucol_setStrength(coll, UCOL_IDENTICAL);
2375        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2376        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2377        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2378        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2379
2380        r = strcmp(sortKeyA, sortKeyAz);
2381        if (r <= 0) {
2382            log_err("Error 6 in test 4\n");
2383        }
2384        r = strcmp(sortKeyA, sortKeyB);
2385        if (r <= 0) {
2386            log_err("Error 7 in test 4\n");
2387        }
2388        r = strcmp(sortKeyAz, sortKeyBz);
2389        if (r != 0) {
2390            log_err("Error 8 in test 4\n");
2391        }
2392        ucol_setStrength(coll, UCOL_TERTIARY);
2393    }
2394
2395
2396    /*  Test 5:  Null characters in non-normal source strings.*/
2397
2398    {
2399        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2400        static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2401        char  sortKeyA[50];
2402        char  sortKeyAz[50];
2403        char  sortKeyB[50];
2404        char  sortKeyBz[50];
2405        int   r;
2406
2407        result = ucol_strcoll(coll, strA, 6, strB, 6);
2408        if (result != UCOL_GREATER) {
2409            log_err("ERROR 1 in test 5\n");
2410        }
2411        result = ucol_strcoll(coll, strA, -1, strB, -1);
2412        if (result != UCOL_EQUAL) {
2413            log_err("ERROR 2 in test 5\n");
2414        }
2415
2416        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2417        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2418        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2419        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2420
2421        r = strcmp(sortKeyA, sortKeyAz);
2422        if (r <= 0) {
2423            log_err("Error 3 in test 5\n");
2424        }
2425        r = strcmp(sortKeyA, sortKeyB);
2426        if (r <= 0) {
2427            log_err("Error 4 in test 5\n");
2428        }
2429        r = strcmp(sortKeyAz, sortKeyBz);
2430        if (r != 0) {
2431            log_err("Error 5 in test 5\n");
2432        }
2433
2434        ucol_setStrength(coll, UCOL_IDENTICAL);
2435        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2436        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2437        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2438        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2439
2440        r = strcmp(sortKeyA, sortKeyAz);
2441        if (r <= 0) {
2442            log_err("Error 6 in test 5\n");
2443        }
2444        r = strcmp(sortKeyA, sortKeyB);
2445        if (r <= 0) {
2446            log_err("Error 7 in test 5\n");
2447        }
2448        r = strcmp(sortKeyAz, sortKeyBz);
2449        if (r != 0) {
2450            log_err("Error 8 in test 5\n");
2451        }
2452        ucol_setStrength(coll, UCOL_TERTIARY);
2453    }
2454
2455
2456    /*  Test 6:  Null character as base of a non-normal combining sequence.*/
2457
2458    {
2459        static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2460        static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2461
2462        result = ucol_strcoll(coll, strA, 5, strB, 5);
2463        if (result != UCOL_LESS) {
2464            log_err("Error 1 in test 6\n");
2465        }
2466        result = ucol_strcoll(coll, strA, -1, strB, -1);
2467        if (result != UCOL_EQUAL) {
2468            log_err("Error 2 in test 6\n");
2469        }
2470    }
2471
2472    ucol_close(coll);
2473}
2474
2475
2476
2477#if 0
2478static void TestGetCaseBit(void) {
2479  static const char *caseBitData[] = {
2480    "a", "A", "ch", "Ch", "CH",
2481      "\\uFF9E", "\\u0009"
2482  };
2483
2484  static const uint8_t results[] = {
2485    UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2486      UCOL_UPPER_CASE, UCOL_LOWER_CASE
2487  };
2488
2489  uint32_t i, blen = 0;
2490  UChar b[256] = {0};
2491  UErrorCode status = U_ZERO_ERROR;
2492  UCollator *UCA = ucol_open("", &status);
2493  uint8_t res = 0;
2494
2495  for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2496    blen = u_unescape(caseBitData[i], b, 256);
2497    res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2498    if(results[i] != res) {
2499      log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2500    }
2501  }
2502}
2503#endif
2504
2505static void TestHangulTailoring(void) {
2506    static const char *koreanData[] = {
2507        "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2508            "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2509            "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2510            "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2511            "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2512            "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2513    };
2514
2515    const char *rules =
2516        "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2517        "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2518        "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2519        "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2520        "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2521        "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2522
2523
2524  UErrorCode status = U_ZERO_ERROR;
2525  UChar rlz[2048] = { 0 };
2526  uint32_t rlen = u_unescape(rules, rlz, 2048);
2527
2528  UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2529  if(status == U_FILE_ACCESS_ERROR) {
2530    log_data_err("Is your data around?\n");
2531    return;
2532  } else if(U_FAILURE(status)) {
2533    log_err("Error opening collator\n");
2534    return;
2535  }
2536
2537  log_verbose("Using start of korean rules\n");
2538
2539  if(U_SUCCESS(status)) {
2540    genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2541  } else {
2542    log_err("Unable to open collator with rules %s\n", rules);
2543  }
2544
2545  log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2546  ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
2547  genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2548
2549  ucol_close(coll);
2550
2551  log_verbose("Using ko__LOTUS locale\n");
2552  genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2553}
2554
2555static void TestCompressOverlap(void) {
2556    UChar       secstr[150];
2557    UChar       tertstr[150];
2558    UErrorCode  status = U_ZERO_ERROR;
2559    UCollator  *coll;
2560    char        result[200];
2561    uint32_t    resultlen;
2562    int         count = 0;
2563    char       *tempptr;
2564
2565    coll = ucol_open("", &status);
2566
2567    if (U_FAILURE(status)) {
2568        log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
2569        return;
2570    }
2571    while (count < 149) {
2572        secstr[count] = 0x0020; /* [06, 05, 05] */
2573        tertstr[count] = 0x0020;
2574        count ++;
2575    }
2576
2577    /* top down compression ----------------------------------- */
2578    secstr[count] = 0x0332; /* [, 87, 05] */
2579    tertstr[count] = 0x3000; /* [06, 05, 07] */
2580
2581    /* no compression secstr should have 150 secondary bytes, tertstr should
2582    have 150 tertiary bytes.
2583    with correct overlapping compression, secstr should have 4 secondary
2584    bytes, tertstr should have > 2 tertiary bytes */
2585    resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2586    tempptr = uprv_strchr(result, 1) + 1;
2587    while (*(tempptr + 1) != 1) {
2588        /* the last secondary collation element is not checked since it is not
2589        part of the compression */
2590        if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2591            log_err("Secondary compression overlapped\n");
2592        }
2593        tempptr ++;
2594    }
2595
2596    /* tertiary top/bottom/common for en_US is similar to the secondary
2597    top/bottom/common */
2598    resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2599    tempptr = uprv_strrchr(result, 1) + 1;
2600    while (*(tempptr + 1) != 0) {
2601        /* the last secondary collation element is not checked since it is not
2602        part of the compression */
2603        if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2604            log_err("Tertiary compression overlapped\n");
2605        }
2606        tempptr ++;
2607    }
2608
2609    /* bottom up compression ------------------------------------- */
2610    secstr[count] = 0;
2611    tertstr[count] = 0;
2612    resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2613    tempptr = uprv_strchr(result, 1) + 1;
2614    while (*(tempptr + 1) != 1) {
2615        /* the last secondary collation element is not checked since it is not
2616        part of the compression */
2617        if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2618            log_err("Secondary compression overlapped\n");
2619        }
2620        tempptr ++;
2621    }
2622
2623    /* tertiary top/bottom/common for en_US is similar to the secondary
2624    top/bottom/common */
2625    resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2626    tempptr = uprv_strrchr(result, 1) + 1;
2627    while (*(tempptr + 1) != 0) {
2628        /* the last secondary collation element is not checked since it is not
2629        part of the compression */
2630        if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2631            log_err("Tertiary compression overlapped\n");
2632        }
2633        tempptr ++;
2634    }
2635
2636    ucol_close(coll);
2637}
2638
2639static void TestCyrillicTailoring(void) {
2640  static const char *test[] = {
2641    "\\u0410b",
2642      "\\u0410\\u0306a",
2643      "\\u04d0A"
2644  };
2645
2646    /* Russian overrides contractions, so this test is not valid anymore */
2647    /*genericLocaleStarter("ru", test, 3);*/
2648
2649    genericLocaleStarter("root", test, 3);
2650    genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2651    genericRulesStarter("&Z < \\u0410", test, 3);
2652    genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2653    genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2654    genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2655    genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2656}
2657
2658static void TestSuppressContractions(void) {
2659
2660  static const char *testNoCont2[] = {
2661      "\\u0410\\u0302a",
2662      "\\u0410\\u0306b",
2663      "\\u0410c"
2664  };
2665  static const char *testNoCont[] = {
2666      "a\\u0410",
2667      "A\\u0410\\u0306",
2668      "\\uFF21\\u0410\\u0302"
2669  };
2670
2671  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2672  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2673}
2674
2675static void TestContraction(void) {
2676    const static char *testrules[] = {
2677        "&A = AB / B",
2678        "&A = A\\u0306/\\u0306",
2679        "&c = ch / h"
2680    };
2681    const static UChar testdata[][2] = {
2682        {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2683        {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2684        {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2685    };
2686    const static UChar testdata2[][2] = {
2687        {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2688        {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2689        {0x0063 /* 'c' */, 0x006C /* 'l' */}
2690    };
2691    const static char *testrules3[] = {
2692        "&z < xyz &xyzw << B",
2693        "&z < xyz &xyz << B / w",
2694        "&z < ch &achm << B",
2695        "&z < ch &a << B / chm",
2696        "&\\ud800\\udc00w << B",
2697        "&\\ud800\\udc00 << B / w",
2698        "&a\\ud800\\udc00m << B",
2699        "&a << B / \\ud800\\udc00m",
2700    };
2701
2702    UErrorCode  status   = U_ZERO_ERROR;
2703    UCollator  *coll;
2704    UChar       rule[256] = {0};
2705    uint32_t    rlen     = 0;
2706    int         i;
2707
2708    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2709        UCollationElements *iter1;
2710        int j = 0;
2711        log_verbose("Rule %s for testing\n", testrules[i]);
2712        rlen = u_unescape(testrules[i], rule, 32);
2713        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2714        if (U_FAILURE(status)) {
2715            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2716            return;
2717        }
2718        iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2719        if (U_FAILURE(status)) {
2720            log_err("Collation iterator creation failed\n");
2721            return;
2722        }
2723        while (j < 2) {
2724            UCollationElements *iter2 = ucol_openElements(coll,
2725                                                         &(testdata[i][j]),
2726                                                         1, &status);
2727            uint32_t ce;
2728            if (U_FAILURE(status)) {
2729                log_err("Collation iterator creation failed\n");
2730                return;
2731            }
2732            ce = ucol_next(iter2, &status);
2733            while (ce != UCOL_NULLORDER) {
2734                if ((uint32_t)ucol_next(iter1, &status) != ce) {
2735                    log_err("Collation elements in contraction split does not match\n");
2736                    return;
2737                }
2738                ce = ucol_next(iter2, &status);
2739            }
2740            j ++;
2741            ucol_closeElements(iter2);
2742        }
2743        if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2744            log_err("Collation elements not exhausted\n");
2745            return;
2746        }
2747        ucol_closeElements(iter1);
2748        ucol_close(coll);
2749    }
2750
2751    rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2752    coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2753    if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2754        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2755                testdata2[0][0], testdata2[0][1], testdata2[1][0],
2756                testdata2[1][1]);
2757        return;
2758    }
2759    if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2760        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2761                testdata2[1][0], testdata2[1][1], testdata2[2][0],
2762                testdata2[2][1]);
2763        return;
2764    }
2765    ucol_close(coll);
2766
2767    for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2768        UCollator          *coll1,
2769                           *coll2;
2770        UCollationElements *iter1,
2771                           *iter2;
2772        UChar               ch = 0x0042 /* 'B' */;
2773        uint32_t            ce;
2774        rlen = u_unescape(testrules3[i], rule, 32);
2775        coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2776        rlen = u_unescape(testrules3[i + 1], rule, 32);
2777        coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2778        if (U_FAILURE(status)) {
2779            log_err("Collator creation failed %s\n", testrules[i]);
2780            return;
2781        }
2782        iter1 = ucol_openElements(coll1, &ch, 1, &status);
2783        iter2 = ucol_openElements(coll2, &ch, 1, &status);
2784        if (U_FAILURE(status)) {
2785            log_err("Collation iterator creation failed\n");
2786            return;
2787        }
2788        ce = ucol_next(iter1, &status);
2789        if (U_FAILURE(status)) {
2790            log_err("Retrieving ces failed\n");
2791            return;
2792        }
2793        while (ce != UCOL_NULLORDER) {
2794            if (ce != (uint32_t)ucol_next(iter2, &status)) {
2795                log_err("CEs does not match\n");
2796                return;
2797            }
2798            ce = ucol_next(iter1, &status);
2799            if (U_FAILURE(status)) {
2800                log_err("Retrieving ces failed\n");
2801                return;
2802            }
2803        }
2804        if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2805            log_err("CEs not exhausted\n");
2806            return;
2807        }
2808        ucol_closeElements(iter1);
2809        ucol_closeElements(iter2);
2810        ucol_close(coll1);
2811        ucol_close(coll2);
2812    }
2813}
2814
2815static void TestExpansion(void) {
2816    const static char *testrules[] = {
2817        "&J << K / B & K << M",
2818        "&J << K / B << M"
2819    };
2820    const static UChar testdata[][3] = {
2821        {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2822        {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2823        {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2824        {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2825        {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2826        {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2827    };
2828
2829    UErrorCode  status   = U_ZERO_ERROR;
2830    UCollator  *coll;
2831    UChar       rule[256] = {0};
2832    uint32_t    rlen     = 0;
2833    int         i;
2834
2835    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2836        int j = 0;
2837        log_verbose("Rule %s for testing\n", testrules[i]);
2838        rlen = u_unescape(testrules[i], rule, 32);
2839        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2840        if (U_FAILURE(status)) {
2841            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2842            return;
2843        }
2844
2845        for (j = 0; j < 5; j ++) {
2846            doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2847        }
2848        ucol_close(coll);
2849    }
2850}
2851
2852#if 0
2853/* this test tests the current limitations of the engine */
2854/* it always fail, so it is disabled by default */
2855static void TestLimitations(void) {
2856  /* recursive expansions */
2857  {
2858    static const char *rule = "&a=b/c&d=c/e";
2859    static const char *tlimit01[] = {"add","b","adf"};
2860    static const char *tlimit02[] = {"aa","b","af"};
2861    log_verbose("recursive expansions\n");
2862    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2863    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2864  }
2865  /* contractions spanning expansions */
2866  {
2867    static const char *rule = "&a<<<c/e&g<<<eh";
2868    static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2869    static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2870    log_verbose("contractions spanning expansions\n");
2871    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2872    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2873  }
2874  /* normalization: nulls in contractions */
2875  {
2876    static const char *rule = "&a<<<\\u0000\\u0302";
2877    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2878    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2879    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2880    static const UColAttributeValue valOn[] = { UCOL_ON };
2881    static const UColAttributeValue valOff[] = { UCOL_OFF };
2882
2883    log_verbose("NULL in contractions\n");
2884    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2885    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2886    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2887    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2888
2889  }
2890  /* normalization: contractions spanning normalization */
2891  {
2892    static const char *rule = "&a<<<\\u0000\\u0302";
2893    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2894    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2895    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2896    static const UColAttributeValue valOn[] = { UCOL_ON };
2897    static const UColAttributeValue valOff[] = { UCOL_OFF };
2898
2899    log_verbose("contractions spanning normalization\n");
2900    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2901    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2902    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2903    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2904
2905  }
2906  /* variable top:  */
2907  {
2908    /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2909    static const char *rule = "&\\u2010<x<[variable top]=z";
2910    /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2911    static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2912    static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2913    static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2914    static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2915    static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2916    static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2917
2918    log_verbose("variable top\n");
2919    genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2920    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2921    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2922    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2923    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2924
2925  }
2926  /* case level */
2927  {
2928    static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2929    static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2930    static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2931    static const UColAttribute att[] = { UCOL_CASE_FIRST};
2932    static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2933    /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2934    log_verbose("case level\n");
2935    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2936    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2937    /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2938    /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2939  }
2940
2941}
2942#endif
2943
2944static void TestBocsuCoverage(void) {
2945  UErrorCode status = U_ZERO_ERROR;
2946  const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2947  UChar       test[256] = {0};
2948  uint32_t    tlen     = u_unescape(testString, test, 32);
2949  uint8_t key[256]     = {0};
2950  uint32_t klen         = 0;
2951
2952  UCollator *coll = ucol_open("", &status);
2953  if(U_SUCCESS(status)) {
2954  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2955
2956  klen = ucol_getSortKey(coll, test, tlen, key, 256);
2957
2958  ucol_close(coll);
2959  } else {
2960    log_data_err("Couldn't open UCA\n");
2961  }
2962}
2963
2964static void TestVariableTopSetting(void) {
2965  UErrorCode status = U_ZERO_ERROR;
2966  const UChar *current = NULL;
2967  uint32_t varTopOriginal = 0, varTop1, varTop2;
2968  UCollator *coll = ucol_open("", &status);
2969  if(U_SUCCESS(status)) {
2970
2971  uint32_t strength = 0;
2972  uint16_t specs = 0;
2973  uint32_t chOffset = 0;
2974  uint32_t chLen = 0;
2975  uint32_t exOffset = 0;
2976  uint32_t exLen = 0;
2977  uint32_t oldChOffset = 0;
2978  uint32_t oldChLen = 0;
2979  uint32_t oldExOffset = 0;
2980  uint32_t oldExLen = 0;
2981  uint32_t prefixOffset = 0;
2982  uint32_t prefixLen = 0;
2983
2984  UBool startOfRules = TRUE;
2985  UColTokenParser src;
2986  UColOptionSet opts;
2987
2988  UChar *rulesCopy = NULL;
2989  uint32_t rulesLen;
2990
2991  UCollationResult result;
2992
2993  UChar first[256] = { 0 };
2994  UChar second[256] = { 0 };
2995  UParseError parseError;
2996  int32_t myQ = getTestOption(QUICK_OPTION);
2997
2998  uprv_memset(&src, 0, sizeof(UColTokenParser));
2999
3000  src.opts = &opts;
3001
3002  if(getTestOption(QUICK_OPTION) <= 0) {
3003    setTestOption(QUICK_OPTION, 1);
3004  }
3005
3006  /* this test will fail when normalization is turned on */
3007  /* therefore we always turn off exhaustive mode for it */
3008  { /* QUICK > 0*/
3009    log_verbose("Slide variable top over UCARules\n");
3010    rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
3011    rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
3012    rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
3013
3014    if(U_SUCCESS(status) && rulesLen > 0) {
3015      ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
3016      src.current = src.source = rulesCopy;
3017      src.end = rulesCopy+rulesLen;
3018      src.extraCurrent = src.end;
3019      src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
3020
3021	  /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3022	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3023      while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
3024        strength = src.parsedToken.strength;
3025        chOffset = src.parsedToken.charsOffset;
3026        chLen = src.parsedToken.charsLen;
3027        exOffset = src.parsedToken.extensionOffset;
3028        exLen = src.parsedToken.extensionLen;
3029        prefixOffset = src.parsedToken.prefixOffset;
3030        prefixLen = src.parsedToken.prefixLen;
3031        specs = src.parsedToken.flags;
3032
3033        startOfRules = FALSE;
3034        {
3035          log_verbose("%04X %d ", *(src.source+chOffset), chLen);
3036        }
3037        if(strength == UCOL_PRIMARY) {
3038          status = U_ZERO_ERROR;
3039          varTopOriginal = ucol_getVariableTop(coll, &status);
3040          varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
3041          if(U_FAILURE(status)) {
3042            char buffer[256];
3043            char *buf = buffer;
3044            uint32_t i = 0, j;
3045            uint32_t CE = UCOL_NO_MORE_CES;
3046
3047            /* before we start screaming, let's see if there is a problem with the rules */
3048            UErrorCode collIterateStatus = U_ZERO_ERROR;
3049            collIterate *s = uprv_new_collIterate(&collIterateStatus);
3050            uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
3051
3052            CE = ucol_getNextCE(coll, s, &status);
3053
3054            for(i = 0; i < oldChLen; i++) {
3055              j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
3056              buf += j;
3057            }
3058            if(status == U_PRIMARY_TOO_LONG_ERROR) {
3059              log_verbose("= Expected failure for %s =", buffer);
3060            } else {
3061              if(uprv_collIterateAtEnd(s)) {
3062                log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3063                  oldChOffset, u_errorName(status), buffer);
3064              } else {
3065                log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3066                  buffer);
3067              }
3068            }
3069            uprv_delete_collIterate(s);
3070          }
3071          varTop2 = ucol_getVariableTop(coll, &status);
3072          if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3073            log_err("cannot retrieve set varTop value!\n");
3074            continue;
3075          }
3076
3077          if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3078
3079            u_strncpy(first, src.source+oldChOffset, oldChLen);
3080            u_strncpy(first+oldChLen, src.source+chOffset, chLen);
3081            u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
3082            first[2*oldChLen+chLen] = 0;
3083
3084            if(oldExLen == 0) {
3085              u_strncpy(second, src.source+chOffset, chLen);
3086              second[chLen] = 0;
3087            } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3088              u_strncpy(second, src.source+oldExOffset, oldExLen);
3089              u_strncpy(second+oldChLen, src.source+chOffset, chLen);
3090              u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
3091              second[2*oldExLen+chLen] = 0;
3092            }
3093            result = ucol_strcoll(coll, first, -1, second, -1);
3094            if(result == UCOL_EQUAL) {
3095              doTest(coll, first, second, UCOL_EQUAL);
3096            } else {
3097              log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
3098            }
3099          }
3100        }
3101        if(strength != UCOL_TOK_RESET) {
3102          oldChOffset = chOffset;
3103          oldChLen = chLen;
3104          oldExOffset = exOffset;
3105          oldExLen = exLen;
3106        }
3107      }
3108      status = U_ZERO_ERROR;
3109    }
3110    else {
3111      log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3112      return;
3113    }
3114    if (U_FAILURE(status)) {
3115        log_err("Error parsing rules %s\n", u_errorName(status));
3116        return;
3117    }
3118    status = U_ZERO_ERROR;
3119  }
3120
3121  setTestOption(QUICK_OPTION, myQ);
3122
3123  log_verbose("Testing setting variable top to contractions\n");
3124  {
3125    UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3126    int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth;
3127    while(*conts != 0) {
3128      /*
3129       * A continuation is NUL-terminated and NUL-padded
3130       * except if it has the maximum length.
3131       */
3132      int32_t contractionLength = maxUCAContractionLength;
3133      while(contractionLength > 0 && conts[contractionLength - 1] == 0) {
3134        --contractionLength;
3135      }
3136      if(*(conts+1)==0) { /* pre-context */
3137        varTop1 = ucol_setVariableTop(coll, conts, 1, &status);
3138      } else {
3139        varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status);
3140      }
3141      if(U_FAILURE(status)) {
3142        if(status == U_PRIMARY_TOO_LONG_ERROR) {
3143          /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3144           * therefore it is not an error when it complains about them. */
3145          log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3146                      *conts, *(conts+1), *(conts+2));
3147        } else {
3148          log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3149                  *conts, *(conts+1), *(conts+2), u_errorName(status));
3150        }
3151        status = U_ZERO_ERROR;
3152      }
3153      conts+=maxUCAContractionLength;
3154    }
3155
3156    status = U_ZERO_ERROR;
3157
3158    first[0] = 0x0040;
3159    first[1] = 0x0050;
3160    first[2] = 0x0000;
3161
3162    ucol_setVariableTop(coll, first, -1, &status);
3163
3164    if(U_SUCCESS(status)) {
3165      log_err("Invalid contraction succeded in setting variable top!\n");
3166    }
3167
3168  }
3169
3170  log_verbose("Test restoring variable top\n");
3171
3172  status = U_ZERO_ERROR;
3173  ucol_restoreVariableTop(coll, varTopOriginal, &status);
3174  if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3175    log_err("Couldn't restore old variable top\n");
3176  }
3177
3178  log_verbose("Testing calling with error set\n");
3179
3180  status = U_INTERNAL_PROGRAM_ERROR;
3181  varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3182  varTop2 = ucol_getVariableTop(coll, &status);
3183  ucol_restoreVariableTop(coll, varTop2, &status);
3184  varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3185  varTop2 = ucol_getVariableTop(NULL, &status);
3186  ucol_restoreVariableTop(NULL, varTop2, &status);
3187  if(status != U_INTERNAL_PROGRAM_ERROR) {
3188    log_err("Bad reaction to passed error!\n");
3189  }
3190  uprv_free(src.source);
3191  ucol_close(coll);
3192  } else {
3193    log_data_err("Couldn't open UCA collator\n");
3194  }
3195
3196}
3197
3198static void TestNonChars(void) {
3199  static const char *test[] = {
3200      "\\u0000",  /* ignorable */
3201      "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
3202      "\\uFDD0", "\\uFDEF",
3203      "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
3204      "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
3205      "\\U0003FFFE", "\\U0003FFFF",
3206      "\\U0004FFFE", "\\U0004FFFF",
3207      "\\U0005FFFE", "\\U0005FFFF",
3208      "\\U0006FFFE", "\\U0006FFFF",
3209      "\\U0007FFFE", "\\U0007FFFF",
3210      "\\U0008FFFE", "\\U0008FFFF",
3211      "\\U0009FFFE", "\\U0009FFFF",
3212      "\\U000AFFFE", "\\U000AFFFF",
3213      "\\U000BFFFE", "\\U000BFFFF",
3214      "\\U000CFFFE", "\\U000CFFFF",
3215      "\\U000DFFFE", "\\U000DFFFF",
3216      "\\U000EFFFE", "\\U000EFFFF",
3217      "\\U000FFFFE", "\\U000FFFFF",
3218      "\\U0010FFFE", "\\U0010FFFF",
3219      "\\uFFFF"  /* special character with maximum primary weight */
3220  };
3221  UErrorCode status = U_ZERO_ERROR;
3222  UCollator *coll = ucol_open("en_US", &status);
3223
3224  log_verbose("Test non characters\n");
3225
3226  if(U_SUCCESS(status)) {
3227    genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
3228  } else {
3229    log_err_status(status, "Unable to open collator\n");
3230  }
3231
3232  ucol_close(coll);
3233}
3234
3235static void TestExtremeCompression(void) {
3236  static char *test[4];
3237  int32_t j = 0, i = 0;
3238
3239  for(i = 0; i<4; i++) {
3240    test[i] = (char *)malloc(2048*sizeof(char));
3241  }
3242
3243  for(j = 20; j < 500; j++) {
3244    for(i = 0; i<4; i++) {
3245      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3246      test[i][j-1] = (char)('a'+i);
3247      test[i][j] = 0;
3248    }
3249    genericLocaleStarter("en_US", (const char **)test, 4);
3250  }
3251
3252
3253  for(i = 0; i<4; i++) {
3254    free(test[i]);
3255  }
3256}
3257
3258#if 0
3259static void TestExtremeCompression(void) {
3260  static char *test[4];
3261  int32_t j = 0, i = 0;
3262  UErrorCode status = U_ZERO_ERROR;
3263  UCollator *coll = ucol_open("en_US", status);
3264  for(i = 0; i<4; i++) {
3265    test[i] = (char *)malloc(2048*sizeof(char));
3266  }
3267  for(j = 10; j < 2048; j++) {
3268    for(i = 0; i<4; i++) {
3269      uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3270      test[i][j-1] = (char)('a'+i);
3271      test[i][j] = 0;
3272    }
3273  }
3274  genericLocaleStarter("en_US", (const char **)test, 4);
3275
3276  for(j = 10; j < 2048; j++) {
3277    for(i = 0; i<1; i++) {
3278      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3279      test[i][j] = 0;
3280    }
3281  }
3282  for(i = 0; i<4; i++) {
3283    free(test[i]);
3284  }
3285}
3286#endif
3287
3288static void TestSurrogates(void) {
3289  static const char *test[] = {
3290    "z","\\ud900\\udc25",  "\\ud805\\udc50",
3291       "\\ud800\\udc00y",  "\\ud800\\udc00r",
3292       "\\ud800\\udc00f",  "\\ud800\\udc00",
3293       "\\ud800\\udc00c", "\\ud800\\udc00b",
3294       "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3295       "\\ud800\\udc00a",
3296       "c", "b"
3297  };
3298
3299  static const char *rule =
3300    "&z < \\ud900\\udc25   < \\ud805\\udc50"
3301       "< \\ud800\\udc00y  < \\ud800\\udc00r"
3302       "< \\ud800\\udc00f  << \\ud800\\udc00"
3303       "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3304       "< \\ud800\\udc00a  < c < b" ;
3305
3306  genericRulesStarter(rule, test, 14);
3307}
3308
3309/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3310static void TestPrefix(void) {
3311  uint32_t i;
3312
3313  static const struct {
3314    const char *rules;
3315    const char *data[50];
3316    const uint32_t len;
3317  } tests[] = {
3318    { "&z <<< z|a",
3319      {"zz", "za"}, 2 },
3320
3321    { "&z <<< z|   a",
3322      {"zz", "za"}, 2 },
3323    { "[strength I]"
3324      "&a=\\ud900\\udc25"
3325      "&z<<<\\ud900\\udc25|a",
3326      {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3327  };
3328
3329
3330  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3331    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3332  }
3333}
3334
3335/* This test uses data suplied by Masashiko Maedera to test the implementation */
3336/* JIS X 4061 collation order implementation                                   */
3337static void TestNewJapanese(void) {
3338
3339  static const char * const test1[] = {
3340      "\\u30b7\\u30e3\\u30fc\\u30ec",
3341      "\\u30b7\\u30e3\\u30a4",
3342      "\\u30b7\\u30e4\\u30a3",
3343      "\\u30b7\\u30e3\\u30ec",
3344      "\\u3061\\u3087\\u3053",
3345      "\\u3061\\u3088\\u3053",
3346      "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3347      "\\u3066\\u30fc\\u305f",
3348      "\\u30c6\\u30fc\\u30bf",
3349      "\\u30c6\\u30a7\\u30bf",
3350      "\\u3066\\u3048\\u305f",
3351      "\\u3067\\u30fc\\u305f",
3352      "\\u30c7\\u30fc\\u30bf",
3353      "\\u30c7\\u30a7\\u30bf",
3354      "\\u3067\\u3048\\u305f",
3355      "\\u3066\\u30fc\\u305f\\u30fc",
3356      "\\u30c6\\u30fc\\u30bf\\u30a1",
3357      "\\u30c6\\u30a7\\u30bf\\u30fc",
3358      "\\u3066\\u3047\\u305f\\u3041",
3359      "\\u3066\\u3048\\u305f\\u30fc",
3360      "\\u3067\\u30fc\\u305f\\u30fc",
3361      "\\u30c7\\u30fc\\u30bf\\u30a1",
3362      "\\u3067\\u30a7\\u305f\\u30a1",
3363      "\\u30c7\\u3047\\u30bf\\u3041",
3364      "\\u30c7\\u30a8\\u30bf\\u30a2",
3365      "\\u3072\\u3086",
3366      "\\u3073\\u3085\\u3042",
3367      "\\u3074\\u3085\\u3042",
3368      "\\u3073\\u3085\\u3042\\u30fc",
3369      "\\u30d3\\u30e5\\u30a2\\u30fc",
3370      "\\u3074\\u3085\\u3042\\u30fc",
3371      "\\u30d4\\u30e5\\u30a2\\u30fc",
3372      "\\u30d2\\u30e5\\u30a6",
3373      "\\u30d2\\u30e6\\u30a6",
3374      "\\u30d4\\u30e5\\u30a6\\u30a2",
3375      "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3376      "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3377      "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3378      "\\u3072\\u3085\\u3093",
3379      "\\u3074\\u3085\\u3093",
3380      "\\u3075\\u30fc\\u308a",
3381      "\\u30d5\\u30fc\\u30ea",
3382      "\\u3075\\u3045\\u308a",
3383      "\\u3075\\u30a5\\u308a",
3384      "\\u3075\\u30a5\\u30ea",
3385      "\\u30d5\\u30a6\\u30ea",
3386      "\\u3076\\u30fc\\u308a",
3387      "\\u30d6\\u30fc\\u30ea",
3388      "\\u3076\\u3045\\u308a",
3389      "\\u30d6\\u30a5\\u308a",
3390      "\\u3077\\u3046\\u308a",
3391      "\\u30d7\\u30a6\\u30ea",
3392      "\\u3075\\u30fc\\u308a\\u30fc",
3393      "\\u30d5\\u30a5\\u30ea\\u30fc",
3394      "\\u3075\\u30a5\\u308a\\u30a3",
3395      "\\u30d5\\u3045\\u308a\\u3043",
3396      "\\u30d5\\u30a6\\u30ea\\u30fc",
3397      "\\u3075\\u3046\\u308a\\u3043",
3398      "\\u30d6\\u30a6\\u30ea\\u30a4",
3399      "\\u3077\\u30fc\\u308a\\u30fc",
3400      "\\u3077\\u30a5\\u308a\\u30a4",
3401      "\\u3077\\u3046\\u308a\\u30fc",
3402      "\\u30d7\\u30a6\\u30ea\\u30a4",
3403      "\\u30d5\\u30fd",
3404      "\\u3075\\u309e",
3405      "\\u3076\\u309d",
3406      "\\u3076\\u3075",
3407      "\\u3076\\u30d5",
3408      "\\u30d6\\u3075",
3409      "\\u30d6\\u30d5",
3410      "\\u3076\\u309e",
3411      "\\u3076\\u3077",
3412      "\\u30d6\\u3077",
3413      "\\u3077\\u309d",
3414      "\\u30d7\\u30fd",
3415      "\\u3077\\u3075",
3416};
3417
3418  static const char *test2[] = {
3419    "\\u306f\\u309d", /* H\\u309d */
3420    "\\u30cf\\u30fd", /* K\\u30fd */
3421    "\\u306f\\u306f", /* HH */
3422    "\\u306f\\u30cf", /* HK */
3423    "\\u30cf\\u30cf", /* KK */
3424    "\\u306f\\u309e", /* H\\u309e */
3425    "\\u30cf\\u30fe", /* K\\u30fe */
3426    "\\u306f\\u3070", /* HH\\u309b */
3427    "\\u30cf\\u30d0", /* KK\\u309b */
3428    "\\u306f\\u3071", /* HH\\u309c */
3429    "\\u30cf\\u3071", /* KH\\u309c */
3430    "\\u30cf\\u30d1", /* KK\\u309c */
3431    "\\u3070\\u309d", /* H\\u309b\\u309d */
3432    "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3433    "\\u3070\\u306f", /* H\\u309bH */
3434    "\\u30d0\\u30cf", /* K\\u309bK */
3435    "\\u3070\\u309e", /* H\\u309b\\u309e */
3436    "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3437    "\\u3070\\u3070", /* H\\u309bH\\u309b */
3438    "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3439    "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3440    "\\u3070\\u3071", /* H\\u309bH\\u309c */
3441    "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3442    "\\u3071\\u309d", /* H\\u309c\\u309d */
3443    "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3444    "\\u3071\\u306f", /* H\\u309cH */
3445    "\\u30d1\\u30cf", /* K\\u309cK */
3446    "\\u3071\\u3070", /* H\\u309cH\\u309b */
3447    "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3448    "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3449    "\\u3071\\u3071", /* H\\u309cH\\u309c */
3450    "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3451  };
3452  /*
3453  static const char *test3[] = {
3454    "\\u221er\\u221e",
3455    "\\u221eR#",
3456    "\\u221et\\u221e",
3457    "#r\\u221e",
3458    "#R#",
3459    "#t%",
3460    "#T%",
3461    "8t\\u221e",
3462    "8T\\u221e",
3463    "8t#",
3464    "8T#",
3465    "8t%",
3466    "8T%",
3467    "8t8",
3468    "8T8",
3469    "\\u03c9r\\u221e",
3470    "\\u03a9R%",
3471    "rr\\u221e",
3472    "rR\\u221e",
3473    "Rr\\u221e",
3474    "RR\\u221e",
3475    "RT%",
3476    "rt8",
3477    "tr\\u221e",
3478    "tr8",
3479    "TR8",
3480    "tt8",
3481    "\\u30b7\\u30e3\\u30fc\\u30ec",
3482  };
3483  */
3484  static const UColAttribute att[] = { UCOL_STRENGTH };
3485  static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3486
3487  static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3488  static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3489
3490  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3491  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3492  /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3493  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3494  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3495}
3496
3497static void TestStrCollIdenticalPrefix(void) {
3498  const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3499  const char* test[] = {
3500    "ab\\ud9b0\\udc70",
3501    "ab\\ud9b0\\udc71"
3502  };
3503  genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3504}
3505/* Contractions should have all their canonically equivalent */
3506/* strings included */
3507static void TestContractionClosure(void) {
3508  static const struct {
3509    const char *rules;
3510    const char *data[10];
3511    const uint32_t len;
3512  } tests[] = {
3513    {   "&b=\\u00e4\\u00e4",
3514      { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3515    {   "&b=\\u00C5",
3516      { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3517  };
3518  uint32_t i;
3519
3520
3521  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3522    genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3523  }
3524}
3525
3526/* This tests also fails*/
3527static void TestBeforePrefixFailure(void) {
3528  static const struct {
3529    const char *rules;
3530    const char *data[10];
3531    const uint32_t len;
3532  } tests[] = {
3533    { "&g <<< a"
3534      "&[before 3]\\uff41 <<< x",
3535      {"x", "\\uff41"}, 2 },
3536    {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3537        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3538        "&[before 3]\\u30a7<<<\\u30a9",
3539      {"\\u30a9", "\\u30a7"}, 2 },
3540    {   "&[before 3]\\u30a7<<<\\u30a9"
3541        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3542        "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3543      {"\\u30a9", "\\u30a7"}, 2 },
3544  };
3545  uint32_t i;
3546
3547
3548  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3549    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3550  }
3551
3552#if 0
3553  const char* rule1 =
3554        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3555        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3556        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3557  const char* rule2 =
3558        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3559        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3560        "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3561  const char* test[] = {
3562      "\\u30c6\\u30fc\\u30bf",
3563      "\\u30c6\\u30a7\\u30bf",
3564  };
3565  genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3566  genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3567/* this piece of code should be in some sort of verbose mode     */
3568/* it gets the collation elements for elements and prints them   */
3569/* This is useful when trying to see whether the problem is      */
3570  {
3571    UErrorCode status = U_ZERO_ERROR;
3572    uint32_t i = 0;
3573    UCollationElements *it = NULL;
3574    uint32_t CE;
3575    UChar string[256];
3576    uint32_t uStringLen;
3577    UCollator *coll = NULL;
3578
3579    uStringLen = u_unescape(rule1, string, 256);
3580
3581    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3582
3583    /*coll = ucol_open("ja_JP_JIS", &status);*/
3584    it = ucol_openElements(coll, string, 0, &status);
3585
3586    for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3587      log_verbose("%s\n", test[i]);
3588      uStringLen = u_unescape(test[i], string, 256);
3589      ucol_setText(it, string, uStringLen, &status);
3590
3591      while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3592        log_verbose("%08X\n", CE);
3593      }
3594      log_verbose("\n");
3595
3596    }
3597
3598    ucol_closeElements(it);
3599    ucol_close(coll);
3600  }
3601#endif
3602}
3603
3604static void TestPrefixCompose(void) {
3605  const char* rule1 =
3606        "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3607  /*
3608  const char* test[] = {
3609      "\\u30c6\\u30fc\\u30bf",
3610      "\\u30c6\\u30a7\\u30bf",
3611  };
3612  */
3613  {
3614    UErrorCode status = U_ZERO_ERROR;
3615    /*uint32_t i = 0;*/
3616    /*UCollationElements *it = NULL;*/
3617/*    uint32_t CE;*/
3618    UChar string[256];
3619    uint32_t uStringLen;
3620    UCollator *coll = NULL;
3621
3622    uStringLen = u_unescape(rule1, string, 256);
3623
3624    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3625    ucol_close(coll);
3626  }
3627
3628
3629}
3630
3631/*
3632[last variable] last variable value
3633[last primary ignorable] largest CE for primary ignorable
3634[last secondary ignorable] largest CE for secondary ignorable
3635[last tertiary ignorable] largest CE for tertiary ignorable
3636[top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3637*/
3638
3639static void TestRuleOptions(void) {
3640  /* values here are hardcoded and are correct for the current UCA
3641   * when the UCA changes, one might be forced to change these
3642   * values.
3643   */
3644
3645  /*
3646   * These strings contain the last character before [variable top]
3647   * and the first and second characters (by primary weights) after it.
3648   * See FractionalUCA.txt. For example:
3649      [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3650      [variable top = 0C FE]
3651      [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3652     and
3653      00B4; [0D 0C, 05, 05]
3654   *
3655   * Note: Starting with UCA 6.0, the [variable top] collation element
3656   * is not the weight of any character or string,
3657   * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3658   */
3659#define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3660#define FIRST_REGULAR_CHAR_STRING "\\u0060"
3661#define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3662
3663  /*
3664   * This string has to match the character that has the [last regular] weight
3665   * which changes with each UCA version.
3666   * See the bottom of FractionalUCA.txt which says something like
3667      [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3668   *
3669   * Note: Starting with UCA 6.0, the [last regular] collation element
3670   * is not the weight of any character or string,
3671   * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3672   */
3673#define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3674
3675  static const struct {
3676    const char *rules;
3677    const char *data[10];
3678    const uint32_t len;
3679  } tests[] = {
3680    /* - all befores here amount to zero */
3681    { "&[before 3][first tertiary ignorable]<<<a",
3682        { "\\u0000", "a"}, 2
3683    }, /* you cannot go before first tertiary ignorable */
3684
3685    { "&[before 3][last tertiary ignorable]<<<a",
3686        { "\\u0000", "a"}, 2
3687    }, /* you cannot go before last tertiary ignorable */
3688
3689    { "&[before 3][first secondary ignorable]<<<a",
3690        { "\\u0000", "a"}, 2
3691    }, /* you cannot go before first secondary ignorable */
3692
3693    { "&[before 3][last secondary ignorable]<<<a",
3694        { "\\u0000", "a"}, 2
3695    }, /* you cannot go before first secondary ignorable */
3696
3697    /* 'normal' befores */
3698
3699    { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3700        {  "c", "b", "\\u0332", "a" }, 4
3701    },
3702
3703    /* we don't have a code point that corresponds to
3704     * the last primary ignorable
3705     */
3706    { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3707        {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3708    },
3709
3710    { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3711        {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
3712    },
3713
3714    { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3715        { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
3716    },
3717
3718    { "&[first regular]<a"
3719      "&[before 1][first regular]<b",
3720      { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
3721    },
3722
3723    { "&[before 1][last regular]<b"
3724      "&[last regular]<a",
3725        { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3726    },
3727
3728    { "&[before 1][first implicit]<b"
3729      "&[first implicit]<a",
3730        { "b", "\\u4e00", "a", "\\u4e01"}, 4
3731    },
3732
3733    { "&[before 1][last implicit]<b"
3734      "&[last implicit]<a",
3735        { "b", "\\U0010FFFD", "a" }, 3
3736    },
3737
3738    { "&[last variable]<z"
3739      "&[last primary ignorable]<x"
3740      "&[last secondary ignorable]<<y"
3741      "&[last tertiary ignorable]<<<w"
3742      "&[top]<u",
3743      {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
3744    }
3745
3746  };
3747  uint32_t i;
3748
3749  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3750    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3751  }
3752}
3753
3754
3755static void TestOptimize(void) {
3756  /* this is not really a test - just trying out
3757   * whether copying of UCA contents will fail
3758   * Cannot really test, since the functionality
3759   * remains the same.
3760   */
3761  static const struct {
3762    const char *rules;
3763    const char *data[10];
3764    const uint32_t len;
3765  } tests[] = {
3766    /* - all befores here amount to zero */
3767    { "[optimize [\\uAC00-\\uD7FF]]",
3768    { "a", "b"}, 2}
3769  };
3770  uint32_t i;
3771
3772  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3773    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3774  }
3775}
3776
3777/*
3778cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3779weiv    ucol_strcollIter?
3780cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3781weiv    these are the input strings?
3782cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3783weiv    will check - could be a problem with utf-8 iterator
3784cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3785weiv    hmmm
3786cycheng@ca.ibm.c... note that we have a standalone high surrogate
3787weiv    that doesn't sound right
3788cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3789weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
3790cycheng@ca.ibm.c... yes
3791weiv    and then do the comparison
3792cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3793weiv    utf-16 strings look like a little endian ones in the example you sent me
3794weiv    It could be a bug - let me try to test it out
3795cycheng@ca.ibm.c... ok
3796cycheng@ca.ibm.c... we can wait till the conf. call
3797cycheng@ca.ibm.c... next weke
3798weiv    that would be great
3799weiv    hmmm
3800weiv    I might be wrong
3801weiv    let me play with it some more
3802cycheng@ca.ibm.c... ok
3803cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
3804cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3805cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3806weiv    ok
3807cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3808weiv    thanks
3809cycheng@ca.ibm.c... the 4 strings we sent are just samples
3810*/
3811#if 0
3812static void Alexis(void) {
3813  UErrorCode status = U_ZERO_ERROR;
3814  UCollator *coll = ucol_open("", &status);
3815
3816
3817  const char utf16be[2][4] = {
3818    { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3819    { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3820  };
3821
3822  const char utf8[2][4] = {
3823    { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3824    { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3825  };
3826
3827  UCharIterator iterU161, iterU162;
3828  UCharIterator iterU81, iterU82;
3829
3830  UCollationResult resU16, resU8;
3831
3832  uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3833  uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3834
3835  uiter_setUTF8(&iterU81, utf8[0], 4);
3836  uiter_setUTF8(&iterU82, utf8[1], 4);
3837
3838  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3839
3840  resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3841  resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3842
3843
3844  if(resU16 != resU8) {
3845    log_err("different results\n");
3846  }
3847
3848  ucol_close(coll);
3849}
3850#endif
3851
3852#define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3853static void Alexis2(void) {
3854  UErrorCode status = U_ZERO_ERROR;
3855  UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3856  char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3857  char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3858  int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3859
3860  UConverter *conv = NULL;
3861
3862  UCharIterator U16BEItS, U16BEItT;
3863  UCharIterator U8ItS, U8ItT;
3864
3865  UCollationResult resU16, resU16BE, resU8;
3866
3867  static const char* const pairs[][2] = {
3868    { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3869    { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3870    { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3871    { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3872    { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3873    { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3874    { "\\u0020", "\\u0020\\u0000"}
3875/*
38765F20 (my result here)
38775F204E008E3F
38785F20 (your result here)
3879*/
3880  };
3881
3882  int32_t i = 0;
3883
3884  UCollator *coll = ucol_open("", &status);
3885  if(status == U_FILE_ACCESS_ERROR) {
3886    log_data_err("Is your data around?\n");
3887    return;
3888  } else if(U_FAILURE(status)) {
3889    log_err("Error opening collator\n");
3890    return;
3891  }
3892  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3893  conv = ucnv_open("UTF16BE", &status);
3894  for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3895    U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3896    U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3897
3898    resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3899
3900    log_verbose("Result of strcoll is %i\n", resU16);
3901
3902    U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3903    U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3904
3905    /* use the original sizes, as the result from converter is in bytes */
3906    uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3907    uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3908
3909    resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3910
3911    log_verbose("Result of U16BE is %i\n", resU16BE);
3912
3913    if(resU16 != resU16BE) {
3914      log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3915    }
3916
3917    u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3918    u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3919
3920    uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3921    uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3922
3923    resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3924
3925    if(resU16 != resU8) {
3926      log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3927    }
3928
3929  }
3930
3931  ucol_close(coll);
3932  ucnv_close(conv);
3933}
3934
3935static void TestHebrewUCA(void) {
3936  UErrorCode status = U_ZERO_ERROR;
3937  static const char *first[] = {
3938    "d790d6b8d79cd795d6bcd7a9",
3939    "d790d79cd79ed7a7d799d799d7a1",
3940    "d790d6b4d79ed795d6bcd7a9",
3941  };
3942
3943  char utf8String[3][256];
3944  UChar utf16String[3][256];
3945
3946  int32_t i = 0, j = 0;
3947  int32_t sizeUTF8[3];
3948  int32_t sizeUTF16[3];
3949
3950  UCollator *coll = ucol_open("", &status);
3951  if (U_FAILURE(status)) {
3952      log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
3953      return;
3954  }
3955  /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3956
3957  for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3958    sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3959    u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3960    log_verbose("%i: ");
3961    for(j = 0; j < sizeUTF16[i]; j++) {
3962      /*log_verbose("\\u%04X", utf16String[i][j]);*/
3963      log_verbose("%04X", utf16String[i][j]);
3964    }
3965    log_verbose("\n");
3966  }
3967  for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3968    for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3969      doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3970    }
3971  }
3972
3973  ucol_close(coll);
3974
3975}
3976
3977static void TestPartialSortKeyTermination(void) {
3978  static const char* cases[] = {
3979    "\\u1234\\u1234\\udc00",
3980    "\\udc00\\ud800\\ud800"
3981  };
3982
3983  int32_t i = sizeof(UCollator);
3984
3985  UErrorCode status = U_ZERO_ERROR;
3986
3987  UCollator *coll = ucol_open("", &status);
3988
3989  UCharIterator iter;
3990
3991  UChar currCase[256];
3992  int32_t length = 0;
3993  int32_t pKeyLen = 0;
3994
3995  uint8_t key[256];
3996
3997  for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3998    uint32_t state[2] = {0, 0};
3999    length = u_unescape(cases[i], currCase, 256);
4000    uiter_setString(&iter, currCase, length);
4001    pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
4002
4003    log_verbose("Done\n");
4004
4005  }
4006  ucol_close(coll);
4007}
4008
4009static void TestSettings(void) {
4010  static const char* cases[] = {
4011    "apple",
4012      "Apple"
4013  };
4014
4015  static const char* locales[] = {
4016    "",
4017      "en"
4018  };
4019
4020  UErrorCode status = U_ZERO_ERROR;
4021
4022  int32_t i = 0, j = 0;
4023
4024  UChar source[256], target[256];
4025  int32_t sLen = 0, tLen = 0;
4026
4027  UCollator *collateObject = NULL;
4028  for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
4029    collateObject = ucol_open(locales[i], &status);
4030    ucol_setStrength(collateObject, UCOL_PRIMARY);
4031    ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
4032    for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
4033      sLen = u_unescape(cases[j-1], source, 256);
4034      source[sLen] = 0;
4035      tLen = u_unescape(cases[j], target, 256);
4036      source[tLen] = 0;
4037      doTest(collateObject, source, target, UCOL_EQUAL);
4038    }
4039    ucol_close(collateObject);
4040  }
4041}
4042
4043static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
4044    UErrorCode status = U_ZERO_ERROR;
4045    int32_t errorNo = 0;
4046    /*const UChar *sourceRules = NULL;*/
4047    /*int32_t sourceRulesLen = 0;*/
4048    UColAttributeValue french = UCOL_OFF;
4049    int32_t cloneSize = 0;
4050
4051    if(!ucol_equals(source, target)) {
4052        log_err("Same collators, different address not equal\n");
4053        errorNo++;
4054    }
4055    ucol_close(target);
4056    if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
4057        /* currently, safeClone is implemented through getRules/openRules
4058        * so it is the same as the test below - I will comment that test out.
4059        */
4060        /* real thing */
4061        target = ucol_safeClone(source, NULL, &cloneSize, &status);
4062        if(U_FAILURE(status)) {
4063            log_err("Error creating clone\n");
4064            errorNo++;
4065            return errorNo;
4066        }
4067        if(!ucol_equals(source, target)) {
4068            log_err("Collator different from it's clone\n");
4069            errorNo++;
4070        }
4071        french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
4072        if(french == UCOL_ON) {
4073            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
4074        } else {
4075            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
4076        }
4077        if(U_FAILURE(status)) {
4078            log_err("Error setting attributes\n");
4079            errorNo++;
4080            return errorNo;
4081        }
4082        if(ucol_equals(source, target)) {
4083            log_err("Collators same even when options changed\n");
4084            errorNo++;
4085        }
4086        ucol_close(target);
4087        /* commented out since safeClone uses exactly the same technique */
4088        /*
4089        sourceRules = ucol_getRules(source, &sourceRulesLen);
4090        target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4091        if(U_FAILURE(status)) {
4092        log_err("Error instantiating target from rules\n");
4093        errorNo++;
4094        return errorNo;
4095        }
4096        if(!ucol_equals(source, target)) {
4097        log_err("Collator different from collator that was created from the same rules\n");
4098        errorNo++;
4099        }
4100        ucol_close(target);
4101        */
4102    }
4103    return errorNo;
4104}
4105
4106
4107static void TestEquals(void) {
4108    /* ucol_equals is not currently a public API. There is a chance that it will become
4109    * something like this, but currently it is only used by RuleBasedCollator::operator==
4110    */
4111    /* test whether the two collators instantiated from the same locale are equal */
4112    UErrorCode status = U_ZERO_ERROR;
4113    UParseError parseError;
4114    int32_t noOfLoc = uloc_countAvailable();
4115    const char *locName = NULL;
4116    UCollator *source = NULL, *target = NULL;
4117    int32_t i = 0;
4118
4119    const char* rules[] = {
4120        "&l < lj <<< Lj <<< LJ",
4121        "&n < nj <<< Nj <<< NJ",
4122        "&ae <<< \\u00e4",
4123        "&AE <<< \\u00c4"
4124    };
4125    /*
4126    const char* badRules[] = {
4127    "&l <<< Lj",
4128    "&n < nj <<< nJ <<< NJ",
4129    "&a <<< \\u00e4",
4130    "&AE <<< \\u00c4 <<< x"
4131    };
4132    */
4133
4134    UChar sourceRules[1024], targetRules[1024];
4135    int32_t sourceRulesSize = 0, targetRulesSize = 0;
4136    int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4137
4138    for(i = 0; i < rulesSize; i++) {
4139        sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4140        targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4141    }
4142
4143    source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4144    if(status == U_FILE_ACCESS_ERROR) {
4145        log_data_err("Is your data around?\n");
4146        return;
4147    } else if(U_FAILURE(status)) {
4148        log_err("Error opening collator\n");
4149        return;
4150    }
4151    target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4152    if(!ucol_equals(source, target)) {
4153        log_err("Equivalent collators not equal!\n");
4154    }
4155    ucol_close(source);
4156    ucol_close(target);
4157
4158    source = ucol_open("root", &status);
4159    target = ucol_open("root", &status);
4160    log_verbose("Testing root\n");
4161    if(!ucol_equals(source, source)) {
4162        log_err("Same collator not equal\n");
4163    }
4164    if(TestEqualsForCollator(locName, source, target)) {
4165        log_err("Errors for root\n", locName);
4166    }
4167    ucol_close(source);
4168
4169    for(i = 0; i<noOfLoc; i++) {
4170        status = U_ZERO_ERROR;
4171        locName = uloc_getAvailable(i);
4172        /*if(hasCollationElements(locName)) {*/
4173        log_verbose("Testing equality for locale %s\n", locName);
4174        source = ucol_open(locName, &status);
4175        target = ucol_open(locName, &status);
4176        if (U_FAILURE(status)) {
4177            log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
4178            continue;
4179        }
4180        if(TestEqualsForCollator(locName, source, target)) {
4181            log_err("Errors for locale %s\n", locName);
4182        }
4183        ucol_close(source);
4184        /*}*/
4185    }
4186}
4187
4188static void TestJ2726(void) {
4189    UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4190    UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4191    UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4192    UErrorCode status = U_ZERO_ERROR;
4193    UCollator *coll = ucol_open("en", &status);
4194    ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4195    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4196    doTest(coll, a, aSpace, UCOL_EQUAL);
4197    doTest(coll, aSpace, a, UCOL_EQUAL);
4198    doTest(coll, a, spaceA, UCOL_EQUAL);
4199    doTest(coll, spaceA, a, UCOL_EQUAL);
4200    doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4201    doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4202    ucol_close(coll);
4203}
4204
4205static void NullRule(void) {
4206    UChar r[3] = {0};
4207    UErrorCode status = U_ZERO_ERROR;
4208    UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4209    if(U_SUCCESS(status)) {
4210        log_err("This should have been an error!\n");
4211        ucol_close(coll);
4212    } else {
4213        status = U_ZERO_ERROR;
4214    }
4215    coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4216    if(U_FAILURE(status)) {
4217        log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
4218    } else {
4219        ucol_close(coll);
4220    }
4221}
4222
4223/**
4224 * Test for CollationElementIterator previous and next for the whole set of
4225 * unicode characters with normalization on.
4226 */
4227static void TestNumericCollation(void)
4228{
4229    UErrorCode status = U_ZERO_ERROR;
4230
4231    const static char *basicTestStrings[]={
4232    "hello1",
4233    "hello2",
4234    "hello2002",
4235    "hello2003",
4236    "hello123456",
4237    "hello1234567",
4238    "hello10000000",
4239    "hello100000000",
4240    "hello1000000000",
4241    "hello10000000000",
4242    };
4243
4244    const static char *preZeroTestStrings[]={
4245    "avery10000",
4246    "avery010000",
4247    "avery0010000",
4248    "avery00010000",
4249    "avery000010000",
4250    "avery0000010000",
4251    "avery00000010000",
4252    "avery000000010000",
4253    };
4254
4255    const static char *thirtyTwoBitNumericStrings[]={
4256    "avery42949672960",
4257    "avery42949672961",
4258    "avery42949672962",
4259    "avery429496729610"
4260    };
4261
4262     const static char *longNumericStrings[]={
4263     /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4264        In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4265        are treated as multiple collation elements. */
4266    "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4267    "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4268    "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4269    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4270    "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4271    "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4272    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4273    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4274    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4275    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4276    };
4277
4278    const static char *supplementaryDigits[] = {
4279      "\\uD835\\uDFCE", /* 0 */
4280      "\\uD835\\uDFCF", /* 1 */
4281      "\\uD835\\uDFD0", /* 2 */
4282      "\\uD835\\uDFD1", /* 3 */
4283      "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4284      "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4285      "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4286      "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4287      "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4288      "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4289    };
4290
4291    const static char *foreignDigits[] = {
4292      "\\u0661",
4293        "\\u0662",
4294        "\\u0663",
4295      "\\u0661\\u0660",
4296      "\\u0661\\u0662",
4297      "\\u0661\\u0663",
4298      "\\u0662\\u0660",
4299      "\\u0662\\u0662",
4300      "\\u0662\\u0663",
4301      "\\u0663\\u0660",
4302      "\\u0663\\u0662",
4303      "\\u0663\\u0663"
4304    };
4305
4306    const static char *evenZeroes[] = {
4307      "2000",
4308      "2001",
4309        "2002",
4310        "2003"
4311    };
4312
4313    UColAttribute att = UCOL_NUMERIC_COLLATION;
4314    UColAttributeValue val = UCOL_ON;
4315
4316    /* Open our collator. */
4317    UCollator* coll = ucol_open("root", &status);
4318    if (U_FAILURE(status)){
4319        log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
4320              myErrorName(status));
4321        return;
4322    }
4323    genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4324    genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4325    genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
4326    genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4327    genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4328    genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4329
4330    /* Setting up our collator to do digits. */
4331    ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4332    if (U_FAILURE(status)){
4333        log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4334              myErrorName(status));
4335        return;
4336    }
4337
4338    /*
4339       Testing that prepended zeroes still yield the correct collation behavior.
4340       We expect that every element in our strings array will be equal.
4341    */
4342    genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4343
4344    ucol_close(coll);
4345}
4346
4347static void TestTibetanConformance(void)
4348{
4349    const char* test[] = {
4350        "\\u0FB2\\u0591\\u0F71\\u0061",
4351        "\\u0FB2\\u0F71\\u0061"
4352    };
4353
4354    UErrorCode status = U_ZERO_ERROR;
4355    UCollator *coll = ucol_open("", &status);
4356    UChar source[100];
4357    UChar target[100];
4358    int result;
4359    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4360    if (U_SUCCESS(status)) {
4361        u_unescape(test[0], source, 100);
4362        u_unescape(test[1], target, 100);
4363        doTest(coll, source, target, UCOL_EQUAL);
4364        result = ucol_strcoll(coll, source, -1,   target, -1);
4365        log_verbose("result %d\n", result);
4366        if (UCOL_EQUAL != result) {
4367            log_err("Tibetan comparison error\n");
4368        }
4369    }
4370    ucol_close(coll);
4371
4372    genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4373}
4374
4375static void TestPinyinProblem(void) {
4376    static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4377    genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4378}
4379
4380#define TST_UCOL_MAX_INPUT 0x220001
4381#define topByte 0xFF000000;
4382#define bottomByte 0xFF;
4383#define fourBytes 0xFFFFFFFF;
4384
4385
4386static void showImplicit(UChar32 i) {
4387    if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4388        log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4389    }
4390}
4391
4392static void TestImplicitGeneration(void) {
4393    UErrorCode status = U_ZERO_ERROR;
4394    UChar32 last = 0;
4395    UChar32 current;
4396    UChar32 i = 0, j = 0;
4397    UChar32 roundtrip = 0;
4398    UChar32 lastBottom = 0;
4399    UChar32 currentBottom = 0;
4400    UChar32 lastTop = 0;
4401    UChar32 currentTop = 0;
4402
4403    UCollator *coll = ucol_open("root", &status);
4404    if(U_FAILURE(status)) {
4405        log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4406        return;
4407    }
4408
4409    uprv_uca_getRawFromImplicit(0xE20303E7);
4410
4411    for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4412        current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4413
4414        /* check that it round-trips AND that all intervening ones are illegal*/
4415        roundtrip = uprv_uca_getRawFromImplicit(current);
4416        if (roundtrip != i) {
4417            log_err("No roundtrip %08X\n", i);
4418        }
4419        if (last != 0) {
4420            for (j = last + 1; j < current; ++j) {
4421                roundtrip = uprv_uca_getRawFromImplicit(j);
4422                /* raise an error if it *doesn't* find an error*/
4423                if (roundtrip != -1) {
4424                    log_err("Fails to recognize illegal %08X\n", j);
4425                }
4426            }
4427        }
4428        /* now do other consistency checks*/
4429        lastBottom = last & bottomByte;
4430        currentBottom = current & bottomByte;
4431        lastTop = last & topByte;
4432        currentTop = current & topByte;
4433
4434        /* print out some values for spot-checking*/
4435        if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4436            showImplicit(i-3);
4437            showImplicit(i-2);
4438            showImplicit(i-1);
4439            showImplicit(i);
4440            showImplicit(i+1);
4441            showImplicit(i+2);
4442        }
4443        last = current;
4444
4445        if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4446            log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4447        }
4448    }
4449    showImplicit(TST_UCOL_MAX_INPUT-2);
4450    showImplicit(TST_UCOL_MAX_INPUT-1);
4451    showImplicit(TST_UCOL_MAX_INPUT);
4452    ucol_close(coll);
4453}
4454
4455/**
4456 * Iterate through the given iterator, checking to see that all the strings
4457 * in the expected array are present.
4458 * @param expected array of strings we expect to see, or NULL
4459 * @param expectedCount number of elements of expected, or 0
4460 */
4461static int32_t checkUEnumeration(const char* msg,
4462                                 UEnumeration* iter,
4463                                 const char** expected,
4464                                 int32_t expectedCount) {
4465    UErrorCode ec = U_ZERO_ERROR;
4466    int32_t i = 0, n, j, bit;
4467    int32_t seenMask = 0;
4468
4469    U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4470    n = uenum_count(iter, &ec);
4471    if (!assertSuccess("count", &ec)) return -1;
4472    log_verbose("%s = [", msg);
4473    for (;; ++i) {
4474        const char* s = uenum_next(iter, NULL, &ec);
4475        if (!assertSuccess("snext", &ec) || s == NULL) break;
4476        if (i != 0) log_verbose(",");
4477        log_verbose("%s", s);
4478        /* check expected list */
4479        for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4480            if ((seenMask&bit) == 0 &&
4481                uprv_strcmp(s, expected[j]) == 0) {
4482                seenMask |= bit;
4483                break;
4484            }
4485        }
4486    }
4487    log_verbose("] (%d)\n", i);
4488    assertTrue("count verified", i==n);
4489    /* did we see all expected strings? */
4490    for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4491        if ((seenMask&bit)!=0) {
4492            log_verbose("Ok: \"%s\" seen\n", expected[j]);
4493        } else {
4494            log_err("FAIL: \"%s\" not seen\n", expected[j]);
4495        }
4496    }
4497    return n;
4498}
4499
4500/**
4501 * Test new API added for separate collation tree.
4502 */
4503static void TestSeparateTrees(void) {
4504    UErrorCode ec = U_ZERO_ERROR;
4505    UEnumeration *e = NULL;
4506    int32_t n = -1;
4507    UBool isAvailable;
4508    char loc[256];
4509
4510    static const char* AVAIL[] = { "en", "de" };
4511
4512    static const char* KW[] = { "collation" };
4513
4514    static const char* KWVAL[] = { "phonebook", "stroke" };
4515
4516#if !UCONFIG_NO_SERVICE
4517    e = ucol_openAvailableLocales(&ec);
4518    if (e != NULL) {
4519        assertSuccess("ucol_openAvailableLocales", &ec);
4520        assertTrue("ucol_openAvailableLocales!=0", e!=0);
4521        n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4522        /* Don't need to check n because we check list */
4523        uenum_close(e);
4524    } else {
4525        log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
4526    }
4527#endif
4528
4529    e = ucol_getKeywords(&ec);
4530    if (e != NULL) {
4531        assertSuccess("ucol_getKeywords", &ec);
4532        assertTrue("ucol_getKeywords!=0", e!=0);
4533        n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4534        /* Don't need to check n because we check list */
4535        uenum_close(e);
4536    } else {
4537        log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
4538    }
4539
4540    e = ucol_getKeywordValues(KW[0], &ec);
4541    if (e != NULL) {
4542        assertSuccess("ucol_getKeywordValues", &ec);
4543        assertTrue("ucol_getKeywordValues!=0", e!=0);
4544        n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4545        /* Don't need to check n because we check list */
4546        uenum_close(e);
4547    } else {
4548        log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
4549    }
4550
4551    /* Try setting a warning before calling ucol_getKeywordValues */
4552    ec = U_USING_FALLBACK_WARNING;
4553    e = ucol_getKeywordValues(KW[0], &ec);
4554    if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
4555        assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4556        n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4557        /* Don't need to check n because we check list */
4558        uenum_close(e);
4559    }
4560
4561    /*
4562U_DRAFT int32_t U_EXPORT2
4563ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4564                             const char* locale, UBool* isAvailable,
4565                             UErrorCode* status);
4566}
4567*/
4568    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
4569                                     &isAvailable, &ec);
4570    if (assertSuccess("getFunctionalEquivalent", &ec)) {
4571        assertEquals("getFunctionalEquivalent(de)", "de", loc);
4572        assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4573                   isAvailable == TRUE);
4574    }
4575
4576    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
4577                                     &isAvailable, &ec);
4578    if (assertSuccess("getFunctionalEquivalent", &ec)) {
4579        assertEquals("getFunctionalEquivalent(de_DE)", "de", loc);
4580        assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4581                   isAvailable == TRUE);
4582    }
4583}
4584
4585/* supercedes TestJ784 */
4586static void TestBeforePinyin(void) {
4587    const static char rules[] = {
4588        "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4589        "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4590        "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4591        "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4592        "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4593        "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4594    };
4595
4596    const static char *test[] = {
4597        "l\\u0101",
4598        "la",
4599        "l\\u0101n",
4600        "lan ",
4601        "l\\u0113",
4602        "le",
4603        "l\\u0113n",
4604        "len"
4605    };
4606
4607    const static char *test2[] = {
4608        "x\\u0101",
4609        "x\\u0100",
4610        "X\\u0101",
4611        "X\\u0100",
4612        "x\\u00E1",
4613        "x\\u00C1",
4614        "X\\u00E1",
4615        "X\\u00C1",
4616        "x\\u01CE",
4617        "x\\u01CD",
4618        "X\\u01CE",
4619        "X\\u01CD",
4620        "x\\u00E0",
4621        "x\\u00C0",
4622        "X\\u00E0",
4623        "X\\u00C0",
4624        "xa",
4625        "xA",
4626        "Xa",
4627        "XA",
4628        "x\\u0101x",
4629        "x\\u0100x",
4630        "x\\u00E1x",
4631        "x\\u00C1x",
4632        "x\\u01CEx",
4633        "x\\u01CDx",
4634        "x\\u00E0x",
4635        "x\\u00C0x",
4636        "xax",
4637        "xAx"
4638    };
4639
4640    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4641    genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4642    genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4643    genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4644}
4645
4646static void TestBeforeTightening(void) {
4647    static const struct {
4648        const char *rules;
4649        UErrorCode expectedStatus;
4650    } tests[] = {
4651        { "&[before 1]a<x", U_ZERO_ERROR },
4652        { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4653        { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4654        { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4655        { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4656        { "&[before 2]a<<x",U_ZERO_ERROR },
4657        { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4658        { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4659        { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
4660        { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
4661        { "&[before 3]a<<<x",U_ZERO_ERROR },
4662        { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
4663        { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4664    };
4665
4666    int32_t i = 0;
4667
4668    UErrorCode status = U_ZERO_ERROR;
4669    UChar rlz[RULE_BUFFER_LEN] = { 0 };
4670    uint32_t rlen = 0;
4671
4672    UCollator *coll = NULL;
4673
4674
4675    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4676        rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4677        coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4678        if(status != tests[i].expectedStatus) {
4679            log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
4680                tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4681        }
4682        ucol_close(coll);
4683        status = U_ZERO_ERROR;
4684    }
4685
4686}
4687
4688/*
4689&m < a
4690&[before 1] a < x <<< X << q <<< Q < z
4691assert: m <<< M < x <<< X << q <<< Q < z < a < n
4692
4693&m < a
4694&[before 2] a << x <<< X << q <<< Q < z
4695assert: m <<< M < x <<< X << q <<< Q << a < z < n
4696
4697&m < a
4698&[before 3] a <<< x <<< X << q <<< Q < z
4699assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4700
4701
4702&m << a
4703&[before 1] a < x <<< X << q <<< Q < z
4704assert: x <<< X << q <<< Q < z < m <<< M << a < n
4705
4706&m << a
4707&[before 2] a << x <<< X << q <<< Q < z
4708assert: m <<< M << x <<< X << q <<< Q << a < z < n
4709
4710&m << a
4711&[before 3] a <<< x <<< X << q <<< Q < z
4712assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4713
4714
4715&m <<< a
4716&[before 1] a < x <<< X << q <<< Q < z
4717assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4718
4719&m <<< a
4720&[before 2] a << x <<< X << q <<< Q < z
4721assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
4722
4723&m <<< a
4724&[before 3] a <<< x <<< X << q <<< Q < z
4725assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
4726
4727
4728&[before 1] s < x <<< X << q <<< Q < z
4729assert: r <<< R < x <<< X << q <<< Q < z < s < n
4730
4731&[before 2] s << x <<< X << q <<< Q < z
4732assert: r <<< R < x <<< X << q <<< Q << s < z < n
4733
4734&[before 3] s <<< x <<< X << q <<< Q < z
4735assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4736
4737
4738&[before 1] \u24DC < x <<< X << q <<< Q < z
4739assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4740
4741&[before 2] \u24DC << x <<< X << q <<< Q < z
4742assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4743
4744&[before 3] \u24DC <<< x <<< X << q <<< Q < z
4745assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
4746*/
4747
4748
4749#if 0
4750/* requires features not yet supported */
4751static void TestMoreBefore(void) {
4752    static const struct {
4753        const char* rules;
4754        const char* order[16];
4755        int32_t size;
4756    } tests[] = {
4757        { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4758        { "m","M","x","X","q","Q","z","a","n" }, 9},
4759        { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4760        { "m","M","x","X","q","Q","a","z","n" }, 9},
4761        { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4762        { "m","M","x","X","a","q","Q","z","n" }, 9},
4763        { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4764        { "x","X","q","Q","z","m","M","a","n" }, 9},
4765        { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4766        { "m","M","x","X","q","Q","a","z","n" }, 9},
4767        { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4768        { "m","M","x","X","a","q","Q","z","n" }, 9},
4769        { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4770        { "x","X","q","Q","z","n","m","a","M" }, 9},
4771        { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4772        { "x","X","q","Q","m","a","M","z","n" }, 9},
4773        { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4774        { "m","x","X","a","M","q","Q","z","n" }, 9},
4775        { "&[before 1] s < x <<< X << q <<< Q < z",
4776        { "r","R","x","X","q","Q","z","s","n" }, 9},
4777        { "&[before 2] s << x <<< X << q <<< Q < z",
4778        { "r","R","x","X","q","Q","s","z","n" }, 9},
4779        { "&[before 3] s <<< x <<< X << q <<< Q < z",
4780        { "r","R","x","X","s","q","Q","z","n" }, 9},
4781        { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4782        { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4783        { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4784        { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4785        { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4786        { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4787    };
4788
4789    int32_t i = 0;
4790
4791    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4792        genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4793    }
4794}
4795#endif
4796
4797static void TestTailorNULL( void ) {
4798    const static char* rule = "&a <<< '\\u0000'";
4799    UErrorCode status = U_ZERO_ERROR;
4800    UChar rlz[RULE_BUFFER_LEN] = { 0 };
4801    uint32_t rlen = 0;
4802    UChar a = 1, null = 0;
4803    UCollationResult res = UCOL_EQUAL;
4804
4805    UCollator *coll = NULL;
4806
4807
4808    rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4809    coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4810
4811    if(U_FAILURE(status)) {
4812        log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
4813    } else {
4814        res = ucol_strcoll(coll, &a, 1, &null, 1);
4815
4816        if(res != UCOL_LESS) {
4817            log_err("NULL was not tailored properly!\n");
4818        }
4819    }
4820
4821    ucol_close(coll);
4822}
4823
4824static void
4825TestUpperFirstQuaternary(void)
4826{
4827  const char* tests[] = { "B", "b", "Bb", "bB" };
4828  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4829  UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4830  genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4831}
4832
4833static void
4834TestJ4960(void)
4835{
4836  const char* tests[] = { "\\u00e2T", "aT" };
4837  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4838  UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4839  const char* tests2[] = { "a", "A" };
4840  const char* rule = "&[first tertiary ignorable]=A=a";
4841  UColAttribute att2[] = { UCOL_CASE_LEVEL };
4842  UColAttributeValue attVals2[] = { UCOL_ON };
4843  /* Test whether we correctly ignore primary ignorables on case level when */
4844  /* we have only primary & case level */
4845  genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4846  /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4847  /* and case level */
4848  genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4849  /* Test whether completely ignorable letters have case level info (they shouldn't) */
4850  genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4851}
4852
4853static void
4854TestJ5223(void)
4855{
4856  static const char *test = "this is a test string";
4857  UChar ustr[256];
4858  int32_t ustr_length = u_unescape(test, ustr, 256);
4859  unsigned char sortkey[256];
4860  int32_t sortkey_length;
4861  UErrorCode status = U_ZERO_ERROR;
4862  static UCollator *coll = NULL;
4863  coll = ucol_open("root", &status);
4864  if(U_FAILURE(status)) {
4865    log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4866    return;
4867  }
4868  ucol_setStrength(coll, UCOL_PRIMARY);
4869  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4870  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4871  if (U_FAILURE(status)) {
4872    log_err("Failed setting atributes\n");
4873    return;
4874  }
4875  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4876  if (sortkey_length > 256) return;
4877
4878  /* we mark the position where the null byte should be written in advance */
4879  sortkey[sortkey_length-1] = 0xAA;
4880
4881  /* we set the buffer size one byte higher than needed */
4882  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4883    sortkey_length+1);
4884
4885  /* no error occurs (for me) */
4886  if (sortkey[sortkey_length-1] == 0xAA) {
4887    log_err("Hit bug at first try\n");
4888  }
4889
4890  /* we mark the position where the null byte should be written again */
4891  sortkey[sortkey_length-1] = 0xAA;
4892
4893  /* this time we set the buffer size to the exact amount needed */
4894  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4895    sortkey_length);
4896
4897  /* now the trailing null byte is not written */
4898  if (sortkey[sortkey_length-1] == 0xAA) {
4899    log_err("Hit bug at second try\n");
4900  }
4901
4902  ucol_close(coll);
4903}
4904
4905/* Regression test for Thai partial sort key problem */
4906static void
4907TestJ5232(void)
4908{
4909    const static char *test[] = {
4910        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4911        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4912    };
4913
4914    genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4915}
4916
4917static void
4918TestJ5367(void)
4919{
4920    const static char *test[] = { "a", "y" };
4921    const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4922    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4923}
4924
4925static void
4926TestVI5913(void)
4927{
4928    UErrorCode status = U_ZERO_ERROR;
4929    int32_t i, j;
4930    UCollator *coll =NULL;
4931    uint8_t  resColl[100], expColl[100];
4932    int32_t  rLen, tLen, ruleLen, sLen, kLen;
4933    UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
4934    UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
4935    UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
4936    static const UChar tData[][20]={
4937        {0x1EAC, 0},
4938        {0x0041, 0x0323, 0x0302, 0},
4939        {0x1EA0, 0x0302, 0},
4940        {0x00C2, 0x0323, 0},
4941        {0x1ED8, 0},  /* O with dot and circumflex */
4942        {0x1ECC, 0x0302, 0},
4943        {0x1EB7, 0},
4944        {0x1EA1, 0x0306, 0},
4945    };
4946    static const UChar tailorData[][20]={
4947        {0x1FA2, 0},  /* Omega with 3 combining marks */
4948        {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4949        {0x1FF3, 0x0313, 0x0300, 0},
4950        {0x1F60, 0x0300, 0x0345, 0},
4951        {0x1F62, 0x0345, 0},
4952        {0x1FA0, 0x0300, 0},
4953    };
4954    static const UChar tailorData2[][20]={
4955        {0x1E63, 0x030C, 0},  /* s with dot below + caron */
4956        {0x0073, 0x0323, 0x030C, 0},
4957        {0x0073, 0x030C, 0x0323, 0},
4958    };
4959    static const UChar tailorData3[][20]={
4960        {0x007a, 0},  /*  z */
4961        {0x0061, 0x0065, 0},  /*  a + e */
4962        {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4963        {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
4964        {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4965        {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
4966        {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
4967        {0x00EA, 0},  /* e with circumflex  */
4968    };
4969
4970    /* Test Vietnamese sort. */
4971    coll = ucol_open("vi", &status);
4972    if(U_FAILURE(status)) {
4973        log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
4974        return;
4975    }
4976    log_verbose("\n\nVI collation:");
4977    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
4978        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4979    }
4980    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
4981        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4982    }
4983    if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
4984        log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4985    }
4986    if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
4987        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4988    }
4989
4990    for (j=0; j<8; j++) {
4991        tLen = u_strlen(tData[j]);
4992        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
4993        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4994        for(i = 0; i<rLen; i++) {
4995            log_verbose(" %02X", resColl[i]);
4996        }
4997    }
4998
4999    ucol_close(coll);
5000
5001    /* Test Romanian sort. */
5002    coll = ucol_open("ro", &status);
5003    log_verbose("\n\nRO collation:");
5004    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
5005        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
5006    }
5007    if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
5008        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
5009    }
5010    if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
5011        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
5012    }
5013
5014    for (j=4; j<8; j++) {
5015        tLen = u_strlen(tData[j]);
5016        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
5017        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
5018        for(i = 0; i<rLen; i++) {
5019            log_verbose(" %02X", resColl[i]);
5020        }
5021    }
5022    ucol_close(coll);
5023
5024    /* Test the precomposed Greek character with 3 combining marks. */
5025    log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5026    ruleLen = u_strlen(rule);
5027    coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5028    if (U_FAILURE(status)) {
5029        log_err("ucol_openRules failed with %s\n", u_errorName(status));
5030        return;
5031    }
5032    sLen = u_strlen(tailorData[0]);
5033    for (j=1; j<6; j++) {
5034        tLen = u_strlen(tailorData[j]);
5035        if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
5036            log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
5037        }
5038    }
5039    /* Test getSortKey. */
5040    tLen = u_strlen(tailorData[0]);
5041    kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
5042    for (j=0; j<6; j++) {
5043        tLen = u_strlen(tailorData[j]);
5044        rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
5045        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5046            log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5047            for(i = 0; i<rLen; i++) {
5048                log_err(" %02X", resColl[i]);
5049            }
5050        }
5051    }
5052    ucol_close(coll);
5053
5054    log_verbose("\n\nTailoring test for s with caron:");
5055    ruleLen = u_strlen(rule2);
5056    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5057    tLen = u_strlen(tailorData2[0]);
5058    kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
5059    for (j=1; j<3; j++) {
5060        tLen = u_strlen(tailorData2[j]);
5061        rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
5062        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5063            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5064            for(i = 0; i<rLen; i++) {
5065                log_err(" %02X", resColl[i]);
5066            }
5067        }
5068    }
5069    ucol_close(coll);
5070
5071    log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5072    ruleLen = u_strlen(rule3);
5073    coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5074    tLen = u_strlen(tailorData3[3]);
5075    kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
5076    for (j=4; j<6; j++) {
5077        tLen = u_strlen(tailorData3[j]);
5078        rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5079
5080        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5081            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5082            for(i = 0; i<rLen; i++) {
5083                log_err(" %02X", resColl[i]);
5084            }
5085        }
5086
5087        log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5088         for(i = 0; i<rLen; i++) {
5089             log_verbose(" %02X", resColl[i]);
5090         }
5091    }
5092    ucol_close(coll);
5093}
5094
5095static void
5096TestTailor6179(void)
5097{
5098    UErrorCode status = U_ZERO_ERROR;
5099    int32_t i;
5100    UCollator *coll =NULL;
5101    uint8_t  resColl[100];
5102    int32_t  rLen, tLen, ruleLen;
5103    /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
5104    static const UChar rule1[]={
5105            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5106            0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5107            0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5108            0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5109    /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5110    static const UChar rule2[]={
5111            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5112            0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5113            0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5114            0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5115            0x3C,0x3C,0x20,0x62,0};
5116
5117    static const UChar tData1[][4]={
5118        {0x61, 0},
5119        {0x62, 0},
5120        { 0xFDD0,0x009E, 0}
5121    };
5122    static const UChar tData2[][4]={
5123        {0x61, 0},
5124        {0x62, 0},
5125        { 0xFDD0,0x009E, 0}
5126     };
5127
5128    /*
5129     * These values from FractionalUCA.txt will change,
5130     * and need to be updated here.
5131     */
5132    static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0};
5133    static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0};
5134    static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5135    static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5136
5137    /* Test [Last Primary ignorable] */
5138
5139    log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
5140    ruleLen = u_strlen(rule1);
5141    coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5142    if (U_FAILURE(status)) {
5143        log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
5144        return;
5145    }
5146    tLen = u_strlen(tData1[0]);
5147    rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
5148    if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
5149        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
5150        for(i = 0; i<rLen; i++) {
5151            log_err(" %02X", resColl[i]);
5152        }
5153        log_err("\n");
5154    }
5155    tLen = u_strlen(tData1[1]);
5156    rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
5157    if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
5158        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
5159        for(i = 0; i<rLen; i++) {
5160            log_err(" %02X", resColl[i]);
5161        }
5162        log_err("\n");
5163    }
5164    ucol_close(coll);
5165
5166
5167    /* Test [Last Secondary ignorable] */
5168    log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
5169    ruleLen = u_strlen(rule1);
5170    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5171    if (U_FAILURE(status)) {
5172        log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
5173        return;
5174    }
5175    tLen = u_strlen(tData2[0]);
5176    rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5177    if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
5178        log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
5179        for(i = 0; i<rLen; i++) {
5180            log_err(" %02X", resColl[i]);
5181        }
5182        log_err("\n");
5183    }
5184if(isICUVersionAtLeast(52, 0, 1)) {  /* TODO: debug & fix, see ticket #8982 */
5185    tLen = u_strlen(tData2[1]);
5186    rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5187    if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
5188        log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
5189        for(i = 0; i<rLen; i++) {
5190            log_err(" %02X", resColl[i]);
5191        }
5192        log_err("\n");
5193    }
5194}
5195    ucol_close(coll);
5196}
5197
5198static void
5199TestUCAPrecontext(void)
5200{
5201    UErrorCode status = U_ZERO_ERROR;
5202    int32_t i, j;
5203    UCollator *coll =NULL;
5204    uint8_t  resColl[100], prevColl[100];
5205    int32_t  rLen, tLen, ruleLen;
5206    UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5207    UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5208    /* & l middle-dot << a  a is an expansion. */
5209
5210    UChar tData1[][20]={
5211            { 0xb7, 0},  /* standalone middle dot(0xb7) */
5212            { 0x387, 0}, /* standalone middle dot(0x387) */
5213            { 0x61, 0},  /* a */
5214            { 0x6C, 0},  /* l */
5215            { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
5216            { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
5217            { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5218            { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
5219            { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5220            { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
5221            { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
5222     };
5223
5224    log_verbose("\n\nEN collation:");
5225    coll = ucol_open("en", &status);
5226    if (U_FAILURE(status)) {
5227        log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
5228        return;
5229    }
5230    for (j=0; j<11; j++) {
5231        tLen = u_strlen(tData1[j]);
5232        rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5233        if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5234            log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5235                    j, tData1[j]);
5236        }
5237        log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5238        for(i = 0; i<rLen; i++) {
5239            log_verbose(" %02X", resColl[i]);
5240        }
5241        uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5242     }
5243     ucol_close(coll);
5244
5245
5246     log_verbose("\n\nJA collation:");
5247     coll = ucol_open("ja", &status);
5248     if (U_FAILURE(status)) {
5249         log_err("Tailoring test: &z <<a|- failed!");
5250         return;
5251     }
5252     for (j=0; j<11; j++) {
5253         tLen = u_strlen(tData1[j]);
5254         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5255         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5256             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5257                     j, tData1[j]);
5258         }
5259         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5260         for(i = 0; i<rLen; i++) {
5261             log_verbose(" %02X", resColl[i]);
5262         }
5263         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5264      }
5265      ucol_close(coll);
5266
5267
5268      log_verbose("\n\nTailoring test: & middle dot < a ");
5269      ruleLen = u_strlen(rule1);
5270      coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5271      if (U_FAILURE(status)) {
5272          log_err("Tailoring test: & middle dot < a failed!");
5273          return;
5274      }
5275      for (j=0; j<11; j++) {
5276          tLen = u_strlen(tData1[j]);
5277          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5278          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5279              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5280                      j, tData1[j]);
5281          }
5282          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5283          for(i = 0; i<rLen; i++) {
5284              log_verbose(" %02X", resColl[i]);
5285          }
5286          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5287       }
5288       ucol_close(coll);
5289
5290
5291       log_verbose("\n\nTailoring test: & l middle-dot << a ");
5292       ruleLen = u_strlen(rule2);
5293       coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5294       if (U_FAILURE(status)) {
5295           log_err("Tailoring test: & l middle-dot << a failed!");
5296           return;
5297       }
5298       for (j=0; j<11; j++) {
5299           tLen = u_strlen(tData1[j]);
5300           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5301           if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5302               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5303                       j, tData1[j]);
5304           }
5305           if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
5306               log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5307                       j, tData1[j]);
5308           }
5309           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5310           for(i = 0; i<rLen; i++) {
5311               log_verbose(" %02X", resColl[i]);
5312           }
5313           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5314        }
5315        ucol_close(coll);
5316}
5317
5318static void
5319TestOutOfBuffer5468(void)
5320{
5321    static const char *test = "\\u4e00";
5322    UChar ustr[256];
5323    int32_t ustr_length = u_unescape(test, ustr, 256);
5324    unsigned char shortKeyBuf[1];
5325    int32_t sortkey_length;
5326    UErrorCode status = U_ZERO_ERROR;
5327    static UCollator *coll = NULL;
5328
5329    coll = ucol_open("root", &status);
5330    if(U_FAILURE(status)) {
5331      log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
5332      return;
5333    }
5334    ucol_setStrength(coll, UCOL_PRIMARY);
5335    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
5336    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5337    if (U_FAILURE(status)) {
5338      log_err("Failed setting atributes\n");
5339      return;
5340    }
5341
5342    sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
5343    if (sortkey_length != 4) {
5344        log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
5345    }
5346    log_verbose("length of sortKey is %d", sortkey_length);
5347    ucol_close(coll);
5348}
5349
5350#define TSKC_DATA_SIZE 5
5351#define TSKC_BUF_SIZE  50
5352static void
5353TestSortKeyConsistency(void)
5354{
5355    UErrorCode icuRC = U_ZERO_ERROR;
5356    UCollator* ucol;
5357    UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5358
5359    uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5360    uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5361    int32_t i, j, i2;
5362
5363    ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
5364    if (U_FAILURE(icuRC))
5365    {
5366        log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
5367        return;
5368    }
5369
5370    for (i = 0; i < TSKC_DATA_SIZE; i++)
5371    {
5372        UCharIterator uiter;
5373        uint32_t state[2] = { 0, 0 };
5374        int32_t dataLen = i+1;
5375        for (j=0; j<TSKC_BUF_SIZE; j++)
5376            bufFull[i][j] = bufPart[i][j] = 0;
5377
5378        /* Full sort key */
5379        ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
5380
5381        /* Partial sort key */
5382        uiter_setString(&uiter, data, dataLen);
5383        ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
5384        if (U_FAILURE(icuRC))
5385        {
5386            log_err("ucol_nextSortKeyPart failed\n");
5387            ucol_close(ucol);
5388            return;
5389        }
5390
5391        for (i2=0; i2<i; i2++)
5392        {
5393            UBool fullMatch = TRUE;
5394            UBool partMatch = TRUE;
5395            for (j=0; j<TSKC_BUF_SIZE; j++)
5396            {
5397                fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
5398                partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
5399            }
5400            if (fullMatch != partMatch) {
5401                log_err(fullMatch ? "full key was consistent, but partial key changed\n"
5402                                  : "partial key was consistent, but full key changed\n");
5403                ucol_close(ucol);
5404                return;
5405            }
5406        }
5407    }
5408
5409    /*=============================================*/
5410   ucol_close(ucol);
5411}
5412
5413/* ticket: 6101 */
5414static void TestCroatianSortKey(void) {
5415    const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
5416    UErrorCode status = U_ZERO_ERROR;
5417    UCollator *ucol;
5418    UCharIterator iter;
5419
5420    static const UChar text[] = { 0x0044, 0xD81A };
5421
5422    size_t length = sizeof(text)/sizeof(*text);
5423
5424    uint8_t textSortKey[32];
5425    size_t lenSortKey = 32;
5426    size_t actualSortKeyLen;
5427    uint32_t uStateInfo[2] = { 0, 0 };
5428
5429    ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
5430    if (U_FAILURE(status)) {
5431        log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
5432        return;
5433    }
5434
5435    uiter_setString(&iter, text, length);
5436
5437    actualSortKeyLen = ucol_nextSortKeyPart(
5438        ucol, &iter, (uint32_t*)uStateInfo,
5439        textSortKey, lenSortKey, &status
5440        );
5441
5442    if (actualSortKeyLen == lenSortKey) {
5443        log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5444    }
5445
5446    ucol_close(ucol);
5447}
5448
5449/* ticket: 6140 */
5450/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5451 * they are both Hiragana and Katakana
5452 */
5453#define SORTKEYLEN 50
5454static void TestHiragana(void) {
5455    UErrorCode status = U_ZERO_ERROR;
5456    UCollator* ucol;
5457    UCollationResult strcollresult;
5458    UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5459    UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5460    int32_t data1Len = sizeof(data1)/sizeof(*data1);
5461    int32_t data2Len = sizeof(data2)/sizeof(*data2);
5462    int32_t i, j;
5463    uint8_t sortKey1[SORTKEYLEN];
5464    uint8_t sortKey2[SORTKEYLEN];
5465
5466    UCharIterator uiter1;
5467    UCharIterator uiter2;
5468    uint32_t state1[2] = { 0, 0 };
5469    uint32_t state2[2] = { 0, 0 };
5470    int32_t keySize1;
5471    int32_t keySize2;
5472
5473    ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
5474            &status);
5475    if (U_FAILURE(status)) {
5476        log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
5477        return;
5478    }
5479
5480    /* Start of full sort keys */
5481    /* Full sort key1 */
5482    keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
5483    /* Full sort key2 */
5484    keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
5485    if (keySize1 == keySize2) {
5486        for (i = 0; i < keySize1; i++) {
5487            if (sortKey1[i] != sortKey2[i]) {
5488                log_err("Full sort keys are different. Should be equal.");
5489            }
5490        }
5491    } else {
5492        log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
5493    }
5494    /* End of full sort keys */
5495
5496    /* Start of partial sort keys */
5497    /* Partial sort key1 */
5498    uiter_setString(&uiter1, data1, data1Len);
5499    keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
5500    /* Partial sort key2 */
5501    uiter_setString(&uiter2, data2, data2Len);
5502    keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
5503    if (U_SUCCESS(status) && keySize1 == keySize2) {
5504        for (j = 0; j < keySize1; j++) {
5505            if (sortKey1[j] != sortKey2[j]) {
5506                log_err("Partial sort keys are different. Should be equal");
5507            }
5508        }
5509    } else {
5510        log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
5511    }
5512    /* End of partial sort keys */
5513
5514    /* Start of strcoll */
5515    /* Use ucol_strcoll() to determine ordering */
5516    strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
5517    if (strcollresult != UCOL_EQUAL) {
5518        log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5519    }
5520
5521    ucol_close(ucol);
5522}
5523
5524/* Convenient struct for running collation tests */
5525typedef struct {
5526  const UChar source[MAX_TOKEN_LEN];  /* String on left */
5527  const UChar target[MAX_TOKEN_LEN];  /* String on right */
5528  UCollationResult result;            /* -1, 0 or +1, depending on collation */
5529} OneTestCase;
5530
5531/*
5532 * Utility function to test one collation test case.
5533 * @param testcases Array of test cases.
5534 * @param n_testcases Size of the array testcases.
5535 * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
5536 * @param n_rules Size of the array str_rules.
5537 */
5538static void doTestOneTestCase(const OneTestCase testcases[],
5539                              int n_testcases,
5540                              const char* str_rules[],
5541                              int n_rules)
5542{
5543  int rule_no, testcase_no;
5544  UChar rule[500];
5545  int32_t length = 0;
5546  UErrorCode status = U_ZERO_ERROR;
5547  UParseError parse_error;
5548  UCollator  *myCollation;
5549
5550  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5551
5552    length = u_unescape(str_rules[rule_no], rule, 500);
5553    if (length == 0) {
5554        log_err("ERROR: The rule cannot be unescaped: %s\n");
5555        return;
5556    }
5557    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5558    if(U_FAILURE(status)){
5559        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5560        return;
5561    }
5562    log_verbose("Testing the <<* syntax\n");
5563    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5564    ucol_setStrength(myCollation, UCOL_TERTIARY);
5565    for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
5566      doTest(myCollation,
5567             testcases[testcase_no].source,
5568             testcases[testcase_no].target,
5569             testcases[testcase_no].result
5570             );
5571    }
5572    ucol_close(myCollation);
5573  }
5574}
5575
5576const static OneTestCase rangeTestcases[] = {
5577  { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
5578  { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
5579  { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
5580
5581  { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
5582  { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
5583  { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
5584  { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
5585  { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
5586
5587  { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
5588  { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
5589  { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
5590  { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
5591
5592  { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
5593  { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
5594  { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
5595  { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
5596  { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
5597  { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
5598  { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
5599  { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
5600};
5601
5602static int nRangeTestcases = LEN(rangeTestcases);
5603
5604const static OneTestCase rangeTestcasesSupplemental[] = {
5605  { {0xfffe},                            {0xffff},                          UCOL_LESS }, /* U+FFFE < U+FFFF */
5606  { {0xffff},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFF < U+10000 */
5607  { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
5608  { {0xfffe},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5609  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5610  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5611  { {0xfffe},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5612};
5613
5614static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
5615
5616const static OneTestCase rangeTestcasesQwerty[] = {
5617  { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
5618  { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
5619
5620  { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
5621  { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
5622
5623  { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
5624  { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
5625
5626  { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
5627  { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
5628
5629  { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5630    {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
5631  { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5632    {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
5633};
5634
5635static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
5636
5637static void TestSameStrengthList(void)
5638{
5639  const char* strRules[] = {
5640    /* Normal */
5641    "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
5642
5643    /* Lists */
5644    "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5645  };
5646  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5647}
5648
5649static void TestSameStrengthListQuoted(void)
5650{
5651  const char* strRules[] = {
5652    /* Lists with quoted characters */
5653    "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5654    "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5655
5656    "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5657    "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5658
5659    "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
5660    "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5661  };
5662  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5663}
5664
5665static void TestSameStrengthListSupplemental(void)
5666{
5667  const char* strRules[] = {
5668    "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5669    "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5670    "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5671    "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5672  };
5673  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5674}
5675
5676static void TestSameStrengthListQwerty(void)
5677{
5678  const char* strRules[] = {
5679    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5680    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5681    "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5682    "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5683    "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5684
5685    /* Quoted characters also will work if two quoted characters are not consecutive.  */
5686    "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5687
5688    /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5689    /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5690
5691 };
5692  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5693}
5694
5695static void TestSameStrengthListQuotedQwerty(void)
5696{
5697  const char* strRules[] = {
5698    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5699    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5700    "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
5701
5702    /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5703    /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5704   };
5705  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5706}
5707
5708static void TestSameStrengthListRanges(void)
5709{
5710  const char* strRules[] = {
5711    "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5712  };
5713  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5714}
5715
5716static void TestSameStrengthListSupplementalRanges(void)
5717{
5718  const char* strRules[] = {
5719    "&\\ufffe<*\\uffff-\\U00010002",
5720  };
5721  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5722}
5723
5724static void TestSpecialCharacters(void)
5725{
5726  const char* strRules[] = {
5727    /* Normal */
5728    "&';'<'+'<','<'-'<'&'<'*'",
5729
5730    /* List */
5731    "&';'<*'+,-&*'",
5732
5733    /* Range */
5734    "&';'<*'+'-'-&*'",
5735  };
5736
5737  const static OneTestCase specialCharacterStrings[] = {
5738    { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
5739    { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
5740    { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
5741    { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
5742  };
5743  doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
5744}
5745
5746static void TestPrivateUseCharacters(void)
5747{
5748  const char* strRules[] = {
5749    /* Normal */
5750    "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5751    "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5752  };
5753
5754  const static OneTestCase privateUseCharacterStrings[] = {
5755    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5756    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5757    { {0xe2d9}, {0xe2da}, UCOL_LESS },
5758    { {0xe2da}, {0xe2db}, UCOL_LESS },
5759    { {0xe2db}, {0xe2dc}, UCOL_LESS },
5760    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5761  };
5762  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5763}
5764
5765static void TestPrivateUseCharactersInList(void)
5766{
5767  const char* strRules[] = {
5768    /* List */
5769    "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5770    /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5771    "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5772  };
5773
5774  const static OneTestCase privateUseCharacterStrings[] = {
5775    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5776    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5777    { {0xe2d9}, {0xe2da}, UCOL_LESS },
5778    { {0xe2da}, {0xe2db}, UCOL_LESS },
5779    { {0xe2db}, {0xe2dc}, UCOL_LESS },
5780    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5781  };
5782  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5783}
5784
5785static void TestPrivateUseCharactersInRange(void)
5786{
5787  const char* strRules[] = {
5788    /* Range */
5789    "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5790    "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5791    /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5792  };
5793
5794  const static OneTestCase privateUseCharacterStrings[] = {
5795    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5796    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5797    { {0xe2d9}, {0xe2da}, UCOL_LESS },
5798    { {0xe2da}, {0xe2db}, UCOL_LESS },
5799    { {0xe2db}, {0xe2dc}, UCOL_LESS },
5800    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5801  };
5802  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5803}
5804
5805static void TestInvalidListsAndRanges(void)
5806{
5807  const char* invalidRules[] = {
5808    /* Range not in starred expression */
5809    "&\\ufffe<\\uffff-\\U00010002",
5810
5811    /* Range without start */
5812    "&a<*-c",
5813
5814    /* Range without end */
5815    "&a<*b-",
5816
5817    /* More than one hyphen */
5818    "&a<*b-g-l",
5819
5820    /* Range in the wrong order */
5821    "&a<*k-b",
5822
5823  };
5824
5825  UChar rule[500];
5826  UErrorCode status = U_ZERO_ERROR;
5827  UParseError parse_error;
5828  int n_rules = LEN(invalidRules);
5829  int rule_no;
5830  int length;
5831  UCollator  *myCollation;
5832
5833  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5834
5835    length = u_unescape(invalidRules[rule_no], rule, 500);
5836    if (length == 0) {
5837        log_err("ERROR: The rule cannot be unescaped: %s\n");
5838        return;
5839    }
5840    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5841    if(!U_FAILURE(status)){
5842      log_err("ERROR: Could not cause a failure as expected: \n");
5843    }
5844    status = U_ZERO_ERROR;
5845  }
5846}
5847
5848/*
5849 * This test ensures that characters placed before a character in a different script have the same lead byte
5850 * in their collation key before and after script reordering.
5851 */
5852static void TestBeforeRuleWithScriptReordering(void)
5853{
5854    UParseError error;
5855    UErrorCode status = U_ZERO_ERROR;
5856    UCollator  *myCollation;
5857    char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
5858    UChar rules[500];
5859    uint32_t rulesLength = 0;
5860    int32_t reorderCodes[1] = {USCRIPT_GREEK};
5861    UCollationResult collResult;
5862
5863    uint8_t baseKey[256];
5864    uint32_t baseKeyLength;
5865    uint8_t beforeKey[256];
5866    uint32_t beforeKeyLength;
5867
5868    UChar base[] = { 0x03b1 }; /* base */
5869    int32_t baseLen = sizeof(base)/sizeof(*base);
5870
5871    UChar before[] = { 0x0e01 }; /* ko kai */
5872    int32_t beforeLen = sizeof(before)/sizeof(*before);
5873
5874    /*UChar *data[] = { before, base };
5875    genericRulesStarter(srules, data, 2);*/
5876
5877    log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
5878
5879
5880    /* build collator */
5881    log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
5882
5883    rulesLength = u_unescape(srules, rules, LEN(rules));
5884    myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5885    if(U_FAILURE(status)) {
5886        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5887        return;
5888    }
5889
5890    /* check collation results - before rule applied but not script reordering */
5891    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5892    if (collResult != UCOL_GREATER) {
5893        log_err("Collation result not correct before script reordering = %d\n", collResult);
5894    }
5895
5896    /* check the lead byte of the collation keys before script reordering */
5897    baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5898    beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5899    if (baseKey[0] != beforeKey[0]) {
5900      log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5901   }
5902
5903    /* reorder the scripts */
5904    ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
5905    if(U_FAILURE(status)) {
5906        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5907        return;
5908    }
5909
5910    /* check collation results - before rule applied and after script reordering */
5911    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5912    if (collResult != UCOL_GREATER) {
5913        log_err("Collation result not correct after script reordering = %d\n", collResult);
5914    }
5915
5916    /* check the lead byte of the collation keys after script reordering */
5917    ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5918    ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5919    if (baseKey[0] != beforeKey[0]) {
5920        log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5921    }
5922
5923    ucol_close(myCollation);
5924}
5925
5926/*
5927 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
5928 */
5929static void TestNonLeadBytesDuringCollationReordering(void)
5930{
5931    UErrorCode status = U_ZERO_ERROR;
5932    UCollator  *myCollation;
5933    int32_t reorderCodes[1] = {USCRIPT_GREEK};
5934
5935    uint8_t baseKey[256];
5936    uint32_t baseKeyLength;
5937    uint8_t reorderKey[256];
5938    uint32_t reorderKeyLength;
5939
5940    UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
5941
5942    uint32_t i;
5943
5944
5945    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5946
5947    /* build collator tertiary */
5948    myCollation = ucol_open("", &status);
5949    ucol_setStrength(myCollation, UCOL_TERTIARY);
5950    if(U_FAILURE(status)) {
5951        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5952        return;
5953    }
5954    baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5955
5956    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5957    if(U_FAILURE(status)) {
5958        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5959        return;
5960    }
5961    reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5962
5963    if (baseKeyLength != reorderKeyLength) {
5964        log_err("Key lengths not the same during reordering.\n");
5965        return;
5966    }
5967
5968    for (i = 1; i < baseKeyLength; i++) {
5969        if (baseKey[i] != reorderKey[i]) {
5970            log_err("Collation key bytes not the same at position %d.\n", i);
5971            return;
5972        }
5973    }
5974    ucol_close(myCollation);
5975
5976    /* build collator quaternary */
5977    myCollation = ucol_open("", &status);
5978    ucol_setStrength(myCollation, UCOL_QUATERNARY);
5979    if(U_FAILURE(status)) {
5980        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5981        return;
5982    }
5983    baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5984
5985    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5986    if(U_FAILURE(status)) {
5987        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5988        return;
5989    }
5990    reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5991
5992    if (baseKeyLength != reorderKeyLength) {
5993        log_err("Key lengths not the same during reordering.\n");
5994        return;
5995    }
5996
5997    for (i = 1; i < baseKeyLength; i++) {
5998        if (baseKey[i] != reorderKey[i]) {
5999            log_err("Collation key bytes not the same at position %d.\n", i);
6000            return;
6001        }
6002    }
6003    ucol_close(myCollation);
6004}
6005
6006/*
6007 * Test reordering API.
6008 */
6009static void TestReorderingAPI(void)
6010{
6011    UErrorCode status = U_ZERO_ERROR;
6012    UCollator  *myCollation;
6013    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6014    int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
6015    int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6016    UCollationResult collResult;
6017    int32_t retrievedReorderCodesLength;
6018    int32_t retrievedReorderCodes[10];
6019    UChar greekString[] = { 0x03b1 };
6020    UChar punctuationString[] = { 0x203e };
6021    int loopIndex;
6022
6023    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6024
6025    /* build collator tertiary */
6026    myCollation = ucol_open("", &status);
6027    ucol_setStrength(myCollation, UCOL_TERTIARY);
6028    if(U_FAILURE(status)) {
6029        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6030        return;
6031    }
6032
6033    /* set the reorderding */
6034    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6035    if (U_FAILURE(status)) {
6036        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6037        return;
6038    }
6039
6040    /* get the reordering */
6041    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6042    if (status != U_BUFFER_OVERFLOW_ERROR) {
6043        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6044        return;
6045    }
6046    status = U_ZERO_ERROR;
6047    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6048        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6049        return;
6050    }
6051    /* now let's really get it */
6052    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6053    if (U_FAILURE(status)) {
6054        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6055        return;
6056    }
6057    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6058        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6059        return;
6060    }
6061    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6062        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6063            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6064            return;
6065        }
6066    }
6067    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6068    if (collResult != UCOL_LESS) {
6069        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6070        return;
6071    }
6072
6073    /* clear the reordering */
6074    ucol_setReorderCodes(myCollation, NULL, 0, &status);
6075    if (U_FAILURE(status)) {
6076        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6077        return;
6078    }
6079
6080    /* get the reordering again */
6081    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6082    if (retrievedReorderCodesLength != 0) {
6083        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6084        return;
6085    }
6086
6087    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6088    if (collResult != UCOL_GREATER) {
6089        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6090        return;
6091    }
6092
6093    /* test for error condition on duplicate reorder codes */
6094    ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
6095    if (!U_FAILURE(status)) {
6096        log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
6097        return;
6098    }
6099
6100    status = U_ZERO_ERROR;
6101    /* test for reorder codes after a reset code */
6102    ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
6103    if (!U_FAILURE(status)) {
6104        log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
6105        return;
6106    }
6107
6108    ucol_close(myCollation);
6109}
6110
6111/*
6112 * Test reordering API.
6113 */
6114static void TestReorderingAPIWithRuleCreatedCollator(void)
6115{
6116    UErrorCode status = U_ZERO_ERROR;
6117    UCollator  *myCollation;
6118    UChar rules[90];
6119    int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
6120    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6121    UCollationResult collResult;
6122    int32_t retrievedReorderCodesLength;
6123    int32_t retrievedReorderCodes[10];
6124    UChar greekString[] = { 0x03b1 };
6125    UChar punctuationString[] = { 0x203e };
6126    UChar hanString[] = { 0x65E5, 0x672C };
6127    int loopIndex;
6128
6129    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6130
6131    /* build collator from rules */
6132    u_uastrcpy(rules, "[reorder Hani Grek]");
6133    myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
6134    if(U_FAILURE(status)) {
6135        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6136        return;
6137    }
6138
6139    /* get the reordering */
6140    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6141    if (U_FAILURE(status)) {
6142        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6143        return;
6144    }
6145    if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
6146        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
6147        return;
6148    }
6149    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6150        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
6151            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6152            return;
6153        }
6154    }
6155    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
6156    if (collResult != UCOL_GREATER) {
6157        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6158        return;
6159    }
6160
6161
6162    /* set the reorderding */
6163    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6164    if (U_FAILURE(status)) {
6165        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6166        return;
6167    }
6168
6169    /* get the reordering */
6170    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6171    if (status != U_BUFFER_OVERFLOW_ERROR) {
6172        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6173        return;
6174    }
6175    status = U_ZERO_ERROR;
6176    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6177        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6178        return;
6179    }
6180    /* now let's really get it */
6181    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6182    if (U_FAILURE(status)) {
6183        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6184        return;
6185    }
6186    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6187        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6188        return;
6189    }
6190    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6191        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6192            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6193            return;
6194        }
6195    }
6196    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6197    if (collResult != UCOL_LESS) {
6198        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6199        return;
6200    }
6201
6202    /* clear the reordering */
6203    ucol_setReorderCodes(myCollation, NULL, 0, &status);
6204    if (U_FAILURE(status)) {
6205        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6206        return;
6207    }
6208
6209    /* get the reordering again */
6210    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6211    if (retrievedReorderCodesLength != 0) {
6212        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6213        return;
6214    }
6215
6216    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6217    if (collResult != UCOL_GREATER) {
6218        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6219        return;
6220    }
6221
6222    ucol_close(myCollation);
6223}
6224
6225static int compareUScriptCodes(const void * a, const void * b)
6226{
6227  return ( *(int32_t*)a - *(int32_t*)b );
6228}
6229
6230static void TestEquivalentReorderingScripts(void) {
6231    UErrorCode status = U_ZERO_ERROR;
6232    int32_t equivalentScripts[50];
6233    int32_t equivalentScriptsLength;
6234    int loopIndex;
6235    int32_t equivalentScriptsResult[] = {
6236        USCRIPT_BOPOMOFO,
6237        USCRIPT_LISU,
6238        USCRIPT_LYCIAN,
6239        USCRIPT_CARIAN,
6240        USCRIPT_LYDIAN,
6241        USCRIPT_YI,
6242        USCRIPT_OLD_ITALIC,
6243        USCRIPT_GOTHIC,
6244        USCRIPT_DESERET,
6245        USCRIPT_SHAVIAN,
6246        USCRIPT_OSMANYA,
6247        USCRIPT_LINEAR_B,
6248        USCRIPT_CYPRIOT,
6249        USCRIPT_OLD_SOUTH_ARABIAN,
6250        USCRIPT_AVESTAN,
6251        USCRIPT_IMPERIAL_ARAMAIC,
6252        USCRIPT_INSCRIPTIONAL_PARTHIAN,
6253        USCRIPT_INSCRIPTIONAL_PAHLAVI,
6254        USCRIPT_UGARITIC,
6255        USCRIPT_OLD_PERSIAN,
6256        USCRIPT_CUNEIFORM,
6257        USCRIPT_EGYPTIAN_HIEROGLYPHS,
6258        USCRIPT_PHONETIC_POLLARD,
6259        USCRIPT_SORA_SOMPENG,
6260        USCRIPT_MEROITIC_CURSIVE,
6261        USCRIPT_MEROITIC_HIEROGLYPHS
6262    };
6263
6264    qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
6265
6266    /* UScript.GOTHIC */
6267    equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
6268    if (U_FAILURE(status)) {
6269        log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6270        return;
6271    }
6272    /*
6273    fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
6274    fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
6275    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6276        fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
6277    }
6278    */
6279    if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6280        log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6281        return;
6282    }
6283    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6284        if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6285            log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6286            return;
6287        }
6288    }
6289
6290    /* UScript.SHAVIAN */
6291    equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
6292    if (U_FAILURE(status)) {
6293        log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6294        return;
6295    }
6296    if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6297        log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6298        return;
6299    }
6300    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6301        if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6302            log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6303            return;
6304        }
6305    }
6306}
6307
6308static void TestReorderingAcrossCloning(void)
6309{
6310    UErrorCode status = U_ZERO_ERROR;
6311    UCollator  *myCollation;
6312    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6313    UCollator *clonedCollation;
6314    int32_t bufferSize;
6315    int32_t retrievedReorderCodesLength;
6316    int32_t retrievedReorderCodes[10];
6317    int loopIndex;
6318
6319    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6320
6321    /* build collator tertiary */
6322    myCollation = ucol_open("", &status);
6323    ucol_setStrength(myCollation, UCOL_TERTIARY);
6324    if(U_FAILURE(status)) {
6325        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6326        return;
6327    }
6328
6329    /* set the reorderding */
6330    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6331    if (U_FAILURE(status)) {
6332        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6333        return;
6334    }
6335
6336    /* clone the collator */
6337    clonedCollation = ucol_safeClone(myCollation, NULL, &bufferSize, &status);
6338    if (U_FAILURE(status)) {
6339        log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
6340        return;
6341    }
6342
6343    /* get the reordering */
6344    retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6345    if (U_FAILURE(status)) {
6346        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6347        return;
6348    }
6349    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6350        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6351        return;
6352    }
6353    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6354        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6355            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6356            return;
6357        }
6358    }
6359
6360    /*uprv_free(buffer);*/
6361    ucol_close(myCollation);
6362    ucol_close(clonedCollation);
6363}
6364
6365/*
6366 * Utility function to test one collation reordering test case set.
6367 * @param testcases Array of test cases.
6368 * @param n_testcases Size of the array testcases.
6369 * @param reorderTokens Array of reordering codes.
6370 * @param reorderTokensLen Size of the array reorderTokens.
6371 */
6372static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
6373{
6374    uint32_t testCaseNum;
6375    UErrorCode status = U_ZERO_ERROR;
6376    UCollator  *myCollation;
6377
6378    myCollation = ucol_open("", &status);
6379    if (U_FAILURE(status)) {
6380        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6381        return;
6382    }
6383    ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
6384    if(U_FAILURE(status)) {
6385        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
6386        return;
6387    }
6388
6389    for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
6390        doTest(myCollation,
6391            testCases[testCaseNum].source,
6392            testCases[testCaseNum].target,
6393            testCases[testCaseNum].result
6394        );
6395    }
6396    ucol_close(myCollation);
6397}
6398
6399static void TestGreekFirstReorder(void)
6400{
6401    const char* strRules[] = {
6402        "[reorder Grek]"
6403    };
6404
6405    const int32_t apiRules[] = {
6406        USCRIPT_GREEK
6407    };
6408
6409    const static OneTestCase privateUseCharacterStrings[] = {
6410        { {0x0391}, {0x0391}, UCOL_EQUAL },
6411        { {0x0041}, {0x0391}, UCOL_GREATER },
6412        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
6413        { {0x0060}, {0x0391}, UCOL_LESS },
6414        { {0x0391}, {0xe2dc}, UCOL_LESS },
6415        { {0x0391}, {0x0060}, UCOL_GREATER },
6416    };
6417
6418    /* Test rules creation */
6419    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6420
6421    /* Test collation reordering API */
6422    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6423}
6424
6425static void TestGreekLastReorder(void)
6426{
6427    const char* strRules[] = {
6428        "[reorder Zzzz Grek]"
6429    };
6430
6431    const int32_t apiRules[] = {
6432        USCRIPT_UNKNOWN, USCRIPT_GREEK
6433    };
6434
6435    const static OneTestCase privateUseCharacterStrings[] = {
6436        { {0x0391}, {0x0391}, UCOL_EQUAL },
6437        { {0x0041}, {0x0391}, UCOL_LESS },
6438        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
6439        { {0x0060}, {0x0391}, UCOL_LESS },
6440        { {0x0391}, {0xe2dc}, UCOL_GREATER },
6441    };
6442
6443    /* Test rules creation */
6444    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6445
6446    /* Test collation reordering API */
6447    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6448}
6449
6450static void TestNonScriptReorder(void)
6451{
6452    const char* strRules[] = {
6453        "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
6454    };
6455
6456    const int32_t apiRules[] = {
6457        USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
6458        UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
6459        UCOL_REORDER_CODE_CURRENCY
6460    };
6461
6462    const static OneTestCase privateUseCharacterStrings[] = {
6463        { {0x0391}, {0x0041}, UCOL_LESS },
6464        { {0x0041}, {0x0391}, UCOL_GREATER },
6465        { {0x0060}, {0x0041}, UCOL_LESS },
6466        { {0x0060}, {0x0391}, UCOL_GREATER },
6467        { {0x0024}, {0x0041}, UCOL_GREATER },
6468    };
6469
6470    /* Test rules creation */
6471    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6472
6473    /* Test collation reordering API */
6474    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6475}
6476
6477static void TestHaniReorder(void)
6478{
6479    const char* strRules[] = {
6480        "[reorder Hani]"
6481    };
6482    const int32_t apiRules[] = {
6483        USCRIPT_HAN
6484    };
6485
6486    const static OneTestCase privateUseCharacterStrings[] = {
6487        { {0x4e00}, {0x0041}, UCOL_LESS },
6488        { {0x4e00}, {0x0060}, UCOL_GREATER },
6489        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6490        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6491        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6492        { {0xfa27}, {0x0041}, UCOL_LESS },
6493        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6494    };
6495
6496    /* Test rules creation */
6497    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6498
6499    /* Test collation reordering API */
6500    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6501}
6502
6503static void TestHaniReorderWithOtherRules(void)
6504{
6505    const char* strRules[] = {
6506        "[reorder Hani] &b<a"
6507    };
6508    /*const int32_t apiRules[] = {
6509        USCRIPT_HAN
6510    };*/
6511
6512    const static OneTestCase privateUseCharacterStrings[] = {
6513        { {0x4e00}, {0x0041}, UCOL_LESS },
6514        { {0x4e00}, {0x0060}, UCOL_GREATER },
6515        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6516        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6517        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6518        { {0xfa27}, {0x0041}, UCOL_LESS },
6519        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6520        { {0x0062}, {0x0061}, UCOL_LESS },
6521    };
6522
6523    /* Test rules creation */
6524    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6525}
6526
6527static void TestMultipleReorder(void)
6528{
6529    const char* strRules[] = {
6530        "[reorder Grek Zzzz DIGIT Latn Hani]"
6531    };
6532
6533    const int32_t apiRules[] = {
6534        USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
6535    };
6536
6537    const static OneTestCase collationTestCases[] = {
6538        { {0x0391}, {0x0041}, UCOL_LESS},
6539        { {0x0031}, {0x0041}, UCOL_LESS},
6540        { {0x0041}, {0x4e00}, UCOL_LESS},
6541    };
6542
6543    /* Test rules creation */
6544    doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
6545
6546    /* Test collation reordering API */
6547    doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
6548}
6549
6550/*
6551 * Test that covers issue reported in ticket 8814
6552 */
6553static void TestReorderWithNumericCollation(void)
6554{
6555    UErrorCode status = U_ZERO_ERROR;
6556    UCollator  *myCollation;
6557    UCollator  *myReorderCollation;
6558    int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
6559    /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
6560    UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
6561    UChar fortyS[] = { 0x0053 };
6562    UChar fortyThreeP[] = { 0x0050 };
6563    uint8_t fortyS_sortKey[128];
6564    int32_t fortyS_sortKey_Length;
6565    uint8_t fortyThreeP_sortKey[128];
6566    int32_t fortyThreeP_sortKey_Length;
6567    uint8_t fortyS_sortKey_reorder[128];
6568    int32_t fortyS_sortKey_reorder_Length;
6569    uint8_t fortyThreeP_sortKey_reorder[128];
6570    int32_t fortyThreeP_sortKey_reorder_Length;
6571    UCollationResult collResult;
6572    UCollationResult collResultReorder;
6573
6574    log_verbose("Testing reordering with and without numeric collation\n");
6575
6576    /* build collator tertiary with numeric */
6577    myCollation = ucol_open("", &status);
6578    /*
6579    ucol_setStrength(myCollation, UCOL_TERTIARY);
6580    */
6581    ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
6582    if(U_FAILURE(status)) {
6583        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6584        return;
6585    }
6586
6587    /* build collator tertiary with numeric and reordering */
6588    myReorderCollation = ucol_open("", &status);
6589    /*
6590    ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
6591    */
6592    ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
6593    ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
6594    if(U_FAILURE(status)) {
6595        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6596        return;
6597    }
6598
6599    fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
6600    fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
6601    fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
6602    fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
6603
6604    if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
6605        log_err_status(status, "ERROR: couldn't generate sort keys\n");
6606        return;
6607    }
6608    collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
6609    collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
6610    /*
6611    fprintf(stderr, "\tcollResult = %x\n", collResult);
6612    fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
6613    fprintf(stderr, "\nfortyS\n");
6614    for (i = 0; i < fortyS_sortKey_Length; i++) {
6615        fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
6616    }
6617    fprintf(stderr, "\nfortyThreeP\n");
6618    for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
6619        fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
6620    }
6621    */
6622    if (collResult != collResultReorder) {
6623        log_err_status(status, "ERROR: collation results should have been the same.\n");
6624        return;
6625    }
6626
6627    ucol_close(myCollation);
6628    ucol_close(myReorderCollation);
6629}
6630
6631static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
6632{
6633  for (; *a == *b; ++a, ++b) {
6634    if (*a == 0) {
6635      return 0;
6636    }
6637  }
6638  return (*a < *b ? -1 : 1);
6639}
6640
6641static void TestImportRulesDeWithPhonebook(void)
6642{
6643  const char* normalRules[] = {
6644    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
6645    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
6646    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
6647  };
6648  const OneTestCase normalTests[] = {
6649    { {0x00e6}, {0x00c6}, UCOL_LESS},
6650    { {0x00fc}, {0x00dc}, UCOL_GREATER},
6651  };
6652
6653  const char* importRules[] = {
6654    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
6655    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6656    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6657  };
6658  const OneTestCase importTests[] = {
6659    { {0x00e6}, {0x00c6}, UCOL_LESS},
6660    { {0x00fc}, {0x00dc}, UCOL_LESS},
6661  };
6662
6663  doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
6664  doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
6665}
6666
6667#if 0
6668static void TestImportRulesFiWithEor(void)
6669{
6670  /* DUCET. */
6671  const char* defaultRules[] = {
6672    "&a<b",                                    /* Dummy rule. */
6673  };
6674
6675  const OneTestCase defaultTests[] = {
6676    { {0x0110}, {0x00F0}, UCOL_LESS},
6677    { {0x00a3}, {0x00a5}, UCOL_LESS},
6678    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
6679  };
6680
6681  /* European Ordering rules: ignore currency characters. */
6682  const char* eorRules[] = {
6683    "[import root-u-co-eor]",
6684  };
6685
6686  const OneTestCase eorTests[] = {
6687    { {0x0110}, {0x00F0}, UCOL_LESS},
6688    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
6689    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
6690  };
6691
6692  const char* fiStdRules[] = {
6693    "[import fi-u-co-standard]",
6694  };
6695
6696  const OneTestCase fiStdTests[] = {
6697    { {0x0110}, {0x00F0}, UCOL_GREATER},
6698    { {0x00a3}, {0x00a5}, UCOL_LESS},
6699    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
6700  };
6701
6702  /* Both European Ordering Rules and Fi Standard Rules. */
6703  const char* eorFiStdRules[] = {
6704    "[import root-u-co-eor][import fi-u-co-standard]",
6705  };
6706
6707  /* This is essentially same as the one before once fi.txt is updated with import. */
6708  const char* fiEorRules[] = {
6709    "[import fi-u-co-eor]",
6710  };
6711
6712  const OneTestCase fiEorTests[] = {
6713    { {0x0110}, {0x00F0}, UCOL_GREATER},
6714    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
6715    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
6716  };
6717
6718  doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
6719  doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
6720  doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
6721  doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
6722
6723  /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
6724        eor{
6725            Sequence{
6726                "[import root-u-co-eor][import fi-u-co-standard]"
6727            }
6728            Version{"21.0"}
6729        }
6730  */
6731  /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
6732
6733}
6734#endif
6735
6736#if 0
6737/*
6738 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
6739 * the resource files are built with -includeUnihanColl option.
6740 * TODO: Uncomment this function and make it work when unihan rules are built by default.
6741 */
6742static void TestImportRulesCJKWithUnihan(void)
6743{
6744  /* DUCET. */
6745  const char* defaultRules[] = {
6746    "&a<b",                                    /* Dummy rule. */
6747  };
6748
6749  const OneTestCase defaultTests[] = {
6750    { {0x3402}, {0x4e1e}, UCOL_GREATER},
6751  };
6752
6753  /* European Ordering rules: ignore currency characters. */
6754  const char* unihanRules[] = {
6755    "[import ko-u-co-unihan]",
6756  };
6757
6758  const OneTestCase unihanTests[] = {
6759    { {0x3402}, {0x4e1e}, UCOL_LESS},
6760  };
6761
6762  doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
6763  doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
6764
6765}
6766#endif
6767
6768static void TestImport(void)
6769{
6770    UCollator* vicoll;
6771    UCollator* escoll;
6772    UCollator* viescoll;
6773    UCollator* importviescoll;
6774    UParseError error;
6775    UErrorCode status = U_ZERO_ERROR;
6776    UChar* virules;
6777    int32_t viruleslength;
6778    UChar* esrules;
6779    int32_t esruleslength;
6780    UChar* viesrules;
6781    int32_t viesruleslength;
6782    char srules[500] = "[import vi][import es]";
6783    UChar rules[500];
6784    uint32_t length = 0;
6785    int32_t itemCount;
6786    int32_t i, k;
6787    UChar32 start;
6788    UChar32 end;
6789    UChar str[500];
6790    int32_t strLength;
6791
6792    uint8_t sk1[500];
6793    uint8_t sk2[500];
6794
6795    UBool b;
6796    USet* tailoredSet;
6797    USet* importTailoredSet;
6798
6799
6800    vicoll = ucol_open("vi", &status);
6801    if(U_FAILURE(status)){
6802        log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
6803        return;
6804    }
6805
6806    virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
6807    escoll = ucol_open("es", &status);
6808    esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
6809    viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
6810    viesrules[0] = 0;
6811    u_strcat(viesrules, virules);
6812    u_strcat(viesrules, esrules);
6813    viesruleslength = viruleslength + esruleslength;
6814    viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6815
6816    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6817    length = u_unescape(srules, rules, 500);
6818    importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6819    if(U_FAILURE(status)){
6820        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6821        return;
6822    }
6823
6824    tailoredSet = ucol_getTailoredSet(viescoll, &status);
6825    importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
6826
6827    if(!uset_equals(tailoredSet, importTailoredSet)){
6828        log_err("Tailored sets not equal");
6829    }
6830
6831    uset_close(importTailoredSet);
6832
6833    itemCount = uset_getItemCount(tailoredSet);
6834
6835    for( i = 0; i < itemCount; i++){
6836        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6837        if(strLength < 2){
6838            for (; start <= end; start++){
6839                k = 0;
6840                U16_APPEND(str, k, 500, start, b);
6841                ucol_getSortKey(viescoll, str, 1, sk1, 500);
6842                ucol_getSortKey(importviescoll, str, 1, sk2, 500);
6843                if(compare_uint8_t_arrays(sk1, sk2) != 0){
6844                    log_err("Sort key for %s not equal\n", str);
6845                    break;
6846                }
6847            }
6848        }else{
6849            ucol_getSortKey(viescoll, str, strLength, sk1, 500);
6850            ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
6851            if(compare_uint8_t_arrays(sk1, sk2) != 0){
6852                log_err("ZZSort key for %s not equal\n", str);
6853                break;
6854            }
6855
6856        }
6857    }
6858
6859    uset_close(tailoredSet);
6860
6861    uprv_free(viesrules);
6862
6863    ucol_close(vicoll);
6864    ucol_close(escoll);
6865    ucol_close(viescoll);
6866    ucol_close(importviescoll);
6867}
6868
6869static void TestImportWithType(void)
6870{
6871    UCollator* vicoll;
6872    UCollator* decoll;
6873    UCollator* videcoll;
6874    UCollator* importvidecoll;
6875    UParseError error;
6876    UErrorCode status = U_ZERO_ERROR;
6877    const UChar* virules;
6878    int32_t viruleslength;
6879    const UChar* derules;
6880    int32_t deruleslength;
6881    UChar* viderules;
6882    int32_t videruleslength;
6883    const char srules[500] = "[import vi][import de-u-co-phonebk]";
6884    UChar rules[500];
6885    uint32_t length = 0;
6886    int32_t itemCount;
6887    int32_t i, k;
6888    UChar32 start;
6889    UChar32 end;
6890    UChar str[500];
6891    int32_t strLength;
6892
6893    uint8_t sk1[500];
6894    uint8_t sk2[500];
6895
6896    USet* tailoredSet;
6897    USet* importTailoredSet;
6898
6899    vicoll = ucol_open("vi", &status);
6900    if(U_FAILURE(status)){
6901        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6902        return;
6903    }
6904    virules = ucol_getRules(vicoll, &viruleslength);
6905    /* decoll = ucol_open("de@collation=phonebook", &status); */
6906    decoll = ucol_open("de-u-co-phonebk", &status);
6907    if(U_FAILURE(status)){
6908        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6909        return;
6910    }
6911
6912
6913    derules = ucol_getRules(decoll, &deruleslength);
6914    viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
6915    viderules[0] = 0;
6916    u_strcat(viderules, virules);
6917    u_strcat(viderules, derules);
6918    videruleslength = viruleslength + deruleslength;
6919    videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6920
6921    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6922    length = u_unescape(srules, rules, 500);
6923    importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6924    if(U_FAILURE(status)){
6925        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6926        return;
6927    }
6928
6929    tailoredSet = ucol_getTailoredSet(videcoll, &status);
6930    importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
6931
6932    if(!uset_equals(tailoredSet, importTailoredSet)){
6933        log_err("Tailored sets not equal");
6934    }
6935
6936    uset_close(importTailoredSet);
6937
6938    itemCount = uset_getItemCount(tailoredSet);
6939
6940    for( i = 0; i < itemCount; i++){
6941        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6942        if(strLength < 2){
6943            for (; start <= end; start++){
6944                k = 0;
6945                U16_APPEND_UNSAFE(str, k, start);
6946                ucol_getSortKey(videcoll, str, 1, sk1, 500);
6947                ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
6948                if(compare_uint8_t_arrays(sk1, sk2) != 0){
6949                    log_err("Sort key for %s not equal\n", str);
6950                    break;
6951                }
6952            }
6953        }else{
6954            ucol_getSortKey(videcoll, str, strLength, sk1, 500);
6955            ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
6956            if(compare_uint8_t_arrays(sk1, sk2) != 0){
6957                log_err("Sort key for %s not equal\n", str);
6958                break;
6959            }
6960
6961        }
6962    }
6963
6964    uset_close(tailoredSet);
6965
6966    uprv_free(viderules);
6967
6968    ucol_close(videcoll);
6969    ucol_close(importvidecoll);
6970    ucol_close(vicoll);
6971    ucol_close(decoll);
6972}
6973
6974/* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
6975static const UChar longUpperStr1[]= { /* 155 chars */
6976    0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
6977    0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
6978    0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
6979    0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
6980    0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
6981    0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
6982    0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
6983    0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
6984    0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
6985    0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
6986};
6987
6988/* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
6989static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
6990    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6991    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6992    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6993    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6994    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
6995};
6996
6997/* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
6998static const UChar longUpperStr3[]= { /* 324 chars */
6999    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7000    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7001    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7002    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7003    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7004    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7005    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7006    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7007    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7008    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7009    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7010    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
7011};
7012
7013#define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
7014
7015typedef struct {
7016    const UChar * longUpperStrPtr;
7017    int32_t       longUpperStrLen;
7018} LongUpperStrItem;
7019
7020/* String pointers must be in reverse collation order of the corresponding strings */
7021static const LongUpperStrItem longUpperStrItems[] = {
7022    { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
7023    { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
7024    { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
7025    { NULL,          0                           }
7026};
7027
7028enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */
7029
7030/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
7031static void TestCaseLevelBufferOverflow(void)
7032{
7033    UErrorCode status = U_ZERO_ERROR;
7034    UCollator * ucol = ucol_open("root", &status);
7035    if ( U_SUCCESS(status) ) {
7036        ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
7037        if ( U_SUCCESS(status) ) {
7038            const LongUpperStrItem * itemPtr;
7039            uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
7040            for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
7041                int32_t sortKeyLen;
7042                if (itemPtr > longUpperStrItems) {
7043                    uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
7044                }
7045                sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
7046                if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
7047                    log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
7048                    break;
7049                }
7050                if ( itemPtr > longUpperStrItems ) {
7051                    int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
7052                    if (compareResult >= 0) {
7053                        log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
7054                    }
7055                }
7056            }
7057        } else {
7058            log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
7059        }
7060        ucol_close(ucol);
7061    } else {
7062        log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
7063    }
7064}
7065
7066
7067#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
7068
7069void addMiscCollTest(TestNode** root)
7070{
7071    TEST(TestRuleOptions);
7072    TEST(TestBeforePrefixFailure);
7073    TEST(TestContractionClosure);
7074    TEST(TestPrefixCompose);
7075    TEST(TestStrCollIdenticalPrefix);
7076    TEST(TestPrefix);
7077    TEST(TestNewJapanese);
7078    /*TEST(TestLimitations);*/
7079    TEST(TestNonChars);
7080    TEST(TestExtremeCompression);
7081    TEST(TestSurrogates);
7082    TEST(TestVariableTopSetting);
7083    TEST(TestBocsuCoverage);
7084    TEST(TestCyrillicTailoring);
7085    TEST(TestCase);
7086    TEST(IncompleteCntTest);
7087    TEST(BlackBirdTest);
7088    TEST(FunkyATest);
7089    TEST(BillFairmanTest);
7090    TEST(RamsRulesTest);
7091    TEST(IsTailoredTest);
7092    TEST(TestCollations);
7093    TEST(TestChMove);
7094    TEST(TestImplicitTailoring);
7095    TEST(TestFCDProblem);
7096    TEST(TestEmptyRule);
7097    /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
7098    TEST(TestJ815);
7099    /*TEST(TestJ831);*/ /* we changed lv locale */
7100    TEST(TestBefore);
7101    TEST(TestRedundantRules);
7102    TEST(TestExpansionSyntax);
7103    TEST(TestHangulTailoring);
7104    TEST(TestUCARules);
7105    TEST(TestIncrementalNormalize);
7106    TEST(TestComposeDecompose);
7107    TEST(TestCompressOverlap);
7108    TEST(TestContraction);
7109    TEST(TestExpansion);
7110    /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
7111    /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
7112    TEST(TestOptimize);
7113    TEST(TestSuppressContractions);
7114    TEST(Alexis2);
7115    TEST(TestHebrewUCA);
7116    TEST(TestPartialSortKeyTermination);
7117    TEST(TestSettings);
7118    TEST(TestEquals);
7119    TEST(TestJ2726);
7120    TEST(NullRule);
7121    TEST(TestNumericCollation);
7122    TEST(TestTibetanConformance);
7123    TEST(TestPinyinProblem);
7124    TEST(TestImplicitGeneration);
7125    TEST(TestSeparateTrees);
7126    TEST(TestBeforePinyin);
7127    TEST(TestBeforeTightening);
7128    /*TEST(TestMoreBefore);*/
7129    TEST(TestTailorNULL);
7130    TEST(TestUpperFirstQuaternary);
7131    TEST(TestJ4960);
7132    TEST(TestJ5223);
7133    TEST(TestJ5232);
7134    TEST(TestJ5367);
7135    TEST(TestHiragana);
7136    TEST(TestSortKeyConsistency);
7137    TEST(TestVI5913);  /* VI, RO tailored rules */
7138    TEST(TestCroatianSortKey);
7139    TEST(TestTailor6179);
7140    TEST(TestUCAPrecontext);
7141    TEST(TestOutOfBuffer5468);
7142    TEST(TestSameStrengthList);
7143
7144    TEST(TestSameStrengthListQuoted);
7145    TEST(TestSameStrengthListSupplemental);
7146    TEST(TestSameStrengthListQwerty);
7147    TEST(TestSameStrengthListQuotedQwerty);
7148    TEST(TestSameStrengthListRanges);
7149    TEST(TestSameStrengthListSupplementalRanges);
7150    TEST(TestSpecialCharacters);
7151    TEST(TestPrivateUseCharacters);
7152    TEST(TestPrivateUseCharactersInList);
7153    TEST(TestPrivateUseCharactersInRange);
7154    TEST(TestInvalidListsAndRanges);
7155    TEST(TestImportRulesDeWithPhonebook);
7156    /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
7157    /* TEST(TestImportRulesCJKWithUnihan); */
7158    TEST(TestImport);
7159    TEST(TestImportWithType);
7160
7161    TEST(TestBeforeRuleWithScriptReordering);
7162    TEST(TestNonLeadBytesDuringCollationReordering);
7163    TEST(TestReorderingAPI);
7164    TEST(TestReorderingAPIWithRuleCreatedCollator);
7165    TEST(TestEquivalentReorderingScripts);
7166    TEST(TestGreekFirstReorder);
7167    TEST(TestGreekLastReorder);
7168    TEST(TestNonScriptReorder);
7169    TEST(TestHaniReorder);
7170    TEST(TestHaniReorderWithOtherRules);
7171    TEST(TestMultipleReorder);
7172    TEST(TestReorderingAcrossCloning);
7173    TEST(TestReorderWithNumericCollation);
7174
7175    TEST(TestCaseLevelBufferOverflow);
7176}
7177
7178#endif /* #if !UCONFIG_NO_COLLATION */
7179