1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File CITERTST.C
9*
10* Modification History:
11* Date      Name               Description
12*           Madhu Katragadda   Ported for C API
13* 02/19/01  synwee             Modified test case for new collation iterator
14*********************************************************************************/
15/*
16 * Collation Iterator tests.
17 * (Let me reiterate my position...)
18 */
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_COLLATION
23
24#include "unicode/ucol.h"
25#include "unicode/ucoleitr.h"
26#include "unicode/uloc.h"
27#include "unicode/uchar.h"
28#include "unicode/ustring.h"
29#include "unicode/putil.h"
30#include "callcoll.h"
31#include "cmemory.h"
32#include "cintltst.h"
33#include "citertst.h"
34#include "ccolltst.h"
35#include "filestrm.h"
36#include "cstring.h"
37#include "ucol_imp.h"
38#include "ucol_tok.h"
39#include "uparse.h"
40#include <stdio.h>
41
42extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
43
44void addCollIterTest(TestNode** root)
45{
46    addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
47    addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
48    addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
49    addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
50    addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
51    addTest(root, &TestNormalizedUnicodeChar,
52                                "tscoll/citertst/TestNormalizedUnicodeChar");
53    addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
54    addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
55    addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
56    addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
57    addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
58    addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
59    addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
60    addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
61    addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
62    addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
63}
64
65/* The locales we support */
66
67static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
68
69static void TestBug672() {
70    UErrorCode  status = U_ZERO_ERROR;
71    UChar       pattern[20];
72    UChar       text[50];
73    int         i;
74    int         result[3][3];
75
76    u_uastrcpy(pattern, "resume");
77    u_uastrcpy(text, "Time to resume updating my resume.");
78
79    for (i = 0; i < 3; ++ i) {
80        UCollator          *coll = ucol_open(LOCALES[i], &status);
81        UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
82                                                     &status);
83        UCollationElements *titer = ucol_openElements(coll, text, -1,
84                                                     &status);
85        if (U_FAILURE(status)) {
86            log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
87                    myErrorName(status));
88            return;
89        }
90
91        log_verbose("locale tested %s\n", LOCALES[i]);
92
93        while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
94               U_SUCCESS(status)) {
95        }
96        if (U_FAILURE(status)) {
97            log_err("ERROR: reversing collation iterator :%s\n",
98                    myErrorName(status));
99            return;
100        }
101        ucol_reset(pitr);
102
103        ucol_setOffset(titer, u_strlen(pattern), &status);
104        if (U_FAILURE(status)) {
105            log_err("ERROR: setting offset in collator :%s\n",
106                    myErrorName(status));
107            return;
108        }
109        result[i][0] = ucol_getOffset(titer);
110        log_verbose("Text iterator set to offset %d\n", result[i][0]);
111
112        /* Use previous() */
113        ucol_previous(titer, &status);
114        result[i][1] = ucol_getOffset(titer);
115        log_verbose("Current offset %d after previous\n", result[i][1]);
116
117        /* Add one to index */
118        log_verbose("Adding one to current offset...\n");
119        ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
120        if (U_FAILURE(status)) {
121            log_err("ERROR: setting offset in collator :%s\n",
122                    myErrorName(status));
123            return;
124        }
125        result[i][2] = ucol_getOffset(titer);
126        log_verbose("Current offset in text = %d\n", result[i][2]);
127        ucol_closeElements(pitr);
128        ucol_closeElements(titer);
129        ucol_close(coll);
130    }
131
132    if (uprv_memcmp(result[0], result[1], 3) != 0 ||
133        uprv_memcmp(result[1], result[2], 3) != 0) {
134        log_err("ERROR: Different locales have different offsets at the same character\n");
135    }
136}
137
138
139
140/*  Running this test with normalization enabled showed up a bug in the incremental
141    normalization code. */
142static void TestBug672Normalize() {
143    UErrorCode  status = U_ZERO_ERROR;
144    UChar       pattern[20];
145    UChar       text[50];
146    int         i;
147    int         result[3][3];
148
149    u_uastrcpy(pattern, "resume");
150    u_uastrcpy(text, "Time to resume updating my resume.");
151
152    for (i = 0; i < 3; ++ i) {
153        UCollator          *coll = ucol_open(LOCALES[i], &status);
154        UCollationElements *pitr = NULL;
155        UCollationElements *titer = NULL;
156
157        ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
158
159        pitr = ucol_openElements(coll, pattern, -1, &status);
160        titer = ucol_openElements(coll, text, -1, &status);
161        if (U_FAILURE(status)) {
162            log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
163                    myErrorName(status));
164            return;
165        }
166
167        log_verbose("locale tested %s\n", LOCALES[i]);
168
169        while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
170               U_SUCCESS(status)) {
171        }
172        if (U_FAILURE(status)) {
173            log_err("ERROR: reversing collation iterator :%s\n",
174                    myErrorName(status));
175            return;
176        }
177        ucol_reset(pitr);
178
179        ucol_setOffset(titer, u_strlen(pattern), &status);
180        if (U_FAILURE(status)) {
181            log_err("ERROR: setting offset in collator :%s\n",
182                    myErrorName(status));
183            return;
184        }
185        result[i][0] = ucol_getOffset(titer);
186        log_verbose("Text iterator set to offset %d\n", result[i][0]);
187
188        /* Use previous() */
189        ucol_previous(titer, &status);
190        result[i][1] = ucol_getOffset(titer);
191        log_verbose("Current offset %d after previous\n", result[i][1]);
192
193        /* Add one to index */
194        log_verbose("Adding one to current offset...\n");
195        ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
196        if (U_FAILURE(status)) {
197            log_err("ERROR: setting offset in collator :%s\n",
198                    myErrorName(status));
199            return;
200        }
201        result[i][2] = ucol_getOffset(titer);
202        log_verbose("Current offset in text = %d\n", result[i][2]);
203        ucol_closeElements(pitr);
204        ucol_closeElements(titer);
205        ucol_close(coll);
206    }
207
208    if (uprv_memcmp(result[0], result[1], 3) != 0 ||
209        uprv_memcmp(result[1], result[2], 3) != 0) {
210        log_err("ERROR: Different locales have different offsets at the same character\n");
211    }
212}
213
214
215
216
217/**
218 * Test for CollationElementIterator previous and next for the whole set of
219 * unicode characters.
220 */
221static void TestUnicodeChar()
222{
223    UChar source[0x100];
224    UCollator *en_us;
225    UCollationElements *iter;
226    UErrorCode status = U_ZERO_ERROR;
227    UChar codepoint;
228
229    UChar *test;
230    en_us = ucol_open("en_US", &status);
231    if (U_FAILURE(status)){
232       log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
233              myErrorName(status));
234       return;
235    }
236
237    for (codepoint = 1; codepoint < 0xFFFE;)
238    {
239      test = source;
240
241      while (codepoint % 0xFF != 0)
242      {
243        if (u_isdefined(codepoint))
244          *(test ++) = codepoint;
245        codepoint ++;
246      }
247
248      if (u_isdefined(codepoint))
249        *(test ++) = codepoint;
250
251      if (codepoint != 0xFFFF)
252        codepoint ++;
253
254      *test = 0;
255      iter=ucol_openElements(en_us, source, u_strlen(source), &status);
256      if(U_FAILURE(status)){
257          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
258              myErrorName(status));
259          ucol_close(en_us);
260          return;
261      }
262      /* A basic test to see if it's working at all */
263      log_verbose("codepoint testing %x\n", codepoint);
264      backAndForth(iter);
265      ucol_closeElements(iter);
266
267      /* null termination test */
268      iter=ucol_openElements(en_us, source, -1, &status);
269      if(U_FAILURE(status)){
270          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
271              myErrorName(status));
272          ucol_close(en_us);
273          return;
274      }
275      /* A basic test to see if it's working at all */
276      backAndForth(iter);
277      ucol_closeElements(iter);
278    }
279
280    ucol_close(en_us);
281}
282
283/**
284 * Test for CollationElementIterator previous and next for the whole set of
285 * unicode characters with normalization on.
286 */
287static void TestNormalizedUnicodeChar()
288{
289    UChar source[0x100];
290    UCollator *th_th;
291    UCollationElements *iter;
292    UErrorCode status = U_ZERO_ERROR;
293    UChar codepoint;
294
295    UChar *test;
296    /* thai should have normalization on */
297    th_th = ucol_open("th_TH", &status);
298    if (U_FAILURE(status)){
299        log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
300              myErrorName(status));
301        return;
302    }
303
304    for (codepoint = 1; codepoint < 0xFFFE;)
305    {
306      test = source;
307
308      while (codepoint % 0xFF != 0)
309      {
310        if (u_isdefined(codepoint))
311          *(test ++) = codepoint;
312        codepoint ++;
313      }
314
315      if (u_isdefined(codepoint))
316        *(test ++) = codepoint;
317
318      if (codepoint != 0xFFFF)
319        codepoint ++;
320
321      *test = 0;
322      iter=ucol_openElements(th_th, source, u_strlen(source), &status);
323      if(U_FAILURE(status)){
324          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
325              myErrorName(status));
326            ucol_close(th_th);
327          return;
328      }
329
330      backAndForth(iter);
331      ucol_closeElements(iter);
332
333      iter=ucol_openElements(th_th, source, -1, &status);
334      if(U_FAILURE(status)){
335          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
336              myErrorName(status));
337            ucol_close(th_th);
338          return;
339      }
340
341      backAndForth(iter);
342      ucol_closeElements(iter);
343    }
344
345    ucol_close(th_th);
346}
347
348/**
349* Test the incremental normalization
350*/
351static void TestNormalization()
352{
353          UErrorCode          status = U_ZERO_ERROR;
354    const char               *str    =
355                            "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
356          UCollator          *coll;
357          UChar               rule[50];
358          int                 rulelen = u_unescape(str, rule, 50);
359          int                 count = 0;
360    const char                *testdata[] =
361                        {"\\u1ED9", "o\\u0323\\u0302",
362                        "\\u0300\\u0315", "\\u0315\\u0300",
363                        "A\\u0300\\u0315B", "A\\u0315\\u0300B",
364                        "A\\u0316\\u0315B", "A\\u0315\\u0316B",
365                        "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
366                        "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
367                        "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
368    int32_t   srclen;
369    UChar source[10];
370    UCollationElements *iter;
371
372    coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
373    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
374    if (U_FAILURE(status)){
375        log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
376              myErrorName(status));
377        return;
378    }
379
380    srclen = u_unescape(testdata[0], source, 10);
381    iter = ucol_openElements(coll, source, srclen, &status);
382    backAndForth(iter);
383    ucol_closeElements(iter);
384
385    srclen = u_unescape(testdata[1], source, 10);
386    iter = ucol_openElements(coll, source, srclen, &status);
387    backAndForth(iter);
388    ucol_closeElements(iter);
389
390    while (count < 12) {
391        srclen = u_unescape(testdata[count], source, 10);
392        iter = ucol_openElements(coll, source, srclen, &status);
393
394        if (U_FAILURE(status)){
395            log_err("ERROR: in creation of collator element iterator\n %s\n",
396                  myErrorName(status));
397            return;
398        }
399        backAndForth(iter);
400        ucol_closeElements(iter);
401
402        iter = ucol_openElements(coll, source, -1, &status);
403
404        if (U_FAILURE(status)){
405            log_err("ERROR: in creation of collator element iterator\n %s\n",
406                  myErrorName(status));
407            return;
408        }
409        backAndForth(iter);
410        ucol_closeElements(iter);
411        count ++;
412    }
413    ucol_close(coll);
414}
415
416/**
417 * Test for CollationElementIterator.previous()
418 *
419 * @bug 4108758 - Make sure it works with contracting characters
420 *
421 */
422static void TestPrevious()
423{
424    UCollator *coll=NULL;
425    UChar rule[50];
426    UChar *source;
427    UCollator *c1, *c2, *c3;
428    UCollationElements *iter;
429    UErrorCode status = U_ZERO_ERROR;
430    UChar test1[50];
431    UChar test2[50];
432
433    u_uastrcpy(test1, "What subset of all possible test cases?");
434    u_uastrcpy(test2, "has the highest probability of detecting");
435    coll = ucol_open("en_US", &status);
436
437    iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
438    log_verbose("English locale testing back and forth\n");
439    if(U_FAILURE(status)){
440        log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
441            myErrorName(status));
442        ucol_close(coll);
443        return;
444    }
445    /* A basic test to see if it's working at all */
446    backAndForth(iter);
447    ucol_closeElements(iter);
448    ucol_close(coll);
449
450    /* Test with a contracting character sequence */
451    u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
452    c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
453
454    log_verbose("Contraction rule testing back and forth with no normalization\n");
455
456    if (c1 == NULL || U_FAILURE(status))
457    {
458        log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
459            myErrorName(status));
460        return;
461    }
462    source=(UChar*)malloc(sizeof(UChar) * 20);
463    u_uastrcpy(source, "abchdcba");
464    iter=ucol_openElements(c1, source, u_strlen(source), &status);
465    if(U_FAILURE(status)){
466        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
467            myErrorName(status));
468        return;
469    }
470    backAndForth(iter);
471    ucol_closeElements(iter);
472    ucol_close(c1);
473
474    /* Test with an expanding character sequence */
475    u_uastrcpy(rule, "&a < b < c/abd < d");
476    c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
477    log_verbose("Expansion rule testing back and forth with no normalization\n");
478    if (c2 == NULL || U_FAILURE(status))
479    {
480        log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
481            myErrorName(status));
482        return;
483    }
484    u_uastrcpy(source, "abcd");
485    iter=ucol_openElements(c2, source, u_strlen(source), &status);
486    if(U_FAILURE(status)){
487        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
488            myErrorName(status));
489        return;
490    }
491    backAndForth(iter);
492    ucol_closeElements(iter);
493    ucol_close(c2);
494    /* Now try both */
495    u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
496    c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
497    log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
498
499    if (c3 == NULL || U_FAILURE(status))
500    {
501        log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
502            myErrorName(status));
503        return;
504    }
505    u_uastrcpy(source, "abcdbchdc");
506    iter=ucol_openElements(c3, source, u_strlen(source), &status);
507    if(U_FAILURE(status)){
508        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
509            myErrorName(status));
510        return;
511    }
512    backAndForth(iter);
513    ucol_closeElements(iter);
514    ucol_close(c3);
515    source[0] = 0x0e41;
516    source[1] = 0x0e02;
517    source[2] = 0x0e41;
518    source[3] = 0x0e02;
519    source[4] = 0x0e27;
520    source[5] = 0x61;
521    source[6] = 0x62;
522    source[7] = 0x63;
523    source[8] = 0;
524
525    coll = ucol_open("th_TH", &status);
526    log_verbose("Thai locale testing back and forth with normalization\n");
527    iter=ucol_openElements(coll, source, u_strlen(source), &status);
528    if(U_FAILURE(status)){
529        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
530            myErrorName(status));
531        return;
532    }
533    backAndForth(iter);
534    ucol_closeElements(iter);
535    ucol_close(coll);
536
537    /* prev test */
538    source[0] = 0x0061;
539    source[1] = 0x30CF;
540    source[2] = 0x3099;
541    source[3] = 0x30FC;
542    source[4] = 0;
543
544    coll = ucol_open("ja_JP", &status);
545    log_verbose("Japanese locale testing back and forth with normalization\n");
546    iter=ucol_openElements(coll, source, u_strlen(source), &status);
547    if(U_FAILURE(status)){
548        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
549            myErrorName(status));
550        return;
551    }
552    backAndForth(iter);
553    ucol_closeElements(iter);
554    ucol_close(coll);
555
556    free(source);
557}
558
559/**
560 * Test for getOffset() and setOffset()
561 */
562static void TestOffset()
563{
564    UErrorCode status= U_ZERO_ERROR;
565    UCollator *en_us=NULL;
566    UCollationElements *iter, *pristine;
567    int32_t offset;
568    OrderAndOffset *orders;
569    int32_t orderLength=0;
570    int     count = 0;
571    UChar test1[50];
572    UChar test2[50];
573
574    u_uastrcpy(test1, "What subset of all possible test cases?");
575    u_uastrcpy(test2, "has the highest probability of detecting");
576    en_us = ucol_open("en_US", &status);
577    log_verbose("Testing getOffset and setOffset for collations\n");
578    iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
579    if(U_FAILURE(status)){
580        log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
581            myErrorName(status));
582        ucol_close(en_us);
583        return;
584    }
585
586    /* testing boundaries */
587    ucol_setOffset(iter, 0, &status);
588    if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
589        log_err("Error: After setting offset to 0, we should be at the end "
590                "of the backwards iteration");
591    }
592    ucol_setOffset(iter, u_strlen(test1), &status);
593    if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
594        log_err("Error: After setting offset to end of the string, we should "
595                "be at the end of the backwards iteration");
596    }
597
598    /* Run all the way through the iterator, then get the offset */
599
600    orders = getOrders(iter, &orderLength);
601
602    offset = ucol_getOffset(iter);
603
604    if (offset != u_strlen(test1))
605    {
606        log_err("offset at end != length %d vs %d\n", offset,
607            u_strlen(test1) );
608    }
609
610    /* Now set the offset back to the beginning and see if it works */
611    pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
612    if(U_FAILURE(status)){
613        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
614            myErrorName(status));
615    ucol_close(en_us);
616        return;
617    }
618    status = U_ZERO_ERROR;
619
620    ucol_setOffset(iter, 0, &status);
621    if (U_FAILURE(status))
622    {
623        log_err("setOffset failed. %s\n",    myErrorName(status));
624    }
625    else
626    {
627        assertEqual(iter, pristine);
628    }
629
630    ucol_closeElements(pristine);
631    ucol_closeElements(iter);
632    free(orders);
633
634    /* testing offsets in normalization buffer */
635    test1[0] = 0x61;
636    test1[1] = 0x300;
637    test1[2] = 0x316;
638    test1[3] = 0x62;
639    test1[4] = 0;
640    ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
641    iter = ucol_openElements(en_us, test1, 4, &status);
642    if(U_FAILURE(status)){
643        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
644            myErrorName(status));
645        ucol_close(en_us);
646        return;
647    }
648
649    count = 0;
650    while (ucol_next(iter, &status) != UCOL_NULLORDER &&
651        U_SUCCESS(status)) {
652        switch (count) {
653        case 0:
654            if (ucol_getOffset(iter) != 1) {
655                log_err("ERROR: Offset of iteration should be 1\n");
656            }
657            break;
658        case 3:
659            if (ucol_getOffset(iter) != 4) {
660                log_err("ERROR: Offset of iteration should be 4\n");
661            }
662            break;
663        default:
664            if (ucol_getOffset(iter) != 3) {
665                log_err("ERROR: Offset of iteration should be 3\n");
666            }
667        }
668        count ++;
669    }
670
671    ucol_reset(iter);
672    count = 0;
673    while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
674        U_SUCCESS(status)) {
675        switch (count) {
676        case 0:
677        case 1:
678            if (ucol_getOffset(iter) != 3) {
679                log_err("ERROR: Offset of iteration should be 3\n");
680            }
681            break;
682        case 2:
683            if (ucol_getOffset(iter) != 1) {
684                log_err("ERROR: Offset of iteration should be 1\n");
685            }
686            break;
687        default:
688            if (ucol_getOffset(iter) != 0) {
689                log_err("ERROR: Offset of iteration should be 0\n");
690            }
691        }
692        count ++;
693    }
694
695    if(U_FAILURE(status)){
696        log_err("ERROR: in iterating collation elements %s\n",
697            myErrorName(status));
698    }
699
700    ucol_closeElements(iter);
701    ucol_close(en_us);
702}
703
704/**
705 * Test for setText()
706 */
707static void TestSetText()
708{
709    int32_t c,i;
710    UErrorCode status = U_ZERO_ERROR;
711    UCollator *en_us=NULL;
712    UCollationElements *iter1, *iter2;
713    UChar test1[50];
714    UChar test2[50];
715
716    u_uastrcpy(test1, "What subset of all possible test cases?");
717    u_uastrcpy(test2, "has the highest probability of detecting");
718    en_us = ucol_open("en_US", &status);
719    log_verbose("testing setText for Collation elements\n");
720    iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
721    if(U_FAILURE(status)){
722        log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
723            myErrorName(status));
724    ucol_close(en_us);
725        return;
726    }
727    iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
728    if(U_FAILURE(status)){
729        log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
730            myErrorName(status));
731    ucol_close(en_us);
732        return;
733    }
734
735    /* Run through the second iterator just to exercise it */
736    c = ucol_next(iter2, &status);
737    i = 0;
738
739    while ( ++i < 10 && (c != UCOL_NULLORDER))
740    {
741        if (U_FAILURE(status))
742        {
743            log_err("iter2->next() returned an error. %s\n", myErrorName(status));
744            ucol_closeElements(iter2);
745            ucol_closeElements(iter1);
746    ucol_close(en_us);
747            return;
748        }
749
750        c = ucol_next(iter2, &status);
751    }
752
753    /* Now set it to point to the same string as the first iterator */
754    ucol_setText(iter2, test1, u_strlen(test1), &status);
755    if (U_FAILURE(status))
756    {
757        log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
758    }
759    else
760    {
761        assertEqual(iter1, iter2);
762    }
763
764    /* Now set it to point to a null string with fake length*/
765    ucol_setText(iter2, NULL, 2, &status);
766    if (U_FAILURE(status))
767    {
768        log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
769    }
770    else
771    {
772        if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
773            log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
774        }
775    }
776
777    ucol_closeElements(iter2);
778    ucol_closeElements(iter1);
779    ucol_close(en_us);
780}
781
782/** @bug 4108762
783 * Test for getMaxExpansion()
784 */
785static void TestMaxExpansion()
786{
787    UErrorCode          status = U_ZERO_ERROR;
788    UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
789    UChar               ch     = 0;
790    UChar32             unassigned = 0xEFFFD;
791    UChar               supplementary[2];
792    uint32_t            stringOffset = 0;
793    UBool               isError = FALSE;
794    uint32_t            sorder = 0;
795    UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
796    uint32_t            temporder = 0;
797
798    UChar rule[256];
799    u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
800    coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
801        UCOL_DEFAULT_STRENGTH,NULL, &status);
802    if(U_SUCCESS(status) && coll) {
803      iter = ucol_openElements(coll, &ch, 1, &status);
804
805      while (ch < 0xFFFF && U_SUCCESS(status)) {
806          int      count = 1;
807          uint32_t order;
808          int32_t  size = 0;
809
810          ch ++;
811
812          ucol_setText(iter, &ch, 1, &status);
813          order = ucol_previous(iter, &status);
814
815          /* thai management */
816          if (order == 0)
817              order = ucol_previous(iter, &status);
818
819          while (U_SUCCESS(status) &&
820              ucol_previous(iter, &status) != UCOL_NULLORDER) {
821              count ++;
822          }
823
824          size = ucol_getMaxExpansion(iter, order);
825          if (U_FAILURE(status) || size < count) {
826              log_err("Failure at codepoint %d, maximum expansion count < %d\n",
827                  ch, count);
828          }
829      }
830
831      /* testing for exact max expansion */
832      ch = 0;
833      while (ch < 0x61) {
834          uint32_t order;
835          int32_t  size;
836          ucol_setText(iter, &ch, 1, &status);
837          order = ucol_previous(iter, &status);
838          size  = ucol_getMaxExpansion(iter, order);
839          if (U_FAILURE(status) || size != 1) {
840              log_err("Failure at codepoint %d, maximum expansion count < %d\n",
841                  ch, 1);
842          }
843          ch ++;
844      }
845
846      ch = 0x63;
847      ucol_setText(iter, &ch, 1, &status);
848      temporder = ucol_previous(iter, &status);
849
850      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
851          log_err("Failure at codepoint %d, maximum expansion count != %d\n",
852                  ch, 3);
853      }
854
855      ch = 0x64;
856      ucol_setText(iter, &ch, 1, &status);
857      temporder = ucol_previous(iter, &status);
858
859      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
860          log_err("Failure at codepoint %d, maximum expansion count != %d\n",
861                  ch, 3);
862      }
863
864      U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
865      ucol_setText(iter, supplementary, 2, &status);
866      sorder = ucol_previous(iter, &status);
867
868      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
869          log_err("Failure at codepoint %d, maximum expansion count < %d\n",
870                  ch, 2);
871      }
872
873      /* testing jamo */
874      ch = 0x1165;
875
876      ucol_setText(iter, &ch, 1, &status);
877      temporder = ucol_previous(iter, &status);
878      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
879          log_err("Failure at codepoint %d, maximum expansion count > %d\n",
880                  ch, 3);
881      }
882
883      ucol_closeElements(iter);
884      ucol_close(coll);
885
886      /* testing special jamo &a<\u1160 */
887      rule[0] = 0x26;
888      rule[1] = 0x71;
889      rule[2] = 0x3c;
890      rule[3] = 0x1165;
891      rule[4] = 0x2f;
892      rule[5] = 0x71;
893      rule[6] = 0x71;
894      rule[7] = 0x71;
895      rule[8] = 0x71;
896      rule[9] = 0;
897
898      coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
899          UCOL_DEFAULT_STRENGTH,NULL, &status);
900      iter = ucol_openElements(coll, &ch, 1, &status);
901
902      temporder = ucol_previous(iter, &status);
903      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
904          log_err("Failure at codepoint %d, maximum expansion count > %d\n",
905                  ch, 5);
906      }
907
908      ucol_closeElements(iter);
909      ucol_close(coll);
910    } else {
911      log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
912    }
913
914}
915
916
917static void assertEqual(UCollationElements *i1, UCollationElements *i2)
918{
919    int32_t c1, c2;
920    int32_t count = 0;
921    UErrorCode status = U_ZERO_ERROR;
922
923    do
924    {
925        c1 = ucol_next(i1, &status);
926        c2 = ucol_next(i2, &status);
927
928        if (c1 != c2)
929        {
930            log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
931            break;
932        }
933
934        count += 1;
935    }
936    while (c1 != UCOL_NULLORDER);
937}
938
939/**
940 * Testing iterators with extremely small buffers
941 */
942static void TestSmallBuffer()
943{
944    UErrorCode          status = U_ZERO_ERROR;
945    UCollator          *coll;
946    UCollationElements *testiter,
947                       *iter;
948    int32_t             count = 0;
949    OrderAndOffset     *testorders,
950                       *orders;
951
952    UChar teststr[500];
953    UChar str[] = {0x300, 0x31A, 0};
954    /*
955    creating a long string of decomposable characters,
956    since by default the writable buffer is of size 256
957    */
958    while (count < 500) {
959        if ((count & 1) == 0) {
960            teststr[count ++] = 0x300;
961        }
962        else {
963            teststr[count ++] = 0x31A;
964        }
965    }
966
967    coll = ucol_open("th_TH", &status);
968    if(U_SUCCESS(status) && coll) {
969      testiter = ucol_openElements(coll, teststr, 500, &status);
970      iter = ucol_openElements(coll, str, 2, &status);
971
972      orders     = getOrders(iter, &count);
973      if (count != 2) {
974          log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
975      }
976
977      /*
978      this will rearrange the string data to 250 characters of 0x300 first then
979      250 characters of 0x031A
980      */
981      testorders = getOrders(testiter, &count);
982
983      if (count != 500) {
984          log_err("Error decomposition does not give the right sized collation elements\n");
985      }
986
987      while (count != 0) {
988          /* UCA collation element for 0x0F76 */
989          if ((count > 250 && testorders[-- count].order != orders[1].order) ||
990              (count <= 250 && testorders[-- count].order != orders[0].order)) {
991              log_err("Error decomposition does not give the right collation element at %d count\n", count);
992              break;
993          }
994      }
995
996      free(testorders);
997      free(orders);
998
999      ucol_reset(testiter);
1000
1001      /* ensures closing of elements done properly to clear writable buffer */
1002      ucol_next(testiter, &status);
1003      ucol_next(testiter, &status);
1004      ucol_closeElements(testiter);
1005      ucol_closeElements(iter);
1006      ucol_close(coll);
1007    } else {
1008      log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1009    }
1010}
1011
1012/**
1013* Sniplets of code from genuca
1014*/
1015static int32_t hex2num(char hex) {
1016    if(hex>='0' && hex <='9') {
1017        return hex-'0';
1018    } else if(hex>='a' && hex<='f') {
1019        return hex-'a'+10;
1020    } else if(hex>='A' && hex<='F') {
1021        return hex-'A'+10;
1022    } else {
1023        return 0;
1024    }
1025}
1026
1027/**
1028* Getting codepoints from a string
1029* @param str character string contain codepoints seperated by space and ended
1030*        by a semicolon
1031* @param codepoints array for storage, assuming size > 5
1032* @return position at the end of the codepoint section
1033*/
1034static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
1035    UErrorCode errorCode = U_ZERO_ERROR;
1036    char *semi = uprv_strchr(str, ';');
1037    char *pipe = uprv_strchr(str, '|');
1038    char *s;
1039    *codepoints = 0;
1040    *contextCPs = 0;
1041    if(semi == NULL) {
1042        log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
1043        return str;
1044    }
1045    if(pipe != NULL) {
1046        int32_t contextLength;
1047        *pipe = 0;
1048        contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
1049        *pipe = '|';
1050        if(U_FAILURE(errorCode)) {
1051            log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
1052            return str;
1053        }
1054        /* prepend the precontext string to the codepoints */
1055        u_memcpy(codepoints, contextCPs, contextLength);
1056        codepoints += contextLength;
1057        /* start of the code point string */
1058        s = pipe + 1;
1059    } else {
1060        s = str;
1061    }
1062    u_parseString(s, codepoints, 99, NULL, &errorCode);
1063    if(U_FAILURE(errorCode)) {
1064        log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
1065        return str;
1066    }
1067    return semi + 1;
1068}
1069
1070/**
1071* Sniplets of code from genuca
1072*/
1073static int32_t
1074readElement(char **from, char *to, char separator, UErrorCode *status)
1075{
1076    if (U_SUCCESS(*status)) {
1077        char    buffer[1024];
1078        int32_t i = 0;
1079        while (**from != separator) {
1080            if (**from != ' ') {
1081                *(buffer+i++) = **from;
1082            }
1083            (*from)++;
1084        }
1085        (*from)++;
1086        *(buffer + i) = 0;
1087        strcpy(to, buffer);
1088        return i/2;
1089    }
1090
1091    return 0;
1092}
1093
1094/**
1095* Sniplets of code from genuca
1096*/
1097static uint32_t
1098getSingleCEValue(char *primary, char *secondary, char *tertiary,
1099                          UErrorCode *status)
1100{
1101    if (U_SUCCESS(*status)) {
1102        uint32_t  value    = 0;
1103        char      primsave = '\0';
1104        char      secsave  = '\0';
1105        char      tersave  = '\0';
1106        char     *primend  = primary+4;
1107        char     *secend   = secondary+2;
1108        char     *terend   = tertiary+2;
1109        uint32_t  primvalue;
1110        uint32_t  secvalue;
1111        uint32_t  tervalue;
1112
1113        if (uprv_strlen(primary) > 4) {
1114            primsave = *primend;
1115            *primend = '\0';
1116        }
1117
1118        if (uprv_strlen(secondary) > 2) {
1119            secsave = *secend;
1120            *secend = '\0';
1121        }
1122
1123        if (uprv_strlen(tertiary) > 2) {
1124            tersave = *terend;
1125            *terend = '\0';
1126        }
1127
1128        primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
1129        secvalue  = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
1130        tervalue  = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
1131        if(primvalue <= 0xFF) {
1132          primvalue <<= 8;
1133        }
1134
1135        value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
1136           | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
1137           | (tervalue & UCOL_TERTIARYORDERMASK);
1138
1139        if(primsave!='\0') {
1140            *primend = primsave;
1141        }
1142        if(secsave!='\0') {
1143            *secend = secsave;
1144        }
1145        if(tersave!='\0') {
1146            *terend = tersave;
1147        }
1148        return value;
1149    }
1150    return 0;
1151}
1152
1153/**
1154* Getting collation elements generated from a string
1155* @param str character string contain collation elements contained in [] and
1156*        seperated by space
1157* @param ce array for storage, assuming size > 20
1158* @param status error status
1159* @return position at the end of the codepoint section
1160*/
1161static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
1162    char       *pStartCP     = uprv_strchr(str, '[');
1163    int         count        = 0;
1164    char       *pEndCP;
1165    char        primary[100];
1166    char        secondary[100];
1167    char        tertiary[100];
1168
1169    while (*pStartCP == '[') {
1170        uint32_t primarycount   = 0;
1171        uint32_t secondarycount = 0;
1172        uint32_t tertiarycount  = 0;
1173        uint32_t CEi = 1;
1174        pEndCP = strchr(pStartCP, ']');
1175        if(pEndCP == NULL) {
1176            break;
1177        }
1178        pStartCP ++;
1179
1180        primarycount   = readElement(&pStartCP, primary, ',', status);
1181        secondarycount = readElement(&pStartCP, secondary, ',', status);
1182        tertiarycount  = readElement(&pStartCP, tertiary, ']', status);
1183
1184        /* I want to get the CEs entered right here, including continuation */
1185        ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
1186        if (U_FAILURE(*status)) {
1187            break;
1188        }
1189
1190        while (2 * CEi < primarycount || CEi < secondarycount ||
1191               CEi < tertiarycount) {
1192            uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
1193            if (2 * CEi < primarycount) {
1194                value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
1195                value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
1196            }
1197
1198            if (2 * CEi + 1 < primarycount) {
1199                value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
1200                value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
1201            }
1202
1203            if (CEi < secondarycount) {
1204                value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
1205                value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
1206            }
1207
1208            if (CEi < tertiarycount) {
1209                value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
1210                value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
1211            }
1212
1213            CEi ++;
1214            ces[count ++] = value;
1215        }
1216
1217      pStartCP = pEndCP + 1;
1218    }
1219    ces[count] = 0;
1220    return pStartCP;
1221}
1222
1223/**
1224* Getting the FractionalUCA.txt file stream
1225*/
1226static FileStream * getFractionalUCA(void)
1227{
1228    char        newPath[256];
1229    char        backupPath[256];
1230    FileStream *result = NULL;
1231
1232    /* Look inside ICU_DATA first */
1233    uprv_strcpy(newPath, ctest_dataSrcDir());
1234    uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
1235    uprv_strcat(newPath, "FractionalUCA.txt");
1236
1237    /* As a fallback, try to guess where the source data was located
1238     *   at the time ICU was built, and look there.
1239     */
1240#if defined (U_TOPSRCDIR)
1241    strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
1242#else
1243    {
1244        UErrorCode errorCode = U_ZERO_ERROR;
1245        strcpy(backupPath, loadTestData(&errorCode));
1246        strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
1247    }
1248#endif
1249    strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
1250
1251    result = T_FileStream_open(newPath, "rb");
1252
1253    if (result == NULL) {
1254        result = T_FileStream_open(backupPath, "rb");
1255        if (result == NULL) {
1256            log_err("Failed to open either %s or %s\n", newPath, backupPath);
1257        }
1258    }
1259    return result;
1260}
1261
1262/**
1263* Testing the CEs returned by the iterator
1264*/
1265static void TestCEs() {
1266    FileStream *file = NULL;
1267    char        line[2048];
1268    char       *str;
1269    UChar       codepoints[10];
1270    uint32_t    ces[20];
1271    UErrorCode  status = U_ZERO_ERROR;
1272    UCollator          *coll = ucol_open("", &status);
1273    uint32_t lineNo = 0;
1274    UChar       contextCPs[5];
1275
1276    if (U_FAILURE(status)) {
1277        log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
1278        return;
1279    }
1280
1281    file = getFractionalUCA();
1282
1283    if (file == NULL) {
1284        log_err("*** unable to open input FractionalUCA.txt file ***\n");
1285        return;
1286    }
1287
1288
1289    while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1290        int                 count = 0;
1291        UCollationElements *iter;
1292        int32_t            preContextCeLen=0;
1293        lineNo++;
1294        /* skip this line if it is empty or a comment or is a return value
1295        or start of some variable section */
1296        if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1297            line[0] == 0x000D || line[0] == '[') {
1298            continue;
1299        }
1300
1301        str = getCodePoints(line, codepoints, contextCPs);
1302
1303        /* these are 'fake' codepoints in the fractional UCA, and are used just
1304         * for positioning of indirect values. They should not go through this
1305         * test.
1306         */
1307        if(*codepoints == 0xFDD0) {
1308          continue;
1309        }
1310        if (*contextCPs != 0) {
1311            iter = ucol_openElements(coll, contextCPs, -1, &status);
1312            if (U_FAILURE(status)) {
1313                log_err("Error in opening collation elements\n");
1314                break;
1315            }
1316            while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
1317                preContextCeLen++;
1318            }
1319            ucol_closeElements(iter);
1320        }
1321
1322        getCEs(str, ces+preContextCeLen, &status);
1323        if (U_FAILURE(status)) {
1324            log_err("Error in parsing collation elements in FractionalUCA.txt\n");
1325            break;
1326        }
1327        iter = ucol_openElements(coll, codepoints, -1, &status);
1328        if (U_FAILURE(status)) {
1329            log_err("Error in opening collation elements\n");
1330            break;
1331        }
1332        for (;;) {
1333            uint32_t ce = (uint32_t)ucol_next(iter, &status);
1334            if (ce == 0xFFFFFFFF) {
1335                ce = 0;
1336            }
1337            /* we now unconditionally reorder Thai/Lao prevowels, so this
1338             * test would fail if we don't skip here.
1339             */
1340            if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
1341              continue;
1342            }
1343            if (ce != ces[count] || U_FAILURE(status)) {
1344                log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
1345                break;
1346            }
1347            if (ces[count] == 0) {
1348                break;
1349            }
1350            count ++;
1351        }
1352        ucol_closeElements(iter);
1353    }
1354
1355    T_FileStream_close(file);
1356    ucol_close(coll);
1357}
1358
1359/**
1360* Testing the discontigous contractions
1361*/
1362static void TestDiscontiguos() {
1363    const char               *rulestr    =
1364                            "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1365          UChar               rule[50];
1366          int                 rulelen = u_unescape(rulestr, rule, 50);
1367    const char               *src[] = {
1368     "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1369    /* base character blocked */
1370     "XD\\u0300", "XD\\u0300\\u0315",
1371    /* non blocking combining character */
1372     "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1373     /* blocking combining character */
1374     "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1375     /* contraction prefix */
1376     "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1377     "X\\u0300\\u031A\\u0315",
1378     /* ends not with a contraction character */
1379     "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1380     "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1381    };
1382    const char               *tgt[] = {
1383     /* non blocking combining character */
1384     "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1385    /* base character blocked */
1386     "X D \\u0300", "X D \\u0300\\u0315",
1387    /* non blocking combining character */
1388     "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1389     /* blocking combining character */
1390     "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1391     /* contraction prefix */
1392     "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1393     "X\\u0300 \\u031A \\u0315",
1394     /* ends not with a contraction character */
1395     "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1396     "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1397    };
1398          int                 size   = 20;
1399          UCollator          *coll;
1400          UErrorCode          status    = U_ZERO_ERROR;
1401          int                 count     = 0;
1402          UCollationElements *iter;
1403          UCollationElements *resultiter;
1404
1405    coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1406    iter       = ucol_openElements(coll, rule, 1, &status);
1407    resultiter = ucol_openElements(coll, rule, 1, &status);
1408
1409    if (U_FAILURE(status)) {
1410        log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1411        return;
1412    }
1413
1414    while (count < size) {
1415        UChar  str[20];
1416        UChar  tstr[20];
1417        int    strLen = u_unescape(src[count], str, 20);
1418        UChar *s;
1419
1420        ucol_setText(iter, str, strLen, &status);
1421        if (U_FAILURE(status)) {
1422            log_err("Error opening collation iterator\n");
1423            return;
1424        }
1425
1426        u_unescape(tgt[count], tstr, 20);
1427        s = tstr;
1428
1429        log_verbose("count %d\n", count);
1430
1431        for (;;) {
1432            uint32_t  ce;
1433            UChar    *e = u_strchr(s, 0x20);
1434            if (e == 0) {
1435                e = u_strchr(s, 0);
1436            }
1437            ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1438            ce = ucol_next(resultiter, &status);
1439            if (U_FAILURE(status)) {
1440                log_err("Error manipulating collation iterator\n");
1441                return;
1442            }
1443            while (ce != UCOL_NULLORDER) {
1444                if (ce != (uint32_t)ucol_next(iter, &status) ||
1445                    U_FAILURE(status)) {
1446                    log_err("Discontiguos contraction test mismatch\n");
1447                    return;
1448                }
1449                ce = ucol_next(resultiter, &status);
1450                if (U_FAILURE(status)) {
1451                    log_err("Error getting next collation element\n");
1452                    return;
1453                }
1454            }
1455            s = e + 1;
1456            if (*e == 0) {
1457                break;
1458            }
1459        }
1460        ucol_reset(iter);
1461        backAndForth(iter);
1462        count ++;
1463    }
1464    ucol_closeElements(resultiter);
1465    ucol_closeElements(iter);
1466    ucol_close(coll);
1467}
1468
1469static void TestCEBufferOverflow()
1470{
1471    UChar               str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
1472    UErrorCode          status = U_ZERO_ERROR;
1473    UChar               rule[10];
1474    UCollator          *coll;
1475    UCollationElements *iter;
1476
1477    u_uastrcpy(rule, "&z < AB");
1478    coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
1479    if (U_FAILURE(status)) {
1480        log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
1481        return;
1482    }
1483
1484    /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1485    test. this will cause an overflow in getPrev */
1486    str[0] = 0x0041;    /* 'A' */
1487    /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1488    uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
1489    str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042;   /* 'B' */
1490    iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
1491                             &status);
1492    if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
1493        status == U_BUFFER_OVERFLOW_ERROR) {
1494        log_err("CE buffer should not overflow with long string of trail surrogates\n");
1495    }
1496    ucol_closeElements(iter);
1497    ucol_close(coll);
1498}
1499
1500/**
1501* Checking collation element validity.
1502*/
1503#define MAX_CODEPOINTS_TO_SHOW 10
1504static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
1505    int i, lengthToUse = length;
1506    if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
1507        lengthToUse = MAX_CODEPOINTS_TO_SHOW;
1508    }
1509    for (i = 0; i < lengthToUse; ++i) {
1510        int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
1511        if (bytesWritten <= 0) {
1512            break;
1513        }
1514        codepointText += bytesWritten;
1515    }
1516    if (i < length) {
1517        sprintf(codepointText, " ...");
1518    }
1519}
1520
1521static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
1522                             int length)
1523{
1524    UErrorCode          status = U_ZERO_ERROR;
1525    UCollationElements *iter   = ucol_openElements(coll, codepoints, length,
1526                                                  &status);
1527    UBool result = FALSE;
1528    UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
1529    const char * collLocale;
1530
1531    if (U_FAILURE(status)) {
1532        log_err("Error creating iterator for testing validity\n");
1533        return FALSE;
1534    }
1535    collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
1536    if (U_FAILURE(status) || collLocale==NULL) {
1537        status = U_ZERO_ERROR;
1538        collLocale = "?";
1539    }
1540
1541    for (;;) {
1542        uint32_t ce = ucol_next(iter, &status);
1543        uint32_t primary, p1, p2, secondary, tertiary;
1544        if (ce == UCOL_NULLORDER) {
1545            result = TRUE;
1546            break;
1547        }
1548        if (ce == 0) {
1549            continue;
1550        }
1551        if (ce == 0x02000202) {
1552            /* special CE for merge-sort character */
1553            if (*codepoints == 0xFFFE /* && length == 1 */) {
1554                /*
1555                 * Note: We should check for length==1 but the token parser appears
1556                 * to give us trailing NUL characters.
1557                 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
1558                 *                     rather than the internal collation rule parser
1559                 */
1560                continue;
1561            } else {
1562                log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
1563                        (int)*codepoints, (int)length);
1564                break;
1565            }
1566        }
1567        primary   = UCOL_PRIMARYORDER(ce);
1568        p1 = primary >> 8;
1569        p2 = primary & 0xFF;
1570        secondary = UCOL_SECONDARYORDER(ce);
1571        tertiary  = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
1572
1573        if (!isContinuation(ce)) {
1574            if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1575                log_err("Empty CE %08lX except for case bits\n", (long)ce);
1576                break;
1577            }
1578            if (p1 == 0) {
1579                if (p2 != 0) {
1580                    log_err("Primary 00 xx in %08lX\n", (long)ce);
1581                    break;
1582                }
1583                primaryDone = TRUE;
1584            } else {
1585                if (p1 <= 2 || p1 >= 0xF0) {
1586                    /* Primary first bytes F0..FF are specials. */
1587                    log_err("Primary first byte of %08lX out of range\n", (long)ce);
1588                    break;
1589                }
1590                if (p2 == 0) {
1591                    primaryDone = TRUE;
1592                } else {
1593                    if (p2 <= 3 || p2 >= 0xFF) {
1594                        /* Primary second bytes 03 and FF are sort key compression terminators. */
1595                        log_err("Primary second byte of %08lX out of range\n", (long)ce);
1596                        break;
1597                    }
1598                    primaryDone = FALSE;
1599                }
1600            }
1601            if (secondary == 0) {
1602                if (primary != 0) {
1603                    log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
1604                    break;
1605                }
1606                secondaryDone = TRUE;
1607            } else {
1608                if (secondary <= 2 ||
1609                    (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
1610                ) {
1611                    /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
1612                    log_err("Secondary byte of %08lX out of range\n", (long)ce);
1613                    break;
1614                }
1615                secondaryDone = FALSE;
1616            }
1617            if (tertiary == 0) {
1618                /* We know that ce != 0. */
1619                log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
1620                break;
1621            }
1622            if (tertiary <= 2) {
1623                log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1624                break;
1625            }
1626            tertiaryDone = FALSE;
1627        } else {
1628            if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1629                log_err("Empty continuation %08lX\n", (long)ce);
1630                break;
1631            }
1632            if (primaryDone && primary != 0) {
1633                log_err("Primary was done but continues in %08lX\n", (long)ce);
1634                break;
1635            }
1636            if (p1 == 0) {
1637                if (p2 != 0) {
1638                    log_err("Primary 00 xx in %08lX\n", (long)ce);
1639                    break;
1640                }
1641                primaryDone = TRUE;
1642            } else {
1643                if (p1 <= 2) {
1644                    log_err("Primary first byte of %08lX out of range\n", (long)ce);
1645                    break;
1646                }
1647                if (p2 == 0) {
1648                    primaryDone = TRUE;
1649                } else {
1650                    if (p2 <= 3) {
1651                        log_err("Primary second byte of %08lX out of range\n", (long)ce);
1652                        break;
1653                    }
1654                }
1655            }
1656            if (secondaryDone && secondary != 0) {
1657                log_err("Secondary was done but continues in %08lX\n", (long)ce);
1658                break;
1659            }
1660            if (secondary == 0) {
1661                secondaryDone = TRUE;
1662            } else {
1663                if (secondary <= 2) {
1664                    log_err("Secondary byte of %08lX out of range\n", (long)ce);
1665                    break;
1666                }
1667            }
1668            if (tertiaryDone && tertiary != 0) {
1669                log_err("Tertiary was done but continues in %08lX\n", (long)ce);
1670                break;
1671            }
1672            if (tertiary == 0) {
1673                tertiaryDone = TRUE;
1674            } else if (tertiary <= 2) {
1675                log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1676                break;
1677            }
1678        }
1679    }
1680    if (!result) {
1681        char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
1682        showCodepoints(codepoints, length, codepointText);
1683        log_err("Locale: %s  Code point string: %s\n", collLocale, codepointText);
1684    }
1685    ucol_closeElements(iter);
1686    return result;
1687}
1688
1689static const UChar IMPORT[] = { 0x5B, 0x69, 0x6D, 0x70, 0x6F, 0x72, 0x74, 0 };  /* "[import" */
1690
1691static void TestCEValidity()
1692{
1693    /* testing UCA collation elements */
1694    UErrorCode  status      = U_ZERO_ERROR;
1695    /* en_US has no tailorings */
1696    UCollator  *coll        = ucol_open("root", &status);
1697    /* tailored locales */
1698    char        locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
1699    const char *loc;
1700    FileStream *file = NULL;
1701    char        line[2048];
1702    UChar       codepoints[11];
1703    int         count = 0;
1704    int         maxCount = 0;
1705    UChar       contextCPs[3];
1706    UChar32     c;
1707    UParseError parseError;
1708    if (U_FAILURE(status)) {
1709        log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
1710        return;
1711    }
1712    log_verbose("Testing UCA elements\n");
1713    file = getFractionalUCA();
1714    if (file == NULL) {
1715        log_err("Fractional UCA data can not be opened\n");
1716        return;
1717    }
1718
1719    while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1720        if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1721            line[0] == 0x000D || line[0] == '[') {
1722            continue;
1723        }
1724
1725        getCodePoints(line, codepoints, contextCPs);
1726        checkCEValidity(coll, codepoints, u_strlen(codepoints));
1727    }
1728
1729    log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1730    for (c = 0; c <= 0xffff; ++c) {
1731        if (u_isdefined(c)) {
1732            codepoints[0] = (UChar)c;
1733            checkCEValidity(coll, codepoints, 1);
1734        }
1735    }
1736    for (; c <= 0x10ffff; ++c) {
1737        if (u_isdefined(c)) {
1738            int32_t i = 0;
1739            U16_APPEND_UNSAFE(codepoints, i, c);
1740            checkCEValidity(coll, codepoints, i);
1741        }
1742    }
1743
1744    ucol_close(coll);
1745
1746    /* testing tailored collation elements */
1747    log_verbose("Testing tailored elements\n");
1748    if(getTestOption(QUICK_OPTION)) {
1749        maxCount = sizeof(locale)/sizeof(locale[0]);
1750    } else {
1751        maxCount = uloc_countAvailable();
1752    }
1753    while (count < maxCount) {
1754        const UChar *rules = NULL,
1755                    *current = NULL;
1756        UChar *rulesCopy = NULL;
1757        int32_t ruleLen = 0;
1758
1759        uint32_t chOffset = 0;
1760        uint32_t chLen = 0;
1761        uint32_t exOffset = 0;
1762        uint32_t exLen = 0;
1763        uint32_t prefixOffset = 0;
1764        uint32_t prefixLen = 0;
1765        UBool    startOfRules = TRUE;
1766        UColOptionSet opts;
1767
1768        UColTokenParser src;
1769        uint32_t strength = 0;
1770        uint16_t specs = 0;
1771        if(getTestOption(QUICK_OPTION)) {
1772            loc = locale[count];
1773        } else {
1774            loc = uloc_getAvailable(count);
1775            if(!hasCollationElements(loc)) {
1776                count++;
1777                continue;
1778            }
1779        }
1780        status = U_ZERO_ERROR; // clear status from previous loop iteration
1781
1782        uprv_memset(&src, 0, sizeof(UColTokenParser));
1783
1784        log_verbose("Testing CEs for %s\n", loc);
1785
1786        coll      = ucol_open(loc, &status);
1787        if (U_FAILURE(status)) {
1788            log_err("%s collator creation failed with status %s\n", loc, u_errorName(status));
1789            return;
1790        }
1791
1792        src.opts = &opts;
1793        rules = ucol_getRules(coll, &ruleLen);
1794
1795        /*
1796         * We have not set up the UColTokenParser with a callback function
1797         * to fetch [import] sub-rules,
1798         * so skip testing tailorings that import others.
1799         * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
1800         *                     rather than the internal collation rule parser
1801         */
1802        if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {
1803            rulesCopy = (UChar *)uprv_malloc((ruleLen +
1804                UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1805            uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1806            src.current = src.source = rulesCopy;
1807            src.end = rulesCopy + ruleLen;
1808            src.extraCurrent = src.end;
1809            src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1810
1811	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1812	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1813            while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL && U_SUCCESS(status)) {
1814              strength = src.parsedToken.strength;
1815              chOffset = src.parsedToken.charsOffset;
1816              chLen = src.parsedToken.charsLen;
1817              exOffset = src.parsedToken.extensionOffset;
1818              exLen = src.parsedToken.extensionLen;
1819              prefixOffset = src.parsedToken.prefixOffset;
1820              prefixLen = src.parsedToken.prefixLen;
1821              specs = src.parsedToken.flags;
1822
1823                startOfRules = FALSE;
1824                uprv_memcpy(codepoints, src.source + chOffset,
1825                                                       chLen * sizeof(UChar));
1826                codepoints[chLen] = 0;
1827                checkCEValidity(coll, codepoints, chLen);
1828            }
1829            if (U_FAILURE(status)) {
1830                log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", loc, u_errorName(status));
1831            }
1832            uprv_free(src.source);
1833            uprv_free(src.reorderCodes);
1834        }
1835
1836        ucol_close(coll);
1837        count ++;
1838    }
1839    T_FileStream_close(file);
1840}
1841
1842static void printSortKeyError(const UChar   *codepoints, int length,
1843                                    uint8_t *sortkey, int sklen)
1844{
1845    int count = 0;
1846    log_err("Sortkey not valid for ");
1847    while (length > 0) {
1848        log_err("0x%04x ", *codepoints);
1849        length --;
1850        codepoints ++;
1851    }
1852    log_err("\nSortkey : ");
1853    while (count < sklen) {
1854        log_err("0x%02x ", sortkey[count]);
1855        count ++;
1856    }
1857    log_err("\n");
1858}
1859
1860/**
1861* Checking sort key validity for all levels
1862*/
1863static UBool checkSortKeyValidity(UCollator *coll,
1864                                  const UChar *codepoints,
1865                                  int length)
1866{
1867    UErrorCode status  = U_ZERO_ERROR;
1868    UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
1869                                      UCOL_TERTIARY, UCOL_QUATERNARY,
1870                                      UCOL_IDENTICAL};
1871    int        strengthlen = 5;
1872    int        strengthIndex = 0;
1873    int        caselevel   = 0;
1874
1875    while (caselevel < 1) {
1876        if (caselevel == 0) {
1877            ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
1878        }
1879        else {
1880            ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
1881        }
1882
1883        while (strengthIndex < strengthlen) {
1884            int        count01 = 0;
1885            uint32_t   count   = 0;
1886            uint8_t    sortkey[128];
1887            uint32_t   sklen;
1888
1889            ucol_setStrength(coll, strength[strengthIndex]);
1890            sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
1891            while (sortkey[count] != 0) {
1892                if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
1893                    printSortKeyError(codepoints, length, sortkey, sklen);
1894                    return FALSE;
1895                }
1896                if (sortkey[count] == 1) {
1897                    count01 ++;
1898                }
1899                count ++;
1900            }
1901
1902            if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
1903                printSortKeyError(codepoints, length, sortkey, sklen);
1904                return FALSE;
1905            }
1906            strengthIndex ++;
1907        }
1908        caselevel ++;
1909    }
1910    return TRUE;
1911}
1912
1913static void TestSortKeyValidity(void)
1914{
1915    /* testing UCA collation elements */
1916    UErrorCode  status      = U_ZERO_ERROR;
1917    /* en_US has no tailorings */
1918    UCollator  *coll        = ucol_open("en_US", &status);
1919    /* tailored locales */
1920    char        locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
1921    FileStream *file = NULL;
1922    char        line[2048];
1923    UChar       codepoints[10];
1924    int         count = 0;
1925    UChar       contextCPs[5];
1926    UParseError parseError;
1927    if (U_FAILURE(status)) {
1928        log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
1929        return;
1930    }
1931    log_verbose("Testing UCA elements\n");
1932    file = getFractionalUCA();
1933    if (file == NULL) {
1934        log_err("Fractional UCA data can not be opened\n");
1935        return;
1936    }
1937
1938    while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1939        if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1940            line[0] == 0x000D || line[0] == '[') {
1941            continue;
1942        }
1943
1944        getCodePoints(line, codepoints, contextCPs);
1945        if(codepoints[0] == 0xFFFE) {
1946            /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
1947            continue;
1948        }
1949        checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
1950    }
1951
1952    log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1953    codepoints[0] = 0;
1954
1955    while (codepoints[0] < 0xFFFF) {
1956        if (u_isdefined((UChar32)codepoints[0])) {
1957            checkSortKeyValidity(coll, codepoints, 1);
1958        }
1959        codepoints[0] ++;
1960    }
1961
1962    ucol_close(coll);
1963
1964    /* testing tailored collation elements */
1965    log_verbose("Testing tailored elements\n");
1966    while (count < 5) {
1967        const UChar *rules = NULL,
1968                    *current = NULL;
1969        UChar *rulesCopy = NULL;
1970        int32_t ruleLen = 0;
1971
1972        uint32_t chOffset = 0;
1973        uint32_t chLen = 0;
1974        uint32_t exOffset = 0;
1975        uint32_t exLen = 0;
1976        uint32_t prefixOffset = 0;
1977        uint32_t prefixLen = 0;
1978        UBool    startOfRules = TRUE;
1979        UColOptionSet opts;
1980
1981        UColTokenParser src;
1982        uint32_t strength = 0;
1983        uint16_t specs = 0;
1984        status = U_ZERO_ERROR; // clear status from previous loop iteration
1985
1986        uprv_memset(&src, 0, sizeof(UColTokenParser));
1987
1988        coll      = ucol_open(locale[count], &status);
1989        if (U_FAILURE(status)) {
1990            log_err("%s collator creation failed with status %s\n", locale[count], u_errorName(status));
1991            return;
1992        }
1993
1994        src.opts = &opts;
1995        rules = ucol_getRules(coll, &ruleLen);
1996
1997        /*
1998         * We have not set up the UColTokenParser with a callback function
1999         * to fetch [import] sub-rules,
2000         * so skip testing tailorings that import others.
2001         * TODO: Ticket #8047: Change TestSortKeyValidity to use ucol_getTailoredSet()
2002         *                     rather than the internal collation rule parser
2003         */
2004        if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {
2005            rulesCopy = (UChar *)uprv_malloc((ruleLen +
2006                UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
2007            uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
2008            src.current = src.source = rulesCopy;
2009            src.end = rulesCopy + ruleLen;
2010            src.extraCurrent = src.end;
2011            src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
2012
2013	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
2014	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
2015            while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL && U_SUCCESS(status)) {
2016                strength = src.parsedToken.strength;
2017                chOffset = src.parsedToken.charsOffset;
2018                chLen = src.parsedToken.charsLen;
2019                exOffset = src.parsedToken.extensionOffset;
2020                exLen = src.parsedToken.extensionLen;
2021                prefixOffset = src.parsedToken.prefixOffset;
2022                prefixLen = src.parsedToken.prefixLen;
2023                specs = src.parsedToken.flags;
2024
2025                startOfRules = FALSE;
2026                uprv_memcpy(codepoints, src.source + chOffset,
2027                                                       chLen * sizeof(UChar));
2028                codepoints[chLen] = 0;
2029                if(codepoints[0] == 0xFFFE) {
2030                    /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
2031                    continue;
2032                }
2033                checkSortKeyValidity(coll, codepoints, chLen);
2034            }
2035            if (U_FAILURE(status)) {
2036                log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", locale[count], u_errorName(status));
2037            }
2038            uprv_free(src.source);
2039            uprv_free(src.reorderCodes);
2040        }
2041
2042        ucol_close(coll);
2043        count ++;
2044    }
2045    T_FileStream_close(file);
2046}
2047
2048/**
2049* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
2050* normalization on AND jamo tailoring, among other things.
2051*/
2052static const UChar tsceText[] = {   /* Nothing in here should be ignorable */
2053    0x0020, 0xAC00,                 /* simple LV Hangul */
2054    0x0020, 0xAC01,                 /* simple LVT Hangul */
2055    0x0020, 0xAC0F,                 /* LVTT, last jamo expands for search */
2056    0x0020, 0xAFFF,                 /* LLVVVTT, every jamo expands for search */
2057    0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
2058    0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
2059    0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
2060    0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
2061    0x0020, 0x00E6,                 /* small letter ae, expands */
2062    0x0020, 0x1E4D,                 /* small letter o with tilde and acute, decomposes */
2063    0x0020
2064};
2065enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
2066
2067static const int32_t rootStandardOffsets[] = {
2068    0,  1,2,
2069    2,  3,4,4,
2070    4,  5,6,6,
2071    6,  7,8,8,
2072    8,  9,10,11,
2073    12, 13,14,15,
2074    16, 17,18,19,
2075    20, 21,22,23,
2076    24, 25,26,26,26,
2077    26, 27,28,28,
2078    28,
2079    29
2080};
2081enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
2082
2083static const int32_t rootSearchOffsets[] = {
2084    0,  1,2,
2085    2,  3,4,4,
2086    4,  5,6,6,6,
2087    6,  7,8,8,8,8,8,8,
2088    8,  9,10,11,
2089    12, 13,14,15,
2090    16, 17,18,19,20,
2091    20, 21,22,22,23,23,23,24,
2092    24, 25,26,26,26,
2093    26, 27,28,28,
2094    28,
2095    29
2096};
2097enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
2098
2099typedef struct {
2100    const char *    locale;
2101    const int32_t * offsets;
2102    int32_t         offsetsLen;
2103} TSCEItem;
2104
2105static const TSCEItem tsceItems[] = {
2106    { "root",                  rootStandardOffsets, kLen_rootStandardOffsets },
2107    { "root@collation=search", rootSearchOffsets,   kLen_rootSearchOffsets   },
2108    { NULL,                    NULL,                0                        }
2109};
2110
2111static void TestSearchCollatorElements(void)
2112{
2113    const TSCEItem * tsceItemPtr;
2114    for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
2115        UErrorCode status = U_ZERO_ERROR;
2116        UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
2117        if ( U_SUCCESS(status) ) {
2118            UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
2119            if ( U_SUCCESS(status) ) {
2120                int32_t offset, element;
2121                const int32_t * nextOffsetPtr;
2122                const int32_t * limitOffsetPtr;
2123
2124                nextOffsetPtr = tsceItemPtr->offsets;
2125                limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
2126                do {
2127                    offset = ucol_getOffset(uce);
2128                    element = ucol_next(uce, &status);
2129                    if ( element == 0 ) {
2130                        log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
2131                    }
2132                    if ( nextOffsetPtr < limitOffsetPtr ) {
2133                        if (offset != *nextOffsetPtr) {
2134                            log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
2135                                                            tsceItemPtr->locale, *nextOffsetPtr, offset );
2136                            nextOffsetPtr = limitOffsetPtr;
2137                            break;
2138                        }
2139                        nextOffsetPtr++;
2140                    } else {
2141                        log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
2142                    }
2143                } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
2144                if ( nextOffsetPtr < limitOffsetPtr ) {
2145                    log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
2146                }
2147
2148                ucol_setOffset(uce, kLen_tsceText, &status);
2149                status = U_ZERO_ERROR;
2150                nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
2151                limitOffsetPtr = tsceItemPtr->offsets;
2152                do {
2153                    offset = ucol_getOffset(uce);
2154                    element = ucol_previous(uce, &status);
2155                    if ( element == 0 ) {
2156                        log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
2157                    }
2158                    if ( nextOffsetPtr > limitOffsetPtr ) {
2159                        nextOffsetPtr--;
2160                        if (offset != *nextOffsetPtr) {
2161                            log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
2162                                                                tsceItemPtr->locale, *nextOffsetPtr, offset );
2163                            nextOffsetPtr = limitOffsetPtr;
2164                            break;
2165                        }
2166                   } else {
2167                        log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
2168                    }
2169                } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
2170                if ( nextOffsetPtr > limitOffsetPtr ) {
2171                    log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
2172                }
2173
2174                ucol_closeElements(uce);
2175            } else {
2176                log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
2177            }
2178            ucol_close(ucol);
2179        } else {
2180            log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
2181        }
2182    }
2183}
2184
2185#endif /* #if !UCONFIG_NO_COLLATION */
2186