1/*
2**********************************************************************
3*   Copyright (C) 1999-2011, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   Date        Name        Description
7*   11/10/99    aliu        Creation.
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_TRANSLITERATION
14
15#include "transtst.h"
16#include "unicode/locid.h"
17#include "unicode/dtfmtsym.h"
18#include "unicode/normlzr.h"
19#include "unicode/translit.h"
20#include "unicode/uchar.h"
21#include "unicode/unifilt.h"
22#include "unicode/uniset.h"
23#include "unicode/ustring.h"
24#include "unicode/usetiter.h"
25#include "unicode/uscript.h"
26#include "unicode/utf16.h"
27#include "cpdtrans.h"
28#include "nultrans.h"
29#include "rbt.h"
30#include "rbt_pars.h"
31#include "anytrans.h"
32#include "esctrn.h"
33#include "name2uni.h"
34#include "nortrans.h"
35#include "remtrans.h"
36#include "titletrn.h"
37#include "tolowtrn.h"
38#include "toupptrn.h"
39#include "unesctrn.h"
40#include "uni2name.h"
41#include "cstring.h"
42#include "cmemory.h"
43#include <stdio.h>
44
45/***********************************************************************
46
47                     HOW TO USE THIS TEST FILE
48                               -or-
49                  How I developed on two platforms
50                without losing (too much of) my mind
51
52
531. Add new tests by copying/pasting/changing existing tests.  On Java,
54   any public void method named Test...() taking no parameters becomes
55   a test.  On C++, you need to modify the header and add a line to
56   the runIndexedTest() dispatch method.
57
582. Make liberal use of the expect() method; it is your friend.
59
603. The tests in this file exactly match those in a sister file on the
61   other side.  The two files are:
62
63   icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
64   icu4c:  source/test/intltest/transtst.cpp
65
66                  ==> THIS IS THE IMPORTANT PART <==
67
68   When you add a test in this file, add it in TransliteratorTest.java
69   too.  Give it the same name and put it in the same relative place.
70   This makes maintenance a lot simpler for any poor soul who ends up
71   trying to synchronize the tests between icu4j and icu4c.
72
734. If you MUST enter a test that is NOT paralleled in the sister file,
74   then add it in the special non-mirrored section.  These are
75   labeled
76
77     "icu4j ONLY"
78
79   or
80
81     "icu4c ONLY"
82
83   Make sure you document the reason the test is here and not there.
84
85
86Thank you.
87The Management
88***********************************************************************/
89
90// Define character constants thusly to be EBCDIC-friendly
91enum {
92    LEFT_BRACE=((UChar)0x007B), /*{*/
93    PIPE      =((UChar)0x007C), /*|*/
94    ZERO      =((UChar)0x0030), /*0*/
95    UPPER_A   =((UChar)0x0041)  /*A*/
96};
97
98TransliteratorTest::TransliteratorTest()
99:   DESERET_DEE((UChar32)0x10414),
100    DESERET_dee((UChar32)0x1043C)
101{
102}
103
104TransliteratorTest::~TransliteratorTest() {}
105
106void
107TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
108                                   const char* &name, char* /*par*/) {
109    switch (index) {
110        TESTCASE(0,TestInstantiation);
111        TESTCASE(1,TestSimpleRules);
112        TESTCASE(2,TestRuleBasedInverse);
113        TESTCASE(3,TestKeyboard);
114        TESTCASE(4,TestKeyboard2);
115        TESTCASE(5,TestKeyboard3);
116        TESTCASE(6,TestArabic);
117        TESTCASE(7,TestCompoundKana);
118        TESTCASE(8,TestCompoundHex);
119        TESTCASE(9,TestFiltering);
120        TESTCASE(10,TestInlineSet);
121        TESTCASE(11,TestPatternQuoting);
122        TESTCASE(12,TestJ277);
123        TESTCASE(13,TestJ243);
124        TESTCASE(14,TestJ329);
125        TESTCASE(15,TestSegments);
126        TESTCASE(16,TestCursorOffset);
127        TESTCASE(17,TestArbitraryVariableValues);
128        TESTCASE(18,TestPositionHandling);
129        TESTCASE(19,TestHiraganaKatakana);
130        TESTCASE(20,TestCopyJ476);
131        TESTCASE(21,TestAnchors);
132        TESTCASE(22,TestInterIndic);
133        TESTCASE(23,TestFilterIDs);
134        TESTCASE(24,TestCaseMap);
135        TESTCASE(25,TestNameMap);
136        TESTCASE(26,TestLiberalizedID);
137        TESTCASE(27,TestCreateInstance);
138        TESTCASE(28,TestNormalizationTransliterator);
139        TESTCASE(29,TestCompoundRBT);
140        TESTCASE(30,TestCompoundFilter);
141        TESTCASE(31,TestRemove);
142        TESTCASE(32,TestToRules);
143        TESTCASE(33,TestContext);
144        TESTCASE(34,TestSupplemental);
145        TESTCASE(35,TestQuantifier);
146        TESTCASE(36,TestSTV);
147        TESTCASE(37,TestCompoundInverse);
148        TESTCASE(38,TestNFDChainRBT);
149        TESTCASE(39,TestNullInverse);
150        TESTCASE(40,TestAliasInverseID);
151        TESTCASE(41,TestCompoundInverseID);
152        TESTCASE(42,TestUndefinedVariable);
153        TESTCASE(43,TestEmptyContext);
154        TESTCASE(44,TestCompoundFilterID);
155        TESTCASE(45,TestPropertySet);
156        TESTCASE(46,TestNewEngine);
157        TESTCASE(47,TestQuantifiedSegment);
158        TESTCASE(48,TestDevanagariLatinRT);
159        TESTCASE(49,TestTeluguLatinRT);
160        TESTCASE(50,TestCompoundLatinRT);
161        TESTCASE(51,TestSanskritLatinRT);
162        TESTCASE(52,TestLocaleInstantiation);
163        TESTCASE(53,TestTitleAccents);
164        TESTCASE(54,TestLocaleResource);
165        TESTCASE(55,TestParseError);
166        TESTCASE(56,TestOutputSet);
167        TESTCASE(57,TestVariableRange);
168        TESTCASE(58,TestInvalidPostContext);
169        TESTCASE(59,TestIDForms);
170        TESTCASE(60,TestToRulesMark);
171        TESTCASE(61,TestEscape);
172        TESTCASE(62,TestAnchorMasking);
173        TESTCASE(63,TestDisplayName);
174        TESTCASE(64,TestSpecialCases);
175#if !UCONFIG_NO_FILE_IO
176        TESTCASE(65,TestIncrementalProgress);
177#endif
178        TESTCASE(66,TestSurrogateCasing);
179        TESTCASE(67,TestFunction);
180        TESTCASE(68,TestInvalidBackRef);
181        TESTCASE(69,TestMulticharStringSet);
182        TESTCASE(70,TestUserFunction);
183        TESTCASE(71,TestAnyX);
184        TESTCASE(72,TestSourceTargetSet);
185        TESTCASE(73,TestGurmukhiDevanagari);
186        TESTCASE(74,TestPatternWhiteSpace);
187        TESTCASE(75,TestAllCodepoints);
188        TESTCASE(76,TestBoilerplate);
189        TESTCASE(77,TestAlternateSyntax);
190        TESTCASE(78,TestBeginEnd);
191        TESTCASE(79,TestBeginEndToRules);
192        TESTCASE(80,TestRegisterAlias);
193        TESTCASE(81,TestRuleStripping);
194        TESTCASE(82,TestHalfwidthFullwidth);
195        TESTCASE(83,TestThai);
196        TESTCASE(84,TestAny);
197        default: name = ""; break;
198    }
199}
200
201static const UVersionInfo ICU_39 = {3,9,4,0};
202/**
203 * Make sure every system transliterator can be instantiated.
204 *
205 * ALSO test that the result of toRules() for each rule is a valid
206 * rule.  Do this here so we don't have to have another test that
207 * instantiates everything as well.
208 */
209void TransliteratorTest::TestInstantiation() {
210    UErrorCode ec = U_ZERO_ERROR;
211    StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
212    assertSuccess("getAvailableIDs()", ec);
213    assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
214    int32_t n = Transliterator::countAvailableIDs();
215    assertTrue("getAvailableIDs().count()==countAvailableIDs()",
216               avail->count(ec) == n);
217    assertSuccess("count()", ec);
218    UnicodeString name;
219    for (int32_t i=0; i<n; ++i) {
220        const UnicodeString& id = *avail->snext(ec);
221        if (!assertSuccess("snext()", ec) ||
222            !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
223            break;
224        }
225        UnicodeString id2 = Transliterator::getAvailableID(i);
226        if (id.length() < 1) {
227            errln(UnicodeString("FAIL: getAvailableID(") +
228                  i + ") returned empty string");
229            continue;
230        }
231        if (id != id2) {
232            errln(UnicodeString("FAIL: getAvailableID(") +
233                  i + ") != getAvailableIDs().snext()");
234            continue;
235        }
236        UParseError parseError;
237        UErrorCode status = U_ZERO_ERROR;
238        Transliterator* t = Transliterator::createInstance(id,
239                              UTRANS_FORWARD, parseError,status);
240        name.truncate(0);
241        Transliterator::getDisplayName(id, name);
242        if (t == 0) {
243#if UCONFIG_NO_BREAK_ITERATION
244            // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
245            if (id.compare((UnicodeString)"Thai-Latin") != 0)
246#endif
247                dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
248                      /*", parse error " + parseError.code +*/
249                      ", line " + parseError.line +
250                      ", offset " + parseError.offset +
251                      ", pre-context " + prettify(parseError.preContext, TRUE) +
252                      ", post-context " +prettify(parseError.postContext,TRUE) +
253                      ", Error: " + u_errorName(status));
254                // When createInstance fails, it deletes the failing
255                // entry from the available ID list.  We detect this
256                // here by looking for a change in countAvailableIDs.
257            int32_t nn = Transliterator::countAvailableIDs();
258            if (nn == (n - 1)) {
259                n = nn;
260                --i; // Compensate for deleted entry
261            }
262        } else {
263            logln(UnicodeString("OK: ") + name + " (" + id + ")");
264
265            // Now test toRules
266            UnicodeString rules;
267            t->toRules(rules, TRUE);
268            Transliterator *u = Transliterator::createFromRules("x",
269                                    rules, UTRANS_FORWARD, parseError,status);
270            if (u == 0) {
271                errln(UnicodeString("FAIL: ") + id +
272                      ".createFromRules() => bad rules" +
273                      /*", parse error " + parseError.code +*/
274                      ", line " + parseError.line +
275                      ", offset " + parseError.offset +
276                      ", context " + prettify(parseError.preContext, TRUE) +
277                      ", rules: " + prettify(rules, TRUE));
278            } else {
279                delete u;
280            }
281            delete t;
282        }
283    }
284    assertTrue("snext()==NULL", avail->snext(ec)==NULL);
285    assertSuccess("snext()", ec);
286    delete avail;
287
288    // Now test the failure path
289    UParseError parseError;
290    UErrorCode status = U_ZERO_ERROR;
291    UnicodeString id("<Not a valid Transliterator ID>");
292    Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
293    if (t != 0) {
294        errln("FAIL: " + id + " returned a transliterator");
295        delete t;
296    } else {
297        logln("OK: Bogus ID handled properly");
298    }
299}
300
301void TransliteratorTest::TestSimpleRules(void) {
302    /* Example: rules 1. ab>x|y
303     *                2. yc>z
304     *
305     * []|eabcd  start - no match, copy e to tranlated buffer
306     * [e]|abcd  match rule 1 - copy output & adjust cursor
307     * [ex|y]cd  match rule 2 - copy output & adjust cursor
308     * [exz]|d   no match, copy d to transliterated buffer
309     * [exzd]|   done
310     */
311    expect(UnicodeString("ab>x|y;", "") +
312           "yc>z",
313           "eabcd", "exzd");
314
315    /* Another set of rules:
316     *    1. ab>x|yzacw
317     *    2. za>q
318     *    3. qc>r
319     *    4. cw>n
320     *
321     * []|ab       Rule 1
322     * [x|yzacw]   No match
323     * [xy|zacw]   Rule 2
324     * [xyq|cw]    Rule 4
325     * [xyqn]|     Done
326     */
327    expect(UnicodeString("ab>x|yzacw;") +
328           "za>q;" +
329           "qc>r;" +
330           "cw>n",
331           "ab", "xyqn");
332
333    /* Test categories
334     */
335    UErrorCode status = U_ZERO_ERROR;
336    UParseError parseError;
337    Transliterator *t = Transliterator::createFromRules(
338        "<ID>",
339        UnicodeString("$dummy=").append((UChar)0xE100) +
340        UnicodeString(";"
341                      "$vowel=[aeiouAEIOU];"
342                      "$lu=[:Lu:];"
343                      "$vowel } $lu > '!';"
344                      "$vowel > '&';"
345                      "'!' { $lu > '^';"
346                      "$lu > '*';"
347                      "a > ERROR", ""),
348        UTRANS_FORWARD, parseError,
349        status);
350    if (U_FAILURE(status)) {
351        dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
352        return;
353    }
354    expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
355    delete t;
356}
357
358/**
359 * Test inline set syntax and set variable syntax.
360 */
361void TransliteratorTest::TestInlineSet(void) {
362    expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
363    expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
364
365    expect(UnicodeString(
366           "$digit = [0-9];"
367           "$alpha = [a-zA-Z];"
368           "$alphanumeric = [$digit $alpha];" // ***
369           "$special = [^$alphanumeric];"     // ***
370           "$alphanumeric > '-';"
371           "$special > '*';", ""),
372
373           "thx-1138", "---*----");
374}
375
376/**
377 * Create some inverses and confirm that they work.  We have to be
378 * careful how we do this, since the inverses will not be true
379 * inverses -- we can't throw any random string at the composition
380 * of the transliterators and expect the identity function.  F x
381 * F' != I.  However, if we are careful about the input, we will
382 * get the expected results.
383 */
384void TransliteratorTest::TestRuleBasedInverse(void) {
385    UnicodeString RULES =
386        UnicodeString("abc>zyx;") +
387        "ab>yz;" +
388        "bc>zx;" +
389        "ca>xy;" +
390        "a>x;" +
391        "b>y;" +
392        "c>z;" +
393
394        "abc<zyx;" +
395        "ab<yz;" +
396        "bc<zx;" +
397        "ca<xy;" +
398        "a<x;" +
399        "b<y;" +
400        "c<z;" +
401
402        "";
403
404    const char* DATA[] = {
405        // Careful here -- random strings will not work.  If we keep
406        // the left side to the domain and the right side to the range
407        // we will be okay though (left, abc; right xyz).
408        "a", "x",
409        "abcacab", "zyxxxyy",
410        "caccb", "xyzzy",
411    };
412
413    int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
414
415    UErrorCode status = U_ZERO_ERROR;
416    UParseError parseError;
417    Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
418                                UTRANS_FORWARD, parseError, status);
419    Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
420                                UTRANS_REVERSE, parseError, status);
421    if (U_FAILURE(status)) {
422        errln("FAIL: RBT constructor failed");
423        return;
424    }
425    for (int32_t i=0; i<DATA_length; i+=2) {
426        expect(*fwd, DATA[i], DATA[i+1]);
427        expect(*rev, DATA[i+1], DATA[i]);
428    }
429    delete fwd;
430    delete rev;
431}
432
433/**
434 * Basic test of keyboard.
435 */
436void TransliteratorTest::TestKeyboard(void) {
437    UParseError parseError;
438    UErrorCode status = U_ZERO_ERROR;
439    Transliterator *t = Transliterator::createFromRules("<ID>",
440                              UnicodeString("psch>Y;")
441                              +"ps>y;"
442                              +"ch>x;"
443                              +"a>A;",
444                              UTRANS_FORWARD, parseError,
445                              status);
446    if (U_FAILURE(status)) {
447        errln("FAIL: RBT constructor failed");
448        return;
449    }
450    const char* DATA[] = {
451        // insertion, buffer
452        "a", "A",
453        "p", "Ap",
454        "s", "Aps",
455        "c", "Apsc",
456        "a", "AycA",
457        "psch", "AycAY",
458        0, "AycAY", // null means finishKeyboardTransliteration
459    };
460
461    keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
462    delete t;
463}
464
465/**
466 * Basic test of keyboard with cursor.
467 */
468void TransliteratorTest::TestKeyboard2(void) {
469    UParseError parseError;
470    UErrorCode status = U_ZERO_ERROR;
471    Transliterator *t = Transliterator::createFromRules("<ID>",
472                              UnicodeString("ych>Y;")
473                              +"ps>|y;"
474                              +"ch>x;"
475                              +"a>A;",
476                              UTRANS_FORWARD, parseError,
477                              status);
478    if (U_FAILURE(status)) {
479        errln("FAIL: RBT constructor failed");
480        return;
481    }
482    const char* DATA[] = {
483        // insertion, buffer
484        "a", "A",
485        "p", "Ap",
486        "s", "Aps", // modified for rollback - "Ay",
487        "c", "Apsc", // modified for rollback - "Ayc",
488        "a", "AycA",
489        "p", "AycAp",
490        "s", "AycAps", // modified for rollback - "AycAy",
491        "c", "AycApsc", // modified for rollback - "AycAyc",
492        "h", "AycAY",
493        0, "AycAY", // null means finishKeyboardTransliteration
494    };
495
496    keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
497    delete t;
498}
499
500/**
501 * Test keyboard transliteration with back-replacement.
502 */
503void TransliteratorTest::TestKeyboard3(void) {
504    // We want th>z but t>y.  Furthermore, during keyboard
505    // transliteration we want t>y then yh>z if t, then h are
506    // typed.
507    UnicodeString RULES("t>|y;"
508                        "yh>z;");
509
510    const char* DATA[] = {
511        // Column 1: characters to add to buffer (as if typed)
512        // Column 2: expected appearance of buffer after
513        //           keyboard xliteration.
514        "a", "a",
515        "b", "ab",
516        "t", "abt", // modified for rollback - "aby",
517        "c", "abyc",
518        "t", "abyct", // modified for rollback - "abycy",
519        "h", "abycz",
520        0, "abycz", // null means finishKeyboardTransliteration
521    };
522
523    UParseError parseError;
524    UErrorCode status = U_ZERO_ERROR;
525    Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
526    if (U_FAILURE(status)) {
527        errln("FAIL: RBT constructor failed");
528        return;
529    }
530    keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
531    delete t;
532}
533
534void TransliteratorTest::keyboardAux(const Transliterator& t,
535                                     const char* DATA[], int32_t DATA_length) {
536    UErrorCode status = U_ZERO_ERROR;
537    UTransPosition index={0, 0, 0, 0};
538    UnicodeString s;
539    for (int32_t i=0; i<DATA_length; i+=2) {
540        UnicodeString log;
541        if (DATA[i] != 0) {
542            log = s + " + "
543                + DATA[i]
544                + " -> ";
545            t.transliterate(s, index, DATA[i], status);
546        } else {
547            log = s + " => ";
548            t.finishTransliteration(s, index);
549        }
550        // Show the start index '{' and the cursor '|'
551        UnicodeString a, b, c;
552        s.extractBetween(0, index.contextStart, a);
553        s.extractBetween(index.contextStart, index.start, b);
554        s.extractBetween(index.start, s.length(), c);
555        log.append(a).
556            append((UChar)LEFT_BRACE).
557            append(b).
558            append((UChar)PIPE).
559            append(c);
560        if (s == DATA[i+1] && U_SUCCESS(status)) {
561            logln(log);
562        } else {
563            errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
564        }
565    }
566}
567
568void TransliteratorTest::TestArabic(void) {
569// Test disabled for 2.0 until new Arabic transliterator can be written.
570//    /*
571//    const char* DATA[] = {
572//        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
573//                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
574//                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
575//                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
576//                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
577//                  "\u062c\u0645\u064a\u0644\u0629",
578//    };
579//    */
580//
581//    UChar ar_raw[] = {
582//        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
583//        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
584//        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
585//        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
586//        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
587//        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
588//    };
589//    UnicodeString ar(ar_raw);
590//    UErrorCode status=U_ZERO_ERROR;
591//    UParseError parseError;
592//    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
593//    if (t == 0) {
594//        errln("FAIL: createInstance failed");
595//        return;
596//    }
597//    expect(*t, "Arabic", ar);
598//    delete t;
599}
600
601/**
602 * Compose the Kana transliterator forward and reverse and try
603 * some strings that should come out unchanged.
604 */
605void TransliteratorTest::TestCompoundKana(void) {
606    UParseError parseError;
607    UErrorCode status = U_ZERO_ERROR;
608    Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
609    if (t == 0) {
610        dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
611    } else {
612        expect(*t, "aaaaa", "aaaaa");
613        delete t;
614    }
615}
616
617/**
618 * Compose the hex transliterators forward and reverse.
619 */
620void TransliteratorTest::TestCompoundHex(void) {
621    UParseError parseError;
622    UErrorCode status = U_ZERO_ERROR;
623    Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
624    Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
625    Transliterator* transab[] = { a, b };
626    Transliterator* transba[] = { b, a };
627    if (a == 0 || b == 0) {
628        errln("FAIL: construction failed");
629        delete a;
630        delete b;
631        return;
632    }
633    // Do some basic tests of a
634    expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
635    // Do some basic tests of b
636    expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
637
638    Transliterator* ab = new CompoundTransliterator(transab, 2);
639    UnicodeString s("abcde", "");
640    expect(*ab, s, s);
641
642    UnicodeString str(s);
643    a->transliterate(str);
644    Transliterator* ba = new CompoundTransliterator(transba, 2);
645    expect(*ba, str, str);
646
647    delete ab;
648    delete ba;
649    delete a;
650    delete b;
651}
652
653int gTestFilterClassID = 0;
654/**
655 * Used by TestFiltering().
656 */
657class TestFilter : public UnicodeFilter {
658    virtual UnicodeFunctor* clone() const {
659        return new TestFilter(*this);
660    }
661    virtual UBool contains(UChar32 c) const {
662        return c != (UChar)0x0063 /*c*/;
663    }
664    // Stubs
665    virtual UnicodeString& toPattern(UnicodeString& result,
666                                     UBool /*escapeUnprintable*/) const {
667        return result;
668    }
669    virtual UBool matchesIndexValue(uint8_t /*v*/) const {
670        return FALSE;
671    }
672    virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
673public:
674    UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
675};
676
677/**
678 * Do some basic tests of filtering.
679 */
680void TransliteratorTest::TestFiltering(void) {
681    UParseError parseError;
682    UErrorCode status = U_ZERO_ERROR;
683    Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
684    if (hex == 0) {
685        errln("FAIL: createInstance(Any-Hex) failed");
686        return;
687    }
688    hex->adoptFilter(new TestFilter());
689    UnicodeString s("abcde");
690    hex->transliterate(s);
691    UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
692    if (s == exp) {
693        logln(UnicodeString("Ok:   \"") + exp + "\"");
694    } else {
695        logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
696    }
697
698    // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
699    UnicodeFilter *f = hex->orphanFilter();
700    if (f == NULL){
701        errln("FAIL: orphanFilter() should get a UnicodeFilter");
702    } else {
703        delete f;
704    }
705    delete hex;
706}
707
708/**
709 * Test anchors
710 */
711void TransliteratorTest::TestAnchors(void) {
712    expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
713           "aaa",
714           "012");
715    expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
716           "aaa",
717           "012");
718    expect(UnicodeString("^ab  > 01 ;"
719           " ab  > |8 ;"
720           "  b  > k ;"
721           " 8x$ > 45 ;"
722           " 8x  > 77 ;", ""),
723
724           "ababbabxabx",
725           "018k7745");
726    expect(UnicodeString("$s = [z$] ;"
727           "$s{ab    > 01 ;"
728           "   ab    > |8 ;"
729           "    b    > k ;"
730           "   8x}$s > 45 ;"
731           "   8x    > 77 ;", ""),
732
733           "abzababbabxzabxabx",
734           "01z018k45z01x45");
735}
736
737/**
738 * Test pattern quoting and escape mechanisms.
739 */
740void TransliteratorTest::TestPatternQuoting(void) {
741    // Array of 3n items
742    // Each item is <rules>, <input>, <expected output>
743    const UnicodeString DATA[] = {
744        UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
745        UnicodeString(UChar(0x4E01)),
746        "[male adult]"
747    };
748
749    for (int32_t i=0; i<3; i+=3) {
750        logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
751        UParseError parseError;
752        UErrorCode status = U_ZERO_ERROR;
753        Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
754        if (U_FAILURE(status)) {
755            errln("RBT constructor failed");
756        } else {
757            expect(*t, DATA[i+1], DATA[i+2]);
758        }
759        delete t;
760    }
761}
762
763/**
764 * Regression test for bugs found in Greek transliteration.
765 */
766void TransliteratorTest::TestJ277(void) {
767    UErrorCode status = U_ZERO_ERROR;
768    UParseError parseError;
769    Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
770    if (gl == NULL) {
771        dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
772        return;
773    }
774
775    UChar sigma = 0x3C3;
776    UChar upsilon = 0x3C5;
777    UChar nu = 0x3BD;
778//    UChar PHI = 0x3A6;
779    UChar alpha = 0x3B1;
780//    UChar omega = 0x3C9;
781//    UChar omicron = 0x3BF;
782//    UChar epsilon = 0x3B5;
783
784    // sigma upsilon nu -> syn
785    UnicodeString syn;
786    syn.append(sigma).append(upsilon).append(nu);
787    expect(*gl, syn, "syn");
788
789    // sigma alpha upsilon nu -> saun
790    UnicodeString sayn;
791    sayn.append(sigma).append(alpha).append(upsilon).append(nu);
792    expect(*gl, sayn, "saun");
793
794    // Again, using a smaller rule set
795    UnicodeString rules(
796                "$alpha   = \\u03B1;"
797                "$nu      = \\u03BD;"
798                "$sigma   = \\u03C3;"
799                "$ypsilon = \\u03C5;"
800                "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
801                "s <>           $sigma;"
802                "a <>           $alpha;"
803                "u <>  $vowel { $ypsilon;"
804                "y <>           $ypsilon;"
805                "n <>           $nu;",
806                "");
807    Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
808    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
809    expect(*mini, syn, "syn");
810    expect(*mini, sayn, "saun");
811    delete mini;
812    mini = NULL;
813
814#if !UCONFIG_NO_FORMATTING
815    // Transliterate the Greek locale data
816    Locale el("el");
817    DateFormatSymbols syms(el, status);
818    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
819    int32_t i, count;
820    const UnicodeString* data = syms.getMonths(count);
821    for (i=0; i<count; ++i) {
822        if (data[i].length() == 0) {
823            continue;
824        }
825        UnicodeString out(data[i]);
826        gl->transliterate(out);
827        UBool ok = TRUE;
828        if (data[i].length() >= 2 && out.length() >= 2 &&
829            u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
830            if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
831                ok = FALSE;
832            }
833        }
834        if (ok) {
835            logln(prettify(data[i] + " -> " + out));
836        } else {
837            errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
838        }
839    }
840#endif
841
842    delete gl;
843}
844
845/**
846 * Prefix, suffix support in hex transliterators
847 */
848void TransliteratorTest::TestJ243(void) {
849    UErrorCode ec = U_ZERO_ERROR;
850
851    // Test default Hex-Any, which should handle
852    // \u, \U, u+, and U+
853    Transliterator *hex =
854        Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
855    if (assertSuccess("getInstance", ec)) {
856        expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
857    }
858    delete hex;
859
860//    // Try a custom Hex-Unicode
861//    // \uXXXX and &#xXXXX;
862//    ec = U_ZERO_ERROR;
863//    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
864//    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
865//           "abcd5fx012&#x00033;");
866//    // Try custom Any-Hex (default is tested elsewhere)
867//    ec = U_ZERO_ERROR;
868//    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
869//    expect(hex3, "012", "&#x30;&#x31;&#x32;");
870}
871
872/**
873 * Parsers need better syntax error messages.
874 */
875void TransliteratorTest::TestJ329(void) {
876
877    struct { UBool containsErrors; const char* rule; } DATA[] = {
878        { FALSE, "a > b; c > d" },
879        { TRUE,  "a > b; no operator; c > d" },
880    };
881    int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
882
883    for (int32_t i=0; i<DATA_length; ++i) {
884        UErrorCode status = U_ZERO_ERROR;
885        UParseError parseError;
886        Transliterator *rbt = Transliterator::createFromRules("<ID>",
887                                    DATA[i].rule,
888                                    UTRANS_FORWARD,
889                                    parseError,
890                                    status);
891        UBool gotError = U_FAILURE(status);
892        UnicodeString desc(DATA[i].rule);
893        desc.append(gotError ? " -> error" : " -> no error");
894        if (gotError) {
895            desc = desc + ", ParseError code=" + u_errorName(status) +
896                " line=" + parseError.line +
897                " offset=" + parseError.offset +
898                " context=" + parseError.preContext;
899        }
900        if (gotError == DATA[i].containsErrors) {
901            logln(UnicodeString("Ok:   ") + desc);
902        } else {
903            errln(UnicodeString("FAIL: ") + desc);
904        }
905        delete rbt;
906    }
907}
908
909/**
910 * Test segments and segment references.
911 */
912void TransliteratorTest::TestSegments(void) {
913    // Array of 3n items
914    // Each item is <rules>, <input>, <expected output>
915    UnicodeString DATA[] = {
916        "([a-z]) '.' ([0-9]) > $2 '-' $1",
917        "abc.123.xyz.456",
918        "ab1-c23.xy4-z56",
919
920        // nested
921        "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
922        "a1 b2",
923        "a1.a.1 b2.b.2",
924    };
925    int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
926
927    for (int32_t i=0; i<DATA_length; i+=3) {
928        logln("Pattern: " + prettify(DATA[i]));
929        UParseError parseError;
930        UErrorCode status = U_ZERO_ERROR;
931        Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
932        if (U_FAILURE(status)) {
933            errln("FAIL: RBT constructor");
934        } else {
935            expect(*t, DATA[i+1], DATA[i+2]);
936        }
937        delete t;
938    }
939}
940
941/**
942 * Test cursor positioning outside of the key
943 */
944void TransliteratorTest::TestCursorOffset(void) {
945    // Array of 3n items
946    // Each item is <rules>, <input>, <expected output>
947    UnicodeString DATA[] = {
948        "pre {alpha} post > | @ ALPHA ;"
949        "eALPHA > beta ;"
950        "pre {beta} post > BETA @@ | ;"
951        "post > xyz",
952
953        "prealphapost prebetapost",
954
955        "prbetaxyz preBETApost",
956    };
957    int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
958
959    for (int32_t i=0; i<DATA_length; i+=3) {
960        logln("Pattern: " + prettify(DATA[i]));
961        UParseError parseError;
962        UErrorCode status = U_ZERO_ERROR;
963        Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
964        if (U_FAILURE(status)) {
965            errln("FAIL: RBT constructor");
966        } else {
967            expect(*t, DATA[i+1], DATA[i+2]);
968        }
969        delete t;
970    }
971}
972
973/**
974 * Test zero length and > 1 char length variable values.  Test
975 * use of variable refs in UnicodeSets.
976 */
977void TransliteratorTest::TestArbitraryVariableValues(void) {
978    // Array of 3n items
979    // Each item is <rules>, <input>, <expected output>
980    UnicodeString DATA[] = {
981        "$abe = ab;"
982        "$pat = x[yY]z;"
983        "$ll  = 'a-z';"
984        "$llZ = [$ll];"
985        "$llY = [$ll$pat];"
986        "$emp = ;"
987
988        "$abe > ABE;"
989        "$pat > END;"
990        "$llZ > 1;"
991        "$llY > 2;"
992        "7$emp 8 > 9;"
993        "",
994
995        "ab xYzxyz stY78",
996        "ABE ENDEND 1129",
997    };
998    int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
999
1000    for (int32_t i=0; i<DATA_length; i+=3) {
1001        logln("Pattern: " + prettify(DATA[i]));
1002        UParseError parseError;
1003        UErrorCode status = U_ZERO_ERROR;
1004        Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1005        if (U_FAILURE(status)) {
1006            errln("FAIL: RBT constructor");
1007        } else {
1008            expect(*t, DATA[i+1], DATA[i+2]);
1009        }
1010        delete t;
1011    }
1012}
1013
1014/**
1015 * Confirm that the contextStart, contextLimit, start, and limit
1016 * behave correctly. J474.
1017 */
1018void TransliteratorTest::TestPositionHandling(void) {
1019    // Array of 3n items
1020    // Each item is <rules>, <input>, <expected output>
1021    const char* DATA[] = {
1022        "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1023        "xtat txtb", // pos 0,9,0,9
1024        "xTTaSS TTxUUb",
1025
1026        "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1027        "xtat txtb", // pos 2,9,3,8
1028        "xtaSS TTxUUb",
1029
1030        "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1031        "xtat txtb", // pos 3,8,3,8
1032        "xtaTT TTxTTb",
1033    };
1034
1035    // Array of 4n positions -- these go with the DATA array
1036    // They are: contextStart, contextLimit, start, limit
1037    int32_t POS[] = {
1038        0, 9, 0, 9,
1039        2, 9, 3, 8,
1040        3, 8, 3, 8,
1041    };
1042
1043    int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1044    for (int32_t i=0; i<n; i++) {
1045        UErrorCode status = U_ZERO_ERROR;
1046        UParseError parseError;
1047        Transliterator *t = Transliterator::createFromRules("<ID>",
1048                                DATA[3*i], UTRANS_FORWARD, parseError, status);
1049        if (U_FAILURE(status)) {
1050            delete t;
1051            errln("FAIL: RBT constructor");
1052            return;
1053        }
1054        UTransPosition pos;
1055        pos.contextStart= POS[4*i];
1056        pos.contextLimit = POS[4*i+1];
1057        pos.start = POS[4*i+2];
1058        pos.limit = POS[4*i+3];
1059        UnicodeString rsource(DATA[3*i+1]);
1060        t->transliterate(rsource, pos, status);
1061        if (U_FAILURE(status)) {
1062            delete t;
1063            errln("FAIL: transliterate");
1064            return;
1065        }
1066        t->finishTransliteration(rsource, pos);
1067        expectAux(DATA[3*i],
1068                  DATA[3*i+1],
1069                  rsource,
1070                  DATA[3*i+2]);
1071        delete t;
1072    }
1073}
1074
1075/**
1076 * Test the Hiragana-Katakana transliterator.
1077 */
1078void TransliteratorTest::TestHiraganaKatakana(void) {
1079    UParseError parseError;
1080    UErrorCode status = U_ZERO_ERROR;
1081    Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1082    Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1083    if (hk == 0 || kh == 0) {
1084        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1085        delete hk;
1086        delete kh;
1087        return;
1088    }
1089
1090    // Array of 3n items
1091    // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1092    const char* DATA[] = {
1093        "both",
1094        "\\u3042\\u3090\\u3099\\u3092\\u3050",
1095        "\\u30A2\\u30F8\\u30F2\\u30B0",
1096
1097        "kh",
1098        "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1099        "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1100    };
1101    int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1102
1103    for (int32_t i=0; i<DATA_length; i+=3) {
1104        UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1105        UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1106        switch (*DATA[i]) {
1107        case 0x68: //'h': // Hiragana-Katakana
1108            expect(*hk, h, k);
1109            break;
1110        case 0x6B: //'k': // Katakana-Hiragana
1111            expect(*kh, k, h);
1112            break;
1113        case 0x62: //'b': // both
1114            expect(*hk, h, k);
1115            expect(*kh, k, h);
1116            break;
1117        }
1118    }
1119    delete hk;
1120    delete kh;
1121}
1122
1123/**
1124 * Test cloning / copy constructor of RBT.
1125 */
1126void TransliteratorTest::TestCopyJ476(void) {
1127    // The real test here is what happens when the destructors are
1128    // called.  So we let one object get destructed, and check to
1129    // see that its copy still works.
1130    Transliterator *t2 = 0;
1131    {
1132        UParseError parseError;
1133        UErrorCode status = U_ZERO_ERROR;
1134        Transliterator *t1 = Transliterator::createFromRules("t1",
1135            "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1136        if (U_FAILURE(status)) {
1137            errln("FAIL: RBT constructor");
1138            return;
1139        }
1140        t2 = t1->clone(); // Call copy constructor under the covers.
1141        expect(*t1, "abcfoofoo", "ABcbar");
1142        delete t1;
1143    }
1144    expect(*t2, "abcfoofoo", "ABcbar");
1145    delete t2;
1146}
1147
1148/**
1149 * Test inter-Indic transliterators.  These are composed.
1150 * ICU4C Jitterbug 483.
1151 */
1152void TransliteratorTest::TestInterIndic(void) {
1153    UnicodeString ID("Devanagari-Gujarati", "");
1154    UErrorCode status = U_ZERO_ERROR;
1155    UParseError parseError;
1156    Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1157    if (dg == 0) {
1158        dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1159        return;
1160    }
1161    UnicodeString id = dg->getID();
1162    if (id != ID) {
1163        errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1164    }
1165    UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1166    UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1167    expect(*dg, dev, guj);
1168    delete dg;
1169}
1170
1171/**
1172 * Test filter syntax in IDs. (J918)
1173 */
1174void TransliteratorTest::TestFilterIDs(void) {
1175    // Array of 3n strings:
1176    // <id>, <inverse id>, <input>, <expected output>
1177    const char* DATA[] = {
1178        "[aeiou]Any-Hex", // ID
1179        "[aeiou]Hex-Any", // expected inverse ID
1180        "quizzical",      // src
1181        "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1182
1183        "[aeiou]Any-Hex;[^5]Hex-Any",
1184        "[^5]Any-Hex;[aeiou]Hex-Any",
1185        "quizzical",
1186        "q\\u0075izzical",
1187
1188        "[abc]Null",
1189        "[abc]Null",
1190        "xyz",
1191        "xyz",
1192    };
1193    enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1194
1195    for (int i=0; i<DATA_length; i+=4) {
1196        UnicodeString ID(DATA[i], "");
1197        UnicodeString uID(DATA[i+1], "");
1198        UnicodeString data2(DATA[i+2], "");
1199        UnicodeString data3(DATA[i+3], "");
1200        UParseError parseError;
1201        UErrorCode status = U_ZERO_ERROR;
1202        Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1203        if (t == 0) {
1204            errln("FAIL: createInstance(" + ID + ") returned NULL");
1205            return;
1206        }
1207        expect(*t, data2, data3);
1208
1209        // Check the ID
1210        if (ID != t->getID()) {
1211            errln("FAIL: createInstance(" + ID + ").getID() => " +
1212                  t->getID());
1213        }
1214
1215        // Check the inverse
1216        Transliterator *u = t->createInverse(status);
1217        if (u == 0) {
1218            errln("FAIL: " + ID + ".createInverse() returned NULL");
1219        } else if (u->getID() != uID) {
1220            errln("FAIL: " + ID + ".createInverse().getID() => " +
1221                  u->getID() + ", expected " + uID);
1222        }
1223
1224        delete t;
1225        delete u;
1226    }
1227}
1228
1229/**
1230 * Test the case mapping transliterators.
1231 */
1232void TransliteratorTest::TestCaseMap(void) {
1233    UParseError parseError;
1234    UErrorCode status = U_ZERO_ERROR;
1235    Transliterator* toUpper =
1236        Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1237    Transliterator* toLower =
1238        Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1239    Transliterator* toTitle =
1240        Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1241    if (toUpper==0 || toLower==0 || toTitle==0) {
1242        errln("FAIL: createInstance returned NULL");
1243        delete toUpper;
1244        delete toLower;
1245        delete toTitle;
1246        return;
1247    }
1248
1249    expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1250           "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1251    expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1252           "the quick brown foX jumped over the lazY dogs.");
1253    expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1254           "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1255
1256    delete toUpper;
1257    delete toLower;
1258    delete toTitle;
1259}
1260
1261/**
1262 * Test the name mapping transliterators.
1263 */
1264void TransliteratorTest::TestNameMap(void) {
1265    UParseError parseError;
1266    UErrorCode status = U_ZERO_ERROR;
1267    Transliterator* uni2name =
1268        Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1269    Transliterator* name2uni =
1270        Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1271    if (uni2name==0 || name2uni==0) {
1272        errln("FAIL: createInstance returned NULL");
1273        delete uni2name;
1274        delete name2uni;
1275        return;
1276    }
1277
1278    // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1279    expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1280           CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1281    expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1282           CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1283
1284    delete uni2name;
1285    delete name2uni;
1286
1287    // round trip
1288    Transliterator* t =
1289        Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1290    if (t==0) {
1291        errln("FAIL: createInstance returned NULL");
1292        delete t;
1293        return;
1294    }
1295
1296    // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1297    UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1298    expect(*t, s, s);
1299    delete t;
1300}
1301
1302/**
1303 * Test liberalized ID syntax.  1006c
1304 */
1305void TransliteratorTest::TestLiberalizedID(void) {
1306    // Some test cases have an expected getID() value of NULL.  This
1307    // means I have disabled the test case for now.  This stuff is
1308    // still under development, and I haven't decided whether to make
1309    // getID() return canonical case yet.  It will all get rewritten
1310    // with the move to Source-Target/Variant IDs anyway. [aliu]
1311    const char* DATA[] = {
1312        "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1313        "  Null  ", "Null", "whitespace",
1314        " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1315        "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1316    };
1317    const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1318    UParseError parseError;
1319    UErrorCode status= U_ZERO_ERROR;
1320    for (int32_t i=0; i<DATA_length; i+=3) {
1321        Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1322        if (t == 0) {
1323            dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1324                  " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1325        } else {
1326            UnicodeString exp;
1327            if (DATA[i+1]) {
1328                exp = UnicodeString(DATA[i+1], "");
1329            }
1330            // Don't worry about getID() if the expected char*
1331            // is NULL -- see above.
1332            if (exp.length() == 0 || exp == t->getID()) {
1333                logln(UnicodeString("Ok: ") + DATA[i+2] +
1334                      " create ID \"" + DATA[i] + "\" => \"" +
1335                      exp + "\"");
1336            } else {
1337                errln(UnicodeString("FAIL: ") + DATA[i+2] +
1338                      " create ID \"" + DATA[i] + "\" => \"" +
1339                      t->getID() + "\", exp \"" + exp + "\"");
1340            }
1341            delete t;
1342        }
1343    }
1344}
1345
1346/* test for Jitterbug 912 */
1347void TransliteratorTest::TestCreateInstance(){
1348    const char* FORWARD = "F";
1349    const char* REVERSE = "R";
1350    const char* DATA[] = {
1351        // Column 1: id
1352        // Column 2: direction
1353        // Column 3: expected ID, or "" if expect failure
1354        "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1355
1356        // JB#2689: bad compound causes crash
1357        "InvalidSource-InvalidTarget", FORWARD, "",
1358        "InvalidSource-InvalidTarget", REVERSE, "",
1359        "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1360        "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1361        "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1362        "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1363
1364        NULL
1365    };
1366
1367    for (int32_t i=0; DATA[i]; i+=3) {
1368        UParseError err;
1369        UErrorCode ec = U_ZERO_ERROR;
1370        UnicodeString id(DATA[i]);
1371        UTransDirection dir = (DATA[i+1]==FORWARD)?
1372            UTRANS_FORWARD:UTRANS_REVERSE;
1373        UnicodeString expID(DATA[i+2]);
1374        Transliterator* t =
1375            Transliterator::createInstance(id,dir,err,ec);
1376        UnicodeString newID;
1377        if (t) {
1378            newID = t->getID();
1379        }
1380        UBool ok = (newID == expID);
1381        if (!t) {
1382            newID = u_errorName(ec);
1383        }
1384        if (ok) {
1385            logln((UnicodeString)"Ok: createInstance(" +
1386                  id + "," + DATA[i+1] + ") => " + newID);
1387        } else {
1388            dataerrln((UnicodeString)"FAIL: createInstance(" +
1389                  id + "," + DATA[i+1] + ") => " + newID +
1390                  ", expected " + expID);
1391        }
1392        delete t;
1393    }
1394}
1395
1396/**
1397 * Test the normalization transliterator.
1398 */
1399void TransliteratorTest::TestNormalizationTransliterator() {
1400    // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1401    // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1402    const char* CANON[] = {
1403        // Input               Decomposed            Composed
1404        "cat",                "cat",                "cat"               ,
1405        "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1406
1407        "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1408        "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1409
1410        "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1411        "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1412        "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1413
1414        "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1415        "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1416
1417        "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1418        "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1419        "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1420
1421        "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1422        "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1423
1424        "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1425        "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1426
1427        "Henry IV",           "Henry IV",           "Henry IV"          ,
1428        "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1429
1430        "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1431        "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1432        "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1433        "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1434        "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1435
1436        "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1437        0 // end
1438    };
1439
1440    const char* COMPAT[] = {
1441        // Input               Decomposed            Composed
1442        "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1443
1444        "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1445        "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1446
1447        "Henry IV",           "Henry IV",           "Henry IV"          ,
1448        "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1449
1450        "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1451        "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1452
1453        "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1454        0 // end
1455    };
1456
1457    int32_t i;
1458    UParseError parseError;
1459    UErrorCode status = U_ZERO_ERROR;
1460    Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1461    Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1462    if (!NFD || !NFC) {
1463        dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1464        delete NFD;
1465        delete NFC;
1466        return;
1467    }
1468    for (i=0; CANON[i]; i+=3) {
1469        UnicodeString in = CharsToUnicodeString(CANON[i]);
1470        UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1471        UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1472        expect(*NFD, in, expd);
1473        expect(*NFC, in, expc);
1474    }
1475    delete NFD;
1476    delete NFC;
1477
1478    Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1479    Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1480    if (!NFKD || !NFKC) {
1481        errln("FAIL: createInstance failed");
1482        delete NFKD;
1483        delete NFKC;
1484        return;
1485    }
1486    for (i=0; COMPAT[i]; i+=3) {
1487        UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1488        UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1489        UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1490        expect(*NFKD, in, expkd);
1491        expect(*NFKC, in, expkc);
1492    }
1493    delete NFKD;
1494    delete NFKC;
1495
1496    UParseError pe;
1497    status = U_ZERO_ERROR;
1498    Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1499                                                       UTRANS_FORWARD,
1500                                                       pe, status);
1501    if (t == 0) {
1502        errln("FAIL: createInstance failed");
1503    }
1504    expect(*t, CharsToUnicodeString("\\u010dx"),
1505           CharsToUnicodeString("c\\u030C"));
1506    delete t;
1507}
1508
1509/**
1510 * Test compound RBT rules.
1511 */
1512void TransliteratorTest::TestCompoundRBT(void) {
1513    // Careful with spacing and ';' here:  Phrase this exactly
1514    // as toRules() is going to return it.  If toRules() changes
1515    // with regard to spacing or ';', then adjust this string.
1516    UnicodeString rule("::Hex-Any;\n"
1517                       "::Any-Lower;\n"
1518                       "a > '.A.';\n"
1519                       "b > '.B.';\n"
1520                       "::[^t]Any-Upper;", "");
1521    UParseError parseError;
1522    UErrorCode status = U_ZERO_ERROR;
1523    Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1524    if (t == 0) {
1525        errln("FAIL: createFromRules failed");
1526        return;
1527    }
1528    expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1529           "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1530    UnicodeString r;
1531    t->toRules(r, TRUE);
1532    if (r == rule) {
1533        logln((UnicodeString)"OK: toRules() => " + r);
1534    } else {
1535        errln((UnicodeString)"FAIL: toRules() => " + r +
1536              ", expected " + rule);
1537    }
1538    delete t;
1539
1540    // Now test toRules
1541    t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1542    if (t == 0) {
1543        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1544        return;
1545    }
1546    UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1547    t->toRules(r, TRUE);
1548    if (r != exp) {
1549        errln((UnicodeString)"FAIL: toRules() => " + r +
1550              ", expected " + exp);
1551    } else {
1552        logln((UnicodeString)"OK: toRules() => " + r);
1553    }
1554    delete t;
1555
1556    // Round trip the result of toRules
1557    t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1558    if (t == 0) {
1559        errln("FAIL: createFromRules #2 failed");
1560        return;
1561    } else {
1562        logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1563    }
1564
1565    // Test toRules again
1566    t->toRules(r, TRUE);
1567    if (r != exp) {
1568        errln((UnicodeString)"FAIL: toRules() => " + r +
1569              ", expected " + exp);
1570    } else {
1571        logln((UnicodeString)"OK: toRules() => " + r);
1572    }
1573
1574    delete t;
1575
1576    // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1577    // to what the regenerated ID will look like.
1578    UnicodeString id("Upper(Lower);(NFKC)", "");
1579    t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1580    if (t == 0) {
1581        errln("FAIL: createInstance #2 failed");
1582        return;
1583    }
1584    if (t->getID() == id) {
1585        logln((UnicodeString)"OK: created " + id);
1586    } else {
1587        errln((UnicodeString)"FAIL: createInstance(" + id +
1588              ").getID() => " + t->getID());
1589    }
1590
1591    Transliterator *u = t->createInverse(status);
1592    if (u == 0) {
1593        errln("FAIL: createInverse failed");
1594        delete t;
1595        return;
1596    }
1597    exp = "NFKC();Lower(Upper)";
1598    if (u->getID() == exp) {
1599        logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1600              u->getID());
1601    } else {
1602        errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1603              u->getID());
1604    }
1605    delete t;
1606    delete u;
1607}
1608
1609/**
1610 * Compound filter semantics were orginially not implemented
1611 * correctly.  Originally, each component filter f(i) is replaced by
1612 * f'(i) = f(i) && g, where g is the filter for the compound
1613 * transliterator.
1614 *
1615 * From Mark:
1616 *
1617 * Suppose and I have a transliterator X. Internally X is
1618 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1619 *
1620 * The compound should convert all greek characters (through latin) to
1621 * cyrillic, then lowercase the result. The filter should say "don't
1622 * touch 'A' in the original". But because an intermediate result
1623 * happens to go through "A", the Greek Alpha gets hung up.
1624 */
1625void TransliteratorTest::TestCompoundFilter(void) {
1626    UParseError parseError;
1627    UErrorCode status = U_ZERO_ERROR;
1628    Transliterator *t = Transliterator::createInstance
1629        ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1630    if (t == 0) {
1631        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1632        return;
1633    }
1634    t->adoptFilter(new UnicodeSet("[^A]", status));
1635    if (U_FAILURE(status)) {
1636        errln("FAIL: UnicodeSet ct failed");
1637        delete t;
1638        return;
1639    }
1640
1641    // Only the 'A' at index 1 should remain unchanged
1642    expect(*t,
1643           CharsToUnicodeString("BA\\u039A\\u0391"),
1644           CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1645    delete t;
1646}
1647
1648void TransliteratorTest::TestRemove(void) {
1649    UParseError parseError;
1650    UErrorCode status = U_ZERO_ERROR;
1651    Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1652    if (t == 0) {
1653        errln("FAIL: createInstance failed");
1654        return;
1655    }
1656
1657    expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1658
1659    // extra test for RemoveTransliterator::clone(), which at one point wasn't
1660    // duplicating the filter
1661    Transliterator* t2 = t->clone();
1662    expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1663
1664    delete t;
1665    delete t2;
1666}
1667
1668void TransliteratorTest::TestToRules(void) {
1669    const char* RBT = "rbt";
1670    const char* SET = "set";
1671    static const char* DATA[] = {
1672        RBT,
1673        "$a=\\u4E61; [$a] > A;",
1674        "[\\u4E61] > A;",
1675
1676        RBT,
1677        "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1678        "[[:Zs:][:Zl:]]{a} > A;",
1679
1680        SET,
1681        "[[:Zs:][:Zl:]]",
1682        "[[:Zs:][:Zl:]]",
1683
1684        SET,
1685        "[:Ps:]",
1686        "[:Ps:]",
1687
1688        SET,
1689        "[:L:]",
1690        "[:L:]",
1691
1692        SET,
1693        "[[:L:]-[A]]",
1694        "[[:L:]-[A]]",
1695
1696        SET,
1697        "[~[:Lu:][:Ll:]]",
1698        "[~[:Lu:][:Ll:]]",
1699
1700        SET,
1701        "[~[a-z]]",
1702        "[~[a-z]]",
1703
1704        RBT,
1705        "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1706        "[^[:Zs:]]{a} > A;",
1707
1708        RBT,
1709        "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1710        "[[a-z]-[:Zs:]]{a} > A;",
1711
1712        RBT,
1713        "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1714        "[[:Zs:]&[a-z]]{a} > A;",
1715
1716        RBT,
1717        "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1718        "[x[:Zs:]]{a} > A;",
1719
1720        RBT,
1721        "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1722        "$macron = \\u0304 ;"
1723        "$evowel = [aeiouyAEIOUY] ;"
1724        "$iotasub = \\u0345 ;"
1725        "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1726        "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1727
1728        RBT,
1729        "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1730        "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1731    };
1732    static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1733
1734    for (int32_t d=0; d < DATA_length; d+=3) {
1735        if (DATA[d] == RBT) {
1736            // Transliterator test
1737            UParseError parseError;
1738            UErrorCode status = U_ZERO_ERROR;
1739            Transliterator *t = Transliterator::createFromRules("ID",
1740                                                                UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1741            if (t == 0) {
1742                dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1743                return;
1744            }
1745            UnicodeString rules, escapedRules;
1746            t->toRules(rules, FALSE);
1747            t->toRules(escapedRules, TRUE);
1748            UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1749            UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1750            if (rules == expRules) {
1751                logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1752                      " => " + rules);
1753            } else {
1754                errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1755                      " => " + rules + ", exp " + expRules);
1756            }
1757            if (escapedRules == expEscapedRules) {
1758                logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1759                      " => " + escapedRules);
1760            } else {
1761                errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1762                      " => " + escapedRules + ", exp " + expEscapedRules);
1763            }
1764            delete t;
1765
1766        } else {
1767            // UnicodeSet test
1768            UErrorCode status = U_ZERO_ERROR;
1769            UnicodeString pat(DATA[d+1], -1, US_INV);
1770            UnicodeString expToPat(DATA[d+2], -1, US_INV);
1771            UnicodeSet set(pat, status);
1772            if (U_FAILURE(status)) {
1773                errln("FAIL: UnicodeSet ct failed");
1774                return;
1775            }
1776            // Adjust spacing etc. as necessary.
1777            UnicodeString toPat;
1778            set.toPattern(toPat);
1779            if (expToPat == toPat) {
1780                logln((UnicodeString)"Ok: " + pat +
1781                      " => " + toPat);
1782            } else {
1783                errln((UnicodeString)"FAIL: " + pat +
1784                      " => " + prettify(toPat, TRUE) +
1785                      ", exp " + prettify(pat, TRUE));
1786            }
1787        }
1788    }
1789}
1790
1791void TransliteratorTest::TestContext() {
1792    UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1793    expect("de > x; {d}e > y;",
1794           "de",
1795           "ye",
1796           &pos);
1797
1798    expect("ab{c} > z;",
1799           "xadabdabcy",
1800           "xadabdabzy");
1801}
1802
1803void TransliteratorTest::TestSupplemental() {
1804
1805    expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1806                                "a > $a; $s > i;"),
1807           CharsToUnicodeString("ab\\U0001030Fx"),
1808           CharsToUnicodeString("\\U00010300bix"));
1809
1810    expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1811                                "$b=[A-Z\\U00010400-\\U0001044D];"
1812                                "($a)($b) > $2 $1;"),
1813           CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1814           CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1815
1816    // k|ax\\U00010300xm
1817
1818    // k|a\\U00010400\\U00010300xm
1819    // ky|\\U00010400\\U00010300xm
1820    // ky\\U00010400|\\U00010300xm
1821
1822    // ky\\U00010400|\\U00010300\\U00010400m
1823    // ky\\U00010400y|\\U00010400m
1824    expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1825                                "$a {x} > | @ \\U00010400;"
1826                                "{$a} [^\\u0000-\\uFFFF] > y;"),
1827           CharsToUnicodeString("kax\\U00010300xm"),
1828           CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1829
1830    expectT("Any-Name",
1831           CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1832           UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1833
1834    expectT("Any-Hex/Unicode",
1835           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1836           UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1837
1838    expectT("Any-Hex/C",
1839           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1840           UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1841
1842    expectT("Any-Hex/Perl",
1843           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1844           UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1845
1846    expectT("Any-Hex/Java",
1847           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1848           UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1849
1850    expectT("Any-Hex/XML",
1851           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1852           "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1853
1854    expectT("Any-Hex/XML10",
1855           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1856           "&#66352;&#1113856;&#917601;&#160;");
1857
1858    expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1859           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1860           CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1861}
1862
1863void TransliteratorTest::TestQuantifier() {
1864
1865    // Make sure @ in a quantified anteContext works
1866    expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1867           "AAAAAb",
1868           "aaa(aac)");
1869
1870    // Make sure @ in a quantified postContext works
1871    expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1872           "baaaaa",
1873           "caa(aaa)");
1874
1875    // Make sure @ in a quantified postContext with seg ref works
1876    expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1877           "baaaaa",
1878           "baa(aaa)");
1879
1880    // Make sure @ past ante context doesn't enter ante context
1881    UTransPosition pos = {0, 5, 3, 5};
1882    expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1883           "xxxab",
1884           "xxx(ac)",
1885           &pos);
1886
1887    // Make sure @ past post context doesn't pass limit
1888    UTransPosition pos2 = {0, 4, 0, 2};
1889    expect("{b} a+ > c @@ |; x > y; a > A;",
1890           "baxx",
1891           "caxx",
1892           &pos2);
1893
1894    // Make sure @ past post context doesn't enter post context
1895    expect("{b} a+ > c @@ |; x > y; a > A;",
1896           "baxx",
1897           "cayy");
1898
1899    expect("(ab)? c > d;",
1900           "c abc ababc",
1901           "d d abd");
1902
1903    // NOTE: The (ab)+ when referenced just yields a single "ab",
1904    // not the full sequence of them.  This accords with perl behavior.
1905    expect("(ab)+ {x} > '(' $1 ')';",
1906           "x abx ababxy",
1907           "x ab(ab) abab(ab)y");
1908
1909    expect("b+ > x;",
1910           "ac abc abbc abbbc",
1911           "ac axc axc axc");
1912
1913    expect("[abc]+ > x;",
1914           "qac abrc abbcs abtbbc",
1915           "qx xrx xs xtx");
1916
1917    expect("q{(ab)+} > x;",
1918           "qa qab qaba qababc qaba",
1919           "qa qx qxa qxc qxa");
1920
1921    expect("q(ab)* > x;",
1922           "qa qab qaba qababc",
1923           "xa x xa xc");
1924
1925    // NOTE: The (ab)+ when referenced just yields a single "ab",
1926    // not the full sequence of them.  This accords with perl behavior.
1927    expect("q(ab)* > '(' $1 ')';",
1928           "qa qab qaba qababc",
1929           "()a (ab) (ab)a (ab)c");
1930
1931    // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1932    // quoted string
1933    expect("'ab'+ > x;",
1934           "bb ab ababb",
1935           "bb x xb");
1936
1937    // $foo+ and $foo* -- the quantifier should apply to the entire
1938    // variable reference
1939    expect("$var = ab; $var+ > x;",
1940           "bb ab ababb",
1941           "bb x xb");
1942}
1943
1944class TestTrans : public Transliterator {
1945public:
1946    TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1947    }
1948    virtual Transliterator* clone(void) const {
1949        return new TestTrans(getID());
1950    }
1951    virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1952        UBool /*isIncremental*/) const
1953    {
1954        offsets.start = offsets.limit;
1955    }
1956    virtual UClassID getDynamicClassID() const;
1957    static UClassID U_EXPORT2 getStaticClassID();
1958};
1959UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1960
1961/**
1962 * Test Source-Target/Variant.
1963 */
1964void TransliteratorTest::TestSTV(void) {
1965    int32_t ns = Transliterator::countAvailableSources();
1966    if (ns < 0 || ns > 255) {
1967        errln((UnicodeString)"FAIL: Bad source count: " + ns);
1968        return;
1969    }
1970    int32_t i, j;
1971    for (i=0; i<ns; ++i) {
1972        UnicodeString source;
1973        Transliterator::getAvailableSource(i, source);
1974        logln((UnicodeString)"" + i + ": " + source);
1975        if (source.length() == 0) {
1976            errln("FAIL: empty source");
1977            continue;
1978        }
1979        int32_t nt = Transliterator::countAvailableTargets(source);
1980        if (nt < 0 || nt > 255) {
1981            errln((UnicodeString)"FAIL: Bad target count: " + nt);
1982            continue;
1983        }
1984        for (int32_t j=0; j<nt; ++j) {
1985            UnicodeString target;
1986            Transliterator::getAvailableTarget(j, source, target);
1987            logln((UnicodeString)" " + j + ": " + target);
1988            if (target.length() == 0) {
1989                errln("FAIL: empty target");
1990                continue;
1991            }
1992            int32_t nv = Transliterator::countAvailableVariants(source, target);
1993            if (nv < 0 || nv > 255) {
1994                errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1995                continue;
1996            }
1997            for (int32_t k=0; k<nv; ++k) {
1998                UnicodeString variant;
1999                Transliterator::getAvailableVariant(k, source, target, variant);
2000                if (variant.length() == 0) {
2001                    logln((UnicodeString)"  " + k + ": <empty>");
2002                } else {
2003                    logln((UnicodeString)"  " + k + ": " + variant);
2004                }
2005            }
2006        }
2007    }
2008
2009    // Test registration
2010    const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2011    const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2012    const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2013    for (i=0; i<3; ++i) {
2014        Transliterator *t = new TestTrans(IDS[i]);
2015        if (t == 0) {
2016            errln("FAIL: out of memory");
2017            return;
2018        }
2019        if (t->getID() != IDS[i]) {
2020            errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2021            delete t;
2022            return;
2023        }
2024        Transliterator::registerInstance(t);
2025        UErrorCode status = U_ZERO_ERROR;
2026        t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2027        if (t == NULL) {
2028            errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2029                  IDS[i]);
2030        } else {
2031            logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2032                  IDS[i]);
2033            delete t;
2034        }
2035        Transliterator::unregister(IDS[i]);
2036        t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2037        if (t != NULL) {
2038            errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2039                  IDS[i]);
2040            delete t;
2041        }
2042    }
2043
2044    // Make sure getAvailable API reflects removal
2045    int32_t n = Transliterator::countAvailableIDs();
2046    for (i=0; i<n; ++i) {
2047        UnicodeString id = Transliterator::getAvailableID(i);
2048        for (j=0; j<3; ++j) {
2049            if (id.caseCompare(FULL_IDS[j],0)==0) {
2050                errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2051            }
2052        }
2053    }
2054    n = Transliterator::countAvailableTargets("Any");
2055    for (i=0; i<n; ++i) {
2056        UnicodeString t;
2057        Transliterator::getAvailableTarget(i, "Any", t);
2058        if (t.caseCompare(IDS[0],0)==0) {
2059            errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2060        }
2061    }
2062    n = Transliterator::countAvailableSources();
2063    for (i=0; i<n; ++i) {
2064        UnicodeString s;
2065        Transliterator::getAvailableSource(i, s);
2066        for (j=0; j<3; ++j) {
2067            if (SOURCES[j] == NULL) continue;
2068            if (s.caseCompare(SOURCES[j],0)==0) {
2069                errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2070            }
2071        }
2072    }
2073}
2074
2075/**
2076 * Test inverse of Greek-Latin; Title()
2077 */
2078void TransliteratorTest::TestCompoundInverse(void) {
2079    UParseError parseError;
2080    UErrorCode status = U_ZERO_ERROR;
2081    Transliterator *t = Transliterator::createInstance
2082        ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2083    if (t == 0) {
2084        dataerrln("FAIL: createInstance - %s", u_errorName(status));
2085        return;
2086    }
2087    UnicodeString exp("(Title);Latin-Greek");
2088    if (t->getID() == exp) {
2089        logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2090              t->getID());
2091    } else {
2092        errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2093              t->getID() + "\", expected \"" + exp + "\"");
2094    }
2095    delete t;
2096}
2097
2098/**
2099 * Test NFD chaining with RBT
2100 */
2101void TransliteratorTest::TestNFDChainRBT() {
2102    UParseError pe;
2103    UErrorCode ec = U_ZERO_ERROR;
2104    Transliterator* t = Transliterator::createFromRules(
2105                               "TEST", "::NFD; aa > Q; a > q;",
2106                               UTRANS_FORWARD, pe, ec);
2107    if (t == NULL || U_FAILURE(ec)) {
2108        dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2109        return;
2110    }
2111    expect(*t, "aa", "Q");
2112    delete t;
2113
2114    // TEMPORARY TESTS -- BEING DEBUGGED
2115//=-    UnicodeString s, s2;
2116//=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2117//=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2118//=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2119//=-    expect(*t, s, s2);
2120//=-    delete t;
2121//=-
2122//=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2123//=-    expect(*t, s2, s);
2124//=-    delete t;
2125//=-
2126//=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2127//=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2128//=-    expect(*t, s, s);
2129//=-    delete t;
2130
2131//    const char* source[] = {
2132//        /*
2133//        "\\u015Br\\u012Bmad",
2134//        "bhagavadg\\u012Bt\\u0101",
2135//        "adhy\\u0101ya",
2136//        "arjuna",
2137//        "vi\\u1E63\\u0101da",
2138//        "y\\u014Dga",
2139//        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2140//        "uv\\u0101cr\\u0325",
2141//        */
2142//        "rmk\\u1E63\\u0113t",
2143//      //"dharmak\\u1E63\\u0113tr\\u0113",
2144//        /*
2145//        "kuruk\\u1E63\\u0113tr\\u0113",
2146//        "samav\\u0113t\\u0101",
2147//        "yuyutsava-\\u1E25",
2148//        "m\\u0101mak\\u0101-\\u1E25",
2149//     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2150//        "kimakurvata",
2151//        "san\\u0304java",
2152//        */
2153//
2154//        0
2155//    };
2156//    const char* expected[] = {
2157//        /*
2158//        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2159//        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2160//        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2161//        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2162//        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2163//        "\\u092f\\u094b\\u0917",
2164//        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2165//        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2166//        */
2167//        "\\u0927",
2168//        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2169//        /*
2170//        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2171//        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2172//        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2173//        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2174//    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2175//        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2176//        "\\u0938\\u0902\\u091c\\u0935",
2177//        */
2178//        0
2179//    };
2180//    UErrorCode status = U_ZERO_ERROR;
2181//    UParseError parseError;
2182//    UnicodeString message;
2183//    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2184//    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2185//    if(U_FAILURE(status)){
2186//        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2187//        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2188//        delete latinToDevToLatin;
2189//        delete devToLatinToDev;
2190//        return;
2191//    }
2192//    UnicodeString gotResult;
2193//    for(int i= 0; source[i] != 0; i++){
2194//        gotResult = source[i];
2195//        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2196//        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2197//    }
2198//    delete latinToDevToLatin;
2199//    delete devToLatinToDev;
2200}
2201
2202/**
2203 * Inverse of "Null" should be "Null". (J21)
2204 */
2205void TransliteratorTest::TestNullInverse() {
2206    UParseError pe;
2207    UErrorCode ec = U_ZERO_ERROR;
2208    Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2209    if (t == 0 || U_FAILURE(ec)) {
2210        errln("FAIL: createInstance");
2211        return;
2212    }
2213    Transliterator *u = t->createInverse(ec);
2214    if (u == 0 || U_FAILURE(ec)) {
2215        errln("FAIL: createInverse");
2216        delete t;
2217        return;
2218    }
2219    if (u->getID() != "Null") {
2220        errln("FAIL: Inverse of Null should be Null");
2221    }
2222    delete t;
2223    delete u;
2224}
2225
2226/**
2227 * Check ID of inverse of alias. (J22)
2228 */
2229void TransliteratorTest::TestAliasInverseID() {
2230    UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2231    UParseError pe;
2232    UErrorCode ec = U_ZERO_ERROR;
2233    Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2234    if (t == 0 || U_FAILURE(ec)) {
2235        dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2236        return;
2237    }
2238    Transliterator *u = t->createInverse(ec);
2239    if (u == 0 || U_FAILURE(ec)) {
2240        errln("FAIL: createInverse");
2241        delete t;
2242        return;
2243    }
2244    UnicodeString exp = "Hangul-Latin";
2245    UnicodeString got = u->getID();
2246    if (got != exp) {
2247        errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2248              ", expected " + exp);
2249    }
2250    delete t;
2251    delete u;
2252}
2253
2254/**
2255 * Test IDs of inverses of compound transliterators. (J20)
2256 */
2257void TransliteratorTest::TestCompoundInverseID() {
2258    UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2259    UParseError pe;
2260    UErrorCode ec = U_ZERO_ERROR;
2261    Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2262    if (t == 0 || U_FAILURE(ec)) {
2263        dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2264        return;
2265    }
2266    Transliterator *u = t->createInverse(ec);
2267    if (u == 0 || U_FAILURE(ec)) {
2268        errln("FAIL: createInverse");
2269        delete t;
2270        return;
2271    }
2272    UnicodeString exp = "NFD(NFC);Jamo-Latin";
2273    UnicodeString got = u->getID();
2274    if (got != exp) {
2275        errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2276              ", expected " + exp);
2277    }
2278    delete t;
2279    delete u;
2280}
2281
2282/**
2283 * Test undefined variable.
2284
2285 */
2286void TransliteratorTest::TestUndefinedVariable() {
2287    UnicodeString rule = "$initial } a <> \\u1161;";
2288    UParseError pe;
2289    UErrorCode ec = U_ZERO_ERROR;
2290    Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2291    delete t;
2292    if (U_FAILURE(ec)) {
2293        logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2294              u_errorName(ec));
2295        return;
2296    }
2297    errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2298          u_errorName(ec));
2299}
2300
2301/**
2302 * Test empty context.
2303 */
2304void TransliteratorTest::TestEmptyContext() {
2305    expect(" { a } > b;", "xay a ", "xby b ");
2306}
2307
2308/**
2309* Test compound filter ID syntax
2310*/
2311void TransliteratorTest::TestCompoundFilterID(void) {
2312    static const char* DATA[] = {
2313        // Col. 1 = ID or rule set (latter must start with #)
2314
2315        // = columns > 1 are null if expect col. 1 to be illegal =
2316
2317        // Col. 2 = direction, "F..." or "R..."
2318        // Col. 3 = source string
2319        // Col. 4 = exp result
2320
2321        "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2322        "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2323        "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2324        "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2325        "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2326        "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2327        NULL,
2328    };
2329
2330    for (int32_t i=0; DATA[i]; i+=4) {
2331        UnicodeString id = CharsToUnicodeString(DATA[i]);
2332        UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2333            UTRANS_REVERSE : UTRANS_FORWARD;
2334        UnicodeString source;
2335        UnicodeString exp;
2336        if (DATA[i+2] != NULL) {
2337            source = CharsToUnicodeString(DATA[i+2]);
2338            exp = CharsToUnicodeString(DATA[i+3]);
2339        }
2340        UBool expOk = (DATA[i+1] != NULL);
2341        Transliterator* t = NULL;
2342        UParseError pe;
2343        UErrorCode ec = U_ZERO_ERROR;
2344        if (id.charAt(0) == 0x23/*#*/) {
2345            t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2346        } else {
2347            t = Transliterator::createInstance(id, direction, pe, ec);
2348        }
2349        UBool ok = (t != NULL && U_SUCCESS(ec));
2350        UnicodeString transID;
2351        if (t!=0) {
2352            transID = t->getID();
2353        }
2354        else {
2355            transID = UnicodeString("NULL", "");
2356        }
2357        if (ok == expOk) {
2358            logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2359                  u_errorName(ec));
2360            if (source.length() != 0) {
2361                expect(*t, source, exp);
2362            }
2363            delete t;
2364        } else {
2365            dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2366                  u_errorName(ec));
2367        }
2368    }
2369}
2370
2371/**
2372 * Test new property set syntax
2373 */
2374void TransliteratorTest::TestPropertySet() {
2375    expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2376    expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2377           "[ a stitch ]\n[ in time ]\r[ saves 9]");
2378}
2379
2380/**
2381 * Test various failure points of the new 2.0 engine.
2382 */
2383void TransliteratorTest::TestNewEngine() {
2384    UParseError pe;
2385    UErrorCode ec = U_ZERO_ERROR;
2386    Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2387    if (t == 0 || U_FAILURE(ec)) {
2388        dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2389        return;
2390    }
2391    // Katakana should be untouched
2392    expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2393           CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2394
2395    delete t;
2396
2397#if 1
2398    // This test will only work if Transliterator.ROLLBACK is
2399    // true.  Otherwise, this test will fail, revealing a
2400    // limitation of global filters in incremental mode.
2401    Transliterator *a =
2402        Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2403    Transliterator *A =
2404        Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2405    if (U_FAILURE(ec)) {
2406        delete a;
2407        delete A;
2408        return;
2409    }
2410
2411    Transliterator* array[3];
2412    array[0] = a;
2413    array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2414    array[2] = A;
2415    if (U_FAILURE(ec)) {
2416        errln("FAIL: createInstance NFD");
2417        delete a;
2418        delete A;
2419        delete array[1];
2420        return;
2421    }
2422
2423    t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2424    if (U_FAILURE(ec)) {
2425        errln("FAIL: UnicodeSet constructor");
2426        delete a;
2427        delete A;
2428        delete array[1];
2429        delete t;
2430        return;
2431    }
2432
2433    expect(*t, "aAaA", "bAbA");
2434
2435    assertTrue("countElements", t->countElements() == 3);
2436    assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2437    assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2438    assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2439    assertSuccess("getElement", ec);
2440
2441    delete a;
2442    delete A;
2443    delete array[1];
2444    delete t;
2445#endif
2446
2447    expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2448           "a",
2449           "ax");
2450
2451    UnicodeString gr = CharsToUnicodeString(
2452        "$ddot = \\u0308 ;"
2453        "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2454        "$rough = \\u0314 ;"
2455        "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2456        "\\u03b1 <> a ;"
2457        "$rough <> h ;");
2458
2459    expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2460}
2461
2462/**
2463 * Test quantified segment behavior.  We want:
2464 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2465 */
2466void TransliteratorTest::TestQuantifiedSegment(void) {
2467    // The normal case
2468    expect("([abc]+) > x $1 x;", "cba", "xcbax");
2469
2470    // The tricky case; the quantifier is around the segment
2471    expect("([abc])+ > x $1 x;", "cba", "xax");
2472
2473    // Tricky case in reverse direction
2474    expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2475
2476    // Check post-context segment
2477    expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2478
2479    // Test toRule/toPattern for non-quantified segment.
2480    // Careful with spacing here.
2481    UnicodeString r("([a-c]){q} > x $1 x;");
2482    UParseError pe;
2483    UErrorCode ec = U_ZERO_ERROR;
2484    Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2485    if (U_FAILURE(ec)) {
2486        errln("FAIL: createFromRules");
2487        delete t;
2488        return;
2489    }
2490    UnicodeString rr;
2491    t->toRules(rr, TRUE);
2492    if (r != rr) {
2493        errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2494    } else {
2495        logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2496    }
2497    delete t;
2498
2499    // Test toRule/toPattern for quantified segment.
2500    // Careful with spacing here.
2501    r = "([a-c])+{q} > x $1 x;";
2502    t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2503    if (U_FAILURE(ec)) {
2504        errln("FAIL: createFromRules");
2505        delete t;
2506        return;
2507    }
2508    t->toRules(rr, TRUE);
2509    if (r != rr) {
2510        errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2511    } else {
2512        logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2513    }
2514    delete t;
2515}
2516
2517//======================================================================
2518// Ram's tests
2519//======================================================================
2520void TransliteratorTest::TestDevanagariLatinRT(){
2521    const int MAX_LEN= 52;
2522    const char* const source[MAX_LEN] = {
2523        "bh\\u0101rata",
2524        "kra",
2525        "k\\u1E63a",
2526        "khra",
2527        "gra",
2528        "\\u1E45ra",
2529        "cra",
2530        "chra",
2531        "j\\u00F1a",
2532        "jhra",
2533        "\\u00F1ra",
2534        "\\u1E6Dya",
2535        "\\u1E6Dhra",
2536        "\\u1E0Dya",
2537      //"r\\u0323ya", // \u095c is not valid in Devanagari
2538        "\\u1E0Dhya",
2539        "\\u1E5Bhra",
2540        "\\u1E47ra",
2541        "tta",
2542        "thra",
2543        "dda",
2544        "dhra",
2545        "nna",
2546        "pra",
2547        "phra",
2548        "bra",
2549        "bhra",
2550        "mra",
2551        "\\u1E49ra",
2552      //"l\\u0331ra",
2553        "yra",
2554        "\\u1E8Fra",
2555      //"l-",
2556        "vra",
2557        "\\u015Bra",
2558        "\\u1E63ra",
2559        "sra",
2560        "hma",
2561        "\\u1E6D\\u1E6Da",
2562        "\\u1E6D\\u1E6Dha",
2563        "\\u1E6Dh\\u1E6Dha",
2564        "\\u1E0D\\u1E0Da",
2565        "\\u1E0D\\u1E0Dha",
2566        "\\u1E6Dya",
2567        "\\u1E6Dhya",
2568        "\\u1E0Dya",
2569        "\\u1E0Dhya",
2570        // Not roundtrippable --
2571        // \\u0939\\u094d\\u094d\\u092E  - hma
2572        // \\u0939\\u094d\\u092E         - hma
2573        // CharsToUnicodeString("hma"),
2574        "hya",
2575        "\\u015Br\\u0325",
2576        "\\u015Bca",
2577        "\\u0115",
2578        "san\\u0304j\\u012Bb s\\u0113nagupta",
2579        "\\u0101nand vaddir\\u0101ju",
2580        "\\u0101",
2581        "a"
2582    };
2583    const char* const expected[MAX_LEN] = {
2584        "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2585        "\\u0915\\u094D\\u0930",          /* kra         */
2586        "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2587        "\\u0916\\u094D\\u0930",          /* khra        */
2588        "\\u0917\\u094D\\u0930",          /* gra         */
2589        "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2590        "\\u091A\\u094D\\u0930",          /* cra         */
2591        "\\u091B\\u094D\\u0930",          /* chra        */
2592        "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2593        "\\u091D\\u094D\\u0930",          /* jhra        */
2594        "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2595        "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2596        "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2597        "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2598      //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2599        "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2600        "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2601        "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2602        "\\u0924\\u094D\\u0924",          /* tta         */
2603        "\\u0925\\u094D\\u0930",          /* thra        */
2604        "\\u0926\\u094D\\u0926",          /* dda         */
2605        "\\u0927\\u094D\\u0930",          /* dhra        */
2606        "\\u0928\\u094D\\u0928",          /* nna         */
2607        "\\u092A\\u094D\\u0930",          /* pra         */
2608        "\\u092B\\u094D\\u0930",          /* phra        */
2609        "\\u092C\\u094D\\u0930",          /* bra         */
2610        "\\u092D\\u094D\\u0930",          /* bhra        */
2611        "\\u092E\\u094D\\u0930",          /* mra         */
2612        "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2613      //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2614        "\\u092F\\u094D\\u0930",          /* yra         */
2615        "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2616      //"l-",
2617        "\\u0935\\u094D\\u0930",          /* vra         */
2618        "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2619        "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2620        "\\u0938\\u094D\\u0930",          /* sra         */
2621        "\\u0939\\u094d\\u092E",          /* hma         */
2622        "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2623        "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2624        "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2625        "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2626        "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2627        "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2628        "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2629        "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2630        "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2631     // "hma",                         /* hma         */
2632        "\\u0939\\u094D\\u092F",          /* hya         */
2633        "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2634        "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2635        "\\u090d",                        /* e\\u0306    */
2636        "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2637        "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2638        "\\u0906",
2639        "\\u0905",
2640    };
2641    UErrorCode status = U_ZERO_ERROR;
2642    UParseError parseError;
2643    UnicodeString message;
2644    Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2645    Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2646    if(U_FAILURE(status)){
2647        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2648        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2649        return;
2650    }
2651    UnicodeString gotResult;
2652    for(int i= 0; i<MAX_LEN; i++){
2653        gotResult = source[i];
2654        expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2655        expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2656    }
2657    delete latinToDev;
2658    delete devToLatin;
2659}
2660
2661void TransliteratorTest::TestTeluguLatinRT(){
2662    const int MAX_LEN=10;
2663    const char* const source[MAX_LEN] = {
2664        "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2665        "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2666        "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2667        "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2668        "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2669        "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2670        "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2671        "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2672        "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2673        "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2674    };
2675
2676    const char* const expected[MAX_LEN] = {
2677        "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2678        "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2679        "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2680        "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2681        "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2682        "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2683        "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2684        "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2685        "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2686        "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2687    };
2688
2689    UErrorCode status = U_ZERO_ERROR;
2690    UParseError parseError;
2691    UnicodeString message;
2692    Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2693    Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2694    if(U_FAILURE(status)){
2695        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2696        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2697        return;
2698    }
2699    UnicodeString gotResult;
2700    for(int i= 0; i<MAX_LEN; i++){
2701        gotResult = source[i];
2702        expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2703        expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2704    }
2705    delete latinToDev;
2706    delete devToLatin;
2707}
2708
2709void TransliteratorTest::TestSanskritLatinRT(){
2710    const int MAX_LEN =16;
2711    const char* const source[MAX_LEN] = {
2712        "rmk\\u1E63\\u0113t",
2713        "\\u015Br\\u012Bmad",
2714        "bhagavadg\\u012Bt\\u0101",
2715        "adhy\\u0101ya",
2716        "arjuna",
2717        "vi\\u1E63\\u0101da",
2718        "y\\u014Dga",
2719        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2720        "uv\\u0101cr\\u0325",
2721        "dharmak\\u1E63\\u0113tr\\u0113",
2722        "kuruk\\u1E63\\u0113tr\\u0113",
2723        "samav\\u0113t\\u0101",
2724        "yuyutsava\\u1E25",
2725        "m\\u0101mak\\u0101\\u1E25",
2726    // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2727        "kimakurvata",
2728        "san\\u0304java",
2729    };
2730    const char* const expected[MAX_LEN] = {
2731        "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2732        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2733        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2734        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2735        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2736        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2737        "\\u092f\\u094b\\u0917",
2738        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2739        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2740        "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2741        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2742        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2743        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2744        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2745    //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2746        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2747        "\\u0938\\u0902\\u091c\\u0935",
2748    };
2749    UErrorCode status = U_ZERO_ERROR;
2750    UParseError parseError;
2751    UnicodeString message;
2752    Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2753    Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2754    if(U_FAILURE(status)){
2755        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2756        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2757        return;
2758    }
2759    UnicodeString gotResult;
2760    for(int i= 0; i<MAX_LEN; i++){
2761        gotResult = source[i];
2762        expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2763        expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2764    }
2765    delete latinToDev;
2766    delete devToLatin;
2767}
2768
2769
2770void TransliteratorTest::TestCompoundLatinRT(){
2771    const char* const source[] = {
2772        "rmk\\u1E63\\u0113t",
2773        "\\u015Br\\u012Bmad",
2774        "bhagavadg\\u012Bt\\u0101",
2775        "adhy\\u0101ya",
2776        "arjuna",
2777        "vi\\u1E63\\u0101da",
2778        "y\\u014Dga",
2779        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2780        "uv\\u0101cr\\u0325",
2781        "dharmak\\u1E63\\u0113tr\\u0113",
2782        "kuruk\\u1E63\\u0113tr\\u0113",
2783        "samav\\u0113t\\u0101",
2784        "yuyutsava\\u1E25",
2785        "m\\u0101mak\\u0101\\u1E25",
2786     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2787        "kimakurvata",
2788        "san\\u0304java"
2789    };
2790    const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2791    const char* const expected[MAX_LEN] = {
2792        "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2793        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2794        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2795        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2796        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2797        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2798        "\\u092f\\u094b\\u0917",
2799        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2800        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2801        "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2802        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2803        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2804        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2805        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2806    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2807        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2808        "\\u0938\\u0902\\u091c\\u0935"
2809    };
2810    if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2811        errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2812        return;
2813    }
2814
2815    UErrorCode status = U_ZERO_ERROR;
2816    UParseError parseError;
2817    UnicodeString message;
2818    Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2819    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2820    Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2821    Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2822
2823    if(U_FAILURE(status)){
2824        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2825        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2826        return;
2827    }
2828    UnicodeString gotResult;
2829    for(int i= 0; i<MAX_LEN; i++){
2830        gotResult = source[i];
2831        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2832        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2833        expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2834
2835    }
2836    delete(latinToDevToLatin);
2837    delete(devToLatinToDev);
2838    delete(devToTelToDev);
2839    delete(latinToTelToLatin);
2840}
2841
2842/**
2843 * Test Gurmukhi-Devanagari Tippi and Bindi
2844 */
2845void TransliteratorTest::TestGurmukhiDevanagari(){
2846    // the rule says:
2847    // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2848    // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2849    UErrorCode status = U_ZERO_ERROR;
2850    UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2851    UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2852    UParseError parseError;
2853
2854    UnicodeSetIterator vIter(vowel);
2855    UnicodeSetIterator nvIter(non_vowel);
2856    Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2857    if(U_FAILURE(status)) {
2858      dataerrln("Error creating transliterator %s", u_errorName(status));
2859      delete trans;
2860      return;
2861    }
2862    UnicodeString src (" \\u0902", -1, US_INV);
2863    UnicodeString expected(" \\u0A02", -1, US_INV);
2864    src = src.unescape();
2865    expected= expected.unescape();
2866
2867    while(vIter.next()){
2868        src.setCharAt(0,(UChar) vIter.getCodepoint());
2869        expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2870        expect(*trans,src,expected);
2871    }
2872
2873    expected.setCharAt(1,0x0A70);
2874    while(nvIter.next()){
2875        //src.setCharAt(0,(char) nvIter.codepoint);
2876        src.setCharAt(0,(UChar)nvIter.getCodepoint());
2877        expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2878        expect(*trans,src,expected);
2879    }
2880    delete trans;
2881}
2882/**
2883 * Test instantiation from a locale.
2884 */
2885void TransliteratorTest::TestLocaleInstantiation(void) {
2886    UParseError pe;
2887    UErrorCode ec = U_ZERO_ERROR;
2888    Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2889    if (U_FAILURE(ec)) {
2890        dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2891        delete t;
2892        return;
2893    }
2894    expect(*t, CharsToUnicodeString("\\u0430"), "a");
2895    delete t;
2896
2897    t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2898    if (U_FAILURE(ec)) {
2899        errln("FAIL: createInstance(en-el)");
2900        delete t;
2901        return;
2902    }
2903    expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2904    delete t;
2905}
2906
2907/**
2908 * Test title case handling of accent (should ignore accents)
2909 */
2910void TransliteratorTest::TestTitleAccents(void) {
2911    UParseError pe;
2912    UErrorCode ec = U_ZERO_ERROR;
2913    Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2914    if (U_FAILURE(ec)) {
2915        errln("FAIL: createInstance(Title)");
2916        delete t;
2917        return;
2918    }
2919    expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2920    delete t;
2921}
2922
2923/**
2924 * Basic test of a locale resource based rule.
2925 */
2926void TransliteratorTest::TestLocaleResource() {
2927    const char* DATA[] = {
2928        // id                    from               to
2929        //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2930        "Latin-el",              "b",               "\\u03bc\\u03c0",
2931        "Latin-Greek",           "b",               "\\u03B2",
2932        "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2933        "el-Latin",              "\\u03B2",         "v",
2934        "Greek-Latin",           "\\u03B2",         "b",
2935    };
2936    const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2937    for (int32_t i=0; i<DATA_length; i+=3) {
2938        UParseError pe;
2939        UErrorCode ec = U_ZERO_ERROR;
2940        Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2941        if (U_FAILURE(ec)) {
2942            dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2943            delete t;
2944            continue;
2945        }
2946        expect(*t, CharsToUnicodeString(DATA[i+1]),
2947               CharsToUnicodeString(DATA[i+2]));
2948        delete t;
2949    }
2950}
2951
2952/**
2953 * Make sure parse errors reference the right line.
2954 */
2955void TransliteratorTest::TestParseError() {
2956    static const char* rule =
2957        "a > b;\n"
2958        "# more stuff\n"
2959        "d << b;";
2960    UErrorCode ec = U_ZERO_ERROR;
2961    UParseError pe;
2962    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2963    delete t;
2964    if (U_FAILURE(ec)) {
2965        UnicodeString err(pe.preContext);
2966        err.append((UChar)124/*|*/).append(pe.postContext);
2967        if (err.indexOf("d << b") >= 0) {
2968            logln("Ok: " + err);
2969        } else {
2970            errln("FAIL: " + err);
2971        }
2972    }
2973    else {
2974        errln("FAIL: no syntax error");
2975    }
2976    static const char* maskingRule =
2977        "a>x;\n"
2978        "# more stuff\n"
2979        "ab>y;";
2980    ec = U_ZERO_ERROR;
2981    delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2982    if (ec != U_RULE_MASK_ERROR) {
2983        errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2984    }
2985    else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2986        errln("FAIL: did not get expected precontext");
2987    }
2988    else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2989        errln("FAIL: did not get expected postcontext");
2990    }
2991}
2992
2993/**
2994 * Make sure sets on output are disallowed.
2995 */
2996void TransliteratorTest::TestOutputSet() {
2997    UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2998    UErrorCode ec = U_ZERO_ERROR;
2999    UParseError pe;
3000    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3001    delete t;
3002    if (U_FAILURE(ec)) {
3003        UnicodeString err(pe.preContext);
3004        err.append((UChar)124/*|*/).append(pe.postContext);
3005        logln("Ok: " + err);
3006        return;
3007    }
3008    errln("FAIL: No syntax error");
3009}
3010
3011/**
3012 * Test the use variable range pragma, making sure that use of
3013 * variable range characters is detected and flagged as an error.
3014 */
3015void TransliteratorTest::TestVariableRange() {
3016    UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3017    UErrorCode ec = U_ZERO_ERROR;
3018    UParseError pe;
3019    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3020    delete t;
3021    if (U_FAILURE(ec)) {
3022        UnicodeString err(pe.preContext);
3023        err.append((UChar)124/*|*/).append(pe.postContext);
3024        logln("Ok: " + err);
3025        return;
3026    }
3027    errln("FAIL: No syntax error");
3028}
3029
3030/**
3031 * Test invalid post context error handling
3032 */
3033void TransliteratorTest::TestInvalidPostContext() {
3034    UnicodeString rule = "a}b{c>d;";
3035    UErrorCode ec = U_ZERO_ERROR;
3036    UParseError pe;
3037    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3038    delete t;
3039    if (U_FAILURE(ec)) {
3040        UnicodeString err(pe.preContext);
3041        err.append((UChar)124/*|*/).append(pe.postContext);
3042        if (err.indexOf("a}b{c") >= 0) {
3043            logln("Ok: " + err);
3044        } else {
3045            errln("FAIL: " + err);
3046        }
3047        return;
3048    }
3049    errln("FAIL: No syntax error");
3050}
3051
3052/**
3053 * Test ID form variants
3054 */
3055void TransliteratorTest::TestIDForms() {
3056    const char* DATA[] = {
3057        "NFC", NULL, "NFD",
3058        "nfd", NULL, "NFC", // make sure case is ignored
3059        "Any-NFKD", NULL, "Any-NFKC",
3060        "Null", NULL, "Null",
3061        "-nfkc", "nfkc", "NFKD",
3062        "-nfkc/", "nfkc", "NFKD",
3063        "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3064        "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3065        "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3066        "Source-", NULL, NULL,
3067        "Source/Variant-", NULL, NULL,
3068        "Source-/Variant", NULL, NULL,
3069        "/Variant", NULL, NULL,
3070        "/Variant-", NULL, NULL,
3071        "-/Variant", NULL, NULL,
3072        "-/", NULL, NULL,
3073        "-", NULL, NULL,
3074        "/", NULL, NULL,
3075    };
3076    const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3077
3078    for (int32_t i=0; i<DATA_length; i+=3) {
3079        const char* ID = DATA[i];
3080        const char* expID = DATA[i+1];
3081        const char* expInvID = DATA[i+2];
3082        UBool expValid = (expInvID != NULL);
3083        if (expID == NULL) {
3084            expID = ID;
3085        }
3086        UParseError pe;
3087        UErrorCode ec = U_ZERO_ERROR;
3088        Transliterator *t =
3089            Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3090        if (U_FAILURE(ec)) {
3091            if (!expValid) {
3092                logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3093            } else {
3094                dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3095            }
3096            delete t;
3097            continue;
3098        }
3099        Transliterator *u = t->createInverse(ec);
3100        if (U_FAILURE(ec)) {
3101            errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3102            delete t;
3103            delete u;
3104            continue;
3105        }
3106        if (t->getID() == expID &&
3107            u->getID() == expInvID) {
3108            logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3109        } else {
3110            errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3111                  t->getID() + " x getInverse() => " + u->getID() +
3112                  ", expected " + expInvID);
3113        }
3114        delete t;
3115        delete u;
3116    }
3117}
3118
3119static const UChar SPACE[]   = {32,0};
3120static const UChar NEWLINE[] = {10,0};
3121static const UChar RETURN[]  = {13,0};
3122static const UChar EMPTY[]   = {0};
3123
3124void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3125                                    const UnicodeString& testRulesForward) {
3126    UnicodeString rules2; t2.toRules(rules2, TRUE);
3127    //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3128    rules2.findAndReplace(SPACE, EMPTY);
3129    rules2.findAndReplace(NEWLINE, EMPTY);
3130    rules2.findAndReplace(RETURN, EMPTY);
3131
3132    UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3133
3134    if (rules2 != testRules) {
3135        errln(label);
3136        logln((UnicodeString)"GENERATED RULES: " + rules2);
3137        logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3138    }
3139}
3140
3141/**
3142 * Mark's toRules test.
3143 */
3144void TransliteratorTest::TestToRulesMark() {
3145    const char* testRules =
3146        "::[[:Latin:][:Mark:]];"
3147        "::NFKD (NFC);"
3148        "::Lower (Lower);"
3149        "a <> \\u03B1;" // alpha
3150        "::NFKC (NFD);"
3151        "::Upper (Lower);"
3152        "::Lower ();"
3153        "::([[:Greek:][:Mark:]]);"
3154        ;
3155    const char* testRulesForward =
3156        "::[[:Latin:][:Mark:]];"
3157        "::NFKD(NFC);"
3158        "::Lower(Lower);"
3159        "a > \\u03B1;"
3160        "::NFKC(NFD);"
3161        "::Upper (Lower);"
3162        "::Lower ();"
3163        ;
3164    const char* testRulesBackward =
3165        "::[[:Greek:][:Mark:]];"
3166        "::Lower (Upper);"
3167        "::NFD(NFKC);"
3168        "\\u03B1 > a;"
3169        "::Lower(Lower);"
3170        "::NFC(NFKD);"
3171        ;
3172    UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3173    UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3174
3175    UParseError pe;
3176    UErrorCode ec = U_ZERO_ERROR;
3177    Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3178    Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3179
3180    if (U_FAILURE(ec)) {
3181        delete t2;
3182        delete t3;
3183        dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3184        return;
3185    }
3186
3187    expect(*t2, source, target);
3188    expect(*t3, target, source);
3189
3190    checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3191    checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3192
3193    delete t2;
3194    delete t3;
3195}
3196
3197/**
3198 * Test Escape and Unescape transliterators.
3199 */
3200void TransliteratorTest::TestEscape() {
3201    UParseError pe;
3202    UErrorCode ec;
3203    Transliterator *t;
3204
3205    ec = U_ZERO_ERROR;
3206    t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3207    if (U_FAILURE(ec)) {
3208        errln((UnicodeString)"FAIL: createInstance");
3209    } else {
3210        expect(*t,
3211               UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3212               "@12Q");
3213    }
3214    delete t;
3215
3216    ec = U_ZERO_ERROR;
3217    t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3218    if (U_FAILURE(ec)) {
3219        errln((UnicodeString)"FAIL: createInstance");
3220    } else {
3221        expect(*t,
3222               CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3223               UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3224    }
3225    delete t;
3226
3227    ec = U_ZERO_ERROR;
3228    t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3229    if (U_FAILURE(ec)) {
3230        errln((UnicodeString)"FAIL: createInstance");
3231    } else {
3232        expect(*t,
3233               CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3234               UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3235    }
3236    delete t;
3237
3238    ec = U_ZERO_ERROR;
3239    t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3240    if (U_FAILURE(ec)) {
3241        errln((UnicodeString)"FAIL: createInstance");
3242    } else {
3243        expect(*t,
3244               CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3245               UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3246    }
3247    delete t;
3248}
3249
3250
3251void TransliteratorTest::TestAnchorMasking(){
3252    UnicodeString rule ("^a > Q; a > q;");
3253    UErrorCode status= U_ZERO_ERROR;
3254    UParseError parseError;
3255
3256    Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3257    if(U_FAILURE(status)){
3258        errln(UnicodeString("FAIL: ") + "ID" +
3259              ".createFromRules() => bad rules" +
3260              /*", parse error " + parseError.code +*/
3261              ", line " + parseError.line +
3262              ", offset " + parseError.offset +
3263              ", context " + prettify(parseError.preContext, TRUE) +
3264              ", rules: " + prettify(rule, TRUE));
3265    }
3266    delete t;
3267}
3268
3269/**
3270 * Make sure display names of variants look reasonable.
3271 */
3272void TransliteratorTest::TestDisplayName() {
3273#if UCONFIG_NO_FORMATTING
3274    logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3275    return;
3276#else
3277    static const char* DATA[] = {
3278        // ID, forward name, reverse name
3279        // Update the text as necessary -- the important thing is
3280        // not the text itself, but how various cases are handled.
3281
3282        // Basic test
3283        "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3284
3285        // Variants
3286        "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3287
3288        // Target-only IDs
3289        "NFC", "Any to NFC", "Any to NFD",
3290    };
3291
3292    int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3293
3294    Locale US("en", "US");
3295
3296    for (int32_t i=0; i<DATA_length; i+=3) {
3297        UnicodeString name;
3298        Transliterator::getDisplayName(DATA[i], US, name);
3299        if (name != DATA[i+1]) {
3300            dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3301                  name + ", expected " + DATA[i+1]);
3302        } else {
3303            logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3304        }
3305        UErrorCode ec = U_ZERO_ERROR;
3306        UParseError pe;
3307        Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3308        if (U_FAILURE(ec)) {
3309            delete t;
3310            dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3311            continue;
3312        }
3313        name = Transliterator::getDisplayName(t->getID(), US, name);
3314        if (name != DATA[i+2]) {
3315            dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3316                  name + ", expected " + DATA[i+2]);
3317        } else {
3318            logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3319        }
3320        delete t;
3321    }
3322#endif
3323}
3324
3325void TransliteratorTest::TestSpecialCases(void) {
3326    const UnicodeString registerRules[] = {
3327        "Any-Dev1", "x > X; y > Y;",
3328        "Any-Dev2", "XY > Z",
3329        "Greek-Latin/FAKE",
3330            CharsToUnicodeString
3331            ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3332        "" // END MARKER
3333    };
3334
3335    const UnicodeString testCases[] = {
3336        // NORMALIZATION
3337        // should add more test cases
3338        "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3339        "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340        "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341        "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342
3343        // mp -> b BUG
3344        "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3345        "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3346
3347        // check for devanagari bug
3348        "nfd;Dev1;Dev2;nfc", "xy", "Z",
3349
3350        // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3351        "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3352                 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3353
3354        //TODO: enable this test once Titlecase works right
3355        /*
3356        "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3357                 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3358                 */
3359        "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3360                 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3361        "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3362                 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3363
3364        "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3365        "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3366
3367         // FORMS OF S
3368        "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3369                               CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3370        "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3371                               CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3372        "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3373                        CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3374        "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3375                        CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3376        // Tatiana bug
3377        // Upper: TAT\\u02B9\\u00C2NA
3378        // Lower: tat\\u02B9\\u00E2na
3379        // Title: Tat\\u02B9\\u00E2na
3380        "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3381                 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3382        "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3383                 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3384        "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3385                 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3386
3387        "" // END MARKER
3388    };
3389
3390    UParseError pos;
3391    int32_t i;
3392    for (i = 0; registerRules[i].length()!=0; i+=2) {
3393        UErrorCode status = U_ZERO_ERROR;
3394
3395        Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3396            registerRules[i+1], UTRANS_FORWARD, pos, status);
3397        if (U_FAILURE(status)) {
3398            dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3399        } else {
3400            Transliterator::registerInstance(t);
3401        }
3402    }
3403    for (i = 0; testCases[i].length()!=0; i+=3) {
3404        UErrorCode ec = U_ZERO_ERROR;
3405        UParseError pe;
3406        const UnicodeString& name = testCases[i];
3407        Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3408        if (U_FAILURE(ec)) {
3409            dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3410            delete t;
3411            continue;
3412        }
3413        const UnicodeString& id = t->getID();
3414        const UnicodeString& source = testCases[i+1];
3415        UnicodeString target;
3416
3417        // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3418
3419        if (testCases[i+2].length() > 0) {
3420            target = testCases[i+2];
3421        } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3422            Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3423        } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3424            Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3425        } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3426            Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3427        } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3428            Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3429        } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3430            target = source;
3431            target.toLower(Locale::getUS());
3432        } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3433            target = source;
3434            target.toUpper(Locale::getUS());
3435        }
3436        if (U_FAILURE(ec)) {
3437            errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3438            continue;
3439        }
3440
3441        expect(*t, source, target);
3442        delete t;
3443    }
3444    for (i = 0; registerRules[i].length()!=0; i+=2) {
3445        Transliterator::unregister(registerRules[i]);
3446    }
3447}
3448
3449char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3450    if (ch <= 0xFFFF) {
3451        sprintf(buffer, "\\u%04x", (int)ch);
3452    } else {
3453        sprintf(buffer, "\\U%08x", (int)ch);
3454    }
3455    return buffer;
3456}
3457
3458void TransliteratorTest::TestSurrogateCasing (void) {
3459    // check that casing handles surrogates
3460    // titlecase is currently defective
3461    char buffer[20];
3462    UChar buffer2[20];
3463    UChar32 dee;
3464    U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3465    UnicodeString DEE(u_totitle(dee));
3466    if (DEE != DESERET_DEE) {
3467        err("Fails titlecase of surrogates");
3468        err(Char32ToEscapedChars(dee, buffer));
3469        err(", ");
3470        errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3471    }
3472
3473    UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3474    UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3475    UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3476    UErrorCode status= U_ZERO_ERROR;
3477
3478    u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3479    if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3480        errln("Fails: Can't uppercase surrogates.");
3481    }
3482
3483    status= U_ZERO_ERROR;
3484    u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3485    if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3486        errln("Fails: Can't lowercase surrogates.");
3487    }
3488}
3489
3490static void _trans(Transliterator& t, const UnicodeString& src,
3491                   UnicodeString& result) {
3492    result = src;
3493    t.transliterate(result);
3494}
3495
3496static void _trans(const UnicodeString& id, const UnicodeString& src,
3497                   UnicodeString& result, UErrorCode ec) {
3498    UParseError pe;
3499    Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3500    if (U_SUCCESS(ec)) {
3501        _trans(*t, src, result);
3502    }
3503    delete t;
3504}
3505
3506static UnicodeString _findMatch(const UnicodeString& source,
3507                                       const UnicodeString* pairs) {
3508    UnicodeString empty;
3509    for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3510        if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3511            return pairs[i+1];
3512        }
3513    }
3514    return empty;
3515}
3516
3517// Check to see that incremental gets at least part way through a reasonable string.
3518
3519void TransliteratorTest::TestIncrementalProgress(void) {
3520    UErrorCode ec = U_ZERO_ERROR;
3521    UnicodeString latinTest = "The Quick Brown Fox.";
3522    UnicodeString devaTest;
3523    _trans("Latin-Devanagari", latinTest, devaTest, ec);
3524    UnicodeString kataTest;
3525    _trans("Latin-Katakana", latinTest, kataTest, ec);
3526    if (U_FAILURE(ec)) {
3527        errln("FAIL: Internal error");
3528        return;
3529    }
3530    const UnicodeString tests[] = {
3531        "Any", latinTest,
3532        "Latin", latinTest,
3533        "Halfwidth", latinTest,
3534        "Devanagari", devaTest,
3535        "Katakana", kataTest,
3536        "" // END MARKER
3537    };
3538
3539    UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3540    int32_t i = 0, j=0, k=0;
3541    int32_t sources = Transliterator::countAvailableSources();
3542    for (i = 0; i < sources; i++) {
3543        UnicodeString source;
3544        Transliterator::getAvailableSource(i, source);
3545        UnicodeString test = _findMatch(source, tests);
3546        if (test.length() == 0) {
3547            logln((UnicodeString)"Skipping " + source + "-X");
3548            continue;
3549        }
3550        int32_t targets = Transliterator::countAvailableTargets(source);
3551        for (j = 0; j < targets; j++) {
3552            UnicodeString target;
3553            Transliterator::getAvailableTarget(j, source, target);
3554            int32_t variants = Transliterator::countAvailableVariants(source, target);
3555            for (k =0; k< variants; k++) {
3556                UnicodeString variant;
3557                UParseError err;
3558                UErrorCode status = U_ZERO_ERROR;
3559
3560                Transliterator::getAvailableVariant(k, source, target, variant);
3561                UnicodeString id = source + "-" + target + "/" + variant;
3562
3563                Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3564                if (U_FAILURE(status)) {
3565                    dataerrln((UnicodeString)"FAIL: Could not create " + id);
3566                    delete t;
3567                    continue;
3568                }
3569                status = U_ZERO_ERROR;
3570                CheckIncrementalAux(t, test);
3571
3572                UnicodeString rev;
3573                _trans(*t, test, rev);
3574                Transliterator *inv = t->createInverse(status);
3575                if (U_FAILURE(status)) {
3576#if UCONFIG_NO_BREAK_ITERATION
3577                    // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
3578                    if (id.compare((UnicodeString)"Latin-Thai/") != 0)
3579#endif
3580                        errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3581
3582                    delete t;
3583                    delete inv;
3584                    continue;
3585                }
3586                CheckIncrementalAux(inv, rev);
3587                delete t;
3588                delete inv;
3589            }
3590        }
3591    }
3592}
3593
3594void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3595                                                      const UnicodeString& input) {
3596    UErrorCode ec = U_ZERO_ERROR;
3597    UTransPosition pos;
3598    UnicodeString test = input;
3599
3600    pos.contextStart = 0;
3601    pos.contextLimit = input.length();
3602    pos.start = 0;
3603    pos.limit = input.length();
3604
3605    t->transliterate(test, pos, ec);
3606    if (U_FAILURE(ec)) {
3607        errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3608        return;
3609    }
3610    UBool gotError = FALSE;
3611
3612    // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3613
3614    if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3615        errln((UnicodeString)"No Progress, " +
3616              t->getID() + ": " + formatInput(test, input, pos));
3617        gotError = TRUE;
3618    } else {
3619        logln((UnicodeString)"PASS Progress, " +
3620              t->getID() + ": " + formatInput(test, input, pos));
3621    }
3622    t->finishTransliteration(test, pos);
3623    if (pos.start != pos.limit) {
3624        errln((UnicodeString)"Incomplete, " +
3625              t->getID() + ": " + formatInput(test, input, pos));
3626        gotError = TRUE;
3627    }
3628}
3629
3630void TransliteratorTest::TestFunction() {
3631    // Careful with spacing and ';' here:  Phrase this exactly
3632    // as toRules() is going to return it.  If toRules() changes
3633    // with regard to spacing or ';', then adjust this string.
3634    UnicodeString rule =
3635        "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3636
3637    UParseError pe;
3638    UErrorCode ec = U_ZERO_ERROR;
3639    Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3640    if (t == NULL) {
3641        dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3642        return;
3643    }
3644
3645    UnicodeString r;
3646    t->toRules(r, TRUE);
3647    if (r == rule) {
3648        logln((UnicodeString)"OK: toRules() => " + r);
3649    } else {
3650        errln((UnicodeString)"FAIL: toRules() => " + r +
3651              ", expected " + rule);
3652    }
3653
3654    expect(*t, "The Quick Brown Fox",
3655           UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3656
3657    delete t;
3658}
3659
3660void TransliteratorTest::TestInvalidBackRef(void) {
3661    UnicodeString rule =  ". > $1;";
3662    UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3663    UParseError pe;
3664    UErrorCode ec = U_ZERO_ERROR;
3665    Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3666    Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3667
3668    if (t != NULL) {
3669        errln("FAIL: createFromRules should have returned NULL");
3670        delete t;
3671    }
3672
3673    if (t2 != NULL) {
3674        errln("FAIL: createFromRules should have returned NULL");
3675        delete t2;
3676    }
3677
3678    if (U_SUCCESS(ec)) {
3679        errln("FAIL: Ok: . > $1; => no error");
3680    } else {
3681        logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3682    }
3683}
3684
3685void TransliteratorTest::TestMulticharStringSet() {
3686    // Basic testing
3687    const char* rule =
3688        "       [{aa}]       > x;"
3689        "         a          > y;"
3690        "       [b{bc}]      > z;"
3691        "[{gd}] { e          > q;"
3692        "         e } [{fg}] > r;" ;
3693
3694    UParseError pe;
3695    UErrorCode ec = U_ZERO_ERROR;
3696    Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3697    if (t == NULL || U_FAILURE(ec)) {
3698        delete t;
3699        errln("FAIL: createFromRules failed");
3700        return;
3701    }
3702
3703    expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3704           "y x yz z d gd de gdq gdqfg ddrfg");
3705    delete t;
3706
3707    // Overlapped string test.  Make sure that when multiple
3708    // strings can match that the longest one is matched.
3709    rule =
3710        "    [a {ab} {abc}]    > x;"
3711        "           b          > y;"
3712        "           c          > z;"
3713        " q [t {st} {rst}] { e > p;" ;
3714
3715    t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3716    if (t == NULL || U_FAILURE(ec)) {
3717        delete t;
3718        errln("FAIL: createFromRules failed");
3719        return;
3720    }
3721
3722    expect(*t, "a ab abc qte qste qrste",
3723           "x x x qtp qstp qrstp");
3724    delete t;
3725}
3726
3727// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3728// BEGIN TestUserFunction support factory
3729
3730Transliterator* _TUFF[4];
3731UnicodeString* _TUFID[4];
3732
3733static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3734                                   Transliterator::Token context) {
3735    return _TUFF[context.integer]->clone();
3736}
3737
3738static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3739    _TUFF[n] = t;
3740    _TUFID[n] = new UnicodeString(ID);
3741    Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3742}
3743
3744static void _TUFUnreg(int32_t n) {
3745    if (_TUFF[n] != NULL) {
3746        Transliterator::unregister(*_TUFID[n]);
3747        delete _TUFF[n];
3748        delete _TUFID[n];
3749    }
3750}
3751
3752// END TestUserFunction support factory
3753// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3754
3755/**
3756 * Test that user-registered transliterators can be used under function
3757 * syntax.
3758 */
3759void TransliteratorTest::TestUserFunction() {
3760
3761    Transliterator* t;
3762    UParseError pe;
3763    UErrorCode ec = U_ZERO_ERROR;
3764
3765    // Setup our factory
3766    int32_t i;
3767    for (i=0; i<4; ++i) {
3768        _TUFF[i] = NULL;
3769    }
3770
3771    // There's no need to register inverses if we don't use them
3772    t = Transliterator::createFromRules("gif",
3773                                        UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3774                                        UTRANS_FORWARD, pe, ec);
3775    if (t == NULL || U_FAILURE(ec)) {
3776        dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3777        return;
3778    }
3779    _TUFReg("Any-gif", t, 0);
3780
3781    t = Transliterator::createFromRules("RemoveCurly",
3782                                        UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3783                                        UTRANS_FORWARD, pe, ec);
3784    if (t == NULL || U_FAILURE(ec)) {
3785        errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3786        goto FAIL;
3787    }
3788    expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3789    _TUFReg("Any-RemoveCurly", t, 1);
3790
3791    logln("Trying &hex");
3792    t = Transliterator::createFromRules("hex2",
3793                                        "(.) > &hex($1);",
3794                                        UTRANS_FORWARD, pe, ec);
3795    if (t == NULL || U_FAILURE(ec)) {
3796        errln("FAIL: createFromRules");
3797        goto FAIL;
3798    }
3799    logln("Registering");
3800    _TUFReg("Any-hex2", t, 2);
3801    t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3802    if (t == NULL || U_FAILURE(ec)) {
3803        errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3804        goto FAIL;
3805    }
3806    expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3807    delete t;
3808
3809    logln("Trying &gif");
3810    t = Transliterator::createFromRules("gif2",
3811                                        "(.) > &Gif(&Hex2($1));",
3812                                        UTRANS_FORWARD, pe, ec);
3813    if (t == NULL || U_FAILURE(ec)) {
3814        errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3815        goto FAIL;
3816    }
3817    logln("Registering");
3818    _TUFReg("Any-gif2", t, 3);
3819    t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3820    if (t == NULL || U_FAILURE(ec)) {
3821        errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3822        goto FAIL;
3823    }
3824    expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3825           "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3826    delete t;
3827
3828    // Test that filters are allowed after &
3829    t = Transliterator::createFromRules("test",
3830                                        "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3831                                        UTRANS_FORWARD, pe, ec);
3832    if (t == NULL || U_FAILURE(ec)) {
3833        errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3834        goto FAIL;
3835    }
3836    expect(*t, "abc",
3837           UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3838    delete t;
3839
3840 FAIL:
3841    for (i=0; i<4; ++i) {
3842        _TUFUnreg(i);
3843    }
3844}
3845
3846/**
3847 * Test the Any-X transliterators.
3848 */
3849void TransliteratorTest::TestAnyX(void) {
3850    UParseError parseError;
3851    UErrorCode status = U_ZERO_ERROR;
3852    Transliterator* anyLatin =
3853        Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3854    if (anyLatin==0) {
3855        dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3856        delete anyLatin;
3857        return;
3858    }
3859
3860    expect(*anyLatin,
3861           CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3862           CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3863
3864    delete anyLatin;
3865}
3866
3867/**
3868 * Test Any-X transliterators with sample letters from all scripts.
3869 */
3870void TransliteratorTest::TestAny(void) {
3871    UErrorCode status = U_ZERO_ERROR;
3872    // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3873    //       function call parameters going on in this test.
3874    UnicodeSet alphabetic("[:alphabetic:]", status);
3875    if (U_FAILURE(status)) {
3876        dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3877        return;
3878    }
3879    alphabetic.freeze();
3880
3881    UnicodeString testString;
3882    for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3883        const char *scriptName = uscript_getShortName((UScriptCode)i);
3884        if (scriptName == NULL) {
3885            errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3886            return;
3887        }
3888
3889        UnicodeSet sample;
3890        sample.applyPropertyAlias("script", scriptName, status);
3891        if (U_FAILURE(status)) {
3892            errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3893            return;
3894        }
3895        sample.retainAll(alphabetic);
3896        for (int32_t count=0; count<5; count++) {
3897            UChar32 c = sample.charAt(count);
3898            if (c == -1) {
3899                break;
3900            }
3901            testString.append(c);
3902        }
3903    }
3904
3905    UParseError parseError;
3906    Transliterator* anyLatin =
3907        Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3908    if (U_FAILURE(status)) {
3909        dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3910        return;
3911    }
3912
3913    logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3914    anyLatin->transliterate(testString);
3915    logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3916    delete anyLatin;
3917}
3918
3919
3920/**
3921 * Test the source and target set API.  These are only implemented
3922 * for RBT and CompoundTransliterator at this time.
3923 */
3924void TransliteratorTest::TestSourceTargetSet() {
3925    UErrorCode ec = U_ZERO_ERROR;
3926
3927    // Rules
3928    const char* r =
3929        "a > b; "
3930        "r [x{lu}] > q;";
3931
3932    // Expected source
3933    UnicodeSet expSrc("[arx{lu}]", ec);
3934
3935    // Expected target
3936    UnicodeSet expTrg("[bq]", ec);
3937
3938    UParseError pe;
3939    Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3940
3941    if (U_FAILURE(ec)) {
3942        delete t;
3943        errln("FAIL: Couldn't set up test");
3944        return;
3945    }
3946
3947    UnicodeSet src; t->getSourceSet(src);
3948    UnicodeSet trg; t->getTargetSet(trg);
3949
3950    if (src == expSrc && trg == expTrg) {
3951        UnicodeString a, b;
3952        logln((UnicodeString)"Ok: " +
3953              r + " => source = " + src.toPattern(a, TRUE) +
3954              ", target = " + trg.toPattern(b, TRUE));
3955    } else {
3956        UnicodeString a, b, c, d;
3957        errln((UnicodeString)"FAIL: " +
3958              r + " => source = " + src.toPattern(a, TRUE) +
3959              ", expected " + expSrc.toPattern(b, TRUE) +
3960              "; target = " + trg.toPattern(c, TRUE) +
3961              ", expected " + expTrg.toPattern(d, TRUE));
3962    }
3963
3964    delete t;
3965}
3966
3967/**
3968 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3969 */
3970void TransliteratorTest::TestPatternWhiteSpace() {
3971    // Rules
3972    const char* r = "a > \\u200E b;";
3973
3974    UErrorCode ec = U_ZERO_ERROR;
3975    UParseError pe;
3976    Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3977
3978    if (U_FAILURE(ec)) {
3979        errln("FAIL: Couldn't set up test");
3980    } else {
3981        expect(*t, "a", "b");
3982    }
3983    delete t;
3984
3985    // UnicodeSet
3986    ec = U_ZERO_ERROR;
3987    UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3988
3989    if (U_FAILURE(ec)) {
3990        errln("FAIL: Couldn't set up test");
3991    } else {
3992        if (set.contains(0x200E)) {
3993            errln("FAIL: U+200E not being ignored by UnicodeSet");
3994        }
3995    }
3996}
3997//======================================================================
3998// this method is in TestUScript.java
3999//======================================================================
4000void TransliteratorTest::TestAllCodepoints(){
4001    UScriptCode code= USCRIPT_INVALID_CODE;
4002    char id[256]={'\0'};
4003    char abbr[256]={'\0'};
4004    char newId[256]={'\0'};
4005    char newAbbrId[256]={'\0'};
4006    char oldId[256]={'\0'};
4007    char oldAbbrId[256]={'\0'};
4008
4009    UErrorCode status =U_ZERO_ERROR;
4010    UParseError pe;
4011
4012    for(uint32_t i = 0; i<=0x10ffff; i++){
4013        code =  uscript_getScript(i,&status);
4014        if(code == USCRIPT_INVALID_CODE){
4015            dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4016        }
4017        const char* myId = uscript_getName(code);
4018        if(!myId) {
4019          dataerrln("Valid script code returned NULL name. Check your data!");
4020          return;
4021        }
4022        uprv_strcpy(id,myId);
4023        uprv_strcpy(abbr,uscript_getShortName(code));
4024
4025        uprv_strcpy(newId,"[:");
4026        uprv_strcat(newId,id);
4027        uprv_strcat(newId,":];NFD");
4028
4029        uprv_strcpy(newAbbrId,"[:");
4030        uprv_strcat(newAbbrId,abbr);
4031        uprv_strcat(newAbbrId,":];NFD");
4032
4033        if(uprv_strcmp(newId,oldId)!=0){
4034            Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4035            if(t==NULL || U_FAILURE(status)){
4036                dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4037            }
4038            delete t;
4039        }
4040        if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4041            Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4042            if(t==NULL || U_FAILURE(status)){
4043                dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4044            }
4045            delete t;
4046        }
4047        uprv_strcpy(oldId,newId);
4048        uprv_strcpy(oldAbbrId, newAbbrId);
4049
4050    }
4051
4052}
4053
4054#define TEST_TRANSLIT_ID(id, cls) { \
4055  UErrorCode ec = U_ZERO_ERROR; \
4056  Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4057  if (U_FAILURE(ec)) { \
4058    dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4059  } else { \
4060    if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4061      errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4062    } \
4063    /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4064  } \
4065  delete t; \
4066}
4067
4068#define TEST_TRANSLIT_RULE(rule, cls) { \
4069  UErrorCode ec = U_ZERO_ERROR; \
4070  UParseError pe; \
4071  Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4072  if (U_FAILURE(ec)) { \
4073    errln("FAIL: Couldn't create " rule); \
4074  } else { \
4075    if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4076      errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4077    } \
4078    /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4079  } \
4080  delete t; \
4081}
4082
4083void TransliteratorTest::TestBoilerplate() {
4084    TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4085    TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4086    TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4087    TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4088    TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4089    TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4090    TEST_TRANSLIT_ID("Null", NullTransliterator);
4091    TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4092    TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4093    TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4094    TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4095    TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4096    TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4097}
4098
4099void TransliteratorTest::TestAlternateSyntax() {
4100    // U+2206 == &
4101    // U+2190 == <
4102    // U+2192 == >
4103    // U+2194 == <>
4104    expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4105           "abc",
4106           "xbz");
4107    expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4108           CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4109           UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4110}
4111
4112static const char* BEGIN_END_RULES[] = {
4113    // [0]
4114    "abc > xy;"
4115    "aba > z;",
4116
4117    // [1]
4118/*
4119    "::BEGIN;"
4120    "abc > xy;"
4121    "::END;"
4122    "::BEGIN;"
4123    "aba > z;"
4124    "::END;",
4125*/
4126    "", // test case commented out below, this is here to keep from messing up the indexes
4127
4128    // [2]
4129/*
4130    "abc > xy;"
4131    "::BEGIN;"
4132    "aba > z;"
4133    "::END;",
4134*/
4135    "", // test case commented out below, this is here to keep from messing up the indexes
4136
4137    // [3]
4138/*
4139    "::BEGIN;"
4140    "abc > xy;"
4141    "::END;"
4142    "aba > z;",
4143*/
4144    "", // test case commented out below, this is here to keep from messing up the indexes
4145
4146    // [4]
4147    "abc > xy;"
4148    "::Null;"
4149    "aba > z;",
4150
4151    // [5]
4152    "::Upper;"
4153    "ABC > xy;"
4154    "AB > x;"
4155    "C > z;"
4156    "::Upper;"
4157    "XYZ > p;"
4158    "XY > q;"
4159    "Z > r;"
4160    "::Upper;",
4161
4162    // [6]
4163    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4164    "$delim = [\\-$ws];"
4165    "$ws $delim* > ' ';"
4166    "'-' $delim* > '-';",
4167
4168    // [7]
4169    "::Null;"
4170    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4171    "$delim = [\\-$ws];"
4172    "$ws $delim* > ' ';"
4173    "'-' $delim* > '-';",
4174
4175    // [8]
4176    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4177    "$delim = [\\-$ws];"
4178    "$ws $delim* > ' ';"
4179    "'-' $delim* > '-';"
4180    "::Null;",
4181
4182    // [9]
4183    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4184    "$delim = [\\-$ws];"
4185    "::Null;"
4186    "$ws $delim* > ' ';"
4187    "'-' $delim* > '-';",
4188
4189    // [10]
4190/*
4191    "::BEGIN;"
4192    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4193    "$delim = [\\-$ws];"
4194    "::END;"
4195    "$ws $delim* > ' ';"
4196    "'-' $delim* > '-';",
4197*/
4198    "", // test case commented out below, this is here to keep from messing up the indexes
4199
4200    // [11]
4201/*
4202    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4203    "$delim = [\\-$ws];"
4204    "::BEGIN;"
4205    "$ws $delim* > ' ';"
4206    "'-' $delim* > '-';"
4207    "::END;",
4208*/
4209    "", // test case commented out below, this is here to keep from messing up the indexes
4210
4211    // [12]
4212/*
4213    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4214    "$delim = [\\-$ws];"
4215    "$ab = [ab];"
4216    "::BEGIN;"
4217    "$ws $delim* > ' ';"
4218    "'-' $delim* > '-';"
4219    "::END;"
4220    "::BEGIN;"
4221    "$ab { ' ' } $ab > '-';"
4222    "c { ' ' > ;"
4223    "::END;"
4224    "::BEGIN;"
4225    "'a-a' > a\\%|a;"
4226    "::END;",
4227*/
4228    "", // test case commented out below, this is here to keep from messing up the indexes
4229
4230    // [13]
4231    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4232    "$delim = [\\-$ws];"
4233    "$ab = [ab];"
4234    "::Null;"
4235    "$ws $delim* > ' ';"
4236    "'-' $delim* > '-';"
4237    "::Null;"
4238    "$ab { ' ' } $ab > '-';"
4239    "c { ' ' > ;"
4240    "::Null;"
4241    "'a-a' > a\\%|a;",
4242
4243    // [14]
4244/*
4245    "::[abc];"
4246    "::BEGIN;"
4247    "abc > xy;"
4248    "::END;"
4249    "::BEGIN;"
4250    "aba > yz;"
4251    "::END;"
4252    "::Upper;",
4253*/
4254    "", // test case commented out below, this is here to keep from messing up the indexes
4255
4256    // [15]
4257    "::[abc];"
4258    "abc > xy;"
4259    "::Null;"
4260    "aba > yz;"
4261    "::Upper;",
4262
4263    // [16]
4264/*
4265    "::[abc];"
4266    "::BEGIN;"
4267    "abc <> xy;"
4268    "::END;"
4269    "::BEGIN;"
4270    "aba <> yz;"
4271    "::END;"
4272    "::Upper(Lower);"
4273    "::([XYZ]);"
4274*/
4275    "", // test case commented out below, this is here to keep from messing up the indexes
4276
4277    // [17]
4278    "::[abc];"
4279    "abc <> xy;"
4280    "::Null;"
4281    "aba <> yz;"
4282    "::Upper(Lower);"
4283    "::([XYZ]);"
4284};
4285static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
4286
4287/*
4288(This entire test is commented out below and will need some heavy revision when we re-add
4289the ::BEGIN/::END stuff)
4290static const char* BOGUS_BEGIN_END_RULES[] = {
4291    // [7]
4292    "::BEGIN;"
4293    "abc > xy;"
4294    "::BEGIN;"
4295    "aba > z;"
4296    "::END;"
4297    "::END;",
4298
4299    // [8]
4300    "abc > xy;"
4301    " aba > z;"
4302    "::END;",
4303
4304    // [9]
4305    "::BEGIN;"
4306    "::Upper;"
4307    "::END;"
4308};
4309static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4310*/
4311
4312static const char* BEGIN_END_TEST_CASES[] = {
4313    // rules             input                   expected output
4314    BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4315//    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4316//    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4317//    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4318    BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4319    BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4320
4321    BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4322    BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4323    BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4324    BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4325//    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4326//    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4327//    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4328//    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4329//    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4330    BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4331    BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4332    BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4333
4334//    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4335    BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4336//    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4337    BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4338};
4339static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4340
4341void TransliteratorTest::TestBeginEnd() {
4342    // run through the list of test cases above
4343    int32_t i = 0;
4344    for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4345        expect((UnicodeString)"Test case #" + (i / 3),
4346               UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4347               UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4348               UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4349    }
4350
4351    // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4352    UParseError parseError;
4353    UErrorCode status = U_ZERO_ERROR;
4354    Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4355            UTRANS_REVERSE, parseError, status);
4356    if (reversed == 0 || U_FAILURE(status)) {
4357        reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4358    } else {
4359        expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4360    }
4361    delete reversed;
4362
4363    // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4364    // that all of them cause errors
4365/*
4366(commented out until we have the real ::BEGIN/::END stuff in place
4367    for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4368        UParseError parseError;
4369        UErrorCode status = U_ZERO_ERROR;
4370        Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4371                UTRANS_FORWARD, parseError, status);
4372        if (!U_FAILURE(status)) {
4373            delete t;
4374            errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4375        }
4376    }
4377*/
4378}
4379
4380void TransliteratorTest::TestBeginEndToRules() {
4381    // run through the same list of test cases we used above, but this time, instead of just
4382    // instantiating a Transliterator from the rules and running the test against it, we instantiate
4383    // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4384    // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4385    // to (i.e., does the same thing as) the original rule set
4386    for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4387        UParseError parseError;
4388        UErrorCode status = U_ZERO_ERROR;
4389        Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4390                UTRANS_FORWARD, parseError, status);
4391        if (U_FAILURE(status)) {
4392            reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4393        } else {
4394            UnicodeString rules;
4395            t->toRules(rules, TRUE);
4396            Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4397                    UTRANS_FORWARD, parseError, status);
4398            if (U_FAILURE(status)) {
4399                reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4400                        parseError, status);
4401                delete t;
4402            } else {
4403                expect(*t2,
4404                       UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4405                       UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4406                delete t;
4407                delete t2;
4408            }
4409        }
4410    }
4411
4412    // do the same thing for the reversible test case
4413    UParseError parseError;
4414    UErrorCode status = U_ZERO_ERROR;
4415    Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4416            UTRANS_REVERSE, parseError, status);
4417    if (U_FAILURE(status)) {
4418        reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4419    } else {
4420        UnicodeString rules;
4421        reversed->toRules(rules, FALSE);
4422        Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4423                parseError, status);
4424        if (U_FAILURE(status)) {
4425            reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4426                    parseError, status);
4427            delete reversed;
4428        } else {
4429            expect(*reversed2,
4430                   UnicodeString("xy XY XYZ yz YZ"),
4431                   UnicodeString("xy abc xaba yz aba"));
4432            delete reversed;
4433            delete reversed2;
4434        }
4435    }
4436}
4437
4438void TransliteratorTest::TestRegisterAlias() {
4439    UnicodeString longID("Lower;[aeiou]Upper");
4440    UnicodeString shortID("Any-CapVowels");
4441    UnicodeString reallyShortID("CapVowels");
4442
4443    Transliterator::registerAlias(shortID, longID);
4444
4445    UErrorCode err = U_ZERO_ERROR;
4446    Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4447    if (U_FAILURE(err)) {
4448        errln("Failed to instantiate transliterator with long ID");
4449        Transliterator::unregister(shortID);
4450        return;
4451    }
4452    Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4453    if (U_FAILURE(err)) {
4454        errln("Failed to instantiate transliterator with short ID");
4455        delete t1;
4456        Transliterator::unregister(shortID);
4457        return;
4458    }
4459
4460    if (t1->getID() != longID)
4461        errln("Transliterator instantiated with long ID doesn't have long ID");
4462    if (t2->getID() != reallyShortID)
4463        errln("Transliterator instantiated with short ID doesn't have short ID");
4464
4465    UnicodeString rules1;
4466    UnicodeString rules2;
4467
4468    t1->toRules(rules1, TRUE);
4469    t2->toRules(rules2, TRUE);
4470    if (rules1 != rules2)
4471        errln("Alias transliterators aren't the same");
4472
4473    delete t1;
4474    delete t2;
4475    Transliterator::unregister(shortID);
4476
4477    t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4478    if (U_SUCCESS(err)) {
4479        errln("Instantiation with short ID succeeded after short ID was unregistered");
4480        delete t1;
4481    }
4482
4483    // try the same thing again, but this time with something other than
4484    // an instance of CompoundTransliterator
4485    UnicodeString realID("Latin-Greek");
4486    UnicodeString fakeID("Latin-dlgkjdflkjdl");
4487    Transliterator::registerAlias(fakeID, realID);
4488
4489    err = U_ZERO_ERROR;
4490    t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4491    if (U_FAILURE(err)) {
4492        dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4493        Transliterator::unregister(realID);
4494        return;
4495    }
4496    t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4497    if (U_FAILURE(err)) {
4498        errln("Failed to instantiate transliterator with fake ID");
4499        delete t1;
4500        Transliterator::unregister(realID);
4501        return;
4502    }
4503
4504    t1->toRules(rules1, TRUE);
4505    t2->toRules(rules2, TRUE);
4506    if (rules1 != rules2)
4507        errln("Alias transliterators aren't the same");
4508
4509    delete t1;
4510    delete t2;
4511    Transliterator::unregister(fakeID);
4512}
4513
4514void TransliteratorTest::TestRuleStripping() {
4515    /*
4516#
4517\uE001>\u0C01; # SIGN
4518    */
4519    static const UChar rule[] = {
4520        0x0023,0x0020,0x000D,0x000A,
4521        0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4522    };
4523    static const UChar expectedRule[] = {
4524        0xE001,0x003E,0x0C01,0x003B,0
4525    };
4526    UChar result[sizeof(rule)/sizeof(rule[0])];
4527    UErrorCode status = U_ZERO_ERROR;
4528    int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4529    if (len != u_strlen(expectedRule)) {
4530        errln("utrans_stripRules return len = %d", len);
4531    }
4532    if (u_strncmp(expectedRule, result, len) != 0) {
4533        errln("utrans_stripRules did not return expected string");
4534    }
4535}
4536
4537/**
4538 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4539 */
4540void TransliteratorTest::TestHalfwidthFullwidth(void) {
4541    UParseError parseError;
4542    UErrorCode status = U_ZERO_ERROR;
4543    Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4544    Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4545    if (hf == 0 || fh == 0) {
4546        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4547        delete hf;
4548        delete fh;
4549        return;
4550    }
4551
4552    // Array of 2n items
4553    // Each item is
4554    //   "hf"|"fh"|"both",
4555    //   <Halfwidth>,
4556    //   <Fullwidth>
4557    const char* DATA[] = {
4558        "both",
4559        "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4560        "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4561    };
4562    int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
4563
4564    for (int32_t i=0; i<DATA_length; i+=3) {
4565        UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4566        UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4567        switch (*DATA[i]) {
4568        case 0x68: //'h': // Halfwidth-Fullwidth only
4569            expect(*hf, h, f);
4570            break;
4571        case 0x66: //'f': // Fullwidth-Halfwidth only
4572            expect(*fh, f, h);
4573            break;
4574        case 0x62: //'b': // both directions
4575            expect(*hf, h, f);
4576            expect(*fh, f, h);
4577            break;
4578        }
4579    }
4580    delete hf;
4581    delete fh;
4582}
4583
4584
4585    /**
4586     *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4587     *              TODO: confirm that the expected results are correct.
4588     *              For now, test just confirms that C++ and Java give identical results.
4589     */
4590void TransliteratorTest::TestThai(void) {
4591#if !UCONFIG_NO_BREAK_ITERATION
4592    UParseError parseError;
4593    UErrorCode status = U_ZERO_ERROR;
4594    Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4595    if (tr == 0) {
4596        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4597        return;
4598    }
4599    if (U_FAILURE(status)) {
4600        errln("FAIL: createInstance failed with %s", u_errorName(status));
4601        return;
4602    }
4603    const char *thaiText =
4604        "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4605        "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4606        "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4607        "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4608        "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4609        "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4610        "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4611        "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4612        "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4613        "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4614        "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4615        "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4616        "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4617        "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4618        "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4619        "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4620        "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4621        "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4622        "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4623        "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4624        "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4625        "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4626        "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4627        "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4628        " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4629        "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4630        "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4631        " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4632        "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4633        "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4634
4635    const char *latinText =
4636        "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4637        "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4638        "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4639        "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4640        "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4641        " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4642        "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4643        "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4644        "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4645        "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4646        "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4647        "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4648        " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4649        "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4650        " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4651        "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4652        "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4653        "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4654
4655
4656    UnicodeString  xlitText(thaiText);
4657    xlitText = xlitText.unescape();
4658    tr->transliterate(xlitText);
4659
4660    UnicodeString expectedText(latinText);
4661    expectedText = expectedText.unescape();
4662    expect(*tr, xlitText, expectedText);
4663
4664    delete tr;
4665#endif
4666}
4667
4668
4669//======================================================================
4670// Support methods
4671//======================================================================
4672void TransliteratorTest::expectT(const UnicodeString& id,
4673                                 const UnicodeString& source,
4674                                 const UnicodeString& expectedResult) {
4675    UErrorCode ec = U_ZERO_ERROR;
4676    UParseError pe;
4677    Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4678    if (U_FAILURE(ec)) {
4679        errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4680        delete t;
4681        return;
4682    }
4683    expect(*t, source, expectedResult);
4684    delete t;
4685}
4686
4687void TransliteratorTest::reportParseError(const UnicodeString& message,
4688                                          const UParseError& parseError,
4689                                          const UErrorCode& status) {
4690    dataerrln(message +
4691          /*", parse error " + parseError.code +*/
4692          ", line " + parseError.line +
4693          ", offset " + parseError.offset +
4694          ", pre-context " + prettify(parseError.preContext, TRUE) +
4695          ", post-context " + prettify(parseError.postContext,TRUE) +
4696          ", Error: " + u_errorName(status));
4697}
4698
4699void TransliteratorTest::expect(const UnicodeString& rules,
4700                                const UnicodeString& source,
4701                                const UnicodeString& expectedResult,
4702                                UTransPosition *pos) {
4703    expect("<ID>", rules, source, expectedResult, pos);
4704}
4705
4706void TransliteratorTest::expect(const UnicodeString& id,
4707                                const UnicodeString& rules,
4708                                const UnicodeString& source,
4709                                const UnicodeString& expectedResult,
4710                                UTransPosition *pos) {
4711    UErrorCode status = U_ZERO_ERROR;
4712    UParseError parseError;
4713    Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4714    if (U_FAILURE(status)) {
4715        reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4716    } else {
4717        expect(*t, source, expectedResult, pos);
4718    }
4719    delete t;
4720}
4721
4722void TransliteratorTest::expect(const Transliterator& t,
4723                                const UnicodeString& source,
4724                                const UnicodeString& expectedResult,
4725                                const Transliterator& reverseTransliterator) {
4726    expect(t, source, expectedResult);
4727    expect(reverseTransliterator, expectedResult, source);
4728}
4729
4730void TransliteratorTest::expect(const Transliterator& t,
4731                                const UnicodeString& source,
4732                                const UnicodeString& expectedResult,
4733                                UTransPosition *pos) {
4734    if (pos == 0) {
4735        UnicodeString result(source);
4736        t.transliterate(result);
4737        expectAux(t.getID() + ":String", source, result, expectedResult);
4738    }
4739    UTransPosition index={0, 0, 0, 0};
4740    if (pos != 0) {
4741        index = *pos;
4742    }
4743
4744    UnicodeString rsource(source);
4745    if (pos == 0) {
4746        t.transliterate(rsource);
4747    } else {
4748        // Do it all at once -- below we do it incrementally
4749        t.finishTransliteration(rsource, *pos);
4750    }
4751    expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4752
4753    // Test keyboard (incremental) transliteration -- this result
4754    // must be the same after we finalize (see below).
4755    UnicodeString log;
4756    rsource.remove();
4757    if (pos != 0) {
4758        rsource = source;
4759        formatInput(log, rsource, index);
4760        log.append(" -> ");
4761        UErrorCode status = U_ZERO_ERROR;
4762        t.transliterate(rsource, index, status);
4763        formatInput(log, rsource, index);
4764    } else {
4765        for (int32_t i=0; i<source.length(); ++i) {
4766            if (i != 0) {
4767                log.append(" + ");
4768            }
4769            log.append(source.charAt(i)).append(" -> ");
4770            UErrorCode status = U_ZERO_ERROR;
4771            t.transliterate(rsource, index, source.charAt(i), status);
4772            formatInput(log, rsource, index);
4773        }
4774    }
4775
4776    // As a final step in keyboard transliteration, we must call
4777    // transliterate to finish off any pending partial matches that
4778    // were waiting for more input.
4779    t.finishTransliteration(rsource, index);
4780    log.append(" => ").append(rsource);
4781
4782    expectAux(t.getID() + ":Keyboard", log,
4783              rsource == expectedResult,
4784              expectedResult);
4785}
4786
4787
4788/**
4789 * @param appendTo result is appended to this param.
4790 * @param input the string being transliterated
4791 * @param pos the index struct
4792 */
4793UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4794                                               const UnicodeString& input,
4795                                               const UTransPosition& pos) {
4796    // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4797    // the {} indicate the context start and limit, and the ||
4798    // indicate the start and limit.
4799    if (0 <= pos.contextStart &&
4800        pos.contextStart <= pos.start &&
4801        pos.start <= pos.limit &&
4802        pos.limit <= pos.contextLimit &&
4803        pos.contextLimit <= input.length()) {
4804
4805        UnicodeString a, b, c, d, e;
4806        input.extractBetween(0, pos.contextStart, a);
4807        input.extractBetween(pos.contextStart, pos.start, b);
4808        input.extractBetween(pos.start, pos.limit, c);
4809        input.extractBetween(pos.limit, pos.contextLimit, d);
4810        input.extractBetween(pos.contextLimit, input.length(), e);
4811        appendTo.append(a).append((UChar)123/*{*/).append(b).
4812            append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4813            append((UChar)125/*}*/).append(e);
4814    } else {
4815        appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4816                        pos.contextStart + ", s=" + pos.start + ", l=" +
4817                        pos.limit + ", cl=" + pos.contextLimit + "} on " +
4818                        input);
4819    }
4820    return appendTo;
4821}
4822
4823void TransliteratorTest::expectAux(const UnicodeString& tag,
4824                                   const UnicodeString& source,
4825                                   const UnicodeString& result,
4826                                   const UnicodeString& expectedResult) {
4827    expectAux(tag, source + " -> " + result,
4828              result == expectedResult,
4829              expectedResult);
4830}
4831
4832void TransliteratorTest::expectAux(const UnicodeString& tag,
4833                                   const UnicodeString& summary, UBool pass,
4834                                   const UnicodeString& expectedResult) {
4835    if (pass) {
4836        logln(UnicodeString("(")+tag+") " + prettify(summary));
4837    } else {
4838        dataerrln(UnicodeString("FAIL: (")+tag+") "
4839              + prettify(summary)
4840              + ", expected " + prettify(expectedResult));
4841    }
4842}
4843
4844#endif /* #if !UCONFIG_NO_TRANSLITERATION */
4845