1/*
2 * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24/**
25 * @test
26 * @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154
27 *      4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241
28 *      4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736
29 *      4133509 4139572 4141640 4179126 4179686 4244884 4663220
30 * @library /java/text/testlib
31 * @summary Regression tests for Collation and associated classes
32 * @modules jdk.localedata
33 */
34/*
35(C) Copyright Taligent, Inc. 1996 - All Rights Reserved
36(C) Copyright IBM Corp. 1996 - All Rights Reserved
37
38  The original version of this source code and documentation is copyrighted and
39owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are
40provided under terms of a License Agreement between Taligent and Sun. This
41technology is protected by multiple US and International patents. This notice and
42attribution to Taligent may not be removed.
43  Taligent is a registered trademark of Taligent, Inc.
44*/
45
46import java.text.*;
47import java.util.Locale;
48import java.util.Vector;
49
50
51public class Regression extends CollatorTest {
52
53    public static void main(String[] args) throws Exception {
54        new Regression().run(args);
55    }
56
57    // CollationElementIterator.reset() doesn't work
58    //
59    public void Test4048446() {
60        CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
61        CollationElementIterator i2 = en_us.getCollationElementIterator(test1);
62
63        while ( i1.next() != CollationElementIterator.NULLORDER ) {
64        }
65        i1.reset();
66
67        assertEqual(i1, i2);
68    }
69
70
71    // Collator -> rules -> Collator round-trip broken for expanding characters
72    //
73    public void Test4051866() throws ParseException {
74        // Build a collator containing expanding characters
75        RuleBasedCollator c1 = new RuleBasedCollator("< o "
76                                                    +"& oe ,o\u3080"
77                                                    +"& oe ,\u1530 ,O"
78                                                    +"& OE ,O\u3080"
79                                                    +"& OE ,\u1520"
80                                                    +"< p ,P");
81
82        // Build another using the rules from  the first
83        RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules());
84
85        // Make sure they're the same
86        if (!c1.getRules().equals(c2.getRules())) {
87            errln("Rules are not equal");
88        }
89    }
90
91    // Collator thinks "black-bird" == "black"
92    //
93    public void Test4053636() {
94        if (en_us.equals("black-bird","black")) {
95            errln("black-bird == black");
96        }
97    }
98
99
100    // CollationElementIterator will not work correctly if the associated
101    // Collator object's mode is changed
102    //
103    public void Test4054238() {
104        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
105
106        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
107        CollationElementIterator i1 = en_us.getCollationElementIterator(test3);
108
109        c.setDecomposition(Collator.NO_DECOMPOSITION);
110        CollationElementIterator i2 = en_us.getCollationElementIterator(test3);
111
112        // At this point, BOTH iterators should use NO_DECOMPOSITION, since the
113        // collator itself is in that mode
114        assertEqual(i1, i2);
115    }
116
117    // Collator.IDENTICAL documented but not implemented
118    //
119    public void Test4054734() {
120        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
121        try {
122            c.setStrength(Collator.IDENTICAL);
123        }
124        catch (Exception e) {
125            errln("Caught " + e.toString() + " setting Collator.IDENTICAL");
126        }
127
128        String[] decomp = {
129            "\u0001",   "<",    "\u0002",
130            "\u0001",   "=",    "\u0001",
131            "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
132            "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
133        };
134        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
135        compareArray(c, decomp);
136
137        String[] nodecomp = {
138            "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
139        };
140        c.setDecomposition(Collator.NO_DECOMPOSITION);
141        compareArray(c, nodecomp);
142    }
143
144    // Full Decomposition mode not implemented
145    //
146    public void Test4054736() {
147        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
148        c.setDecomposition(Collator.FULL_DECOMPOSITION);
149
150        String[] tests = {
151            "\uFB4f", "=", "\u05D0\u05DC",  // Alef-Lamed vs. Alef, Lamed
152        };
153
154        compareArray(c, tests);
155    }
156
157    // Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean
158    //
159    public void Test4058613() {
160        // Creating a default collator doesn't work when Korean is the default
161        // locale
162
163        Locale oldDefault = Locale.getDefault();
164
165        Locale.setDefault( Locale.KOREAN );
166        try {
167            Collator c = Collator.getInstance();
168
169            // Since the fix to this bug was to turn of decomposition for Korean collators,
170            // ensure that's what we got
171            if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
172              errln("Decomposition is not set to NO_DECOMPOSITION");
173            }
174        }
175        finally {
176            Locale.setDefault(oldDefault);
177        }
178    }
179
180    // RuleBasedCollator.getRules does not return the exact pattern as input
181    // for expanding character sequences
182    //
183    public void Test4059820() {
184        RuleBasedCollator c = null;
185        try {
186            c = new RuleBasedCollator("< a < b , c/a < d < z");
187        } catch (ParseException e) {
188            errln("Exception building collator: " + e.toString());
189            return;
190        }
191        if ( c.getRules().indexOf("c/a") == -1) {
192            errln("returned rules do not contain 'c/a'");
193        }
194    }
195
196    // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
197    //
198    public void Test4060154() {
199        RuleBasedCollator c = null;
200        try {
201            c = new RuleBasedCollator("< g, G < h, H < i, I < j, J"
202                                      + " & H < \u0131, \u0130, i, I" );
203        } catch (ParseException e) {
204            errln("Exception building collator: " + e.toString());
205            return;
206        }
207        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
208
209        String[] tertiary = {
210            "A",        "<",    "B",
211            "H",        "<",    "\u0131",
212            "H",        "<",    "I",
213            "\u0131",   "<",    "\u0130",
214            "\u0130",   "<",    "i",
215            "\u0130",   ">",    "H",
216        };
217        c.setStrength(Collator.TERTIARY);
218        compareArray(c, tertiary);
219
220        String[] secondary = {
221            "H",        "<",    "I",
222            "\u0131",   "=",    "\u0130",
223        };
224        c.setStrength(Collator.PRIMARY);
225        compareArray(c, secondary);
226    };
227
228    // Secondary/Tertiary comparison incorrect in French Secondary
229    //
230    public void Test4062418() throws ParseException {
231        RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
232        c.setStrength(Collator.SECONDARY);
233
234        String[] tests = {
235                "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
236        };
237
238        compareArray(c, tests);
239    }
240
241    // Collator.compare() method broken if either string contains spaces
242    //
243    public void Test4065540() {
244        if (en_us.compare("abcd e", "abcd f") == 0) {
245            errln("'abcd e' == 'abcd f'");
246        }
247    }
248
249    // Unicode characters need to be recursively decomposed to get the
250    // correct result. For example,
251    // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
252    //
253    public void Test4066189() {
254        String test1 = "\u1EB1";
255        String test2 = "a\u0306\u0300";
256
257        RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone();
258        c1.setDecomposition(Collator.FULL_DECOMPOSITION);
259        CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
260
261        RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone();
262        c2.setDecomposition(Collator.NO_DECOMPOSITION);
263        CollationElementIterator i2 = en_us.getCollationElementIterator(test2);
264
265        assertEqual(i1, i2);
266    }
267
268    // French secondary collation checking at the end of compare iteration fails
269    //
270    public void Test4066696() {
271        RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
272        c.setStrength(Collator.SECONDARY);
273
274        String[] tests = {
275            "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
276        };
277
278        compareArray(c, tests);
279    }
280
281
282    // Bad canonicalization of same-class combining characters
283    //
284    public void Test4076676() {
285        // These combining characters are all in the same class, so they should not
286        // be reordered, and they should compare as unequal.
287        String s1 = "A\u0301\u0302\u0300";
288        String s2 = "A\u0302\u0300\u0301";
289
290        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
291        c.setStrength(Collator.TERTIARY);
292
293        if (c.compare(s1,s2) == 0) {
294            errln("Same-class combining chars were reordered");
295        }
296    }
297
298
299    // RuleBasedCollator.equals(null) throws NullPointerException
300    //
301    public void Test4079231() {
302        try {
303            if (en_us.equals(null)) {
304                errln("en_us.equals(null) returned true");
305            }
306        }
307        catch (Exception e) {
308            errln("en_us.equals(null) threw " + e.toString());
309        }
310    }
311
312    // RuleBasedCollator breaks on "< a < bb" rule
313    //
314    public void Test4078588() throws ParseException {
315        RuleBasedCollator rbc=new RuleBasedCollator("< a < bb");
316
317        int result = rbc.compare("a","bb");
318
319        if (result != -1) {
320            errln("Compare(a,bb) returned " + result + "; expected -1");
321        }
322    }
323
324    // Combining characters in different classes not reordered properly.
325    //
326    public void Test4081866() throws ParseException {
327        // These combining characters are all in different classes,
328        // so they should be reordered and the strings should compare as equal.
329        String s1 = "A\u0300\u0316\u0327\u0315";
330        String s2 = "A\u0327\u0316\u0315\u0300";
331
332        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
333        c.setStrength(Collator.TERTIARY);
334
335        // Now that the default collators are set to NO_DECOMPOSITION
336        // (as a result of fixing bug 4114077), we must set it explicitly
337        // when we're testing reordering behavior.  -- lwerner, 5/5/98
338        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
339
340        if (c.compare(s1,s2) != 0) {
341            errln("Combining chars were not reordered");
342        }
343    }
344
345    // string comparison errors in Scandinavian collators
346    //
347    public void Test4087241() {
348        RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(
349                                                        new Locale("da", "DK"));
350        c.setStrength(Collator.SECONDARY);
351
352        String[] tests = {
353            "\u007a",   "<",    "\u00e6",       // z        < ae
354            "a\u0308",  "<",    "a\u030a",      // a-unlaut < a-ring
355            "Y",        "<",    "u\u0308",      // Y        < u-umlaut
356        };
357
358        compareArray(c, tests);
359    }
360
361    // CollationKey takes ignorable strings into account when it shouldn't
362    //
363    public void Test4087243() {
364        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
365        c.setStrength(Collator.TERTIARY);
366
367        String[] tests = {
368            "123",      "=",    "123\u0001",        // 1 2 3  =  1 2 3 ctrl-A
369        };
370
371        compareArray(c, tests);
372    }
373
374    // Mu/micro conflict
375    // Micro symbol and greek lowercase letter Mu should sort identically
376    //
377    public void Test4092260() {
378        Collator c = Collator.getInstance(new Locale("el", ""));
379
380        // will only be equal when FULL_DECOMPOSITION is used
381        c.setDecomposition(Collator.FULL_DECOMPOSITION);
382
383        String[] tests = {
384            "\u00B5",      "=",    "\u03BC",
385        };
386
387        compareArray(c, tests);
388    }
389
390    void Test4095316() {
391        Collator c = Collator.getInstance(new Locale("el", "GR"));
392        c.setStrength(Collator.TERTIARY);
393        // javadocs for RuleBasedCollator clearly specify that characters containing compatability
394        // chars MUST use FULL_DECOMPOSITION to get accurate comparisons.
395        c.setDecomposition(Collator.FULL_DECOMPOSITION);
396
397        String[] tests = {
398            "\u03D4",      "=",    "\u03AB",
399        };
400
401        compareArray(c, tests);
402    }
403
404    public void Test4101940() {
405        try {
406            RuleBasedCollator c = new RuleBasedCollator("< a < b");
407            CollationElementIterator i = c.getCollationElementIterator("");
408            i.reset();
409
410            if (i.next() != i.NULLORDER) {
411                errln("next did not return NULLORDER");
412            }
413        }
414        catch (Exception e) {
415            errln("Caught " + e );
416        }
417    }
418
419    // Collator.compare not handling spaces properly
420    //
421    public void Test4103436() {
422        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
423        c.setStrength(Collator.TERTIARY);
424
425        String[] tests = {
426            "file",      "<",    "file access",
427            "file",      "<",    "fileaccess",
428        };
429
430        compareArray(c, tests);
431    }
432
433    // Collation not Unicode conformant with Hangul syllables
434    //
435    public void Test4114076() {
436        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
437        c.setStrength(Collator.TERTIARY);
438
439        //
440        // With Canonical decomposition, Hangul syllables should get decomposed
441        // into Jamo, but Jamo characters should not be decomposed into
442        // conjoining Jamo
443        //
444        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
445        String[] test1 = {
446            "\ud4db",   "=",    "\u1111\u1171\u11b6",
447        };
448        compareArray(c, test1);
449
450        // Full decomposition result should be the same as canonical decomposition
451        // for all hangul.
452        c.setDecomposition(Collator.FULL_DECOMPOSITION);
453        compareArray(c, test1);
454
455    }
456
457
458    // Collator.getCollationKey was hanging on certain character sequences
459    //
460    public void Test4124632() throws Exception {
461        Collator coll = Collator.getInstance(Locale.JAPAN);
462
463        try {
464            coll.getCollationKey("A\u0308bc");
465        } catch (OutOfMemoryError e) {
466            errln("Ran out of memory -- probably an infinite loop");
467        }
468    }
469
470    // sort order of french words with multiple accents has errors
471    //
472    public void Test4132736() {
473        Collator c = Collator.getInstance(Locale.FRANCE);
474
475        String[] test1 = {
476            "e\u0300e\u0301",   "<",    "e\u0301e\u0300",
477            "e\u0300\u0301",    ">",    "e\u0301\u0300",
478        };
479        compareArray(c, test1);
480    }
481
482    // The sorting using java.text.CollationKey is not in the exact order
483    //
484    public void Test4133509() {
485        String[] test1 = {
486            "Exception",    "<",    "ExceptionInInitializerError",
487            "Graphics",     "<",    "GraphicsEnvironment",
488            "String",       "<",    "StringBuffer",
489        };
490        compareArray(en_us, test1);
491    }
492
493    // Collation with decomposition off doesn't work for Europe
494    //
495    public void Test4114077() {
496        // Ensure that we get the same results with decomposition off
497        // as we do with it on....
498
499        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
500        c.setStrength(Collator.TERTIARY);
501
502        String[] test1 = {
503            "\u00C0",        "=", "A\u0300",        // Should be equivalent
504            "p\u00eache",         ">", "p\u00e9ch\u00e9",
505            "\u0204",        "=", "E\u030F",
506            "\u01fa",        "=", "A\u030a\u0301",  // a-ring-acute -> a-ring, acute
507                                                    //   -> a, ring, acute
508            "A\u0300\u0316", "<", "A\u0316\u0300",  // No reordering --> unequal
509        };
510        c.setDecomposition(Collator.NO_DECOMPOSITION);
511        compareArray(c, test1);
512
513        String[] test2 = {
514            "A\u0300\u0316", "=", "A\u0316\u0300",      // Reordering --> equal
515        };
516        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
517        compareArray(c, test2);
518    }
519
520    // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
521    //
522    public void Test4141640() {
523        //
524        // Rather than just creating a Swedish collator, we might as well
525        // try to instantiate one for every locale available on the system
526        // in order to prevent this sort of bug from cropping up in the future
527        //
528        Locale[] locales = Collator.getAvailableLocales();
529
530        for (int i = 0; i < locales.length; i++) {
531            try {
532                Collator c = Collator.getInstance(locales[i]);
533            } catch (Exception e) {
534                errln("Caught " + e + " creating collator for " + locales[i]);
535            }
536        }
537    }
538
539    // getCollationKey throws exception for spanish text
540    // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
541    //
542    public void Test4139572() {
543        //
544        // Code pasted straight from the bug report
545        //
546        // create spanish locale and collator
547        Locale l = new Locale("es", "es");
548        Collator col = Collator.getInstance(l);
549
550        // this spanish phrase kills it!
551        col.getCollationKey("Nombre De Objeto");
552    }
553
554    // RuleBasedCollator doesn't use getCollationElementIterator internally
555    //
556    public void Test4146160() throws ParseException {
557        //
558        // Use a custom collator class whose getCollationElementIterator
559        // methods increment a count....
560        //
561        My4146160Collator.count = 0;
562        new My4146160Collator().getCollationKey("1");
563        if (My4146160Collator.count < 1) {
564            errln("getCollationElementIterator not called");
565        }
566
567        My4146160Collator.count = 0;
568        new My4146160Collator().compare("1", "2");
569        if (My4146160Collator.count < 1) {
570            errln("getCollationElementIterator not called");
571        }
572    }
573
574    static class My4146160Collator extends RuleBasedCollator {
575        public My4146160Collator() throws ParseException {
576            super(Regression.en_us.getRules());
577        }
578
579        public CollationElementIterator getCollationElementIterator(
580                                            String text) {
581            count++;
582            return super.getCollationElementIterator(text);
583        }
584        public CollationElementIterator getCollationElementIterator(
585                                            CharacterIterator text) {
586            count++;
587            return super.getCollationElementIterator(text);
588        }
589
590        public static int count = 0;
591    };
592
593    // CollationElementIterator.previous broken for expanding char sequences
594    //
595    public void Test4179686() throws ParseException {
596
597        // Create a collator with a few expanding character sequences in it....
598        RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
599                                                    + " & ae ; \u00e4 & AE ; \u00c4"
600                                                    + " & oe ; \u00f6 & OE ; \u00d6"
601                                                    + " & ue ; \u00fc & UE ; \u00dc");
602
603        String text = "T\u00f6ne"; // o-umlaut
604
605        CollationElementIterator iter = coll.getCollationElementIterator(text);
606        Vector elements = new Vector();
607        int elem;
608
609        // Iterate forward and collect all of the elements into a Vector
610        while ((elem = iter.next()) != iter.NULLORDER) {
611            elements.addElement(new Integer(elem));
612        }
613
614        // Now iterate backward and make sure they're the same
615        int index = elements.size() - 1;
616        while ((elem = iter.previous()) != iter.NULLORDER) {
617            int expect = ((Integer)elements.elementAt(index)).intValue();
618
619            if (elem != expect) {
620                errln("Mismatch at index " + index
621                      + ": got " + Integer.toString(elem,16)
622                      + ", expected " + Integer.toString(expect,16));
623            }
624            index--;
625        }
626    }
627
628    public void Test4244884() throws ParseException {
629        RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
630        coll = new RuleBasedCollator(coll.getRules()
631                + " & C < ch , cH , Ch , CH < cat < crunchy");
632
633        String[] testStrings = new String[] {
634            "car",
635            "cave",
636            "clamp",
637            "cramp",
638            "czar",
639            "church",
640            "catalogue",
641            "crunchy",
642            "dog"
643        };
644
645        for (int i = 1; i < testStrings.length; i++) {
646            if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
647                errln("error: \"" + testStrings[i - 1]
648                    + "\" is greater than or equal to \"" + testStrings[i]
649                    + "\".");
650            }
651        }
652    }
653
654    public void Test4179216() throws ParseException {
655        // you can position a CollationElementIterator in the middle of
656        // a contracting character sequence, yielding a bogus collation
657        // element
658        RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
659        coll = new RuleBasedCollator(coll.getRules()
660                + " & C < ch , cH , Ch , CH < cat < crunchy");
661        String testText = "church church catcatcher runcrunchynchy";
662        CollationElementIterator iter = coll.getCollationElementIterator(
663                testText);
664
665        // test that the "ch" combination works properly
666        iter.setOffset(4);
667        int elt4 = CollationElementIterator.primaryOrder(iter.next());
668
669        iter.reset();
670        int elt0 = CollationElementIterator.primaryOrder(iter.next());
671
672        iter.setOffset(5);
673        int elt5 = CollationElementIterator.primaryOrder(iter.next());
674
675        if (elt4 != elt0 || elt5 != elt0)
676            errln("The collation elements at positions 0 (" + elt0 + "), 4 ("
677                    + elt4 + "), and 5 (" + elt5 + ") don't match.");
678
679        // test that the "cat" combination works properly
680        iter.setOffset(14);
681        int elt14 = CollationElementIterator.primaryOrder(iter.next());
682
683        iter.setOffset(15);
684        int elt15 = CollationElementIterator.primaryOrder(iter.next());
685
686        iter.setOffset(16);
687        int elt16 = CollationElementIterator.primaryOrder(iter.next());
688
689        iter.setOffset(17);
690        int elt17 = CollationElementIterator.primaryOrder(iter.next());
691
692        iter.setOffset(18);
693        int elt18 = CollationElementIterator.primaryOrder(iter.next());
694
695        iter.setOffset(19);
696        int elt19 = CollationElementIterator.primaryOrder(iter.next());
697
698        if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
699                || elt14 != elt18 || elt14 != elt19)
700            errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = "
701            + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17
702            + ", elt18 = " + elt18 + ", elt19 = " + elt19);
703
704        // now generate a complete list of the collation elements,
705        // first using next() and then using setOffset(), and
706        // make sure both interfaces return the same set of elements
707        iter.reset();
708
709        int elt = iter.next();
710        int count = 0;
711        while (elt != CollationElementIterator.NULLORDER) {
712            ++count;
713            elt = iter.next();
714        }
715
716        String[] nextElements = new String[count];
717        String[] setOffsetElements = new String[count];
718        int lastPos = 0;
719
720        iter.reset();
721        elt = iter.next();
722        count = 0;
723        while (elt != CollationElementIterator.NULLORDER) {
724            nextElements[count++] = testText.substring(lastPos, iter.getOffset());
725            lastPos = iter.getOffset();
726            elt = iter.next();
727        }
728        count = 0;
729        for (int i = 0; i < testText.length(); ) {
730            iter.setOffset(i);
731            lastPos = iter.getOffset();
732            elt = iter.next();
733            setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset());
734            i = iter.getOffset();
735        }
736        for (int i = 0; i < nextElements.length; i++) {
737            if (nextElements[i].equals(setOffsetElements[i])) {
738                logln(nextElements[i]);
739            } else {
740                errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded "
741                    + setOffsetElements[i]);
742            }
743        }
744    }
745
746    public void Test4216006() throws Exception {
747        // rule parser barfs on "<\u00e0=a\u0300", and on other cases
748        // where the same token (after normalization) appears twice in a row
749        boolean caughtException = false;
750        try {
751            RuleBasedCollator dummy = new RuleBasedCollator("\u00e0<a\u0300");
752        }
753        catch (ParseException e) {
754            caughtException = true;
755        }
756        if (!caughtException) {
757            throw new Exception("\"a<a\" collation sequence didn't cause parse error!");
758        }
759
760        RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300");
761        collator.setDecomposition(Collator.FULL_DECOMPOSITION);
762        collator.setStrength(Collator.IDENTICAL);
763
764        String[] tests = {
765            "a\u0300", "=", "\u00e0",
766            "\u00e0",  "=", "a\u0300"
767        };
768
769        compareArray(collator, tests);
770    }
771
772    public void Test4171974() {
773        // test French accent ordering more thoroughly
774        String[] frenchList = {
775            "\u0075\u0075",     // u u
776            "\u00fc\u0075",     // u-umlaut u
777            "\u01d6\u0075",     // u-umlaut-macron u
778            "\u016b\u0075",     // u-macron u
779            "\u1e7b\u0075",     // u-macron-umlaut u
780            "\u0075\u00fc",     // u u-umlaut
781            "\u00fc\u00fc",     // u-umlaut u-umlaut
782            "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
783            "\u016b\u00fc",     // u-macron u-umlaut
784            "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
785            "\u0075\u01d6",     // u u-umlaut-macron
786            "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
787            "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
788            "\u016b\u01d6",     // u-macron u-umlaut-macron
789            "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
790            "\u0075\u016b",     // u u-macron
791            "\u00fc\u016b",     // u-umlaut u-macron
792            "\u01d6\u016b",     // u-umlaut-macron u-macron
793            "\u016b\u016b",     // u-macron u-macron
794            "\u1e7b\u016b",     // u-macron-umlaut u-macron
795            "\u0075\u1e7b",     // u u-macron-umlaut
796            "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
797            "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
798            "\u016b\u1e7b",     // u-macron u-macron-umlaut
799            "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
800        };
801        Collator french = Collator.getInstance(Locale.FRENCH);
802
803        logln("Testing French order...");
804        checkListOrder(frenchList, french);
805
806        logln("Testing French order without decomposition...");
807        french.setDecomposition(Collator.NO_DECOMPOSITION);
808        checkListOrder(frenchList, french);
809
810        String[] englishList = {
811            "\u0075\u0075",     // u u
812            "\u0075\u00fc",     // u u-umlaut
813            "\u0075\u01d6",     // u u-umlaut-macron
814            "\u0075\u016b",     // u u-macron
815            "\u0075\u1e7b",     // u u-macron-umlaut
816            "\u00fc\u0075",     // u-umlaut u
817            "\u00fc\u00fc",     // u-umlaut u-umlaut
818            "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
819            "\u00fc\u016b",     // u-umlaut u-macron
820            "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
821            "\u01d6\u0075",     // u-umlaut-macron u
822            "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
823            "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
824            "\u01d6\u016b",     // u-umlaut-macron u-macron
825            "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
826            "\u016b\u0075",     // u-macron u
827            "\u016b\u00fc",     // u-macron u-umlaut
828            "\u016b\u01d6",     // u-macron u-umlaut-macron
829            "\u016b\u016b",     // u-macron u-macron
830            "\u016b\u1e7b",     // u-macron u-macron-umlaut
831            "\u1e7b\u0075",     // u-macron-umlaut u
832            "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
833            "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
834            "\u1e7b\u016b",     // u-macron-umlaut u-macron
835            "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
836        };
837        Collator english = Collator.getInstance(Locale.ENGLISH);
838
839        logln("Testing English order...");
840        checkListOrder(englishList, english);
841
842        logln("Testing English order without decomposition...");
843        english.setDecomposition(Collator.NO_DECOMPOSITION);
844        checkListOrder(englishList, english);
845    }
846
847    private void checkListOrder(String[] sortedList, Collator c) {
848        // this function uses the specified Collator to make sure the
849        // passed-in list is already sorted into ascending order
850        for (int i = 0; i < sortedList.length - 1; i++) {
851            if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) {
852                errln("List out of order at element #" + i + ": "
853                        + prettify(sortedList[i]) + " >= "
854                        + prettify(sortedList[i + 1]));
855            }
856        }
857    }
858
859    // CollationElementIterator set doesn't work propertly with next/prev
860    public void Test4663220() {
861        RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US);
862        CharacterIterator stringIter = new StringCharacterIterator("fox");
863        CollationElementIterator iter = collator.getCollationElementIterator(stringIter);
864
865        int[] elements_next = new int[3];
866        logln("calling next:");
867        for (int i = 0; i < 3; ++i) {
868            logln("[" + i + "] " + (elements_next[i] = iter.next()));
869        }
870
871        int[] elements_fwd = new int[3];
872        logln("calling set/next:");
873        for (int i = 0; i < 3; ++i) {
874            iter.setOffset(i);
875            logln("[" + i + "] " + (elements_fwd[i] = iter.next()));
876        }
877
878        for (int i = 0; i < 3; ++i) {
879            if (elements_next[i] != elements_fwd[i]) {
880                errln("mismatch at position " + i +
881                      ": " + elements_next[i] +
882                      " != " + elements_fwd[i]);
883            }
884        }
885    }
886
887    //------------------------------------------------------------------------
888    // Internal utilities
889    //
890    private void compareArray(Collator c, String[] tests) {
891        for (int i = 0; i < tests.length; i += 3) {
892
893            int expect = 0;
894            if (tests[i+1].equals("<")) {
895                expect = -1;
896            } else if (tests[i+1].equals(">")) {
897                expect = 1;
898            } else if (tests[i+1].equals("=")) {
899                expect = 0;
900            } else {
901                expect = Integer.decode(tests[i+1]).intValue();
902            }
903
904            int result = c.compare(tests[i], tests[i+2]);
905            if (sign(result) != sign(expect))
906            {
907                errln( i/3 + ": compare(" + prettify(tests[i])
908                                    + " , " + prettify(tests[i+2])
909                                    + ") got " + result + "; expected " + expect);
910            }
911            else
912            {
913                // Collator.compare worked OK; now try the collation keys
914                CollationKey k1 = c.getCollationKey(tests[i]);
915                CollationKey k2 = c.getCollationKey(tests[i+2]);
916
917                result = k1.compareTo(k2);
918                if (sign(result) != sign(expect)) {
919                    errln( i/3 + ": key(" + prettify(tests[i])
920                                        + ").compareTo(key(" + prettify(tests[i+2])
921                                        + ")) got " + result + "; expected " + expect);
922
923                    errln("  " + prettify(k1) + " vs. " + prettify(k2));
924                }
925            }
926        }
927    }
928
929    private static final int sign(int i) {
930        if (i < 0) return -1;
931        if (i > 0) return 1;
932        return 0;
933    }
934
935
936    static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
937
938    String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
939    String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
940    String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck";
941}
942