Regression.java revision 14630:29af931514f5
1/*
2 * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24/**
25 * @test
26 * @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154
27 *      4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241
28 *      4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736
29 *      4133509 4139572 4141640 4179126 4179686 4244884 4663220
30 * @library /java/text/testlib
31 * @summary Regression tests for Collation and associated classes
32 */
33/*
34(C) Copyright Taligent, Inc. 1996 - All Rights Reserved
35(C) Copyright IBM Corp. 1996 - All Rights Reserved
36
37  The original version of this source code and documentation is copyrighted and
38owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are
39provided under terms of a License Agreement between Taligent and Sun. This
40technology is protected by multiple US and International patents. This notice and
41attribution to Taligent may not be removed.
42  Taligent is a registered trademark of Taligent, Inc.
43*/
44
45import java.text.*;
46import java.util.Locale;
47import java.util.Vector;
48
49
50public class Regression extends CollatorTest {
51
52    public static void main(String[] args) throws Exception {
53        new Regression().run(args);
54    }
55
56    // CollationElementIterator.reset() doesn't work
57    //
58    public void Test4048446() {
59        CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
60        CollationElementIterator i2 = en_us.getCollationElementIterator(test1);
61
62        while ( i1.next() != CollationElementIterator.NULLORDER ) {
63        }
64        i1.reset();
65
66        assertEqual(i1, i2);
67    }
68
69
70    // Collator -> rules -> Collator round-trip broken for expanding characters
71    //
72    public void Test4051866() throws ParseException {
73        // Build a collator containing expanding characters
74        RuleBasedCollator c1 = new RuleBasedCollator("< o "
75                                                    +"& oe ,o\u3080"
76                                                    +"& oe ,\u1530 ,O"
77                                                    +"& OE ,O\u3080"
78                                                    +"& OE ,\u1520"
79                                                    +"< p ,P");
80
81        // Build another using the rules from  the first
82        RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules());
83
84        // Make sure they're the same
85        if (!c1.getRules().equals(c2.getRules())) {
86            errln("Rules are not equal");
87        }
88    }
89
90    // Collator thinks "black-bird" == "black"
91    //
92    public void Test4053636() {
93        if (en_us.equals("black-bird","black")) {
94            errln("black-bird == black");
95        }
96    }
97
98
99    // CollationElementIterator will not work correctly if the associated
100    // Collator object's mode is changed
101    //
102    public void Test4054238() {
103        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
104
105        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
106        CollationElementIterator i1 = en_us.getCollationElementIterator(test3);
107
108        c.setDecomposition(Collator.NO_DECOMPOSITION);
109        CollationElementIterator i2 = en_us.getCollationElementIterator(test3);
110
111        // At this point, BOTH iterators should use NO_DECOMPOSITION, since the
112        // collator itself is in that mode
113        assertEqual(i1, i2);
114    }
115
116    // Collator.IDENTICAL documented but not implemented
117    //
118    public void Test4054734() {
119        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
120        try {
121            c.setStrength(Collator.IDENTICAL);
122        }
123        catch (Exception e) {
124            errln("Caught " + e.toString() + " setting Collator.IDENTICAL");
125        }
126
127        String[] decomp = {
128            "\u0001",   "<",    "\u0002",
129            "\u0001",   "=",    "\u0001",
130            "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
131            "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
132        };
133        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
134        compareArray(c, decomp);
135
136        String[] nodecomp = {
137            "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
138        };
139        c.setDecomposition(Collator.NO_DECOMPOSITION);
140        compareArray(c, nodecomp);
141    }
142
143    // Full Decomposition mode not implemented
144    //
145    public void Test4054736() {
146        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
147        c.setDecomposition(Collator.FULL_DECOMPOSITION);
148
149        String[] tests = {
150            "\uFB4f", "=", "\u05D0\u05DC",  // Alef-Lamed vs. Alef, Lamed
151        };
152
153        compareArray(c, tests);
154    }
155
156    // Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean
157    //
158    public void Test4058613() {
159        // Creating a default collator doesn't work when Korean is the default
160        // locale
161
162        Locale oldDefault = Locale.getDefault();
163
164        Locale.setDefault( Locale.KOREAN );
165        try {
166            Collator c = Collator.getInstance();
167
168            // Since the fix to this bug was to turn of decomposition for Korean collators,
169            // ensure that's what we got
170            if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
171              errln("Decomposition is not set to NO_DECOMPOSITION");
172            }
173        }
174        finally {
175            Locale.setDefault(oldDefault);
176        }
177    }
178
179    // RuleBasedCollator.getRules does not return the exact pattern as input
180    // for expanding character sequences
181    //
182    public void Test4059820() {
183        RuleBasedCollator c = null;
184        try {
185            c = new RuleBasedCollator("< a < b , c/a < d < z");
186        } catch (ParseException e) {
187            errln("Exception building collator: " + e.toString());
188            return;
189        }
190        if ( c.getRules().indexOf("c/a") == -1) {
191            errln("returned rules do not contain 'c/a'");
192        }
193    }
194
195    // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
196    //
197    public void Test4060154() {
198        RuleBasedCollator c = null;
199        try {
200            c = new RuleBasedCollator("< g, G < h, H < i, I < j, J"
201                                      + " & H < \u0131, \u0130, i, I" );
202        } catch (ParseException e) {
203            errln("Exception building collator: " + e.toString());
204            return;
205        }
206        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
207
208        String[] tertiary = {
209            "A",        "<",    "B",
210            "H",        "<",    "\u0131",
211            "H",        "<",    "I",
212            "\u0131",   "<",    "\u0130",
213            "\u0130",   "<",    "i",
214            "\u0130",   ">",    "H",
215        };
216        c.setStrength(Collator.TERTIARY);
217        compareArray(c, tertiary);
218
219        String[] secondary = {
220            "H",        "<",    "I",
221            "\u0131",   "=",    "\u0130",
222        };
223        c.setStrength(Collator.PRIMARY);
224        compareArray(c, secondary);
225    };
226
227    // Secondary/Tertiary comparison incorrect in French Secondary
228    //
229    public void Test4062418() throws ParseException {
230        RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
231        c.setStrength(Collator.SECONDARY);
232
233        String[] tests = {
234                "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
235        };
236
237        compareArray(c, tests);
238    }
239
240    // Collator.compare() method broken if either string contains spaces
241    //
242    public void Test4065540() {
243        if (en_us.compare("abcd e", "abcd f") == 0) {
244            errln("'abcd e' == 'abcd f'");
245        }
246    }
247
248    // Unicode characters need to be recursively decomposed to get the
249    // correct result. For example,
250    // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
251    //
252    public void Test4066189() {
253        String test1 = "\u1EB1";
254        String test2 = "a\u0306\u0300";
255
256        RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone();
257        c1.setDecomposition(Collator.FULL_DECOMPOSITION);
258        CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
259
260        RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone();
261        c2.setDecomposition(Collator.NO_DECOMPOSITION);
262        CollationElementIterator i2 = en_us.getCollationElementIterator(test2);
263
264        assertEqual(i1, i2);
265    }
266
267    // French secondary collation checking at the end of compare iteration fails
268    //
269    public void Test4066696() {
270        RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
271        c.setStrength(Collator.SECONDARY);
272
273        String[] tests = {
274            "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
275        };
276
277        compareArray(c, tests);
278    }
279
280
281    // Bad canonicalization of same-class combining characters
282    //
283    public void Test4076676() {
284        // These combining characters are all in the same class, so they should not
285        // be reordered, and they should compare as unequal.
286        String s1 = "A\u0301\u0302\u0300";
287        String s2 = "A\u0302\u0300\u0301";
288
289        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
290        c.setStrength(Collator.TERTIARY);
291
292        if (c.compare(s1,s2) == 0) {
293            errln("Same-class combining chars were reordered");
294        }
295    }
296
297
298    // RuleBasedCollator.equals(null) throws NullPointerException
299    //
300    public void Test4079231() {
301        try {
302            if (en_us.equals(null)) {
303                errln("en_us.equals(null) returned true");
304            }
305        }
306        catch (Exception e) {
307            errln("en_us.equals(null) threw " + e.toString());
308        }
309    }
310
311    // RuleBasedCollator breaks on "< a < bb" rule
312    //
313    public void Test4078588() throws ParseException {
314        RuleBasedCollator rbc=new RuleBasedCollator("< a < bb");
315
316        int result = rbc.compare("a","bb");
317
318        if (result != -1) {
319            errln("Compare(a,bb) returned " + result + "; expected -1");
320        }
321    }
322
323    // Combining characters in different classes not reordered properly.
324    //
325    public void Test4081866() throws ParseException {
326        // These combining characters are all in different classes,
327        // so they should be reordered and the strings should compare as equal.
328        String s1 = "A\u0300\u0316\u0327\u0315";
329        String s2 = "A\u0327\u0316\u0315\u0300";
330
331        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
332        c.setStrength(Collator.TERTIARY);
333
334        // Now that the default collators are set to NO_DECOMPOSITION
335        // (as a result of fixing bug 4114077), we must set it explicitly
336        // when we're testing reordering behavior.  -- lwerner, 5/5/98
337        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
338
339        if (c.compare(s1,s2) != 0) {
340            errln("Combining chars were not reordered");
341        }
342    }
343
344    // string comparison errors in Scandinavian collators
345    //
346    public void Test4087241() {
347        RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(
348                                                        new Locale("da", "DK"));
349        c.setStrength(Collator.SECONDARY);
350
351        String[] tests = {
352            "\u007a",   "<",    "\u00e6",       // z        < ae
353            "a\u0308",  "<",    "a\u030a",      // a-unlaut < a-ring
354            "Y",        "<",    "u\u0308",      // Y        < u-umlaut
355        };
356
357        compareArray(c, tests);
358    }
359
360    // CollationKey takes ignorable strings into account when it shouldn't
361    //
362    public void Test4087243() {
363        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
364        c.setStrength(Collator.TERTIARY);
365
366        String[] tests = {
367            "123",      "=",    "123\u0001",        // 1 2 3  =  1 2 3 ctrl-A
368        };
369
370        compareArray(c, tests);
371    }
372
373    // Mu/micro conflict
374    // Micro symbol and greek lowercase letter Mu should sort identically
375    //
376    public void Test4092260() {
377        Collator c = Collator.getInstance(new Locale("el", ""));
378
379        // will only be equal when FULL_DECOMPOSITION is used
380        c.setDecomposition(Collator.FULL_DECOMPOSITION);
381
382        String[] tests = {
383            "\u00B5",      "=",    "\u03BC",
384        };
385
386        compareArray(c, tests);
387    }
388
389    void Test4095316() {
390        Collator c = Collator.getInstance(new Locale("el", "GR"));
391        c.setStrength(Collator.TERTIARY);
392        // javadocs for RuleBasedCollator clearly specify that characters containing compatability
393        // chars MUST use FULL_DECOMPOSITION to get accurate comparisons.
394        c.setDecomposition(Collator.FULL_DECOMPOSITION);
395
396        String[] tests = {
397            "\u03D4",      "=",    "\u03AB",
398        };
399
400        compareArray(c, tests);
401    }
402
403    public void Test4101940() {
404        try {
405            RuleBasedCollator c = new RuleBasedCollator("< a < b");
406            CollationElementIterator i = c.getCollationElementIterator("");
407            i.reset();
408
409            if (i.next() != i.NULLORDER) {
410                errln("next did not return NULLORDER");
411            }
412        }
413        catch (Exception e) {
414            errln("Caught " + e );
415        }
416    }
417
418    // Collator.compare not handling spaces properly
419    //
420    public void Test4103436() {
421        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
422        c.setStrength(Collator.TERTIARY);
423
424        String[] tests = {
425            "file",      "<",    "file access",
426            "file",      "<",    "fileaccess",
427        };
428
429        compareArray(c, tests);
430    }
431
432    // Collation not Unicode conformant with Hangul syllables
433    //
434    public void Test4114076() {
435        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
436        c.setStrength(Collator.TERTIARY);
437
438        //
439        // With Canonical decomposition, Hangul syllables should get decomposed
440        // into Jamo, but Jamo characters should not be decomposed into
441        // conjoining Jamo
442        //
443        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
444        String[] test1 = {
445            "\ud4db",   "=",    "\u1111\u1171\u11b6",
446        };
447        compareArray(c, test1);
448
449        // Full decomposition result should be the same as canonical decomposition
450        // for all hangul.
451        c.setDecomposition(Collator.FULL_DECOMPOSITION);
452        compareArray(c, test1);
453
454    }
455
456
457    // Collator.getCollationKey was hanging on certain character sequences
458    //
459    public void Test4124632() throws Exception {
460        Collator coll = Collator.getInstance(Locale.JAPAN);
461
462        try {
463            coll.getCollationKey("A\u0308bc");
464        } catch (OutOfMemoryError e) {
465            errln("Ran out of memory -- probably an infinite loop");
466        }
467    }
468
469    // sort order of french words with multiple accents has errors
470    //
471    public void Test4132736() {
472        Collator c = Collator.getInstance(Locale.FRANCE);
473
474        String[] test1 = {
475            "e\u0300e\u0301",   "<",    "e\u0301e\u0300",
476            "e\u0300\u0301",    ">",    "e\u0301\u0300",
477        };
478        compareArray(c, test1);
479    }
480
481    // The sorting using java.text.CollationKey is not in the exact order
482    //
483    public void Test4133509() {
484        String[] test1 = {
485            "Exception",    "<",    "ExceptionInInitializerError",
486            "Graphics",     "<",    "GraphicsEnvironment",
487            "String",       "<",    "StringBuffer",
488        };
489        compareArray(en_us, test1);
490    }
491
492    // Collation with decomposition off doesn't work for Europe
493    //
494    public void Test4114077() {
495        // Ensure that we get the same results with decomposition off
496        // as we do with it on....
497
498        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
499        c.setStrength(Collator.TERTIARY);
500
501        String[] test1 = {
502            "\u00C0",        "=", "A\u0300",        // Should be equivalent
503            "p\u00eache",         ">", "p\u00e9ch\u00e9",
504            "\u0204",        "=", "E\u030F",
505            "\u01fa",        "=", "A\u030a\u0301",  // a-ring-acute -> a-ring, acute
506                                                    //   -> a, ring, acute
507            "A\u0300\u0316", "<", "A\u0316\u0300",  // No reordering --> unequal
508        };
509        c.setDecomposition(Collator.NO_DECOMPOSITION);
510        compareArray(c, test1);
511
512        String[] test2 = {
513            "A\u0300\u0316", "=", "A\u0316\u0300",      // Reordering --> equal
514        };
515        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
516        compareArray(c, test2);
517    }
518
519    // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
520    //
521    public void Test4141640() {
522        //
523        // Rather than just creating a Swedish collator, we might as well
524        // try to instantiate one for every locale available on the system
525        // in order to prevent this sort of bug from cropping up in the future
526        //
527        Locale[] locales = Collator.getAvailableLocales();
528
529        for (int i = 0; i < locales.length; i++) {
530            try {
531                Collator c = Collator.getInstance(locales[i]);
532            } catch (Exception e) {
533                errln("Caught " + e + " creating collator for " + locales[i]);
534            }
535        }
536    }
537
538    // getCollationKey throws exception for spanish text
539    // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
540    //
541    public void Test4139572() {
542        //
543        // Code pasted straight from the bug report
544        //
545        // create spanish locale and collator
546        Locale l = new Locale("es", "es");
547        Collator col = Collator.getInstance(l);
548
549        // this spanish phrase kills it!
550        col.getCollationKey("Nombre De Objeto");
551    }
552
553    // RuleBasedCollator doesn't use getCollationElementIterator internally
554    //
555    public void Test4146160() throws ParseException {
556        //
557        // Use a custom collator class whose getCollationElementIterator
558        // methods increment a count....
559        //
560        My4146160Collator.count = 0;
561        new My4146160Collator().getCollationKey("1");
562        if (My4146160Collator.count < 1) {
563            errln("getCollationElementIterator not called");
564        }
565
566        My4146160Collator.count = 0;
567        new My4146160Collator().compare("1", "2");
568        if (My4146160Collator.count < 1) {
569            errln("getCollationElementIterator not called");
570        }
571    }
572
573    static class My4146160Collator extends RuleBasedCollator {
574        public My4146160Collator() throws ParseException {
575            super(Regression.en_us.getRules());
576        }
577
578        public CollationElementIterator getCollationElementIterator(
579                                            String text) {
580            count++;
581            return super.getCollationElementIterator(text);
582        }
583        public CollationElementIterator getCollationElementIterator(
584                                            CharacterIterator text) {
585            count++;
586            return super.getCollationElementIterator(text);
587        }
588
589        public static int count = 0;
590    };
591
592    // CollationElementIterator.previous broken for expanding char sequences
593    //
594    public void Test4179686() throws ParseException {
595
596        // Create a collator with a few expanding character sequences in it....
597        RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
598                                                    + " & ae ; \u00e4 & AE ; \u00c4"
599                                                    + " & oe ; \u00f6 & OE ; \u00d6"
600                                                    + " & ue ; \u00fc & UE ; \u00dc");
601
602        String text = "T\u00f6ne"; // o-umlaut
603
604        CollationElementIterator iter = coll.getCollationElementIterator(text);
605        Vector elements = new Vector();
606        int elem;
607
608        // Iterate forward and collect all of the elements into a Vector
609        while ((elem = iter.next()) != iter.NULLORDER) {
610            elements.addElement(new Integer(elem));
611        }
612
613        // Now iterate backward and make sure they're the same
614        int index = elements.size() - 1;
615        while ((elem = iter.previous()) != iter.NULLORDER) {
616            int expect = ((Integer)elements.elementAt(index)).intValue();
617
618            if (elem != expect) {
619                errln("Mismatch at index " + index
620                      + ": got " + Integer.toString(elem,16)
621                      + ", expected " + Integer.toString(expect,16));
622            }
623            index--;
624        }
625    }
626
627    public void Test4244884() throws ParseException {
628        RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
629        coll = new RuleBasedCollator(coll.getRules()
630                + " & C < ch , cH , Ch , CH < cat < crunchy");
631
632        String[] testStrings = new String[] {
633            "car",
634            "cave",
635            "clamp",
636            "cramp",
637            "czar",
638            "church",
639            "catalogue",
640            "crunchy",
641            "dog"
642        };
643
644        for (int i = 1; i < testStrings.length; i++) {
645            if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
646                errln("error: \"" + testStrings[i - 1]
647                    + "\" is greater than or equal to \"" + testStrings[i]
648                    + "\".");
649            }
650        }
651    }
652
653    public void Test4179216() throws ParseException {
654        // you can position a CollationElementIterator in the middle of
655        // a contracting character sequence, yielding a bogus collation
656        // element
657        RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
658        coll = new RuleBasedCollator(coll.getRules()
659                + " & C < ch , cH , Ch , CH < cat < crunchy");
660        String testText = "church church catcatcher runcrunchynchy";
661        CollationElementIterator iter = coll.getCollationElementIterator(
662                testText);
663
664        // test that the "ch" combination works properly
665        iter.setOffset(4);
666        int elt4 = CollationElementIterator.primaryOrder(iter.next());
667
668        iter.reset();
669        int elt0 = CollationElementIterator.primaryOrder(iter.next());
670
671        iter.setOffset(5);
672        int elt5 = CollationElementIterator.primaryOrder(iter.next());
673
674        if (elt4 != elt0 || elt5 != elt0)
675            errln("The collation elements at positions 0 (" + elt0 + "), 4 ("
676                    + elt4 + "), and 5 (" + elt5 + ") don't match.");
677
678        // test that the "cat" combination works properly
679        iter.setOffset(14);
680        int elt14 = CollationElementIterator.primaryOrder(iter.next());
681
682        iter.setOffset(15);
683        int elt15 = CollationElementIterator.primaryOrder(iter.next());
684
685        iter.setOffset(16);
686        int elt16 = CollationElementIterator.primaryOrder(iter.next());
687
688        iter.setOffset(17);
689        int elt17 = CollationElementIterator.primaryOrder(iter.next());
690
691        iter.setOffset(18);
692        int elt18 = CollationElementIterator.primaryOrder(iter.next());
693
694        iter.setOffset(19);
695        int elt19 = CollationElementIterator.primaryOrder(iter.next());
696
697        if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
698                || elt14 != elt18 || elt14 != elt19)
699            errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = "
700            + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17
701            + ", elt18 = " + elt18 + ", elt19 = " + elt19);
702
703        // now generate a complete list of the collation elements,
704        // first using next() and then using setOffset(), and
705        // make sure both interfaces return the same set of elements
706        iter.reset();
707
708        int elt = iter.next();
709        int count = 0;
710        while (elt != CollationElementIterator.NULLORDER) {
711            ++count;
712            elt = iter.next();
713        }
714
715        String[] nextElements = new String[count];
716        String[] setOffsetElements = new String[count];
717        int lastPos = 0;
718
719        iter.reset();
720        elt = iter.next();
721        count = 0;
722        while (elt != CollationElementIterator.NULLORDER) {
723            nextElements[count++] = testText.substring(lastPos, iter.getOffset());
724            lastPos = iter.getOffset();
725            elt = iter.next();
726        }
727        count = 0;
728        for (int i = 0; i < testText.length(); ) {
729            iter.setOffset(i);
730            lastPos = iter.getOffset();
731            elt = iter.next();
732            setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset());
733            i = iter.getOffset();
734        }
735        for (int i = 0; i < nextElements.length; i++) {
736            if (nextElements[i].equals(setOffsetElements[i])) {
737                logln(nextElements[i]);
738            } else {
739                errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded "
740                    + setOffsetElements[i]);
741            }
742        }
743    }
744
745    public void Test4216006() throws Exception {
746        // rule parser barfs on "<\u00e0=a\u0300", and on other cases
747        // where the same token (after normalization) appears twice in a row
748        boolean caughtException = false;
749        try {
750            RuleBasedCollator dummy = new RuleBasedCollator("\u00e0<a\u0300");
751        }
752        catch (ParseException e) {
753            caughtException = true;
754        }
755        if (!caughtException) {
756            throw new Exception("\"a<a\" collation sequence didn't cause parse error!");
757        }
758
759        RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300");
760        collator.setDecomposition(Collator.FULL_DECOMPOSITION);
761        collator.setStrength(Collator.IDENTICAL);
762
763        String[] tests = {
764            "a\u0300", "=", "\u00e0",
765            "\u00e0",  "=", "a\u0300"
766        };
767
768        compareArray(collator, tests);
769    }
770
771    public void Test4171974() {
772        // test French accent ordering more thoroughly
773        String[] frenchList = {
774            "\u0075\u0075",     // u u
775            "\u00fc\u0075",     // u-umlaut u
776            "\u01d6\u0075",     // u-umlaut-macron u
777            "\u016b\u0075",     // u-macron u
778            "\u1e7b\u0075",     // u-macron-umlaut u
779            "\u0075\u00fc",     // u u-umlaut
780            "\u00fc\u00fc",     // u-umlaut u-umlaut
781            "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
782            "\u016b\u00fc",     // u-macron u-umlaut
783            "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
784            "\u0075\u01d6",     // u u-umlaut-macron
785            "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
786            "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
787            "\u016b\u01d6",     // u-macron u-umlaut-macron
788            "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
789            "\u0075\u016b",     // u u-macron
790            "\u00fc\u016b",     // u-umlaut u-macron
791            "\u01d6\u016b",     // u-umlaut-macron u-macron
792            "\u016b\u016b",     // u-macron u-macron
793            "\u1e7b\u016b",     // u-macron-umlaut u-macron
794            "\u0075\u1e7b",     // u u-macron-umlaut
795            "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
796            "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
797            "\u016b\u1e7b",     // u-macron u-macron-umlaut
798            "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
799        };
800        Collator french = Collator.getInstance(Locale.FRENCH);
801
802        logln("Testing French order...");
803        checkListOrder(frenchList, french);
804
805        logln("Testing French order without decomposition...");
806        french.setDecomposition(Collator.NO_DECOMPOSITION);
807        checkListOrder(frenchList, french);
808
809        String[] englishList = {
810            "\u0075\u0075",     // u u
811            "\u0075\u00fc",     // u u-umlaut
812            "\u0075\u01d6",     // u u-umlaut-macron
813            "\u0075\u016b",     // u u-macron
814            "\u0075\u1e7b",     // u u-macron-umlaut
815            "\u00fc\u0075",     // u-umlaut u
816            "\u00fc\u00fc",     // u-umlaut u-umlaut
817            "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
818            "\u00fc\u016b",     // u-umlaut u-macron
819            "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
820            "\u01d6\u0075",     // u-umlaut-macron u
821            "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
822            "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
823            "\u01d6\u016b",     // u-umlaut-macron u-macron
824            "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
825            "\u016b\u0075",     // u-macron u
826            "\u016b\u00fc",     // u-macron u-umlaut
827            "\u016b\u01d6",     // u-macron u-umlaut-macron
828            "\u016b\u016b",     // u-macron u-macron
829            "\u016b\u1e7b",     // u-macron u-macron-umlaut
830            "\u1e7b\u0075",     // u-macron-umlaut u
831            "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
832            "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
833            "\u1e7b\u016b",     // u-macron-umlaut u-macron
834            "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
835        };
836        Collator english = Collator.getInstance(Locale.ENGLISH);
837
838        logln("Testing English order...");
839        checkListOrder(englishList, english);
840
841        logln("Testing English order without decomposition...");
842        english.setDecomposition(Collator.NO_DECOMPOSITION);
843        checkListOrder(englishList, english);
844    }
845
846    private void checkListOrder(String[] sortedList, Collator c) {
847        // this function uses the specified Collator to make sure the
848        // passed-in list is already sorted into ascending order
849        for (int i = 0; i < sortedList.length - 1; i++) {
850            if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) {
851                errln("List out of order at element #" + i + ": "
852                        + prettify(sortedList[i]) + " >= "
853                        + prettify(sortedList[i + 1]));
854            }
855        }
856    }
857
858    // CollationElementIterator set doesn't work propertly with next/prev
859    public void Test4663220() {
860        RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US);
861        CharacterIterator stringIter = new StringCharacterIterator("fox");
862        CollationElementIterator iter = collator.getCollationElementIterator(stringIter);
863
864        int[] elements_next = new int[3];
865        logln("calling next:");
866        for (int i = 0; i < 3; ++i) {
867            logln("[" + i + "] " + (elements_next[i] = iter.next()));
868        }
869
870        int[] elements_fwd = new int[3];
871        logln("calling set/next:");
872        for (int i = 0; i < 3; ++i) {
873            iter.setOffset(i);
874            logln("[" + i + "] " + (elements_fwd[i] = iter.next()));
875        }
876
877        for (int i = 0; i < 3; ++i) {
878            if (elements_next[i] != elements_fwd[i]) {
879                errln("mismatch at position " + i +
880                      ": " + elements_next[i] +
881                      " != " + elements_fwd[i]);
882            }
883        }
884    }
885
886    //------------------------------------------------------------------------
887    // Internal utilities
888    //
889    private void compareArray(Collator c, String[] tests) {
890        for (int i = 0; i < tests.length; i += 3) {
891
892            int expect = 0;
893            if (tests[i+1].equals("<")) {
894                expect = -1;
895            } else if (tests[i+1].equals(">")) {
896                expect = 1;
897            } else if (tests[i+1].equals("=")) {
898                expect = 0;
899            } else {
900                expect = Integer.decode(tests[i+1]).intValue();
901            }
902
903            int result = c.compare(tests[i], tests[i+2]);
904            if (sign(result) != sign(expect))
905            {
906                errln( i/3 + ": compare(" + prettify(tests[i])
907                                    + " , " + prettify(tests[i+2])
908                                    + ") got " + result + "; expected " + expect);
909            }
910            else
911            {
912                // Collator.compare worked OK; now try the collation keys
913                CollationKey k1 = c.getCollationKey(tests[i]);
914                CollationKey k2 = c.getCollationKey(tests[i+2]);
915
916                result = k1.compareTo(k2);
917                if (sign(result) != sign(expect)) {
918                    errln( i/3 + ": key(" + prettify(tests[i])
919                                        + ").compareTo(key(" + prettify(tests[i+2])
920                                        + ")) got " + result + "; expected " + expect);
921
922                    errln("  " + prettify(k1) + " vs. " + prettify(k2));
923                }
924            }
925        }
926    }
927
928    private static final int sign(int i) {
929        if (i < 0) return -1;
930        if (i > 0) return 1;
931        return 0;
932    }
933
934
935    static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
936
937    String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
938    String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
939    String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck";
940}
941