Regression.java revision 14630:29af931514f5
1/* 2 * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24/** 25 * @test 26 * @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154 27 * 4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241 28 * 4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736 29 * 4133509 4139572 4141640 4179126 4179686 4244884 4663220 30 * @library /java/text/testlib 31 * @summary Regression tests for Collation and associated classes 32 */ 33/* 34(C) Copyright Taligent, Inc. 1996 - All Rights Reserved 35(C) Copyright IBM Corp. 1996 - All Rights Reserved 36 37 The original version of this source code and documentation is copyrighted and 38owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are 39provided under terms of a License Agreement between Taligent and Sun. This 40technology is protected by multiple US and International patents. This notice and 41attribution to Taligent may not be removed. 42 Taligent is a registered trademark of Taligent, Inc. 43*/ 44 45import java.text.*; 46import java.util.Locale; 47import java.util.Vector; 48 49 50public class Regression extends CollatorTest { 51 52 public static void main(String[] args) throws Exception { 53 new Regression().run(args); 54 } 55 56 // CollationElementIterator.reset() doesn't work 57 // 58 public void Test4048446() { 59 CollationElementIterator i1 = en_us.getCollationElementIterator(test1); 60 CollationElementIterator i2 = en_us.getCollationElementIterator(test1); 61 62 while ( i1.next() != CollationElementIterator.NULLORDER ) { 63 } 64 i1.reset(); 65 66 assertEqual(i1, i2); 67 } 68 69 70 // Collator -> rules -> Collator round-trip broken for expanding characters 71 // 72 public void Test4051866() throws ParseException { 73 // Build a collator containing expanding characters 74 RuleBasedCollator c1 = new RuleBasedCollator("< o " 75 +"& oe ,o\u3080" 76 +"& oe ,\u1530 ,O" 77 +"& OE ,O\u3080" 78 +"& OE ,\u1520" 79 +"< p ,P"); 80 81 // Build another using the rules from the first 82 RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules()); 83 84 // Make sure they're the same 85 if (!c1.getRules().equals(c2.getRules())) { 86 errln("Rules are not equal"); 87 } 88 } 89 90 // Collator thinks "black-bird" == "black" 91 // 92 public void Test4053636() { 93 if (en_us.equals("black-bird","black")) { 94 errln("black-bird == black"); 95 } 96 } 97 98 99 // CollationElementIterator will not work correctly if the associated 100 // Collator object's mode is changed 101 // 102 public void Test4054238() { 103 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 104 105 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 106 CollationElementIterator i1 = en_us.getCollationElementIterator(test3); 107 108 c.setDecomposition(Collator.NO_DECOMPOSITION); 109 CollationElementIterator i2 = en_us.getCollationElementIterator(test3); 110 111 // At this point, BOTH iterators should use NO_DECOMPOSITION, since the 112 // collator itself is in that mode 113 assertEqual(i1, i2); 114 } 115 116 // Collator.IDENTICAL documented but not implemented 117 // 118 public void Test4054734() { 119 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 120 try { 121 c.setStrength(Collator.IDENTICAL); 122 } 123 catch (Exception e) { 124 errln("Caught " + e.toString() + " setting Collator.IDENTICAL"); 125 } 126 127 String[] decomp = { 128 "\u0001", "<", "\u0002", 129 "\u0001", "=", "\u0001", 130 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise 131 "\u00C0", "=", "A\u0300" // Decomp should make these equal 132 }; 133 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 134 compareArray(c, decomp); 135 136 String[] nodecomp = { 137 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave 138 }; 139 c.setDecomposition(Collator.NO_DECOMPOSITION); 140 compareArray(c, nodecomp); 141 } 142 143 // Full Decomposition mode not implemented 144 // 145 public void Test4054736() { 146 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 147 c.setDecomposition(Collator.FULL_DECOMPOSITION); 148 149 String[] tests = { 150 "\uFB4f", "=", "\u05D0\u05DC", // Alef-Lamed vs. Alef, Lamed 151 }; 152 153 compareArray(c, tests); 154 } 155 156 // Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean 157 // 158 public void Test4058613() { 159 // Creating a default collator doesn't work when Korean is the default 160 // locale 161 162 Locale oldDefault = Locale.getDefault(); 163 164 Locale.setDefault( Locale.KOREAN ); 165 try { 166 Collator c = Collator.getInstance(); 167 168 // Since the fix to this bug was to turn of decomposition for Korean collators, 169 // ensure that's what we got 170 if (c.getDecomposition() != Collator.NO_DECOMPOSITION) { 171 errln("Decomposition is not set to NO_DECOMPOSITION"); 172 } 173 } 174 finally { 175 Locale.setDefault(oldDefault); 176 } 177 } 178 179 // RuleBasedCollator.getRules does not return the exact pattern as input 180 // for expanding character sequences 181 // 182 public void Test4059820() { 183 RuleBasedCollator c = null; 184 try { 185 c = new RuleBasedCollator("< a < b , c/a < d < z"); 186 } catch (ParseException e) { 187 errln("Exception building collator: " + e.toString()); 188 return; 189 } 190 if ( c.getRules().indexOf("c/a") == -1) { 191 errln("returned rules do not contain 'c/a'"); 192 } 193 } 194 195 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" 196 // 197 public void Test4060154() { 198 RuleBasedCollator c = null; 199 try { 200 c = new RuleBasedCollator("< g, G < h, H < i, I < j, J" 201 + " & H < \u0131, \u0130, i, I" ); 202 } catch (ParseException e) { 203 errln("Exception building collator: " + e.toString()); 204 return; 205 } 206 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 207 208 String[] tertiary = { 209 "A", "<", "B", 210 "H", "<", "\u0131", 211 "H", "<", "I", 212 "\u0131", "<", "\u0130", 213 "\u0130", "<", "i", 214 "\u0130", ">", "H", 215 }; 216 c.setStrength(Collator.TERTIARY); 217 compareArray(c, tertiary); 218 219 String[] secondary = { 220 "H", "<", "I", 221 "\u0131", "=", "\u0130", 222 }; 223 c.setStrength(Collator.PRIMARY); 224 compareArray(c, secondary); 225 }; 226 227 // Secondary/Tertiary comparison incorrect in French Secondary 228 // 229 public void Test4062418() throws ParseException { 230 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE); 231 c.setStrength(Collator.SECONDARY); 232 233 String[] tests = { 234 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater 235 }; 236 237 compareArray(c, tests); 238 } 239 240 // Collator.compare() method broken if either string contains spaces 241 // 242 public void Test4065540() { 243 if (en_us.compare("abcd e", "abcd f") == 0) { 244 errln("'abcd e' == 'abcd f'"); 245 } 246 } 247 248 // Unicode characters need to be recursively decomposed to get the 249 // correct result. For example, 250 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. 251 // 252 public void Test4066189() { 253 String test1 = "\u1EB1"; 254 String test2 = "a\u0306\u0300"; 255 256 RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone(); 257 c1.setDecomposition(Collator.FULL_DECOMPOSITION); 258 CollationElementIterator i1 = en_us.getCollationElementIterator(test1); 259 260 RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone(); 261 c2.setDecomposition(Collator.NO_DECOMPOSITION); 262 CollationElementIterator i2 = en_us.getCollationElementIterator(test2); 263 264 assertEqual(i1, i2); 265 } 266 267 // French secondary collation checking at the end of compare iteration fails 268 // 269 public void Test4066696() { 270 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE); 271 c.setStrength(Collator.SECONDARY); 272 273 String[] tests = { 274 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute 275 }; 276 277 compareArray(c, tests); 278 } 279 280 281 // Bad canonicalization of same-class combining characters 282 // 283 public void Test4076676() { 284 // These combining characters are all in the same class, so they should not 285 // be reordered, and they should compare as unequal. 286 String s1 = "A\u0301\u0302\u0300"; 287 String s2 = "A\u0302\u0300\u0301"; 288 289 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 290 c.setStrength(Collator.TERTIARY); 291 292 if (c.compare(s1,s2) == 0) { 293 errln("Same-class combining chars were reordered"); 294 } 295 } 296 297 298 // RuleBasedCollator.equals(null) throws NullPointerException 299 // 300 public void Test4079231() { 301 try { 302 if (en_us.equals(null)) { 303 errln("en_us.equals(null) returned true"); 304 } 305 } 306 catch (Exception e) { 307 errln("en_us.equals(null) threw " + e.toString()); 308 } 309 } 310 311 // RuleBasedCollator breaks on "< a < bb" rule 312 // 313 public void Test4078588() throws ParseException { 314 RuleBasedCollator rbc=new RuleBasedCollator("< a < bb"); 315 316 int result = rbc.compare("a","bb"); 317 318 if (result != -1) { 319 errln("Compare(a,bb) returned " + result + "; expected -1"); 320 } 321 } 322 323 // Combining characters in different classes not reordered properly. 324 // 325 public void Test4081866() throws ParseException { 326 // These combining characters are all in different classes, 327 // so they should be reordered and the strings should compare as equal. 328 String s1 = "A\u0300\u0316\u0327\u0315"; 329 String s2 = "A\u0327\u0316\u0315\u0300"; 330 331 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 332 c.setStrength(Collator.TERTIARY); 333 334 // Now that the default collators are set to NO_DECOMPOSITION 335 // (as a result of fixing bug 4114077), we must set it explicitly 336 // when we're testing reordering behavior. -- lwerner, 5/5/98 337 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 338 339 if (c.compare(s1,s2) != 0) { 340 errln("Combining chars were not reordered"); 341 } 342 } 343 344 // string comparison errors in Scandinavian collators 345 // 346 public void Test4087241() { 347 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance( 348 new Locale("da", "DK")); 349 c.setStrength(Collator.SECONDARY); 350 351 String[] tests = { 352 "\u007a", "<", "\u00e6", // z < ae 353 "a\u0308", "<", "a\u030a", // a-unlaut < a-ring 354 "Y", "<", "u\u0308", // Y < u-umlaut 355 }; 356 357 compareArray(c, tests); 358 } 359 360 // CollationKey takes ignorable strings into account when it shouldn't 361 // 362 public void Test4087243() { 363 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 364 c.setStrength(Collator.TERTIARY); 365 366 String[] tests = { 367 "123", "=", "123\u0001", // 1 2 3 = 1 2 3 ctrl-A 368 }; 369 370 compareArray(c, tests); 371 } 372 373 // Mu/micro conflict 374 // Micro symbol and greek lowercase letter Mu should sort identically 375 // 376 public void Test4092260() { 377 Collator c = Collator.getInstance(new Locale("el", "")); 378 379 // will only be equal when FULL_DECOMPOSITION is used 380 c.setDecomposition(Collator.FULL_DECOMPOSITION); 381 382 String[] tests = { 383 "\u00B5", "=", "\u03BC", 384 }; 385 386 compareArray(c, tests); 387 } 388 389 void Test4095316() { 390 Collator c = Collator.getInstance(new Locale("el", "GR")); 391 c.setStrength(Collator.TERTIARY); 392 // javadocs for RuleBasedCollator clearly specify that characters containing compatability 393 // chars MUST use FULL_DECOMPOSITION to get accurate comparisons. 394 c.setDecomposition(Collator.FULL_DECOMPOSITION); 395 396 String[] tests = { 397 "\u03D4", "=", "\u03AB", 398 }; 399 400 compareArray(c, tests); 401 } 402 403 public void Test4101940() { 404 try { 405 RuleBasedCollator c = new RuleBasedCollator("< a < b"); 406 CollationElementIterator i = c.getCollationElementIterator(""); 407 i.reset(); 408 409 if (i.next() != i.NULLORDER) { 410 errln("next did not return NULLORDER"); 411 } 412 } 413 catch (Exception e) { 414 errln("Caught " + e ); 415 } 416 } 417 418 // Collator.compare not handling spaces properly 419 // 420 public void Test4103436() { 421 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 422 c.setStrength(Collator.TERTIARY); 423 424 String[] tests = { 425 "file", "<", "file access", 426 "file", "<", "fileaccess", 427 }; 428 429 compareArray(c, tests); 430 } 431 432 // Collation not Unicode conformant with Hangul syllables 433 // 434 public void Test4114076() { 435 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 436 c.setStrength(Collator.TERTIARY); 437 438 // 439 // With Canonical decomposition, Hangul syllables should get decomposed 440 // into Jamo, but Jamo characters should not be decomposed into 441 // conjoining Jamo 442 // 443 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 444 String[] test1 = { 445 "\ud4db", "=", "\u1111\u1171\u11b6", 446 }; 447 compareArray(c, test1); 448 449 // Full decomposition result should be the same as canonical decomposition 450 // for all hangul. 451 c.setDecomposition(Collator.FULL_DECOMPOSITION); 452 compareArray(c, test1); 453 454 } 455 456 457 // Collator.getCollationKey was hanging on certain character sequences 458 // 459 public void Test4124632() throws Exception { 460 Collator coll = Collator.getInstance(Locale.JAPAN); 461 462 try { 463 coll.getCollationKey("A\u0308bc"); 464 } catch (OutOfMemoryError e) { 465 errln("Ran out of memory -- probably an infinite loop"); 466 } 467 } 468 469 // sort order of french words with multiple accents has errors 470 // 471 public void Test4132736() { 472 Collator c = Collator.getInstance(Locale.FRANCE); 473 474 String[] test1 = { 475 "e\u0300e\u0301", "<", "e\u0301e\u0300", 476 "e\u0300\u0301", ">", "e\u0301\u0300", 477 }; 478 compareArray(c, test1); 479 } 480 481 // The sorting using java.text.CollationKey is not in the exact order 482 // 483 public void Test4133509() { 484 String[] test1 = { 485 "Exception", "<", "ExceptionInInitializerError", 486 "Graphics", "<", "GraphicsEnvironment", 487 "String", "<", "StringBuffer", 488 }; 489 compareArray(en_us, test1); 490 } 491 492 // Collation with decomposition off doesn't work for Europe 493 // 494 public void Test4114077() { 495 // Ensure that we get the same results with decomposition off 496 // as we do with it on.... 497 498 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 499 c.setStrength(Collator.TERTIARY); 500 501 String[] test1 = { 502 "\u00C0", "=", "A\u0300", // Should be equivalent 503 "p\u00eache", ">", "p\u00e9ch\u00e9", 504 "\u0204", "=", "E\u030F", 505 "\u01fa", "=", "A\u030a\u0301", // a-ring-acute -> a-ring, acute 506 // -> a, ring, acute 507 "A\u0300\u0316", "<", "A\u0316\u0300", // No reordering --> unequal 508 }; 509 c.setDecomposition(Collator.NO_DECOMPOSITION); 510 compareArray(c, test1); 511 512 String[] test2 = { 513 "A\u0300\u0316", "=", "A\u0316\u0300", // Reordering --> equal 514 }; 515 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 516 compareArray(c, test2); 517 } 518 519 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator) 520 // 521 public void Test4141640() { 522 // 523 // Rather than just creating a Swedish collator, we might as well 524 // try to instantiate one for every locale available on the system 525 // in order to prevent this sort of bug from cropping up in the future 526 // 527 Locale[] locales = Collator.getAvailableLocales(); 528 529 for (int i = 0; i < locales.length; i++) { 530 try { 531 Collator c = Collator.getInstance(locales[i]); 532 } catch (Exception e) { 533 errln("Caught " + e + " creating collator for " + locales[i]); 534 } 535 } 536 } 537 538 // getCollationKey throws exception for spanish text 539 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 540 // 541 public void Test4139572() { 542 // 543 // Code pasted straight from the bug report 544 // 545 // create spanish locale and collator 546 Locale l = new Locale("es", "es"); 547 Collator col = Collator.getInstance(l); 548 549 // this spanish phrase kills it! 550 col.getCollationKey("Nombre De Objeto"); 551 } 552 553 // RuleBasedCollator doesn't use getCollationElementIterator internally 554 // 555 public void Test4146160() throws ParseException { 556 // 557 // Use a custom collator class whose getCollationElementIterator 558 // methods increment a count.... 559 // 560 My4146160Collator.count = 0; 561 new My4146160Collator().getCollationKey("1"); 562 if (My4146160Collator.count < 1) { 563 errln("getCollationElementIterator not called"); 564 } 565 566 My4146160Collator.count = 0; 567 new My4146160Collator().compare("1", "2"); 568 if (My4146160Collator.count < 1) { 569 errln("getCollationElementIterator not called"); 570 } 571 } 572 573 static class My4146160Collator extends RuleBasedCollator { 574 public My4146160Collator() throws ParseException { 575 super(Regression.en_us.getRules()); 576 } 577 578 public CollationElementIterator getCollationElementIterator( 579 String text) { 580 count++; 581 return super.getCollationElementIterator(text); 582 } 583 public CollationElementIterator getCollationElementIterator( 584 CharacterIterator text) { 585 count++; 586 return super.getCollationElementIterator(text); 587 } 588 589 public static int count = 0; 590 }; 591 592 // CollationElementIterator.previous broken for expanding char sequences 593 // 594 public void Test4179686() throws ParseException { 595 596 // Create a collator with a few expanding character sequences in it.... 597 RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules() 598 + " & ae ; \u00e4 & AE ; \u00c4" 599 + " & oe ; \u00f6 & OE ; \u00d6" 600 + " & ue ; \u00fc & UE ; \u00dc"); 601 602 String text = "T\u00f6ne"; // o-umlaut 603 604 CollationElementIterator iter = coll.getCollationElementIterator(text); 605 Vector elements = new Vector(); 606 int elem; 607 608 // Iterate forward and collect all of the elements into a Vector 609 while ((elem = iter.next()) != iter.NULLORDER) { 610 elements.addElement(new Integer(elem)); 611 } 612 613 // Now iterate backward and make sure they're the same 614 int index = elements.size() - 1; 615 while ((elem = iter.previous()) != iter.NULLORDER) { 616 int expect = ((Integer)elements.elementAt(index)).intValue(); 617 618 if (elem != expect) { 619 errln("Mismatch at index " + index 620 + ": got " + Integer.toString(elem,16) 621 + ", expected " + Integer.toString(expect,16)); 622 } 623 index--; 624 } 625 } 626 627 public void Test4244884() throws ParseException { 628 RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US); 629 coll = new RuleBasedCollator(coll.getRules() 630 + " & C < ch , cH , Ch , CH < cat < crunchy"); 631 632 String[] testStrings = new String[] { 633 "car", 634 "cave", 635 "clamp", 636 "cramp", 637 "czar", 638 "church", 639 "catalogue", 640 "crunchy", 641 "dog" 642 }; 643 644 for (int i = 1; i < testStrings.length; i++) { 645 if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) { 646 errln("error: \"" + testStrings[i - 1] 647 + "\" is greater than or equal to \"" + testStrings[i] 648 + "\"."); 649 } 650 } 651 } 652 653 public void Test4179216() throws ParseException { 654 // you can position a CollationElementIterator in the middle of 655 // a contracting character sequence, yielding a bogus collation 656 // element 657 RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US); 658 coll = new RuleBasedCollator(coll.getRules() 659 + " & C < ch , cH , Ch , CH < cat < crunchy"); 660 String testText = "church church catcatcher runcrunchynchy"; 661 CollationElementIterator iter = coll.getCollationElementIterator( 662 testText); 663 664 // test that the "ch" combination works properly 665 iter.setOffset(4); 666 int elt4 = CollationElementIterator.primaryOrder(iter.next()); 667 668 iter.reset(); 669 int elt0 = CollationElementIterator.primaryOrder(iter.next()); 670 671 iter.setOffset(5); 672 int elt5 = CollationElementIterator.primaryOrder(iter.next()); 673 674 if (elt4 != elt0 || elt5 != elt0) 675 errln("The collation elements at positions 0 (" + elt0 + "), 4 (" 676 + elt4 + "), and 5 (" + elt5 + ") don't match."); 677 678 // test that the "cat" combination works properly 679 iter.setOffset(14); 680 int elt14 = CollationElementIterator.primaryOrder(iter.next()); 681 682 iter.setOffset(15); 683 int elt15 = CollationElementIterator.primaryOrder(iter.next()); 684 685 iter.setOffset(16); 686 int elt16 = CollationElementIterator.primaryOrder(iter.next()); 687 688 iter.setOffset(17); 689 int elt17 = CollationElementIterator.primaryOrder(iter.next()); 690 691 iter.setOffset(18); 692 int elt18 = CollationElementIterator.primaryOrder(iter.next()); 693 694 iter.setOffset(19); 695 int elt19 = CollationElementIterator.primaryOrder(iter.next()); 696 697 if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17 698 || elt14 != elt18 || elt14 != elt19) 699 errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = " 700 + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17 701 + ", elt18 = " + elt18 + ", elt19 = " + elt19); 702 703 // now generate a complete list of the collation elements, 704 // first using next() and then using setOffset(), and 705 // make sure both interfaces return the same set of elements 706 iter.reset(); 707 708 int elt = iter.next(); 709 int count = 0; 710 while (elt != CollationElementIterator.NULLORDER) { 711 ++count; 712 elt = iter.next(); 713 } 714 715 String[] nextElements = new String[count]; 716 String[] setOffsetElements = new String[count]; 717 int lastPos = 0; 718 719 iter.reset(); 720 elt = iter.next(); 721 count = 0; 722 while (elt != CollationElementIterator.NULLORDER) { 723 nextElements[count++] = testText.substring(lastPos, iter.getOffset()); 724 lastPos = iter.getOffset(); 725 elt = iter.next(); 726 } 727 count = 0; 728 for (int i = 0; i < testText.length(); ) { 729 iter.setOffset(i); 730 lastPos = iter.getOffset(); 731 elt = iter.next(); 732 setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset()); 733 i = iter.getOffset(); 734 } 735 for (int i = 0; i < nextElements.length; i++) { 736 if (nextElements[i].equals(setOffsetElements[i])) { 737 logln(nextElements[i]); 738 } else { 739 errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded " 740 + setOffsetElements[i]); 741 } 742 } 743 } 744 745 public void Test4216006() throws Exception { 746 // rule parser barfs on "<\u00e0=a\u0300", and on other cases 747 // where the same token (after normalization) appears twice in a row 748 boolean caughtException = false; 749 try { 750 RuleBasedCollator dummy = new RuleBasedCollator("\u00e0<a\u0300"); 751 } 752 catch (ParseException e) { 753 caughtException = true; 754 } 755 if (!caughtException) { 756 throw new Exception("\"a<a\" collation sequence didn't cause parse error!"); 757 } 758 759 RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300"); 760 collator.setDecomposition(Collator.FULL_DECOMPOSITION); 761 collator.setStrength(Collator.IDENTICAL); 762 763 String[] tests = { 764 "a\u0300", "=", "\u00e0", 765 "\u00e0", "=", "a\u0300" 766 }; 767 768 compareArray(collator, tests); 769 } 770 771 public void Test4171974() { 772 // test French accent ordering more thoroughly 773 String[] frenchList = { 774 "\u0075\u0075", // u u 775 "\u00fc\u0075", // u-umlaut u 776 "\u01d6\u0075", // u-umlaut-macron u 777 "\u016b\u0075", // u-macron u 778 "\u1e7b\u0075", // u-macron-umlaut u 779 "\u0075\u00fc", // u u-umlaut 780 "\u00fc\u00fc", // u-umlaut u-umlaut 781 "\u01d6\u00fc", // u-umlaut-macron u-umlaut 782 "\u016b\u00fc", // u-macron u-umlaut 783 "\u1e7b\u00fc", // u-macron-umlaut u-umlaut 784 "\u0075\u01d6", // u u-umlaut-macron 785 "\u00fc\u01d6", // u-umlaut u-umlaut-macron 786 "\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron 787 "\u016b\u01d6", // u-macron u-umlaut-macron 788 "\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron 789 "\u0075\u016b", // u u-macron 790 "\u00fc\u016b", // u-umlaut u-macron 791 "\u01d6\u016b", // u-umlaut-macron u-macron 792 "\u016b\u016b", // u-macron u-macron 793 "\u1e7b\u016b", // u-macron-umlaut u-macron 794 "\u0075\u1e7b", // u u-macron-umlaut 795 "\u00fc\u1e7b", // u-umlaut u-macron-umlaut 796 "\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut 797 "\u016b\u1e7b", // u-macron u-macron-umlaut 798 "\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut 799 }; 800 Collator french = Collator.getInstance(Locale.FRENCH); 801 802 logln("Testing French order..."); 803 checkListOrder(frenchList, french); 804 805 logln("Testing French order without decomposition..."); 806 french.setDecomposition(Collator.NO_DECOMPOSITION); 807 checkListOrder(frenchList, french); 808 809 String[] englishList = { 810 "\u0075\u0075", // u u 811 "\u0075\u00fc", // u u-umlaut 812 "\u0075\u01d6", // u u-umlaut-macron 813 "\u0075\u016b", // u u-macron 814 "\u0075\u1e7b", // u u-macron-umlaut 815 "\u00fc\u0075", // u-umlaut u 816 "\u00fc\u00fc", // u-umlaut u-umlaut 817 "\u00fc\u01d6", // u-umlaut u-umlaut-macron 818 "\u00fc\u016b", // u-umlaut u-macron 819 "\u00fc\u1e7b", // u-umlaut u-macron-umlaut 820 "\u01d6\u0075", // u-umlaut-macron u 821 "\u01d6\u00fc", // u-umlaut-macron u-umlaut 822 "\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron 823 "\u01d6\u016b", // u-umlaut-macron u-macron 824 "\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut 825 "\u016b\u0075", // u-macron u 826 "\u016b\u00fc", // u-macron u-umlaut 827 "\u016b\u01d6", // u-macron u-umlaut-macron 828 "\u016b\u016b", // u-macron u-macron 829 "\u016b\u1e7b", // u-macron u-macron-umlaut 830 "\u1e7b\u0075", // u-macron-umlaut u 831 "\u1e7b\u00fc", // u-macron-umlaut u-umlaut 832 "\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron 833 "\u1e7b\u016b", // u-macron-umlaut u-macron 834 "\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut 835 }; 836 Collator english = Collator.getInstance(Locale.ENGLISH); 837 838 logln("Testing English order..."); 839 checkListOrder(englishList, english); 840 841 logln("Testing English order without decomposition..."); 842 english.setDecomposition(Collator.NO_DECOMPOSITION); 843 checkListOrder(englishList, english); 844 } 845 846 private void checkListOrder(String[] sortedList, Collator c) { 847 // this function uses the specified Collator to make sure the 848 // passed-in list is already sorted into ascending order 849 for (int i = 0; i < sortedList.length - 1; i++) { 850 if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) { 851 errln("List out of order at element #" + i + ": " 852 + prettify(sortedList[i]) + " >= " 853 + prettify(sortedList[i + 1])); 854 } 855 } 856 } 857 858 // CollationElementIterator set doesn't work propertly with next/prev 859 public void Test4663220() { 860 RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US); 861 CharacterIterator stringIter = new StringCharacterIterator("fox"); 862 CollationElementIterator iter = collator.getCollationElementIterator(stringIter); 863 864 int[] elements_next = new int[3]; 865 logln("calling next:"); 866 for (int i = 0; i < 3; ++i) { 867 logln("[" + i + "] " + (elements_next[i] = iter.next())); 868 } 869 870 int[] elements_fwd = new int[3]; 871 logln("calling set/next:"); 872 for (int i = 0; i < 3; ++i) { 873 iter.setOffset(i); 874 logln("[" + i + "] " + (elements_fwd[i] = iter.next())); 875 } 876 877 for (int i = 0; i < 3; ++i) { 878 if (elements_next[i] != elements_fwd[i]) { 879 errln("mismatch at position " + i + 880 ": " + elements_next[i] + 881 " != " + elements_fwd[i]); 882 } 883 } 884 } 885 886 //------------------------------------------------------------------------ 887 // Internal utilities 888 // 889 private void compareArray(Collator c, String[] tests) { 890 for (int i = 0; i < tests.length; i += 3) { 891 892 int expect = 0; 893 if (tests[i+1].equals("<")) { 894 expect = -1; 895 } else if (tests[i+1].equals(">")) { 896 expect = 1; 897 } else if (tests[i+1].equals("=")) { 898 expect = 0; 899 } else { 900 expect = Integer.decode(tests[i+1]).intValue(); 901 } 902 903 int result = c.compare(tests[i], tests[i+2]); 904 if (sign(result) != sign(expect)) 905 { 906 errln( i/3 + ": compare(" + prettify(tests[i]) 907 + " , " + prettify(tests[i+2]) 908 + ") got " + result + "; expected " + expect); 909 } 910 else 911 { 912 // Collator.compare worked OK; now try the collation keys 913 CollationKey k1 = c.getCollationKey(tests[i]); 914 CollationKey k2 = c.getCollationKey(tests[i+2]); 915 916 result = k1.compareTo(k2); 917 if (sign(result) != sign(expect)) { 918 errln( i/3 + ": key(" + prettify(tests[i]) 919 + ").compareTo(key(" + prettify(tests[i+2]) 920 + ")) got " + result + "; expected " + expect); 921 922 errln(" " + prettify(k1) + " vs. " + prettify(k2)); 923 } 924 } 925 } 926 } 927 928 private static final int sign(int i) { 929 if (i < 0) return -1; 930 if (i > 0) return 1; 931 return 0; 932 } 933 934 935 static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US); 936 937 String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?"; 938 String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?"; 939 String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck"; 940} 941