1/* 2 * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24/** 25 * @test 26 * @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154 27 * 4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241 28 * 4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736 29 * 4133509 4139572 4141640 4179126 4179686 4244884 4663220 30 * @library /java/text/testlib 31 * @summary Regression tests for Collation and associated classes 32 * @modules jdk.localedata 33 */ 34/* 35(C) Copyright Taligent, Inc. 1996 - All Rights Reserved 36(C) Copyright IBM Corp. 1996 - All Rights Reserved 37 38 The original version of this source code and documentation is copyrighted and 39owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are 40provided under terms of a License Agreement between Taligent and Sun. This 41technology is protected by multiple US and International patents. This notice and 42attribution to Taligent may not be removed. 43 Taligent is a registered trademark of Taligent, Inc. 44*/ 45 46import java.text.*; 47import java.util.Locale; 48import java.util.Vector; 49 50 51public class Regression extends CollatorTest { 52 53 public static void main(String[] args) throws Exception { 54 new Regression().run(args); 55 } 56 57 // CollationElementIterator.reset() doesn't work 58 // 59 public void Test4048446() { 60 CollationElementIterator i1 = en_us.getCollationElementIterator(test1); 61 CollationElementIterator i2 = en_us.getCollationElementIterator(test1); 62 63 while ( i1.next() != CollationElementIterator.NULLORDER ) { 64 } 65 i1.reset(); 66 67 assertEqual(i1, i2); 68 } 69 70 71 // Collator -> rules -> Collator round-trip broken for expanding characters 72 // 73 public void Test4051866() throws ParseException { 74 // Build a collator containing expanding characters 75 RuleBasedCollator c1 = new RuleBasedCollator("< o " 76 +"& oe ,o\u3080" 77 +"& oe ,\u1530 ,O" 78 +"& OE ,O\u3080" 79 +"& OE ,\u1520" 80 +"< p ,P"); 81 82 // Build another using the rules from the first 83 RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules()); 84 85 // Make sure they're the same 86 if (!c1.getRules().equals(c2.getRules())) { 87 errln("Rules are not equal"); 88 } 89 } 90 91 // Collator thinks "black-bird" == "black" 92 // 93 public void Test4053636() { 94 if (en_us.equals("black-bird","black")) { 95 errln("black-bird == black"); 96 } 97 } 98 99 100 // CollationElementIterator will not work correctly if the associated 101 // Collator object's mode is changed 102 // 103 public void Test4054238() { 104 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 105 106 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 107 CollationElementIterator i1 = en_us.getCollationElementIterator(test3); 108 109 c.setDecomposition(Collator.NO_DECOMPOSITION); 110 CollationElementIterator i2 = en_us.getCollationElementIterator(test3); 111 112 // At this point, BOTH iterators should use NO_DECOMPOSITION, since the 113 // collator itself is in that mode 114 assertEqual(i1, i2); 115 } 116 117 // Collator.IDENTICAL documented but not implemented 118 // 119 public void Test4054734() { 120 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 121 try { 122 c.setStrength(Collator.IDENTICAL); 123 } 124 catch (Exception e) { 125 errln("Caught " + e.toString() + " setting Collator.IDENTICAL"); 126 } 127 128 String[] decomp = { 129 "\u0001", "<", "\u0002", 130 "\u0001", "=", "\u0001", 131 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise 132 "\u00C0", "=", "A\u0300" // Decomp should make these equal 133 }; 134 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 135 compareArray(c, decomp); 136 137 String[] nodecomp = { 138 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave 139 }; 140 c.setDecomposition(Collator.NO_DECOMPOSITION); 141 compareArray(c, nodecomp); 142 } 143 144 // Full Decomposition mode not implemented 145 // 146 public void Test4054736() { 147 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 148 c.setDecomposition(Collator.FULL_DECOMPOSITION); 149 150 String[] tests = { 151 "\uFB4f", "=", "\u05D0\u05DC", // Alef-Lamed vs. Alef, Lamed 152 }; 153 154 compareArray(c, tests); 155 } 156 157 // Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean 158 // 159 public void Test4058613() { 160 // Creating a default collator doesn't work when Korean is the default 161 // locale 162 163 Locale oldDefault = Locale.getDefault(); 164 165 Locale.setDefault( Locale.KOREAN ); 166 try { 167 Collator c = Collator.getInstance(); 168 169 // Since the fix to this bug was to turn of decomposition for Korean collators, 170 // ensure that's what we got 171 if (c.getDecomposition() != Collator.NO_DECOMPOSITION) { 172 errln("Decomposition is not set to NO_DECOMPOSITION"); 173 } 174 } 175 finally { 176 Locale.setDefault(oldDefault); 177 } 178 } 179 180 // RuleBasedCollator.getRules does not return the exact pattern as input 181 // for expanding character sequences 182 // 183 public void Test4059820() { 184 RuleBasedCollator c = null; 185 try { 186 c = new RuleBasedCollator("< a < b , c/a < d < z"); 187 } catch (ParseException e) { 188 errln("Exception building collator: " + e.toString()); 189 return; 190 } 191 if ( c.getRules().indexOf("c/a") == -1) { 192 errln("returned rules do not contain 'c/a'"); 193 } 194 } 195 196 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" 197 // 198 public void Test4060154() { 199 RuleBasedCollator c = null; 200 try { 201 c = new RuleBasedCollator("< g, G < h, H < i, I < j, J" 202 + " & H < \u0131, \u0130, i, I" ); 203 } catch (ParseException e) { 204 errln("Exception building collator: " + e.toString()); 205 return; 206 } 207 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 208 209 String[] tertiary = { 210 "A", "<", "B", 211 "H", "<", "\u0131", 212 "H", "<", "I", 213 "\u0131", "<", "\u0130", 214 "\u0130", "<", "i", 215 "\u0130", ">", "H", 216 }; 217 c.setStrength(Collator.TERTIARY); 218 compareArray(c, tertiary); 219 220 String[] secondary = { 221 "H", "<", "I", 222 "\u0131", "=", "\u0130", 223 }; 224 c.setStrength(Collator.PRIMARY); 225 compareArray(c, secondary); 226 }; 227 228 // Secondary/Tertiary comparison incorrect in French Secondary 229 // 230 public void Test4062418() throws ParseException { 231 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE); 232 c.setStrength(Collator.SECONDARY); 233 234 String[] tests = { 235 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater 236 }; 237 238 compareArray(c, tests); 239 } 240 241 // Collator.compare() method broken if either string contains spaces 242 // 243 public void Test4065540() { 244 if (en_us.compare("abcd e", "abcd f") == 0) { 245 errln("'abcd e' == 'abcd f'"); 246 } 247 } 248 249 // Unicode characters need to be recursively decomposed to get the 250 // correct result. For example, 251 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. 252 // 253 public void Test4066189() { 254 String test1 = "\u1EB1"; 255 String test2 = "a\u0306\u0300"; 256 257 RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone(); 258 c1.setDecomposition(Collator.FULL_DECOMPOSITION); 259 CollationElementIterator i1 = en_us.getCollationElementIterator(test1); 260 261 RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone(); 262 c2.setDecomposition(Collator.NO_DECOMPOSITION); 263 CollationElementIterator i2 = en_us.getCollationElementIterator(test2); 264 265 assertEqual(i1, i2); 266 } 267 268 // French secondary collation checking at the end of compare iteration fails 269 // 270 public void Test4066696() { 271 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE); 272 c.setStrength(Collator.SECONDARY); 273 274 String[] tests = { 275 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute 276 }; 277 278 compareArray(c, tests); 279 } 280 281 282 // Bad canonicalization of same-class combining characters 283 // 284 public void Test4076676() { 285 // These combining characters are all in the same class, so they should not 286 // be reordered, and they should compare as unequal. 287 String s1 = "A\u0301\u0302\u0300"; 288 String s2 = "A\u0302\u0300\u0301"; 289 290 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 291 c.setStrength(Collator.TERTIARY); 292 293 if (c.compare(s1,s2) == 0) { 294 errln("Same-class combining chars were reordered"); 295 } 296 } 297 298 299 // RuleBasedCollator.equals(null) throws NullPointerException 300 // 301 public void Test4079231() { 302 try { 303 if (en_us.equals(null)) { 304 errln("en_us.equals(null) returned true"); 305 } 306 } 307 catch (Exception e) { 308 errln("en_us.equals(null) threw " + e.toString()); 309 } 310 } 311 312 // RuleBasedCollator breaks on "< a < bb" rule 313 // 314 public void Test4078588() throws ParseException { 315 RuleBasedCollator rbc=new RuleBasedCollator("< a < bb"); 316 317 int result = rbc.compare("a","bb"); 318 319 if (result != -1) { 320 errln("Compare(a,bb) returned " + result + "; expected -1"); 321 } 322 } 323 324 // Combining characters in different classes not reordered properly. 325 // 326 public void Test4081866() throws ParseException { 327 // These combining characters are all in different classes, 328 // so they should be reordered and the strings should compare as equal. 329 String s1 = "A\u0300\u0316\u0327\u0315"; 330 String s2 = "A\u0327\u0316\u0315\u0300"; 331 332 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 333 c.setStrength(Collator.TERTIARY); 334 335 // Now that the default collators are set to NO_DECOMPOSITION 336 // (as a result of fixing bug 4114077), we must set it explicitly 337 // when we're testing reordering behavior. -- lwerner, 5/5/98 338 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 339 340 if (c.compare(s1,s2) != 0) { 341 errln("Combining chars were not reordered"); 342 } 343 } 344 345 // string comparison errors in Scandinavian collators 346 // 347 public void Test4087241() { 348 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance( 349 new Locale("da", "DK")); 350 c.setStrength(Collator.SECONDARY); 351 352 String[] tests = { 353 "\u007a", "<", "\u00e6", // z < ae 354 "a\u0308", "<", "a\u030a", // a-unlaut < a-ring 355 "Y", "<", "u\u0308", // Y < u-umlaut 356 }; 357 358 compareArray(c, tests); 359 } 360 361 // CollationKey takes ignorable strings into account when it shouldn't 362 // 363 public void Test4087243() { 364 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 365 c.setStrength(Collator.TERTIARY); 366 367 String[] tests = { 368 "123", "=", "123\u0001", // 1 2 3 = 1 2 3 ctrl-A 369 }; 370 371 compareArray(c, tests); 372 } 373 374 // Mu/micro conflict 375 // Micro symbol and greek lowercase letter Mu should sort identically 376 // 377 public void Test4092260() { 378 Collator c = Collator.getInstance(new Locale("el", "")); 379 380 // will only be equal when FULL_DECOMPOSITION is used 381 c.setDecomposition(Collator.FULL_DECOMPOSITION); 382 383 String[] tests = { 384 "\u00B5", "=", "\u03BC", 385 }; 386 387 compareArray(c, tests); 388 } 389 390 void Test4095316() { 391 Collator c = Collator.getInstance(new Locale("el", "GR")); 392 c.setStrength(Collator.TERTIARY); 393 // javadocs for RuleBasedCollator clearly specify that characters containing compatability 394 // chars MUST use FULL_DECOMPOSITION to get accurate comparisons. 395 c.setDecomposition(Collator.FULL_DECOMPOSITION); 396 397 String[] tests = { 398 "\u03D4", "=", "\u03AB", 399 }; 400 401 compareArray(c, tests); 402 } 403 404 public void Test4101940() { 405 try { 406 RuleBasedCollator c = new RuleBasedCollator("< a < b"); 407 CollationElementIterator i = c.getCollationElementIterator(""); 408 i.reset(); 409 410 if (i.next() != i.NULLORDER) { 411 errln("next did not return NULLORDER"); 412 } 413 } 414 catch (Exception e) { 415 errln("Caught " + e ); 416 } 417 } 418 419 // Collator.compare not handling spaces properly 420 // 421 public void Test4103436() { 422 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 423 c.setStrength(Collator.TERTIARY); 424 425 String[] tests = { 426 "file", "<", "file access", 427 "file", "<", "fileaccess", 428 }; 429 430 compareArray(c, tests); 431 } 432 433 // Collation not Unicode conformant with Hangul syllables 434 // 435 public void Test4114076() { 436 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 437 c.setStrength(Collator.TERTIARY); 438 439 // 440 // With Canonical decomposition, Hangul syllables should get decomposed 441 // into Jamo, but Jamo characters should not be decomposed into 442 // conjoining Jamo 443 // 444 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 445 String[] test1 = { 446 "\ud4db", "=", "\u1111\u1171\u11b6", 447 }; 448 compareArray(c, test1); 449 450 // Full decomposition result should be the same as canonical decomposition 451 // for all hangul. 452 c.setDecomposition(Collator.FULL_DECOMPOSITION); 453 compareArray(c, test1); 454 455 } 456 457 458 // Collator.getCollationKey was hanging on certain character sequences 459 // 460 public void Test4124632() throws Exception { 461 Collator coll = Collator.getInstance(Locale.JAPAN); 462 463 try { 464 coll.getCollationKey("A\u0308bc"); 465 } catch (OutOfMemoryError e) { 466 errln("Ran out of memory -- probably an infinite loop"); 467 } 468 } 469 470 // sort order of french words with multiple accents has errors 471 // 472 public void Test4132736() { 473 Collator c = Collator.getInstance(Locale.FRANCE); 474 475 String[] test1 = { 476 "e\u0300e\u0301", "<", "e\u0301e\u0300", 477 "e\u0300\u0301", ">", "e\u0301\u0300", 478 }; 479 compareArray(c, test1); 480 } 481 482 // The sorting using java.text.CollationKey is not in the exact order 483 // 484 public void Test4133509() { 485 String[] test1 = { 486 "Exception", "<", "ExceptionInInitializerError", 487 "Graphics", "<", "GraphicsEnvironment", 488 "String", "<", "StringBuffer", 489 }; 490 compareArray(en_us, test1); 491 } 492 493 // Collation with decomposition off doesn't work for Europe 494 // 495 public void Test4114077() { 496 // Ensure that we get the same results with decomposition off 497 // as we do with it on.... 498 499 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 500 c.setStrength(Collator.TERTIARY); 501 502 String[] test1 = { 503 "\u00C0", "=", "A\u0300", // Should be equivalent 504 "p\u00eache", ">", "p\u00e9ch\u00e9", 505 "\u0204", "=", "E\u030F", 506 "\u01fa", "=", "A\u030a\u0301", // a-ring-acute -> a-ring, acute 507 // -> a, ring, acute 508 "A\u0300\u0316", "<", "A\u0316\u0300", // No reordering --> unequal 509 }; 510 c.setDecomposition(Collator.NO_DECOMPOSITION); 511 compareArray(c, test1); 512 513 String[] test2 = { 514 "A\u0300\u0316", "=", "A\u0316\u0300", // Reordering --> equal 515 }; 516 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 517 compareArray(c, test2); 518 } 519 520 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator) 521 // 522 public void Test4141640() { 523 // 524 // Rather than just creating a Swedish collator, we might as well 525 // try to instantiate one for every locale available on the system 526 // in order to prevent this sort of bug from cropping up in the future 527 // 528 Locale[] locales = Collator.getAvailableLocales(); 529 530 for (int i = 0; i < locales.length; i++) { 531 try { 532 Collator c = Collator.getInstance(locales[i]); 533 } catch (Exception e) { 534 errln("Caught " + e + " creating collator for " + locales[i]); 535 } 536 } 537 } 538 539 // getCollationKey throws exception for spanish text 540 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 541 // 542 public void Test4139572() { 543 // 544 // Code pasted straight from the bug report 545 // 546 // create spanish locale and collator 547 Locale l = new Locale("es", "es"); 548 Collator col = Collator.getInstance(l); 549 550 // this spanish phrase kills it! 551 col.getCollationKey("Nombre De Objeto"); 552 } 553 554 // RuleBasedCollator doesn't use getCollationElementIterator internally 555 // 556 public void Test4146160() throws ParseException { 557 // 558 // Use a custom collator class whose getCollationElementIterator 559 // methods increment a count.... 560 // 561 My4146160Collator.count = 0; 562 new My4146160Collator().getCollationKey("1"); 563 if (My4146160Collator.count < 1) { 564 errln("getCollationElementIterator not called"); 565 } 566 567 My4146160Collator.count = 0; 568 new My4146160Collator().compare("1", "2"); 569 if (My4146160Collator.count < 1) { 570 errln("getCollationElementIterator not called"); 571 } 572 } 573 574 static class My4146160Collator extends RuleBasedCollator { 575 public My4146160Collator() throws ParseException { 576 super(Regression.en_us.getRules()); 577 } 578 579 public CollationElementIterator getCollationElementIterator( 580 String text) { 581 count++; 582 return super.getCollationElementIterator(text); 583 } 584 public CollationElementIterator getCollationElementIterator( 585 CharacterIterator text) { 586 count++; 587 return super.getCollationElementIterator(text); 588 } 589 590 public static int count = 0; 591 }; 592 593 // CollationElementIterator.previous broken for expanding char sequences 594 // 595 public void Test4179686() throws ParseException { 596 597 // Create a collator with a few expanding character sequences in it.... 598 RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules() 599 + " & ae ; \u00e4 & AE ; \u00c4" 600 + " & oe ; \u00f6 & OE ; \u00d6" 601 + " & ue ; \u00fc & UE ; \u00dc"); 602 603 String text = "T\u00f6ne"; // o-umlaut 604 605 CollationElementIterator iter = coll.getCollationElementIterator(text); 606 Vector elements = new Vector(); 607 int elem; 608 609 // Iterate forward and collect all of the elements into a Vector 610 while ((elem = iter.next()) != iter.NULLORDER) { 611 elements.addElement(new Integer(elem)); 612 } 613 614 // Now iterate backward and make sure they're the same 615 int index = elements.size() - 1; 616 while ((elem = iter.previous()) != iter.NULLORDER) { 617 int expect = ((Integer)elements.elementAt(index)).intValue(); 618 619 if (elem != expect) { 620 errln("Mismatch at index " + index 621 + ": got " + Integer.toString(elem,16) 622 + ", expected " + Integer.toString(expect,16)); 623 } 624 index--; 625 } 626 } 627 628 public void Test4244884() throws ParseException { 629 RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US); 630 coll = new RuleBasedCollator(coll.getRules() 631 + " & C < ch , cH , Ch , CH < cat < crunchy"); 632 633 String[] testStrings = new String[] { 634 "car", 635 "cave", 636 "clamp", 637 "cramp", 638 "czar", 639 "church", 640 "catalogue", 641 "crunchy", 642 "dog" 643 }; 644 645 for (int i = 1; i < testStrings.length; i++) { 646 if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) { 647 errln("error: \"" + testStrings[i - 1] 648 + "\" is greater than or equal to \"" + testStrings[i] 649 + "\"."); 650 } 651 } 652 } 653 654 public void Test4179216() throws ParseException { 655 // you can position a CollationElementIterator in the middle of 656 // a contracting character sequence, yielding a bogus collation 657 // element 658 RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US); 659 coll = new RuleBasedCollator(coll.getRules() 660 + " & C < ch , cH , Ch , CH < cat < crunchy"); 661 String testText = "church church catcatcher runcrunchynchy"; 662 CollationElementIterator iter = coll.getCollationElementIterator( 663 testText); 664 665 // test that the "ch" combination works properly 666 iter.setOffset(4); 667 int elt4 = CollationElementIterator.primaryOrder(iter.next()); 668 669 iter.reset(); 670 int elt0 = CollationElementIterator.primaryOrder(iter.next()); 671 672 iter.setOffset(5); 673 int elt5 = CollationElementIterator.primaryOrder(iter.next()); 674 675 if (elt4 != elt0 || elt5 != elt0) 676 errln("The collation elements at positions 0 (" + elt0 + "), 4 (" 677 + elt4 + "), and 5 (" + elt5 + ") don't match."); 678 679 // test that the "cat" combination works properly 680 iter.setOffset(14); 681 int elt14 = CollationElementIterator.primaryOrder(iter.next()); 682 683 iter.setOffset(15); 684 int elt15 = CollationElementIterator.primaryOrder(iter.next()); 685 686 iter.setOffset(16); 687 int elt16 = CollationElementIterator.primaryOrder(iter.next()); 688 689 iter.setOffset(17); 690 int elt17 = CollationElementIterator.primaryOrder(iter.next()); 691 692 iter.setOffset(18); 693 int elt18 = CollationElementIterator.primaryOrder(iter.next()); 694 695 iter.setOffset(19); 696 int elt19 = CollationElementIterator.primaryOrder(iter.next()); 697 698 if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17 699 || elt14 != elt18 || elt14 != elt19) 700 errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = " 701 + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17 702 + ", elt18 = " + elt18 + ", elt19 = " + elt19); 703 704 // now generate a complete list of the collation elements, 705 // first using next() and then using setOffset(), and 706 // make sure both interfaces return the same set of elements 707 iter.reset(); 708 709 int elt = iter.next(); 710 int count = 0; 711 while (elt != CollationElementIterator.NULLORDER) { 712 ++count; 713 elt = iter.next(); 714 } 715 716 String[] nextElements = new String[count]; 717 String[] setOffsetElements = new String[count]; 718 int lastPos = 0; 719 720 iter.reset(); 721 elt = iter.next(); 722 count = 0; 723 while (elt != CollationElementIterator.NULLORDER) { 724 nextElements[count++] = testText.substring(lastPos, iter.getOffset()); 725 lastPos = iter.getOffset(); 726 elt = iter.next(); 727 } 728 count = 0; 729 for (int i = 0; i < testText.length(); ) { 730 iter.setOffset(i); 731 lastPos = iter.getOffset(); 732 elt = iter.next(); 733 setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset()); 734 i = iter.getOffset(); 735 } 736 for (int i = 0; i < nextElements.length; i++) { 737 if (nextElements[i].equals(setOffsetElements[i])) { 738 logln(nextElements[i]); 739 } else { 740 errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded " 741 + setOffsetElements[i]); 742 } 743 } 744 } 745 746 public void Test4216006() throws Exception { 747 // rule parser barfs on "<\u00e0=a\u0300", and on other cases 748 // where the same token (after normalization) appears twice in a row 749 boolean caughtException = false; 750 try { 751 RuleBasedCollator dummy = new RuleBasedCollator("\u00e0<a\u0300"); 752 } 753 catch (ParseException e) { 754 caughtException = true; 755 } 756 if (!caughtException) { 757 throw new Exception("\"a<a\" collation sequence didn't cause parse error!"); 758 } 759 760 RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300"); 761 collator.setDecomposition(Collator.FULL_DECOMPOSITION); 762 collator.setStrength(Collator.IDENTICAL); 763 764 String[] tests = { 765 "a\u0300", "=", "\u00e0", 766 "\u00e0", "=", "a\u0300" 767 }; 768 769 compareArray(collator, tests); 770 } 771 772 public void Test4171974() { 773 // test French accent ordering more thoroughly 774 String[] frenchList = { 775 "\u0075\u0075", // u u 776 "\u00fc\u0075", // u-umlaut u 777 "\u01d6\u0075", // u-umlaut-macron u 778 "\u016b\u0075", // u-macron u 779 "\u1e7b\u0075", // u-macron-umlaut u 780 "\u0075\u00fc", // u u-umlaut 781 "\u00fc\u00fc", // u-umlaut u-umlaut 782 "\u01d6\u00fc", // u-umlaut-macron u-umlaut 783 "\u016b\u00fc", // u-macron u-umlaut 784 "\u1e7b\u00fc", // u-macron-umlaut u-umlaut 785 "\u0075\u01d6", // u u-umlaut-macron 786 "\u00fc\u01d6", // u-umlaut u-umlaut-macron 787 "\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron 788 "\u016b\u01d6", // u-macron u-umlaut-macron 789 "\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron 790 "\u0075\u016b", // u u-macron 791 "\u00fc\u016b", // u-umlaut u-macron 792 "\u01d6\u016b", // u-umlaut-macron u-macron 793 "\u016b\u016b", // u-macron u-macron 794 "\u1e7b\u016b", // u-macron-umlaut u-macron 795 "\u0075\u1e7b", // u u-macron-umlaut 796 "\u00fc\u1e7b", // u-umlaut u-macron-umlaut 797 "\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut 798 "\u016b\u1e7b", // u-macron u-macron-umlaut 799 "\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut 800 }; 801 Collator french = Collator.getInstance(Locale.FRENCH); 802 803 logln("Testing French order..."); 804 checkListOrder(frenchList, french); 805 806 logln("Testing French order without decomposition..."); 807 french.setDecomposition(Collator.NO_DECOMPOSITION); 808 checkListOrder(frenchList, french); 809 810 String[] englishList = { 811 "\u0075\u0075", // u u 812 "\u0075\u00fc", // u u-umlaut 813 "\u0075\u01d6", // u u-umlaut-macron 814 "\u0075\u016b", // u u-macron 815 "\u0075\u1e7b", // u u-macron-umlaut 816 "\u00fc\u0075", // u-umlaut u 817 "\u00fc\u00fc", // u-umlaut u-umlaut 818 "\u00fc\u01d6", // u-umlaut u-umlaut-macron 819 "\u00fc\u016b", // u-umlaut u-macron 820 "\u00fc\u1e7b", // u-umlaut u-macron-umlaut 821 "\u01d6\u0075", // u-umlaut-macron u 822 "\u01d6\u00fc", // u-umlaut-macron u-umlaut 823 "\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron 824 "\u01d6\u016b", // u-umlaut-macron u-macron 825 "\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut 826 "\u016b\u0075", // u-macron u 827 "\u016b\u00fc", // u-macron u-umlaut 828 "\u016b\u01d6", // u-macron u-umlaut-macron 829 "\u016b\u016b", // u-macron u-macron 830 "\u016b\u1e7b", // u-macron u-macron-umlaut 831 "\u1e7b\u0075", // u-macron-umlaut u 832 "\u1e7b\u00fc", // u-macron-umlaut u-umlaut 833 "\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron 834 "\u1e7b\u016b", // u-macron-umlaut u-macron 835 "\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut 836 }; 837 Collator english = Collator.getInstance(Locale.ENGLISH); 838 839 logln("Testing English order..."); 840 checkListOrder(englishList, english); 841 842 logln("Testing English order without decomposition..."); 843 english.setDecomposition(Collator.NO_DECOMPOSITION); 844 checkListOrder(englishList, english); 845 } 846 847 private void checkListOrder(String[] sortedList, Collator c) { 848 // this function uses the specified Collator to make sure the 849 // passed-in list is already sorted into ascending order 850 for (int i = 0; i < sortedList.length - 1; i++) { 851 if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) { 852 errln("List out of order at element #" + i + ": " 853 + prettify(sortedList[i]) + " >= " 854 + prettify(sortedList[i + 1])); 855 } 856 } 857 } 858 859 // CollationElementIterator set doesn't work propertly with next/prev 860 public void Test4663220() { 861 RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US); 862 CharacterIterator stringIter = new StringCharacterIterator("fox"); 863 CollationElementIterator iter = collator.getCollationElementIterator(stringIter); 864 865 int[] elements_next = new int[3]; 866 logln("calling next:"); 867 for (int i = 0; i < 3; ++i) { 868 logln("[" + i + "] " + (elements_next[i] = iter.next())); 869 } 870 871 int[] elements_fwd = new int[3]; 872 logln("calling set/next:"); 873 for (int i = 0; i < 3; ++i) { 874 iter.setOffset(i); 875 logln("[" + i + "] " + (elements_fwd[i] = iter.next())); 876 } 877 878 for (int i = 0; i < 3; ++i) { 879 if (elements_next[i] != elements_fwd[i]) { 880 errln("mismatch at position " + i + 881 ": " + elements_next[i] + 882 " != " + elements_fwd[i]); 883 } 884 } 885 } 886 887 //------------------------------------------------------------------------ 888 // Internal utilities 889 // 890 private void compareArray(Collator c, String[] tests) { 891 for (int i = 0; i < tests.length; i += 3) { 892 893 int expect = 0; 894 if (tests[i+1].equals("<")) { 895 expect = -1; 896 } else if (tests[i+1].equals(">")) { 897 expect = 1; 898 } else if (tests[i+1].equals("=")) { 899 expect = 0; 900 } else { 901 expect = Integer.decode(tests[i+1]).intValue(); 902 } 903 904 int result = c.compare(tests[i], tests[i+2]); 905 if (sign(result) != sign(expect)) 906 { 907 errln( i/3 + ": compare(" + prettify(tests[i]) 908 + " , " + prettify(tests[i+2]) 909 + ") got " + result + "; expected " + expect); 910 } 911 else 912 { 913 // Collator.compare worked OK; now try the collation keys 914 CollationKey k1 = c.getCollationKey(tests[i]); 915 CollationKey k2 = c.getCollationKey(tests[i+2]); 916 917 result = k1.compareTo(k2); 918 if (sign(result) != sign(expect)) { 919 errln( i/3 + ": key(" + prettify(tests[i]) 920 + ").compareTo(key(" + prettify(tests[i+2]) 921 + ")) got " + result + "; expected " + expect); 922 923 errln(" " + prettify(k1) + " vs. " + prettify(k2)); 924 } 925 } 926 } 927 } 928 929 private static final int sign(int i) { 930 if (i < 0) return -1; 931 if (i > 0) return 1; 932 return 0; 933 } 934 935 936 static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US); 937 938 String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?"; 939 String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?"; 940 String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck"; 941} 942