1/* 2 * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24/** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 38 * 39 * @library /test/lib 40 * @build jdk.test.lib.RandomFactory 41 * @run main RegExTest 42 * @key randomness 43 */ 44 45import java.util.function.Function; 46import java.util.regex.*; 47import java.util.Random; 48import java.util.Scanner; 49import java.io.*; 50import java.nio.file.*; 51import java.util.*; 52import java.nio.CharBuffer; 53import java.util.function.Predicate; 54import jdk.test.lib.RandomFactory; 55 56/** 57 * This is a test class created to check the operation of 58 * the Pattern and Matcher classes. 59 */ 60public class RegExTest { 61 62 private static Random generator = RandomFactory.getRandom(); 63 private static boolean failure = false; 64 private static int failCount = 0; 65 private static String firstFailure = null; 66 67 /** 68 * Main to interpret arguments and run several tests. 69 * 70 */ 71 public static void main(String[] args) throws Exception { 72 // Most of the tests are in a file 73 processFile("TestCases.txt"); 74 //processFile("PerlCases.txt"); 75 processFile("BMPTestCases.txt"); 76 processFile("SupplementaryTestCases.txt"); 77 78 // These test many randomly generated char patterns 79 bm(); 80 slice(); 81 82 // These are hard to put into the file 83 escapes(); 84 blankInput(); 85 86 // Substitition tests on randomly generated sequences 87 globalSubstitute(); 88 stringbufferSubstitute(); 89 stringbuilderSubstitute(); 90 91 substitutionBasher(); 92 substitutionBasher2(); 93 94 // Canonical Equivalence 95 ceTest(); 96 97 // Anchors 98 anchorTest(); 99 100 // boolean match calls 101 matchesTest(); 102 lookingAtTest(); 103 104 // Pattern API 105 patternMatchesTest(); 106 107 // Misc 108 lookbehindTest(); 109 nullArgumentTest(); 110 backRefTest(); 111 groupCaptureTest(); 112 caretTest(); 113 charClassTest(); 114 emptyPatternTest(); 115 findIntTest(); 116 group0Test(); 117 longPatternTest(); 118 octalTest(); 119 ampersandTest(); 120 negationTest(); 121 splitTest(); 122 appendTest(); 123 caseFoldingTest(); 124 commentsTest(); 125 unixLinesTest(); 126 replaceFirstTest(); 127 gTest(); 128 zTest(); 129 serializeTest(); 130 reluctantRepetitionTest(); 131 multilineDollarTest(); 132 dollarAtEndTest(); 133 caretBetweenTerminatorsTest(); 134 // This RFE rejected in Tiger numOccurrencesTest(); 135 javaCharClassTest(); 136 nonCaptureRepetitionTest(); 137 notCapturedGroupCurlyMatchTest(); 138 escapedSegmentTest(); 139 literalPatternTest(); 140 literalReplacementTest(); 141 regionTest(); 142 toStringTest(); 143 negatedCharClassTest(); 144 findFromTest(); 145 boundsTest(); 146 unicodeWordBoundsTest(); 147 caretAtEndTest(); 148 wordSearchTest(); 149 hitEndTest(); 150 toMatchResultTest(); 151 toMatchResultTest2(); 152 surrogatesInClassTest(); 153 removeQEQuotingTest(); 154 namedGroupCaptureTest(); 155 nonBmpClassComplementTest(); 156 unicodePropertiesTest(); 157 unicodeHexNotationTest(); 158 unicodeClassesTest(); 159 unicodeCharacterNameTest(); 160 horizontalAndVerticalWSTest(); 161 linebreakTest(); 162 branchTest(); 163 groupCurlyNotFoundSuppTest(); 164 groupCurlyBackoffTest(); 165 patternAsPredicate(); 166 invalidFlags(); 167 embeddedFlags(); 168 grapheme(); 169 expoBacktracking(); 170 171 if (failure) { 172 throw new 173 RuntimeException("RegExTest failed, 1st failure: " + 174 firstFailure); 175 } else { 176 System.err.println("OKAY: All tests passed."); 177 } 178 } 179 180 // Utility functions 181 182 private static String getRandomAlphaString(int length) { 183 StringBuffer buf = new StringBuffer(length); 184 for (int i=0; i<length; i++) { 185 char randChar = (char)(97 + generator.nextInt(26)); 186 buf.append(randChar); 187 } 188 return buf.toString(); 189 } 190 191 private static void check(Matcher m, String expected) { 192 m.find(); 193 if (!m.group().equals(expected)) 194 failCount++; 195 } 196 197 private static void check(Matcher m, String result, boolean expected) { 198 m.find(); 199 if (m.group().equals(result) != expected) 200 failCount++; 201 } 202 203 private static void check(Pattern p, String s, boolean expected) { 204 if (p.matcher(s).find() != expected) 205 failCount++; 206 } 207 208 private static void check(String p, String s, boolean expected) { 209 Matcher matcher = Pattern.compile(p).matcher(s); 210 if (matcher.find() != expected) 211 failCount++; 212 } 213 214 private static void check(String p, char c, boolean expected) { 215 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 216 Pattern pattern = Pattern.compile(propertyPattern); 217 char[] ca = new char[1]; ca[0] = c; 218 Matcher matcher = pattern.matcher(new String(ca)); 219 if (!matcher.find()) 220 failCount++; 221 } 222 223 private static void check(String p, int codePoint, boolean expected) { 224 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 225 Pattern pattern = Pattern.compile(propertyPattern); 226 char[] ca = Character.toChars(codePoint); 227 Matcher matcher = pattern.matcher(new String(ca)); 228 if (!matcher.find()) 229 failCount++; 230 } 231 232 private static void check(String p, int flag, String input, String s, 233 boolean expected) 234 { 235 Pattern pattern = Pattern.compile(p, flag); 236 Matcher matcher = pattern.matcher(input); 237 if (expected) 238 check(matcher, s, expected); 239 else 240 check(pattern, input, false); 241 } 242 243 private static void report(String testName) { 244 int spacesToAdd = 30 - testName.length(); 245 StringBuffer paddedNameBuffer = new StringBuffer(testName); 246 for (int i=0; i<spacesToAdd; i++) 247 paddedNameBuffer.append(" "); 248 String paddedName = paddedNameBuffer.toString(); 249 System.err.println(paddedName + ": " + 250 (failCount==0 ? "Passed":"Failed("+failCount+")")); 251 if (failCount > 0) { 252 failure = true; 253 254 if (firstFailure == null) { 255 firstFailure = testName; 256 } 257 } 258 259 failCount = 0; 260 } 261 262 /** 263 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 264 * supplementary characters. This method does NOT fully take care 265 * of the regex syntax. 266 */ 267 private static String toSupplementaries(String s) { 268 int length = s.length(); 269 StringBuffer sb = new StringBuffer(length * 2); 270 271 for (int i = 0; i < length; ) { 272 char c = s.charAt(i++); 273 if (c == '\\') { 274 sb.append(c); 275 if (i < length) { 276 c = s.charAt(i++); 277 sb.append(c); 278 if (c == 'u') { 279 // assume no syntax error 280 sb.append(s.charAt(i++)); 281 sb.append(s.charAt(i++)); 282 sb.append(s.charAt(i++)); 283 sb.append(s.charAt(i++)); 284 } 285 } 286 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 287 sb.append('\ud800').append((char)('\udc00'+c)); 288 } else { 289 sb.append(c); 290 } 291 } 292 return sb.toString(); 293 } 294 295 // Regular expression tests 296 297 // This is for bug 6178785 298 // Test if an expected NPE gets thrown when passing in a null argument 299 private static boolean check(Runnable test) { 300 try { 301 test.run(); 302 failCount++; 303 return false; 304 } catch (NullPointerException npe) { 305 return true; 306 } 307 } 308 309 private static void nullArgumentTest() { 310 check(() -> Pattern.compile(null)); 311 check(() -> Pattern.matches(null, null)); 312 check(() -> Pattern.matches("xyz", null)); 313 check(() -> Pattern.quote(null)); 314 check(() -> Pattern.compile("xyz").split(null)); 315 check(() -> Pattern.compile("xyz").matcher(null)); 316 317 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 318 m.matches(); 319 check(() -> m.appendTail((StringBuffer) null)); 320 check(() -> m.appendTail((StringBuilder)null)); 321 check(() -> m.replaceAll((String) null)); 322 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 323 check(() -> m.replaceFirst((String)null)); 324 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 325 check(() -> m.appendReplacement((StringBuffer)null, null)); 326 check(() -> m.appendReplacement((StringBuilder)null, null)); 327 check(() -> m.reset(null)); 328 check(() -> Matcher.quoteReplacement(null)); 329 //check(() -> m.usePattern(null)); 330 331 report("Null Argument"); 332 } 333 334 // This is for bug6635133 335 // Test if surrogate pair in Unicode escapes can be handled correctly. 336 private static void surrogatesInClassTest() throws Exception { 337 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 338 Matcher matcher = pattern.matcher("\ud834\udd22"); 339 if (!matcher.find()) 340 failCount++; 341 342 report("Surrogate pair in Unicode escape"); 343 } 344 345 // This is for bug6990617 346 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 347 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 348 // char is an octal digit. 349 private static void removeQEQuotingTest() throws Exception { 350 Pattern pattern = 351 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 352 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 353 if (!matcher.find()) 354 failCount++; 355 356 report("Remove Q/E Quoting"); 357 } 358 359 // This is for bug 4988891 360 // Test toMatchResult to see that it is a copy of the Matcher 361 // that is not affected by subsequent operations on the original 362 private static void toMatchResultTest() throws Exception { 363 Pattern pattern = Pattern.compile("squid"); 364 Matcher matcher = pattern.matcher( 365 "agiantsquidofdestinyasmallsquidoffate"); 366 matcher.find(); 367 int matcherStart1 = matcher.start(); 368 MatchResult mr = matcher.toMatchResult(); 369 if (mr == matcher) 370 failCount++; 371 int resultStart1 = mr.start(); 372 if (matcherStart1 != resultStart1) 373 failCount++; 374 matcher.find(); 375 int matcherStart2 = matcher.start(); 376 int resultStart2 = mr.start(); 377 if (matcherStart2 == resultStart2) 378 failCount++; 379 if (resultStart1 != resultStart2) 380 failCount++; 381 MatchResult mr2 = matcher.toMatchResult(); 382 if (mr == mr2) 383 failCount++; 384 if (mr2.start() != matcherStart2) 385 failCount++; 386 report("toMatchResult is a copy"); 387 } 388 389 private static void checkExpectedISE(Runnable test) { 390 try { 391 test.run(); 392 failCount++; 393 } catch (IllegalStateException x) { 394 } catch (IndexOutOfBoundsException xx) { 395 failCount++; 396 } 397 } 398 399 private static void checkExpectedIOOE(Runnable test) { 400 try { 401 test.run(); 402 failCount++; 403 } catch (IndexOutOfBoundsException x) {} 404 } 405 406 // This is for bug 8074678 407 // Test the result of toMatchResult throws ISE if no match is availble 408 private static void toMatchResultTest2() throws Exception { 409 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 410 matcher.find(); 411 MatchResult mr = matcher.toMatchResult(); 412 413 checkExpectedISE(() -> mr.start()); 414 checkExpectedISE(() -> mr.start(2)); 415 checkExpectedISE(() -> mr.end()); 416 checkExpectedISE(() -> mr.end(2)); 417 checkExpectedISE(() -> mr.group()); 418 checkExpectedISE(() -> mr.group(2)); 419 420 matcher = Pattern.compile("(match)").matcher("there is a match"); 421 matcher.find(); 422 MatchResult mr2 = matcher.toMatchResult(); 423 checkExpectedIOOE(() -> mr2.start(2)); 424 checkExpectedIOOE(() -> mr2.end(2)); 425 checkExpectedIOOE(() -> mr2.group(2)); 426 427 report("toMatchResult2 appropriate exceptions"); 428 } 429 430 // This is for bug 5013885 431 // Must test a slice to see if it reports hitEnd correctly 432 private static void hitEndTest() throws Exception { 433 // Basic test of Slice node 434 Pattern p = Pattern.compile("^squidattack"); 435 Matcher m = p.matcher("squack"); 436 m.find(); 437 if (m.hitEnd()) 438 failCount++; 439 m.reset("squid"); 440 m.find(); 441 if (!m.hitEnd()) 442 failCount++; 443 444 // Test Slice, SliceA and SliceU nodes 445 for (int i=0; i<3; i++) { 446 int flags = 0; 447 if (i==1) flags = Pattern.CASE_INSENSITIVE; 448 if (i==2) flags = Pattern.UNICODE_CASE; 449 p = Pattern.compile("^abc", flags); 450 m = p.matcher("ad"); 451 m.find(); 452 if (m.hitEnd()) 453 failCount++; 454 m.reset("ab"); 455 m.find(); 456 if (!m.hitEnd()) 457 failCount++; 458 } 459 460 // Test Boyer-Moore node 461 p = Pattern.compile("catattack"); 462 m = p.matcher("attack"); 463 m.find(); 464 if (!m.hitEnd()) 465 failCount++; 466 467 p = Pattern.compile("catattack"); 468 m = p.matcher("attackattackattackcatatta"); 469 m.find(); 470 if (!m.hitEnd()) 471 failCount++; 472 473 // 8184706: Matching u+0d at EOL against \R should hit-end 474 p = Pattern.compile("...\\R"); 475 m = p.matcher("cat" + (char)0x0a); 476 m.find(); 477 if (m.hitEnd()) 478 failCount++; 479 480 m = p.matcher("cat" + (char)0x0d); 481 m.find(); 482 if (!m.hitEnd()) 483 failCount++; 484 485 m = p.matcher("cat" + (char)0x0d + (char)0x0a); 486 m.find(); 487 if (m.hitEnd()) 488 failCount++; 489 490 report("hitEnd"); 491 } 492 493 // This is for bug 4997476 494 // It is weird code submitted by customer demonstrating a regression 495 private static void wordSearchTest() throws Exception { 496 String testString = new String("word1 word2 word3"); 497 Pattern p = Pattern.compile("\\b"); 498 Matcher m = p.matcher(testString); 499 int position = 0; 500 int start = 0; 501 while (m.find(position)) { 502 start = m.start(); 503 if (start == testString.length()) 504 break; 505 if (m.find(start+1)) { 506 position = m.start(); 507 } else { 508 position = testString.length(); 509 } 510 if (testString.substring(start, position).equals(" ")) 511 continue; 512 if (!testString.substring(start, position-1).startsWith("word")) 513 failCount++; 514 } 515 report("Customer word search"); 516 } 517 518 // This is for bug 4994840 519 private static void caretAtEndTest() throws Exception { 520 // Problem only occurs with multiline patterns 521 // containing a beginning-of-line caret "^" followed 522 // by an expression that also matches the empty string. 523 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 524 Matcher matcher = pattern.matcher("\r"); 525 matcher.find(); 526 matcher.find(); 527 report("Caret at end"); 528 } 529 530 // This test is for 4979006 531 // Check to see if word boundary construct properly handles unicode 532 // non spacing marks 533 private static void unicodeWordBoundsTest() throws Exception { 534 String spaces = " "; 535 String wordChar = "a"; 536 String nsm = "\u030a"; 537 538 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 539 540 Pattern pattern = Pattern.compile("\\b"); 541 Matcher matcher = pattern.matcher(""); 542 // S=other B=word character N=non spacing mark .=word boundary 543 // SS.BB.SS 544 String input = spaces + wordChar + wordChar + spaces; 545 twoFindIndexes(input, matcher, 2, 4); 546 // SS.BBN.SS 547 input = spaces + wordChar +wordChar + nsm + spaces; 548 twoFindIndexes(input, matcher, 2, 5); 549 // SS.BN.SS 550 input = spaces + wordChar + nsm + spaces; 551 twoFindIndexes(input, matcher, 2, 4); 552 // SS.BNN.SS 553 input = spaces + wordChar + nsm + nsm + spaces; 554 twoFindIndexes(input, matcher, 2, 5); 555 // SSN.BB.SS 556 input = spaces + nsm + wordChar + wordChar + spaces; 557 twoFindIndexes(input, matcher, 3, 5); 558 // SS.BNB.SS 559 input = spaces + wordChar + nsm + wordChar + spaces; 560 twoFindIndexes(input, matcher, 2, 5); 561 // SSNNSS 562 input = spaces + nsm + nsm + spaces; 563 matcher.reset(input); 564 if (matcher.find()) 565 failCount++; 566 // SSN.BBN.SS 567 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 568 twoFindIndexes(input, matcher, 3, 6); 569 570 report("Unicode word boundary"); 571 } 572 573 private static void twoFindIndexes(String input, Matcher matcher, int a, 574 int b) throws Exception 575 { 576 matcher.reset(input); 577 matcher.find(); 578 if (matcher.start() != a) 579 failCount++; 580 matcher.find(); 581 if (matcher.start() != b) 582 failCount++; 583 } 584 585 // This test is for 6284152 586 static void check(String regex, String input, String[] expected) { 587 List<String> result = new ArrayList<String>(); 588 Pattern p = Pattern.compile(regex); 589 Matcher m = p.matcher(input); 590 while (m.find()) { 591 result.add(m.group()); 592 } 593 if (!Arrays.asList(expected).equals(result)) 594 failCount++; 595 } 596 597 private static void lookbehindTest() throws Exception { 598 //Positive 599 check("(?<=%.{0,5})foo\\d", 600 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 601 new String[]{"foo1", "foo2", "foo3"}); 602 603 //boundary at end of the lookbehind sub-regex should work consistently 604 //with the boundary just after the lookbehind sub-regex 605 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 606 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 607 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 608 check("(?<!abc \\b)foo", "abc foo", new String[0]); 609 610 //Negative 611 check("(?<!%.{0,5})foo\\d", 612 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 613 new String[] {"foo4", "foo5"}); 614 615 //Positive greedy 616 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 617 618 //Positive reluctant 619 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 620 621 //supplementary 622 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 623 new String[] {"fo\ud800\udc00o"}); 624 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 625 new String[] {"fo\ud800\udc00o"}); 626 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 627 new String[] {"fo\ud800\udc00o"}); 628 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 629 new String[] {"fo\ud800\udc00o"}); 630 report("Lookbehind"); 631 } 632 633 // This test is for 4938995 634 // Check to see if weak region boundaries are transparent to 635 // lookahead and lookbehind constructs 636 private static void boundsTest() throws Exception { 637 String fullMessage = "catdogcat"; 638 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 639 Matcher matcher = pattern.matcher("catdogca"); 640 matcher.useTransparentBounds(true); 641 if (matcher.find()) 642 failCount++; 643 matcher.reset("atdogcat"); 644 if (matcher.find()) 645 failCount++; 646 matcher.reset(fullMessage); 647 if (!matcher.find()) 648 failCount++; 649 matcher.reset(fullMessage); 650 matcher.region(0,9); 651 if (!matcher.find()) 652 failCount++; 653 matcher.reset(fullMessage); 654 matcher.region(0,6); 655 if (!matcher.find()) 656 failCount++; 657 matcher.reset(fullMessage); 658 matcher.region(3,6); 659 if (!matcher.find()) 660 failCount++; 661 matcher.useTransparentBounds(false); 662 if (matcher.find()) 663 failCount++; 664 665 // Negative lookahead/lookbehind 666 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 667 matcher = pattern.matcher("dogcat"); 668 matcher.useTransparentBounds(true); 669 matcher.region(0,3); 670 if (matcher.find()) 671 failCount++; 672 matcher.reset("catdog"); 673 matcher.region(3,6); 674 if (matcher.find()) 675 failCount++; 676 matcher.useTransparentBounds(false); 677 matcher.reset("dogcat"); 678 matcher.region(0,3); 679 if (!matcher.find()) 680 failCount++; 681 matcher.reset("catdog"); 682 matcher.region(3,6); 683 if (!matcher.find()) 684 failCount++; 685 686 report("Region bounds transparency"); 687 } 688 689 // This test is for 4945394 690 private static void findFromTest() throws Exception { 691 String message = "This is 40 $0 message."; 692 Pattern pat = Pattern.compile("\\$0"); 693 Matcher match = pat.matcher(message); 694 if (!match.find()) 695 failCount++; 696 if (match.find()) 697 failCount++; 698 if (match.find()) 699 failCount++; 700 report("Check for alternating find"); 701 } 702 703 // This test is for 4872664 and 4892980 704 private static void negatedCharClassTest() throws Exception { 705 Pattern pattern = Pattern.compile("[^>]"); 706 Matcher matcher = pattern.matcher("\u203A"); 707 if (!matcher.matches()) 708 failCount++; 709 pattern = Pattern.compile("[^fr]"); 710 matcher = pattern.matcher("a"); 711 if (!matcher.find()) 712 failCount++; 713 matcher.reset("\u203A"); 714 if (!matcher.find()) 715 failCount++; 716 String s = "for"; 717 String result[] = s.split("[^fr]"); 718 if (!result[0].equals("f")) 719 failCount++; 720 if (!result[1].equals("r")) 721 failCount++; 722 s = "f\u203Ar"; 723 result = s.split("[^fr]"); 724 if (!result[0].equals("f")) 725 failCount++; 726 if (!result[1].equals("r")) 727 failCount++; 728 729 // Test adding to bits, subtracting a node, then adding to bits again 730 pattern = Pattern.compile("[^f\u203Ar]"); 731 matcher = pattern.matcher("a"); 732 if (!matcher.find()) 733 failCount++; 734 matcher.reset("f"); 735 if (matcher.find()) 736 failCount++; 737 matcher.reset("\u203A"); 738 if (matcher.find()) 739 failCount++; 740 matcher.reset("r"); 741 if (matcher.find()) 742 failCount++; 743 matcher.reset("\u203B"); 744 if (!matcher.find()) 745 failCount++; 746 747 // Test subtracting a node, adding to bits, subtracting again 748 pattern = Pattern.compile("[^\u203Ar\u203B]"); 749 matcher = pattern.matcher("a"); 750 if (!matcher.find()) 751 failCount++; 752 matcher.reset("\u203A"); 753 if (matcher.find()) 754 failCount++; 755 matcher.reset("r"); 756 if (matcher.find()) 757 failCount++; 758 matcher.reset("\u203B"); 759 if (matcher.find()) 760 failCount++; 761 matcher.reset("\u203C"); 762 if (!matcher.find()) 763 failCount++; 764 765 report("Negated Character Class"); 766 } 767 768 // This test is for 4628291 769 private static void toStringTest() throws Exception { 770 Pattern pattern = Pattern.compile("b+"); 771 if (pattern.toString() != "b+") 772 failCount++; 773 Matcher matcher = pattern.matcher("aaabbbccc"); 774 String matcherString = matcher.toString(); // unspecified 775 matcher.find(); 776 matcherString = matcher.toString(); // unspecified 777 matcher.region(0,3); 778 matcherString = matcher.toString(); // unspecified 779 matcher.reset(); 780 matcherString = matcher.toString(); // unspecified 781 report("toString"); 782 } 783 784 // This test is for 4808962 785 private static void literalPatternTest() throws Exception { 786 int flags = Pattern.LITERAL; 787 788 Pattern pattern = Pattern.compile("abc\\t$^", flags); 789 check(pattern, "abc\\t$^", true); 790 791 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 792 check(pattern, "abc\\t$^", true); 793 794 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 795 check(pattern, "\\Qa^$bcabc\\E", true); 796 check(pattern, "a^$bcabc", false); 797 798 pattern = Pattern.compile("\\\\Q\\\\E"); 799 check(pattern, "\\Q\\E", true); 800 801 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 802 check(pattern, "abcefg\\Q\\Ehij", true); 803 804 pattern = Pattern.compile("\\\\\\Q\\\\E"); 805 check(pattern, "\\\\\\\\", true); 806 807 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 808 check(pattern, "\\Qa^$bcabc\\E", true); 809 check(pattern, "a^$bcabc", false); 810 811 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 812 check(pattern, "\\Qabc\\Edef", true); 813 check(pattern, "abcdef", false); 814 815 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 816 check(pattern, "abc\\Edef", true); 817 check(pattern, "abcdef", false); 818 819 pattern = Pattern.compile(Pattern.quote("\\E")); 820 check(pattern, "\\E", true); 821 822 pattern = Pattern.compile("((((abc.+?:)", flags); 823 check(pattern, "((((abc.+?:)", true); 824 825 flags |= Pattern.MULTILINE; 826 827 pattern = Pattern.compile("^cat$", flags); 828 check(pattern, "abc^cat$def", true); 829 check(pattern, "cat", false); 830 831 flags |= Pattern.CASE_INSENSITIVE; 832 833 pattern = Pattern.compile("abcdef", flags); 834 check(pattern, "ABCDEF", true); 835 check(pattern, "AbCdEf", true); 836 837 flags |= Pattern.DOTALL; 838 839 pattern = Pattern.compile("a...b", flags); 840 check(pattern, "A...b", true); 841 check(pattern, "Axxxb", false); 842 843 flags |= Pattern.CANON_EQ; 844 845 Pattern p = Pattern.compile("testa\u030a", flags); 846 check(pattern, "testa\u030a", false); 847 check(pattern, "test\u00e5", false); 848 849 // Supplementary character test 850 flags = Pattern.LITERAL; 851 852 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 853 check(pattern, toSupplementaries("abc\\t$^"), true); 854 855 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 856 check(pattern, toSupplementaries("abc\\t$^"), true); 857 858 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 859 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 860 check(pattern, toSupplementaries("a^$bcabc"), false); 861 862 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 863 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 864 check(pattern, toSupplementaries("a^$bcabc"), false); 865 866 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 867 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 868 check(pattern, toSupplementaries("abcdef"), false); 869 870 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 871 check(pattern, toSupplementaries("abc\\Edef"), true); 872 check(pattern, toSupplementaries("abcdef"), false); 873 874 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 875 check(pattern, toSupplementaries("((((abc.+?:)"), true); 876 877 flags |= Pattern.MULTILINE; 878 879 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 880 check(pattern, toSupplementaries("abc^cat$def"), true); 881 check(pattern, toSupplementaries("cat"), false); 882 883 flags |= Pattern.DOTALL; 884 885 // note: this is case-sensitive. 886 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 887 check(pattern, toSupplementaries("a...b"), true); 888 check(pattern, toSupplementaries("axxxb"), false); 889 890 flags |= Pattern.CANON_EQ; 891 892 String t = toSupplementaries("test"); 893 p = Pattern.compile(t + "a\u030a", flags); 894 check(pattern, t + "a\u030a", false); 895 check(pattern, t + "\u00e5", false); 896 897 report("Literal pattern"); 898 } 899 900 // This test is for 4803179 901 // This test is also for 4808962, replacement parts 902 private static void literalReplacementTest() throws Exception { 903 int flags = Pattern.LITERAL; 904 905 Pattern pattern = Pattern.compile("abc", flags); 906 Matcher matcher = pattern.matcher("zzzabczzz"); 907 String replaceTest = "$0"; 908 String result = matcher.replaceAll(replaceTest); 909 if (!result.equals("zzzabczzz")) 910 failCount++; 911 912 matcher.reset(); 913 String literalReplacement = matcher.quoteReplacement(replaceTest); 914 result = matcher.replaceAll(literalReplacement); 915 if (!result.equals("zzz$0zzz")) 916 failCount++; 917 918 matcher.reset(); 919 replaceTest = "\\t$\\$"; 920 literalReplacement = matcher.quoteReplacement(replaceTest); 921 result = matcher.replaceAll(literalReplacement); 922 if (!result.equals("zzz\\t$\\$zzz")) 923 failCount++; 924 925 // Supplementary character test 926 pattern = Pattern.compile(toSupplementaries("abc"), flags); 927 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 928 replaceTest = "$0"; 929 result = matcher.replaceAll(replaceTest); 930 if (!result.equals(toSupplementaries("zzzabczzz"))) 931 failCount++; 932 933 matcher.reset(); 934 literalReplacement = matcher.quoteReplacement(replaceTest); 935 result = matcher.replaceAll(literalReplacement); 936 if (!result.equals(toSupplementaries("zzz$0zzz"))) 937 failCount++; 938 939 matcher.reset(); 940 replaceTest = "\\t$\\$"; 941 literalReplacement = matcher.quoteReplacement(replaceTest); 942 result = matcher.replaceAll(literalReplacement); 943 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 944 failCount++; 945 946 // IAE should be thrown if backslash or '$' is the last character 947 // in replacement string 948 try { 949 "\uac00".replaceAll("\uac00", "$"); 950 failCount++; 951 } catch (IllegalArgumentException iie) { 952 } catch (Exception e) { 953 failCount++; 954 } 955 try { 956 "\uac00".replaceAll("\uac00", "\\"); 957 failCount++; 958 } catch (IllegalArgumentException iie) { 959 } catch (Exception e) { 960 failCount++; 961 } 962 report("Literal replacement"); 963 } 964 965 // This test is for 4757029 966 private static void regionTest() throws Exception { 967 Pattern pattern = Pattern.compile("abc"); 968 Matcher matcher = pattern.matcher("abcdefabc"); 969 970 matcher.region(0,9); 971 if (!matcher.find()) 972 failCount++; 973 if (!matcher.find()) 974 failCount++; 975 matcher.region(0,3); 976 if (!matcher.find()) 977 failCount++; 978 matcher.region(3,6); 979 if (matcher.find()) 980 failCount++; 981 matcher.region(0,2); 982 if (matcher.find()) 983 failCount++; 984 985 expectRegionFail(matcher, 1, -1); 986 expectRegionFail(matcher, -1, -1); 987 expectRegionFail(matcher, -1, 1); 988 expectRegionFail(matcher, 5, 3); 989 expectRegionFail(matcher, 5, 12); 990 expectRegionFail(matcher, 12, 12); 991 992 pattern = Pattern.compile("^abc$"); 993 matcher = pattern.matcher("zzzabczzz"); 994 matcher.region(0,9); 995 if (matcher.find()) 996 failCount++; 997 matcher.region(3,6); 998 if (!matcher.find()) 999 failCount++; 1000 matcher.region(3,6); 1001 matcher.useAnchoringBounds(false); 1002 if (matcher.find()) 1003 failCount++; 1004 1005 // Supplementary character test 1006 pattern = Pattern.compile(toSupplementaries("abc")); 1007 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 1008 matcher.region(0,9*2); 1009 if (!matcher.find()) 1010 failCount++; 1011 if (!matcher.find()) 1012 failCount++; 1013 matcher.region(0,3*2); 1014 if (!matcher.find()) 1015 failCount++; 1016 matcher.region(1,3*2); 1017 if (matcher.find()) 1018 failCount++; 1019 matcher.region(3*2,6*2); 1020 if (matcher.find()) 1021 failCount++; 1022 matcher.region(0,2*2); 1023 if (matcher.find()) 1024 failCount++; 1025 matcher.region(0,2*2+1); 1026 if (matcher.find()) 1027 failCount++; 1028 1029 expectRegionFail(matcher, 1*2, -1); 1030 expectRegionFail(matcher, -1, -1); 1031 expectRegionFail(matcher, -1, 1*2); 1032 expectRegionFail(matcher, 5*2, 3*2); 1033 expectRegionFail(matcher, 5*2, 12*2); 1034 expectRegionFail(matcher, 12*2, 12*2); 1035 1036 pattern = Pattern.compile(toSupplementaries("^abc$")); 1037 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1038 matcher.region(0,9*2); 1039 if (matcher.find()) 1040 failCount++; 1041 matcher.region(3*2,6*2); 1042 if (!matcher.find()) 1043 failCount++; 1044 matcher.region(3*2+1,6*2); 1045 if (matcher.find()) 1046 failCount++; 1047 matcher.region(3*2,6*2-1); 1048 if (matcher.find()) 1049 failCount++; 1050 matcher.region(3*2,6*2); 1051 matcher.useAnchoringBounds(false); 1052 if (matcher.find()) 1053 failCount++; 1054 report("Regions"); 1055 } 1056 1057 private static void expectRegionFail(Matcher matcher, int index1, 1058 int index2) 1059 { 1060 try { 1061 matcher.region(index1, index2); 1062 failCount++; 1063 } catch (IndexOutOfBoundsException ioobe) { 1064 // Correct result 1065 } catch (IllegalStateException ise) { 1066 // Correct result 1067 } 1068 } 1069 1070 // This test is for 4803197 1071 private static void escapedSegmentTest() throws Exception { 1072 1073 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1074 check(pattern, "dir1\\dir2", true); 1075 1076 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1077 check(pattern, "dir1\\dir2\\", true); 1078 1079 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1080 check(pattern, "dir1\\dir2\\", true); 1081 1082 // Supplementary character test 1083 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1084 check(pattern, toSupplementaries("dir1\\dir2"), true); 1085 1086 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1087 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1088 1089 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1090 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1091 1092 report("Escaped segment"); 1093 } 1094 1095 // This test is for 4792284 1096 private static void nonCaptureRepetitionTest() throws Exception { 1097 String input = "abcdefgh;"; 1098 1099 String[] patterns = new String[] { 1100 "(?:\\w{4})+;", 1101 "(?:\\w{8})*;", 1102 "(?:\\w{2}){2,4};", 1103 "(?:\\w{4}){2,};", // only matches the 1104 ".*?(?:\\w{5})+;", // specified minimum 1105 ".*?(?:\\w{9})*;", // number of reps - OK 1106 "(?:\\w{4})+?;", // lazy repetition - OK 1107 "(?:\\w{4})++;", // possessive repetition - OK 1108 "(?:\\w{2,}?)+;", // non-deterministic - OK 1109 "(\\w{4})+;", // capturing group - OK 1110 }; 1111 1112 for (int i = 0; i < patterns.length; i++) { 1113 // Check find() 1114 check(patterns[i], 0, input, input, true); 1115 // Check matches() 1116 Pattern p = Pattern.compile(patterns[i]); 1117 Matcher m = p.matcher(input); 1118 1119 if (m.matches()) { 1120 if (!m.group(0).equals(input)) 1121 failCount++; 1122 } else { 1123 failCount++; 1124 } 1125 } 1126 1127 report("Non capturing repetition"); 1128 } 1129 1130 // This test is for 6358731 1131 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1132 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1133 Matcher matcher = pattern.matcher("abcd"); 1134 if (!matcher.matches() || 1135 matcher.group(1) != null || 1136 !matcher.group(2).equals("abcd")) { 1137 failCount++; 1138 } 1139 report("Not captured GroupCurly"); 1140 } 1141 1142 // This test is for 4706545 1143 private static void javaCharClassTest() throws Exception { 1144 for (int i=0; i<1000; i++) { 1145 char c = (char)generator.nextInt(); 1146 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1147 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1148 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1149 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1150 check("{javaDigit}", c, Character.isDigit(c)); 1151 check("{javaDefined}", c, Character.isDefined(c)); 1152 check("{javaLetter}", c, Character.isLetter(c)); 1153 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1154 check("{javaJavaIdentifierStart}", c, 1155 Character.isJavaIdentifierStart(c)); 1156 check("{javaJavaIdentifierPart}", c, 1157 Character.isJavaIdentifierPart(c)); 1158 check("{javaUnicodeIdentifierStart}", c, 1159 Character.isUnicodeIdentifierStart(c)); 1160 check("{javaUnicodeIdentifierPart}", c, 1161 Character.isUnicodeIdentifierPart(c)); 1162 check("{javaIdentifierIgnorable}", c, 1163 Character.isIdentifierIgnorable(c)); 1164 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1165 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1166 check("{javaISOControl}", c, Character.isISOControl(c)); 1167 check("{javaMirrored}", c, Character.isMirrored(c)); 1168 1169 } 1170 1171 // Supplementary character test 1172 for (int i=0; i<1000; i++) { 1173 int c = generator.nextInt(Character.MAX_CODE_POINT 1174 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1175 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1176 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1177 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1178 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1179 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1180 check("{javaDigit}", c, Character.isDigit(c)); 1181 check("{javaDefined}", c, Character.isDefined(c)); 1182 check("{javaLetter}", c, Character.isLetter(c)); 1183 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1184 check("{javaJavaIdentifierStart}", c, 1185 Character.isJavaIdentifierStart(c)); 1186 check("{javaJavaIdentifierPart}", c, 1187 Character.isJavaIdentifierPart(c)); 1188 check("{javaUnicodeIdentifierStart}", c, 1189 Character.isUnicodeIdentifierStart(c)); 1190 check("{javaUnicodeIdentifierPart}", c, 1191 Character.isUnicodeIdentifierPart(c)); 1192 check("{javaIdentifierIgnorable}", c, 1193 Character.isIdentifierIgnorable(c)); 1194 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1195 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1196 check("{javaISOControl}", c, Character.isISOControl(c)); 1197 check("{javaMirrored}", c, Character.isMirrored(c)); 1198 } 1199 1200 report("Java character classes"); 1201 } 1202 1203 // This test is for 4523620 1204 /* 1205 private static void numOccurrencesTest() throws Exception { 1206 Pattern pattern = Pattern.compile("aaa"); 1207 1208 if (pattern.numOccurrences("aaaaaa", false) != 2) 1209 failCount++; 1210 if (pattern.numOccurrences("aaaaaa", true) != 4) 1211 failCount++; 1212 1213 pattern = Pattern.compile("^"); 1214 if (pattern.numOccurrences("aaaaaa", false) != 1) 1215 failCount++; 1216 if (pattern.numOccurrences("aaaaaa", true) != 1) 1217 failCount++; 1218 1219 report("Number of Occurrences"); 1220 } 1221 */ 1222 1223 // This test is for 4776374 1224 private static void caretBetweenTerminatorsTest() throws Exception { 1225 int flags1 = Pattern.DOTALL; 1226 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1227 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1228 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1229 1230 check("^....", flags1, "test\ntest", "test", true); 1231 check(".....^", flags1, "test\ntest", "test", false); 1232 check(".....^", flags1, "test\n", "test", false); 1233 check("....^", flags1, "test\r\n", "test", false); 1234 1235 check("^....", flags2, "test\ntest", "test", true); 1236 check("....^", flags2, "test\ntest", "test", false); 1237 check(".....^", flags2, "test\n", "test", false); 1238 check("....^", flags2, "test\r\n", "test", false); 1239 1240 check("^....", flags3, "test\ntest", "test", true); 1241 check(".....^", flags3, "test\ntest", "test\n", true); 1242 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1243 check(".....^", flags3, "test\n", "test", false); 1244 check(".....^", flags3, "test\r\n", "test", false); 1245 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1246 1247 check("^....", flags4, "test\ntest", "test", true); 1248 check(".....^", flags3, "test\ntest", "test\n", true); 1249 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1250 check(".....^", flags4, "test\n", "test\n", false); 1251 check(".....^", flags4, "test\r\n", "test\r", false); 1252 1253 // Supplementary character test 1254 String t = toSupplementaries("test"); 1255 check("^....", flags1, t+"\n"+t, t, true); 1256 check(".....^", flags1, t+"\n"+t, t, false); 1257 check(".....^", flags1, t+"\n", t, false); 1258 check("....^", flags1, t+"\r\n", t, false); 1259 1260 check("^....", flags2, t+"\n"+t, t, true); 1261 check("....^", flags2, t+"\n"+t, t, false); 1262 check(".....^", flags2, t+"\n", t, false); 1263 check("....^", flags2, t+"\r\n", t, false); 1264 1265 check("^....", flags3, t+"\n"+t, t, true); 1266 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1267 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1268 check(".....^", flags3, t+"\n", t, false); 1269 check(".....^", flags3, t+"\r\n", t, false); 1270 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1271 1272 check("^....", flags4, t+"\n"+t, t, true); 1273 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1274 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1275 check(".....^", flags4, t+"\n", t+"\n", false); 1276 check(".....^", flags4, t+"\r\n", t+"\r", false); 1277 1278 report("Caret between terminators"); 1279 } 1280 1281 // This test is for 4727935 1282 private static void dollarAtEndTest() throws Exception { 1283 int flags1 = Pattern.DOTALL; 1284 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1285 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1286 1287 check("....$", flags1, "test\n", "test", true); 1288 check("....$", flags1, "test\r\n", "test", true); 1289 check(".....$", flags1, "test\n", "test\n", true); 1290 check(".....$", flags1, "test\u0085", "test\u0085", true); 1291 check("....$", flags1, "test\u0085", "test", true); 1292 1293 check("....$", flags2, "test\n", "test", true); 1294 check(".....$", flags2, "test\n", "test\n", true); 1295 check(".....$", flags2, "test\u0085", "test\u0085", true); 1296 check("....$", flags2, "test\u0085", "est\u0085", true); 1297 1298 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1299 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1300 check("....$blah", flags3, "test\nblah", "!!!!", false); 1301 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1302 1303 // Supplementary character test 1304 String t = toSupplementaries("test"); 1305 String b = toSupplementaries("blah"); 1306 check("....$", flags1, t+"\n", t, true); 1307 check("....$", flags1, t+"\r\n", t, true); 1308 check(".....$", flags1, t+"\n", t+"\n", true); 1309 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1310 check("....$", flags1, t+"\u0085", t, true); 1311 1312 check("....$", flags2, t+"\n", t, true); 1313 check(".....$", flags2, t+"\n", t+"\n", true); 1314 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1315 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1316 1317 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1318 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1319 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1320 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1321 1322 report("Dollar at End"); 1323 } 1324 1325 // This test is for 4711773 1326 private static void multilineDollarTest() throws Exception { 1327 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1328 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1329 matcher.find(); 1330 if (matcher.start(0) != 9) 1331 failCount++; 1332 matcher.find(); 1333 if (matcher.start(0) != 20) 1334 failCount++; 1335 1336 // Supplementary character test 1337 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1338 matcher.find(); 1339 if (matcher.start(0) != 9*2) 1340 failCount++; 1341 matcher.find(); 1342 if (matcher.start(0) != 20*2) 1343 failCount++; 1344 1345 report("Multiline Dollar"); 1346 } 1347 1348 private static void reluctantRepetitionTest() throws Exception { 1349 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1350 check(p, "1 word word word 2", true); 1351 check(p, "1 wor wo w 2", true); 1352 check(p, "1 word word 2", true); 1353 check(p, "1 word 2", true); 1354 check(p, "1 wo w w 2", true); 1355 check(p, "1 wo w 2", true); 1356 check(p, "1 wor w 2", true); 1357 1358 p = Pattern.compile("([a-z])+?c"); 1359 Matcher m = p.matcher("ababcdefdec"); 1360 check(m, "ababc"); 1361 1362 // Supplementary character test 1363 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1364 m = p.matcher(toSupplementaries("ababcdefdec")); 1365 check(m, toSupplementaries("ababc")); 1366 1367 report("Reluctant Repetition"); 1368 } 1369 1370 private static void serializeTest() throws Exception { 1371 String patternStr = "(b)"; 1372 String matchStr = "b"; 1373 Pattern pattern = Pattern.compile(patternStr); 1374 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1375 ObjectOutputStream oos = new ObjectOutputStream(baos); 1376 oos.writeObject(pattern); 1377 oos.close(); 1378 ObjectInputStream ois = new ObjectInputStream( 1379 new ByteArrayInputStream(baos.toByteArray())); 1380 Pattern serializedPattern = (Pattern)ois.readObject(); 1381 ois.close(); 1382 Matcher matcher = serializedPattern.matcher(matchStr); 1383 if (!matcher.matches()) 1384 failCount++; 1385 if (matcher.groupCount() != 1) 1386 failCount++; 1387 1388 report("Serialization"); 1389 } 1390 1391 private static void gTest() { 1392 Pattern pattern = Pattern.compile("\\G\\w"); 1393 Matcher matcher = pattern.matcher("abc#x#x"); 1394 matcher.find(); 1395 matcher.find(); 1396 matcher.find(); 1397 if (matcher.find()) 1398 failCount++; 1399 1400 pattern = Pattern.compile("\\GA*"); 1401 matcher = pattern.matcher("1A2AA3"); 1402 matcher.find(); 1403 if (matcher.find()) 1404 failCount++; 1405 1406 pattern = Pattern.compile("\\GA*"); 1407 matcher = pattern.matcher("1A2AA3"); 1408 if (!matcher.find(1)) 1409 failCount++; 1410 matcher.find(); 1411 if (matcher.find()) 1412 failCount++; 1413 1414 report("\\G"); 1415 } 1416 1417 private static void zTest() { 1418 Pattern pattern = Pattern.compile("foo\\Z"); 1419 // Positives 1420 check(pattern, "foo\u0085", true); 1421 check(pattern, "foo\u2028", true); 1422 check(pattern, "foo\u2029", true); 1423 check(pattern, "foo\n", true); 1424 check(pattern, "foo\r", true); 1425 check(pattern, "foo\r\n", true); 1426 // Negatives 1427 check(pattern, "fooo", false); 1428 check(pattern, "foo\n\r", false); 1429 1430 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1431 // Positives 1432 check(pattern, "foo", true); 1433 check(pattern, "foo\n", true); 1434 // Negatives 1435 check(pattern, "foo\r", false); 1436 check(pattern, "foo\u0085", false); 1437 check(pattern, "foo\u2028", false); 1438 check(pattern, "foo\u2029", false); 1439 1440 report("\\Z"); 1441 } 1442 1443 private static void replaceFirstTest() { 1444 Pattern pattern = Pattern.compile("(ab)(c*)"); 1445 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1446 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1447 failCount++; 1448 1449 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1450 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1451 failCount++; 1452 1453 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1454 String result = matcher.replaceFirst("$1"); 1455 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1456 failCount++; 1457 1458 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1459 result = matcher.replaceFirst("$2"); 1460 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1461 failCount++; 1462 1463 pattern = Pattern.compile("a*"); 1464 matcher = pattern.matcher("aaaaaaaaaa"); 1465 if (!matcher.replaceFirst("test").equals("test")) 1466 failCount++; 1467 1468 pattern = Pattern.compile("a+"); 1469 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1470 if (!matcher.replaceFirst("test").equals("zzztest")) 1471 failCount++; 1472 1473 // Supplementary character test 1474 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1475 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1476 if (!matcher.replaceFirst(toSupplementaries("test")) 1477 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1478 failCount++; 1479 1480 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1481 if (!matcher.replaceFirst(toSupplementaries("test")). 1482 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1483 failCount++; 1484 1485 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1486 result = matcher.replaceFirst("$1"); 1487 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1488 failCount++; 1489 1490 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1491 result = matcher.replaceFirst("$2"); 1492 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1493 failCount++; 1494 1495 pattern = Pattern.compile(toSupplementaries("a*")); 1496 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1497 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1498 failCount++; 1499 1500 pattern = Pattern.compile(toSupplementaries("a+")); 1501 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1502 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1503 failCount++; 1504 1505 report("Replace First"); 1506 } 1507 1508 private static void unixLinesTest() { 1509 Pattern pattern = Pattern.compile(".*"); 1510 Matcher matcher = pattern.matcher("aa\u2028blah"); 1511 matcher.find(); 1512 if (!matcher.group(0).equals("aa")) 1513 failCount++; 1514 1515 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1516 matcher = pattern.matcher("aa\u2028blah"); 1517 matcher.find(); 1518 if (!matcher.group(0).equals("aa\u2028blah")) 1519 failCount++; 1520 1521 pattern = Pattern.compile("[az]$", 1522 Pattern.MULTILINE | Pattern.UNIX_LINES); 1523 matcher = pattern.matcher("aa\u2028zz"); 1524 check(matcher, "a\u2028", false); 1525 1526 // Supplementary character test 1527 pattern = Pattern.compile(".*"); 1528 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1529 matcher.find(); 1530 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1531 failCount++; 1532 1533 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1534 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1535 matcher.find(); 1536 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1537 failCount++; 1538 1539 pattern = Pattern.compile(toSupplementaries("[az]$"), 1540 Pattern.MULTILINE | Pattern.UNIX_LINES); 1541 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1542 check(matcher, toSupplementaries("a\u2028"), false); 1543 1544 report("Unix Lines"); 1545 } 1546 1547 private static void commentsTest() { 1548 int flags = Pattern.COMMENTS; 1549 1550 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1551 Matcher matcher = pattern.matcher("aa#aa"); 1552 if (!matcher.matches()) 1553 failCount++; 1554 1555 pattern = Pattern.compile("aa # blah", flags); 1556 matcher = pattern.matcher("aa"); 1557 if (!matcher.matches()) 1558 failCount++; 1559 1560 pattern = Pattern.compile("aa blah", flags); 1561 matcher = pattern.matcher("aablah"); 1562 if (!matcher.matches()) 1563 failCount++; 1564 1565 pattern = Pattern.compile("aa # blah blech ", flags); 1566 matcher = pattern.matcher("aa"); 1567 if (!matcher.matches()) 1568 failCount++; 1569 1570 pattern = Pattern.compile("aa # blah\n ", flags); 1571 matcher = pattern.matcher("aa"); 1572 if (!matcher.matches()) 1573 failCount++; 1574 1575 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1576 matcher = pattern.matcher("aabc"); 1577 if (!matcher.matches()) 1578 failCount++; 1579 1580 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1581 matcher = pattern.matcher("aabc"); 1582 if (!matcher.matches()) 1583 failCount++; 1584 1585 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1586 matcher = pattern.matcher("aabc#blech"); 1587 if (!matcher.matches()) 1588 failCount++; 1589 1590 // Supplementary character test 1591 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1592 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1593 if (!matcher.matches()) 1594 failCount++; 1595 1596 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1597 matcher = pattern.matcher(toSupplementaries("aa")); 1598 if (!matcher.matches()) 1599 failCount++; 1600 1601 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1602 matcher = pattern.matcher(toSupplementaries("aablah")); 1603 if (!matcher.matches()) 1604 failCount++; 1605 1606 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1607 matcher = pattern.matcher(toSupplementaries("aa")); 1608 if (!matcher.matches()) 1609 failCount++; 1610 1611 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1612 matcher = pattern.matcher(toSupplementaries("aa")); 1613 if (!matcher.matches()) 1614 failCount++; 1615 1616 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1617 matcher = pattern.matcher(toSupplementaries("aabc")); 1618 if (!matcher.matches()) 1619 failCount++; 1620 1621 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1622 matcher = pattern.matcher(toSupplementaries("aabc")); 1623 if (!matcher.matches()) 1624 failCount++; 1625 1626 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1627 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1628 if (!matcher.matches()) 1629 failCount++; 1630 1631 report("Comments"); 1632 } 1633 1634 private static void caseFoldingTest() { // bug 4504687 1635 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1636 Pattern pattern = Pattern.compile("aa", flags); 1637 Matcher matcher = pattern.matcher("ab"); 1638 if (matcher.matches()) 1639 failCount++; 1640 1641 pattern = Pattern.compile("aA", flags); 1642 matcher = pattern.matcher("ab"); 1643 if (matcher.matches()) 1644 failCount++; 1645 1646 pattern = Pattern.compile("aa", flags); 1647 matcher = pattern.matcher("aB"); 1648 if (matcher.matches()) 1649 failCount++; 1650 matcher = pattern.matcher("Ab"); 1651 if (matcher.matches()) 1652 failCount++; 1653 1654 // ASCII "a" 1655 // Latin-1 Supplement "a" + grave 1656 // Cyrillic "a" 1657 String[] patterns = new String[] { 1658 //single 1659 "a", "\u00e0", "\u0430", 1660 //slice 1661 "ab", "\u00e0\u00e1", "\u0430\u0431", 1662 //class single 1663 "[a]", "[\u00e0]", "[\u0430]", 1664 //class range 1665 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1666 //back reference 1667 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1668 }; 1669 1670 String[] texts = new String[] { 1671 "A", "\u00c0", "\u0410", 1672 "AB", "\u00c0\u00c1", "\u0410\u0411", 1673 "A", "\u00c0", "\u0410", 1674 "B", "\u00c2", "\u0411", 1675 "aA", "\u00e0\u00c0", "\u0430\u0410" 1676 }; 1677 1678 boolean[] expected = new boolean[] { 1679 true, false, false, 1680 true, false, false, 1681 true, false, false, 1682 true, false, false, 1683 true, false, false 1684 }; 1685 1686 flags = Pattern.CASE_INSENSITIVE; 1687 for (int i = 0; i < patterns.length; i++) { 1688 pattern = Pattern.compile(patterns[i], flags); 1689 matcher = pattern.matcher(texts[i]); 1690 if (matcher.matches() != expected[i]) { 1691 System.out.println("<1> Failed at " + i); 1692 failCount++; 1693 } 1694 } 1695 1696 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1697 for (int i = 0; i < patterns.length; i++) { 1698 pattern = Pattern.compile(patterns[i], flags); 1699 matcher = pattern.matcher(texts[i]); 1700 if (!matcher.matches()) { 1701 System.out.println("<2> Failed at " + i); 1702 failCount++; 1703 } 1704 } 1705 // flag unicode_case alone should do nothing 1706 flags = Pattern.UNICODE_CASE; 1707 for (int i = 0; i < patterns.length; i++) { 1708 pattern = Pattern.compile(patterns[i], flags); 1709 matcher = pattern.matcher(texts[i]); 1710 if (matcher.matches()) { 1711 System.out.println("<3> Failed at " + i); 1712 failCount++; 1713 } 1714 } 1715 1716 // Special cases: i, I, u+0131 and u+0130 1717 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1718 pattern = Pattern.compile("[h-j]+", flags); 1719 if (!pattern.matcher("\u0131\u0130").matches()) 1720 failCount++; 1721 report("Case Folding"); 1722 } 1723 1724 private static void appendTest() { 1725 Pattern pattern = Pattern.compile("(ab)(cd)"); 1726 Matcher matcher = pattern.matcher("abcd"); 1727 String result = matcher.replaceAll("$2$1"); 1728 if (!result.equals("cdab")) 1729 failCount++; 1730 1731 String s1 = "Swap all: first = 123, second = 456"; 1732 String s2 = "Swap one: first = 123, second = 456"; 1733 String r = "$3$2$1"; 1734 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1735 matcher = pattern.matcher(s1); 1736 1737 result = matcher.replaceAll(r); 1738 if (!result.equals("Swap all: 123 = first, 456 = second")) 1739 failCount++; 1740 1741 matcher = pattern.matcher(s2); 1742 1743 if (matcher.find()) { 1744 StringBuffer sb = new StringBuffer(); 1745 matcher.appendReplacement(sb, r); 1746 matcher.appendTail(sb); 1747 result = sb.toString(); 1748 if (!result.equals("Swap one: 123 = first, second = 456")) 1749 failCount++; 1750 } 1751 1752 // Supplementary character test 1753 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1754 matcher = pattern.matcher(toSupplementaries("abcd")); 1755 result = matcher.replaceAll("$2$1"); 1756 if (!result.equals(toSupplementaries("cdab"))) 1757 failCount++; 1758 1759 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1760 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1761 r = toSupplementaries("$3$2$1"); 1762 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1763 matcher = pattern.matcher(s1); 1764 1765 result = matcher.replaceAll(r); 1766 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1767 failCount++; 1768 1769 matcher = pattern.matcher(s2); 1770 1771 if (matcher.find()) { 1772 StringBuffer sb = new StringBuffer(); 1773 matcher.appendReplacement(sb, r); 1774 matcher.appendTail(sb); 1775 result = sb.toString(); 1776 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1777 failCount++; 1778 } 1779 report("Append"); 1780 } 1781 1782 private static void splitTest() { 1783 Pattern pattern = Pattern.compile(":"); 1784 String[] result = pattern.split("foo:and:boo", 2); 1785 if (!result[0].equals("foo")) 1786 failCount++; 1787 if (!result[1].equals("and:boo")) 1788 failCount++; 1789 // Supplementary character test 1790 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1791 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1792 if (!result[0].equals(toSupplementaries("foo"))) 1793 failCount++; 1794 if (!result[1].equals(toSupplementaries("andXboo"))) 1795 failCount++; 1796 1797 CharBuffer cb = CharBuffer.allocate(100); 1798 cb.put("foo:and:boo"); 1799 cb.flip(); 1800 result = pattern.split(cb); 1801 if (!result[0].equals("foo")) 1802 failCount++; 1803 if (!result[1].equals("and")) 1804 failCount++; 1805 if (!result[2].equals("boo")) 1806 failCount++; 1807 1808 // Supplementary character test 1809 CharBuffer cbs = CharBuffer.allocate(100); 1810 cbs.put(toSupplementaries("fooXandXboo")); 1811 cbs.flip(); 1812 result = patternX.split(cbs); 1813 if (!result[0].equals(toSupplementaries("foo"))) 1814 failCount++; 1815 if (!result[1].equals(toSupplementaries("and"))) 1816 failCount++; 1817 if (!result[2].equals(toSupplementaries("boo"))) 1818 failCount++; 1819 1820 String source = "0123456789"; 1821 for (int limit=-2; limit<3; limit++) { 1822 for (int x=0; x<10; x++) { 1823 result = source.split(Integer.toString(x), limit); 1824 int expectedLength = limit < 1 ? 2 : limit; 1825 1826 if ((limit == 0) && (x == 9)) { 1827 // expected dropping of "" 1828 if (result.length != 1) 1829 failCount++; 1830 if (!result[0].equals("012345678")) { 1831 failCount++; 1832 } 1833 } else { 1834 if (result.length != expectedLength) { 1835 failCount++; 1836 } 1837 if (!result[0].equals(source.substring(0,x))) { 1838 if (limit != 1) { 1839 failCount++; 1840 } else { 1841 if (!result[0].equals(source.substring(0,10))) { 1842 failCount++; 1843 } 1844 } 1845 } 1846 if (expectedLength > 1) { // Check segment 2 1847 if (!result[1].equals(source.substring(x+1,10))) 1848 failCount++; 1849 } 1850 } 1851 } 1852 } 1853 // Check the case for no match found 1854 for (int limit=-2; limit<3; limit++) { 1855 result = source.split("e", limit); 1856 if (result.length != 1) 1857 failCount++; 1858 if (!result[0].equals(source)) 1859 failCount++; 1860 } 1861 // Check the case for limit == 0, source = ""; 1862 // split() now returns 0-length for empty source "" see #6559590 1863 source = ""; 1864 result = source.split("e", 0); 1865 if (result.length != 1) 1866 failCount++; 1867 if (!result[0].equals(source)) 1868 failCount++; 1869 1870 // Check both split() and splitAsStraem(), especially for zero-lenth 1871 // input and zero-lenth match cases 1872 String[][] input = new String[][] { 1873 { " ", "Abc Efg Hij" }, // normal non-zero-match 1874 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1875 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1876 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1877 { "(?=\\p{Lu})", "AbcEfg" }, 1878 { "(?=\\p{Lu})", "Abc" }, 1879 { " ", "" }, // zero-length input 1880 { ".*", "" }, 1881 1882 // some tests from PatternStreamTest.java 1883 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1884 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1885 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1886 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1887 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1888 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1889 { "\u56da", "" }, 1890 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1891 { "o", "boo:and:foo" }, 1892 { "o", "booooo:and:fooooo" }, 1893 { "o", "fooooo:" }, 1894 }; 1895 1896 String[][] expected = new String[][] { 1897 { "Abc", "Efg", "Hij" }, 1898 { "", "Abc", "Efg", "Hij" }, 1899 { "Abc", "", "Efg", "Hij" }, 1900 { "Abc", "Efg", "Hij" }, 1901 { "Abc", "Efg" }, 1902 { "Abc" }, 1903 { "" }, 1904 { "" }, 1905 1906 { "awgqwefg1fefw", "vssv1vvv1" }, 1907 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1908 { "awgqwefg", "fefw4vssv", "vvv" }, 1909 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1910 { "1", "23", "456", "7890" }, 1911 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1912 { "" }, 1913 { "This", "is", "testing", "", "with", "different", "separators" }, 1914 { "b", "", ":and:f" }, 1915 { "b", "", "", "", "", ":and:f" }, 1916 { "f", "", "", "", "", ":" }, 1917 }; 1918 for (int i = 0; i < input.length; i++) { 1919 pattern = Pattern.compile(input[i][0]); 1920 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1921 failCount++; 1922 } 1923 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1924 // array for zero-length input for now 1925 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1926 expected[i])) { 1927 failCount++; 1928 } 1929 } 1930 report("Split"); 1931 } 1932 1933 private static void negationTest() { 1934 Pattern pattern = Pattern.compile("[\\[@^]+"); 1935 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1936 if (!matcher.find()) 1937 failCount++; 1938 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1939 failCount++; 1940 pattern = Pattern.compile("[@\\[^]+"); 1941 matcher = pattern.matcher("@@@@[[[[^^^^"); 1942 if (!matcher.find()) 1943 failCount++; 1944 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1945 failCount++; 1946 pattern = Pattern.compile("[@\\[^@]+"); 1947 matcher = pattern.matcher("@@@@[[[[^^^^"); 1948 if (!matcher.find()) 1949 failCount++; 1950 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1951 failCount++; 1952 1953 pattern = Pattern.compile("\\)"); 1954 matcher = pattern.matcher("xxx)xxx"); 1955 if (!matcher.find()) 1956 failCount++; 1957 1958 report("Negation"); 1959 } 1960 1961 private static void ampersandTest() { 1962 Pattern pattern = Pattern.compile("[&@]+"); 1963 check(pattern, "@@@@&&&&", true); 1964 1965 pattern = Pattern.compile("[@&]+"); 1966 check(pattern, "@@@@&&&&", true); 1967 1968 pattern = Pattern.compile("[@\\&]+"); 1969 check(pattern, "@@@@&&&&", true); 1970 1971 report("Ampersand"); 1972 } 1973 1974 private static void octalTest() throws Exception { 1975 Pattern pattern = Pattern.compile("\\u0007"); 1976 Matcher matcher = pattern.matcher("\u0007"); 1977 if (!matcher.matches()) 1978 failCount++; 1979 pattern = Pattern.compile("\\07"); 1980 matcher = pattern.matcher("\u0007"); 1981 if (!matcher.matches()) 1982 failCount++; 1983 pattern = Pattern.compile("\\007"); 1984 matcher = pattern.matcher("\u0007"); 1985 if (!matcher.matches()) 1986 failCount++; 1987 pattern = Pattern.compile("\\0007"); 1988 matcher = pattern.matcher("\u0007"); 1989 if (!matcher.matches()) 1990 failCount++; 1991 pattern = Pattern.compile("\\040"); 1992 matcher = pattern.matcher("\u0020"); 1993 if (!matcher.matches()) 1994 failCount++; 1995 pattern = Pattern.compile("\\0403"); 1996 matcher = pattern.matcher("\u00203"); 1997 if (!matcher.matches()) 1998 failCount++; 1999 pattern = Pattern.compile("\\0103"); 2000 matcher = pattern.matcher("\u0043"); 2001 if (!matcher.matches()) 2002 failCount++; 2003 2004 report("Octal"); 2005 } 2006 2007 private static void longPatternTest() throws Exception { 2008 try { 2009 Pattern pattern = Pattern.compile( 2010 "a 32-character-long pattern xxxx"); 2011 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 2012 pattern = Pattern.compile("a thirty four character long regex"); 2013 StringBuffer patternToBe = new StringBuffer(101); 2014 for (int i=0; i<100; i++) 2015 patternToBe.append((char)(97 + i%26)); 2016 pattern = Pattern.compile(patternToBe.toString()); 2017 } catch (PatternSyntaxException e) { 2018 failCount++; 2019 } 2020 2021 // Supplementary character test 2022 try { 2023 Pattern pattern = Pattern.compile( 2024 toSupplementaries("a 32-character-long pattern xxxx")); 2025 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2026 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2027 StringBuffer patternToBe = new StringBuffer(101*2); 2028 for (int i=0; i<100; i++) 2029 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2030 + 97 + i%26)); 2031 pattern = Pattern.compile(patternToBe.toString()); 2032 } catch (PatternSyntaxException e) { 2033 failCount++; 2034 } 2035 report("LongPattern"); 2036 } 2037 2038 private static void group0Test() throws Exception { 2039 Pattern pattern = Pattern.compile("(tes)ting"); 2040 Matcher matcher = pattern.matcher("testing"); 2041 check(matcher, "testing"); 2042 2043 matcher.reset("testing"); 2044 if (matcher.lookingAt()) { 2045 if (!matcher.group(0).equals("testing")) 2046 failCount++; 2047 } else { 2048 failCount++; 2049 } 2050 2051 matcher.reset("testing"); 2052 if (matcher.matches()) { 2053 if (!matcher.group(0).equals("testing")) 2054 failCount++; 2055 } else { 2056 failCount++; 2057 } 2058 2059 pattern = Pattern.compile("(tes)ting"); 2060 matcher = pattern.matcher("testing"); 2061 if (matcher.lookingAt()) { 2062 if (!matcher.group(0).equals("testing")) 2063 failCount++; 2064 } else { 2065 failCount++; 2066 } 2067 2068 pattern = Pattern.compile("^(tes)ting"); 2069 matcher = pattern.matcher("testing"); 2070 if (matcher.matches()) { 2071 if (!matcher.group(0).equals("testing")) 2072 failCount++; 2073 } else { 2074 failCount++; 2075 } 2076 2077 // Supplementary character test 2078 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2079 matcher = pattern.matcher(toSupplementaries("testing")); 2080 check(matcher, toSupplementaries("testing")); 2081 2082 matcher.reset(toSupplementaries("testing")); 2083 if (matcher.lookingAt()) { 2084 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2085 failCount++; 2086 } else { 2087 failCount++; 2088 } 2089 2090 matcher.reset(toSupplementaries("testing")); 2091 if (matcher.matches()) { 2092 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2093 failCount++; 2094 } else { 2095 failCount++; 2096 } 2097 2098 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2099 matcher = pattern.matcher(toSupplementaries("testing")); 2100 if (matcher.lookingAt()) { 2101 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2102 failCount++; 2103 } else { 2104 failCount++; 2105 } 2106 2107 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2108 matcher = pattern.matcher(toSupplementaries("testing")); 2109 if (matcher.matches()) { 2110 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2111 failCount++; 2112 } else { 2113 failCount++; 2114 } 2115 2116 report("Group0"); 2117 } 2118 2119 private static void findIntTest() throws Exception { 2120 Pattern p = Pattern.compile("blah"); 2121 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2122 boolean result = m.find(2); 2123 if (!result) 2124 failCount++; 2125 2126 p = Pattern.compile("$"); 2127 m = p.matcher("1234567890"); 2128 result = m.find(10); 2129 if (!result) 2130 failCount++; 2131 try { 2132 result = m.find(11); 2133 failCount++; 2134 } catch (IndexOutOfBoundsException e) { 2135 // correct result 2136 } 2137 2138 // Supplementary character test 2139 p = Pattern.compile(toSupplementaries("blah")); 2140 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2141 result = m.find(2); 2142 if (!result) 2143 failCount++; 2144 2145 report("FindInt"); 2146 } 2147 2148 private static void emptyPatternTest() throws Exception { 2149 Pattern p = Pattern.compile(""); 2150 Matcher m = p.matcher("foo"); 2151 2152 // Should find empty pattern at beginning of input 2153 boolean result = m.find(); 2154 if (result != true) 2155 failCount++; 2156 if (m.start() != 0) 2157 failCount++; 2158 2159 // Should not match entire input if input is not empty 2160 m.reset(); 2161 result = m.matches(); 2162 if (result == true) 2163 failCount++; 2164 2165 try { 2166 m.start(0); 2167 failCount++; 2168 } catch (IllegalStateException e) { 2169 // Correct result 2170 } 2171 2172 // Should match entire input if input is empty 2173 m.reset(""); 2174 result = m.matches(); 2175 if (result != true) 2176 failCount++; 2177 2178 result = Pattern.matches("", ""); 2179 if (result != true) 2180 failCount++; 2181 2182 result = Pattern.matches("", "foo"); 2183 if (result == true) 2184 failCount++; 2185 report("EmptyPattern"); 2186 } 2187 2188 private static void charClassTest() throws Exception { 2189 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2190 check(pattern, "blahb]blech", true); 2191 2192 pattern = Pattern.compile("[abc[def]]"); 2193 check(pattern, "b", true); 2194 2195 // Supplementary character tests 2196 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2197 check(pattern, toSupplementaries("blahb]blech"), true); 2198 2199 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2200 check(pattern, toSupplementaries("b"), true); 2201 2202 try { 2203 // u00ff when UNICODE_CASE 2204 pattern = Pattern.compile("[ab\u00ffcd]", 2205 Pattern.CASE_INSENSITIVE| 2206 Pattern.UNICODE_CASE); 2207 check(pattern, "ab\u00ffcd", true); 2208 check(pattern, "Ab\u0178Cd", true); 2209 2210 // u00b5 when UNICODE_CASE 2211 pattern = Pattern.compile("[ab\u00b5cd]", 2212 Pattern.CASE_INSENSITIVE| 2213 Pattern.UNICODE_CASE); 2214 check(pattern, "ab\u00b5cd", true); 2215 check(pattern, "Ab\u039cCd", true); 2216 } catch (Exception e) { failCount++; } 2217 2218 /* Special cases 2219 (1)LatinSmallLetterLongS u+017f 2220 (2)LatinSmallLetterDotlessI u+0131 2221 (3)LatineCapitalLetterIWithDotAbove u+0130 2222 (4)KelvinSign u+212a 2223 (5)AngstromSign u+212b 2224 */ 2225 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2226 pattern = Pattern.compile("[sik\u00c5]+", flags); 2227 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2228 failCount++; 2229 2230 report("CharClass"); 2231 } 2232 2233 private static void caretTest() throws Exception { 2234 Pattern pattern = Pattern.compile("\\w*"); 2235 Matcher matcher = pattern.matcher("a#bc#def##g"); 2236 check(matcher, "a"); 2237 check(matcher, ""); 2238 check(matcher, "bc"); 2239 check(matcher, ""); 2240 check(matcher, "def"); 2241 check(matcher, ""); 2242 check(matcher, ""); 2243 check(matcher, "g"); 2244 check(matcher, ""); 2245 if (matcher.find()) 2246 failCount++; 2247 2248 pattern = Pattern.compile("^\\w*"); 2249 matcher = pattern.matcher("a#bc#def##g"); 2250 check(matcher, "a"); 2251 if (matcher.find()) 2252 failCount++; 2253 2254 pattern = Pattern.compile("\\w"); 2255 matcher = pattern.matcher("abc##x"); 2256 check(matcher, "a"); 2257 check(matcher, "b"); 2258 check(matcher, "c"); 2259 check(matcher, "x"); 2260 if (matcher.find()) 2261 failCount++; 2262 2263 pattern = Pattern.compile("^\\w"); 2264 matcher = pattern.matcher("abc##x"); 2265 check(matcher, "a"); 2266 if (matcher.find()) 2267 failCount++; 2268 2269 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2270 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2271 check(matcher, "abc"); 2272 if (matcher.find()) 2273 failCount++; 2274 2275 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2276 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2277 check(matcher, "abc"); 2278 check(matcher, "jkl"); 2279 if (matcher.find()) 2280 failCount++; 2281 2282 pattern = Pattern.compile("^", Pattern.MULTILINE); 2283 matcher = pattern.matcher("this is some text"); 2284 String result = matcher.replaceAll("X"); 2285 if (!result.equals("Xthis is some text")) 2286 failCount++; 2287 2288 pattern = Pattern.compile("^"); 2289 matcher = pattern.matcher("this is some text"); 2290 result = matcher.replaceAll("X"); 2291 if (!result.equals("Xthis is some text")) 2292 failCount++; 2293 2294 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2295 matcher = pattern.matcher("this is some text\n"); 2296 result = matcher.replaceAll("X"); 2297 if (!result.equals("Xthis is some text\n")) 2298 failCount++; 2299 2300 report("Caret"); 2301 } 2302 2303 private static void groupCaptureTest() throws Exception { 2304 // Independent group 2305 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2306 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2307 matcher.find(); 2308 try { 2309 String blah = matcher.group(1); 2310 failCount++; 2311 } catch (IndexOutOfBoundsException ioobe) { 2312 // Good result 2313 } 2314 // Pure group 2315 pattern = Pattern.compile("x+(?:y+)z+"); 2316 matcher = pattern.matcher("xxxyyyzzz"); 2317 matcher.find(); 2318 try { 2319 String blah = matcher.group(1); 2320 failCount++; 2321 } catch (IndexOutOfBoundsException ioobe) { 2322 // Good result 2323 } 2324 2325 // Supplementary character tests 2326 // Independent group 2327 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2328 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2329 matcher.find(); 2330 try { 2331 String blah = matcher.group(1); 2332 failCount++; 2333 } catch (IndexOutOfBoundsException ioobe) { 2334 // Good result 2335 } 2336 // Pure group 2337 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2338 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2339 matcher.find(); 2340 try { 2341 String blah = matcher.group(1); 2342 failCount++; 2343 } catch (IndexOutOfBoundsException ioobe) { 2344 // Good result 2345 } 2346 2347 report("GroupCapture"); 2348 } 2349 2350 private static void backRefTest() throws Exception { 2351 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2352 check(pattern, "zzzaabcazzz", true); 2353 2354 pattern = Pattern.compile("(a*)bc\\1"); 2355 check(pattern, "zzzaabcaazzz", true); 2356 2357 pattern = Pattern.compile("(abc)(def)\\1"); 2358 check(pattern, "abcdefabc", true); 2359 2360 pattern = Pattern.compile("(abc)(def)\\3"); 2361 check(pattern, "abcdefabc", false); 2362 2363 try { 2364 for (int i = 1; i < 10; i++) { 2365 // Make sure backref 1-9 are always accepted 2366 pattern = Pattern.compile("abcdef\\" + i); 2367 // and fail to match if the target group does not exit 2368 check(pattern, "abcdef", false); 2369 } 2370 } catch(PatternSyntaxException e) { 2371 failCount++; 2372 } 2373 2374 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2375 check(pattern, "abcdefghija", false); 2376 check(pattern, "abcdefghija1", true); 2377 2378 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2379 check(pattern, "abcdefghijkk", true); 2380 2381 pattern = Pattern.compile("(a)bcdefghij\\11"); 2382 check(pattern, "abcdefghija1", true); 2383 2384 // Supplementary character tests 2385 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2386 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2387 2388 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2389 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2390 2391 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2392 check(pattern, toSupplementaries("abcdefabc"), true); 2393 2394 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2395 check(pattern, toSupplementaries("abcdefabc"), false); 2396 2397 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2398 check(pattern, toSupplementaries("abcdefghija"), false); 2399 check(pattern, toSupplementaries("abcdefghija1"), true); 2400 2401 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2402 check(pattern, toSupplementaries("abcdefghijkk"), true); 2403 2404 report("BackRef"); 2405 } 2406 2407 /** 2408 * Unicode Technical Report #18, section 2.6 End of Line 2409 * There is no empty line to be matched in the sequence \u000D\u000A 2410 * but there is an empty line in the sequence \u000A\u000D. 2411 */ 2412 private static void anchorTest() throws Exception { 2413 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2414 Matcher m = p.matcher("blah1\r\nblah2"); 2415 m.find(); 2416 m.find(); 2417 if (!m.group().equals("blah2")) 2418 failCount++; 2419 2420 m.reset("blah1\n\rblah2"); 2421 m.find(); 2422 m.find(); 2423 m.find(); 2424 if (!m.group().equals("blah2")) 2425 failCount++; 2426 2427 // Test behavior of $ with \r\n at end of input 2428 p = Pattern.compile(".+$"); 2429 m = p.matcher("blah1\r\n"); 2430 if (!m.find()) 2431 failCount++; 2432 if (!m.group().equals("blah1")) 2433 failCount++; 2434 if (m.find()) 2435 failCount++; 2436 2437 // Test behavior of $ with \r\n at end of input in multiline 2438 p = Pattern.compile(".+$", Pattern.MULTILINE); 2439 m = p.matcher("blah1\r\n"); 2440 if (!m.find()) 2441 failCount++; 2442 if (m.find()) 2443 failCount++; 2444 2445 // Test for $ recognition of \u0085 for bug 4527731 2446 p = Pattern.compile(".+$", Pattern.MULTILINE); 2447 m = p.matcher("blah1\u0085"); 2448 if (!m.find()) 2449 failCount++; 2450 2451 // Supplementary character test 2452 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2453 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2454 m.find(); 2455 m.find(); 2456 if (!m.group().equals(toSupplementaries("blah2"))) 2457 failCount++; 2458 2459 m.reset(toSupplementaries("blah1\n\rblah2")); 2460 m.find(); 2461 m.find(); 2462 m.find(); 2463 if (!m.group().equals(toSupplementaries("blah2"))) 2464 failCount++; 2465 2466 // Test behavior of $ with \r\n at end of input 2467 p = Pattern.compile(".+$"); 2468 m = p.matcher(toSupplementaries("blah1\r\n")); 2469 if (!m.find()) 2470 failCount++; 2471 if (!m.group().equals(toSupplementaries("blah1"))) 2472 failCount++; 2473 if (m.find()) 2474 failCount++; 2475 2476 // Test behavior of $ with \r\n at end of input in multiline 2477 p = Pattern.compile(".+$", Pattern.MULTILINE); 2478 m = p.matcher(toSupplementaries("blah1\r\n")); 2479 if (!m.find()) 2480 failCount++; 2481 if (m.find()) 2482 failCount++; 2483 2484 // Test for $ recognition of \u0085 for bug 4527731 2485 p = Pattern.compile(".+$", Pattern.MULTILINE); 2486 m = p.matcher(toSupplementaries("blah1\u0085")); 2487 if (!m.find()) 2488 failCount++; 2489 2490 report("Anchors"); 2491 } 2492 2493 /** 2494 * A basic sanity test of Matcher.lookingAt(). 2495 */ 2496 private static void lookingAtTest() throws Exception { 2497 Pattern p = Pattern.compile("(ab)(c*)"); 2498 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2499 2500 if (!m.lookingAt()) 2501 failCount++; 2502 2503 if (!m.group().equals(m.group(0))) 2504 failCount++; 2505 2506 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2507 if (m.lookingAt()) 2508 failCount++; 2509 2510 // Supplementary character test 2511 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2512 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2513 2514 if (!m.lookingAt()) 2515 failCount++; 2516 2517 if (!m.group().equals(m.group(0))) 2518 failCount++; 2519 2520 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2521 if (m.lookingAt()) 2522 failCount++; 2523 2524 report("Looking At"); 2525 } 2526 2527 /** 2528 * A basic sanity test of Matcher.matches(). 2529 */ 2530 private static void matchesTest() throws Exception { 2531 // matches() 2532 Pattern p = Pattern.compile("ulb(c*)"); 2533 Matcher m = p.matcher("ulbcccccc"); 2534 if (!m.matches()) 2535 failCount++; 2536 2537 // find() but not matches() 2538 m.reset("zzzulbcccccc"); 2539 if (m.matches()) 2540 failCount++; 2541 2542 // lookingAt() but not matches() 2543 m.reset("ulbccccccdef"); 2544 if (m.matches()) 2545 failCount++; 2546 2547 // matches() 2548 p = Pattern.compile("a|ad"); 2549 m = p.matcher("ad"); 2550 if (!m.matches()) 2551 failCount++; 2552 2553 // Supplementary character test 2554 // matches() 2555 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2556 m = p.matcher(toSupplementaries("ulbcccccc")); 2557 if (!m.matches()) 2558 failCount++; 2559 2560 // find() but not matches() 2561 m.reset(toSupplementaries("zzzulbcccccc")); 2562 if (m.matches()) 2563 failCount++; 2564 2565 // lookingAt() but not matches() 2566 m.reset(toSupplementaries("ulbccccccdef")); 2567 if (m.matches()) 2568 failCount++; 2569 2570 // matches() 2571 p = Pattern.compile(toSupplementaries("a|ad")); 2572 m = p.matcher(toSupplementaries("ad")); 2573 if (!m.matches()) 2574 failCount++; 2575 2576 report("Matches"); 2577 } 2578 2579 /** 2580 * A basic sanity test of Pattern.matches(). 2581 */ 2582 private static void patternMatchesTest() throws Exception { 2583 // matches() 2584 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2585 toSupplementaries("ulbcccccc"))) 2586 failCount++; 2587 2588 // find() but not matches() 2589 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2590 toSupplementaries("zzzulbcccccc"))) 2591 failCount++; 2592 2593 // lookingAt() but not matches() 2594 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2595 toSupplementaries("ulbccccccdef"))) 2596 failCount++; 2597 2598 // Supplementary character test 2599 // matches() 2600 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2601 toSupplementaries("ulbcccccc"))) 2602 failCount++; 2603 2604 // find() but not matches() 2605 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2606 toSupplementaries("zzzulbcccccc"))) 2607 failCount++; 2608 2609 // lookingAt() but not matches() 2610 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2611 toSupplementaries("ulbccccccdef"))) 2612 failCount++; 2613 2614 report("Pattern Matches"); 2615 } 2616 2617 /** 2618 * Canonical equivalence testing. Tests the ability of the engine 2619 * to match sequences that are not explicitly specified in the 2620 * pattern when they are considered equivalent by the Unicode Standard. 2621 */ 2622 private static void ceTest() throws Exception { 2623 // Decomposed char outside char classes 2624 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2625 Matcher m = p.matcher("test\u00e5"); 2626 if (!m.matches()) 2627 failCount++; 2628 2629 m.reset("testa\u030a"); 2630 if (!m.matches()) 2631 failCount++; 2632 2633 // Composed char outside char classes 2634 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2635 m = p.matcher("test\u00e5"); 2636 if (!m.matches()) 2637 failCount++; 2638 2639 m.reset("testa\u030a"); 2640 if (!m.find()) 2641 failCount++; 2642 2643 // Decomposed char inside a char class 2644 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2645 m = p.matcher("test\u00e5"); 2646 if (!m.find()) 2647 failCount++; 2648 2649 m.reset("testa\u030a"); 2650 if (!m.find()) 2651 failCount++; 2652 2653 // Composed char inside a char class 2654 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2655 m = p.matcher("test\u00e5"); 2656 if (!m.find()) 2657 failCount++; 2658 2659 m.reset("testa\u0300"); 2660 if (!m.find()) 2661 failCount++; 2662 2663 m.reset("testa\u030a"); 2664 if (!m.find()) 2665 failCount++; 2666 2667 // Marks that cannot legally change order and be equivalent 2668 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2669 check(p, "testa\u0308\u0300", true); 2670 check(p, "testa\u0300\u0308", false); 2671 2672 // Marks that can legally change order and be equivalent 2673 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2674 check(p, "testa\u0308\u0323", true); 2675 check(p, "testa\u0323\u0308", true); 2676 2677 // Test all equivalences of the sequence a\u0308\u0323\u0300 2678 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2679 check(p, "testa\u0308\u0323\u0300", true); 2680 check(p, "testa\u0323\u0308\u0300", true); 2681 check(p, "testa\u0308\u0300\u0323", true); 2682 check(p, "test\u00e4\u0323\u0300", true); 2683 check(p, "test\u00e4\u0300\u0323", true); 2684 2685 Object[][] data = new Object[][] { 2686 2687 // JDK-4867170 2688 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2689 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2690 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2691 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2692 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2693 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2694 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2695 2696 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2697 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2698 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2699 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2700 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2701 2702 // backtracking, force to match "\u1f80", instead of \u1f82" 2703 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2704 2705 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2706 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2707 2708 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2709 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2710 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2711 2712 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2713 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2714 { "\u1f80", "ab\u1f80cd", "f", true }, 2715 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2716 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2717 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2718 { "\u1f82", "\u1f80\u0300", "m", true }, 2719 2720 // JDK-7080302 # compile failed 2721 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2722 2723 // JDK-6728861, same cause as above one 2724 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2725 2726 // JDK-6995635 2727 { "(\u00e9)", "e\u0301", "m", true }, 2728 2729 // JDK-6736245 2730 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2731 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2732 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2733 2734 // 4916384. 2735 // Decomposed hangul (jamos) works inside clazz 2736 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2737 { "[\u1100\u1161]", "\uac00", "m", true}, 2738 2739 { "[\uac00]", "\u1100\u1161", "m", true}, 2740 { "[\uac00]", "\uac00", "m", true}, 2741 2742 // Decomposed hangul (jamos) 2743 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2744 { "\u1100\u1161", "\uac00", "m", true}, 2745 2746 // Composed hangul 2747 { "\uac00", "\u1100\u1161", "m", true }, 2748 { "\uac00", "\uac00", "m", true }, 2749 2750 /* Need a NFDSlice to nfd the source to solve this issue 2751 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2752 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2753 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2754 2755 // Decomposed supplementary outside char classes 2756 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2757 // Composed supplementary outside char classes 2758 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2759 */ 2760 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2761 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2762 2763 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2764 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2765 }; 2766 2767 int failCount = 0; 2768 for (Object[] d : data) { 2769 String pn = (String)d[0]; 2770 String tt = (String)d[1]; 2771 boolean isFind = "f".equals(((String)d[2])); 2772 boolean expected = (boolean)d[3]; 2773 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2774 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2775 if (ret != expected) { 2776 failCount++; 2777 continue; 2778 } 2779 } 2780 report("Canonical Equivalence"); 2781 } 2782 2783 /** 2784 * A basic sanity test of Matcher.replaceAll(). 2785 */ 2786 private static void globalSubstitute() throws Exception { 2787 // Global substitution with a literal 2788 Pattern p = Pattern.compile("(ab)(c*)"); 2789 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2790 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2791 failCount++; 2792 2793 m.reset("zzzabccczzzabcczzzabccczzz"); 2794 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2795 failCount++; 2796 2797 // Global substitution with groups 2798 m.reset("zzzabccczzzabcczzzabccczzz"); 2799 String result = m.replaceAll("$1"); 2800 if (!result.equals("zzzabzzzabzzzabzzz")) 2801 failCount++; 2802 2803 // Supplementary character test 2804 // Global substitution with a literal 2805 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2806 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2807 if (!m.replaceAll(toSupplementaries("test")). 2808 equals(toSupplementaries("testzzztestzzztest"))) 2809 failCount++; 2810 2811 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2812 if (!m.replaceAll(toSupplementaries("test")). 2813 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2814 failCount++; 2815 2816 // Global substitution with groups 2817 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2818 result = m.replaceAll("$1"); 2819 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2820 failCount++; 2821 2822 report("Global Substitution"); 2823 } 2824 2825 /** 2826 * Tests the usage of Matcher.appendReplacement() with literal 2827 * and group substitutions. 2828 */ 2829 private static void stringbufferSubstitute() throws Exception { 2830 // SB substitution with literal 2831 String blah = "zzzblahzzz"; 2832 Pattern p = Pattern.compile("blah"); 2833 Matcher m = p.matcher(blah); 2834 StringBuffer result = new StringBuffer(); 2835 try { 2836 m.appendReplacement(result, "blech"); 2837 failCount++; 2838 } catch (IllegalStateException e) { 2839 } 2840 m.find(); 2841 m.appendReplacement(result, "blech"); 2842 if (!result.toString().equals("zzzblech")) 2843 failCount++; 2844 2845 m.appendTail(result); 2846 if (!result.toString().equals("zzzblechzzz")) 2847 failCount++; 2848 2849 // SB substitution with groups 2850 blah = "zzzabcdzzz"; 2851 p = Pattern.compile("(ab)(cd)*"); 2852 m = p.matcher(blah); 2853 result = new StringBuffer(); 2854 try { 2855 m.appendReplacement(result, "$1"); 2856 failCount++; 2857 } catch (IllegalStateException e) { 2858 } 2859 m.find(); 2860 m.appendReplacement(result, "$1"); 2861 if (!result.toString().equals("zzzab")) 2862 failCount++; 2863 2864 m.appendTail(result); 2865 if (!result.toString().equals("zzzabzzz")) 2866 failCount++; 2867 2868 // SB substitution with 3 groups 2869 blah = "zzzabcdcdefzzz"; 2870 p = Pattern.compile("(ab)(cd)*(ef)"); 2871 m = p.matcher(blah); 2872 result = new StringBuffer(); 2873 try { 2874 m.appendReplacement(result, "$1w$2w$3"); 2875 failCount++; 2876 } catch (IllegalStateException e) { 2877 } 2878 m.find(); 2879 m.appendReplacement(result, "$1w$2w$3"); 2880 if (!result.toString().equals("zzzabwcdwef")) 2881 failCount++; 2882 2883 m.appendTail(result); 2884 if (!result.toString().equals("zzzabwcdwefzzz")) 2885 failCount++; 2886 2887 // SB substitution with groups and three matches 2888 // skipping middle match 2889 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2890 p = Pattern.compile("(ab)(cd*)"); 2891 m = p.matcher(blah); 2892 result = new StringBuffer(); 2893 try { 2894 m.appendReplacement(result, "$1"); 2895 failCount++; 2896 } catch (IllegalStateException e) { 2897 } 2898 m.find(); 2899 m.appendReplacement(result, "$1"); 2900 if (!result.toString().equals("zzzab")) 2901 failCount++; 2902 2903 m.find(); 2904 m.find(); 2905 m.appendReplacement(result, "$2"); 2906 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2907 failCount++; 2908 2909 m.appendTail(result); 2910 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2911 failCount++; 2912 2913 // Check to make sure escaped $ is ignored 2914 blah = "zzzabcdcdefzzz"; 2915 p = Pattern.compile("(ab)(cd)*(ef)"); 2916 m = p.matcher(blah); 2917 result = new StringBuffer(); 2918 m.find(); 2919 m.appendReplacement(result, "$1w\\$2w$3"); 2920 if (!result.toString().equals("zzzabw$2wef")) 2921 failCount++; 2922 2923 m.appendTail(result); 2924 if (!result.toString().equals("zzzabw$2wefzzz")) 2925 failCount++; 2926 2927 // Check to make sure a reference to nonexistent group causes error 2928 blah = "zzzabcdcdefzzz"; 2929 p = Pattern.compile("(ab)(cd)*(ef)"); 2930 m = p.matcher(blah); 2931 result = new StringBuffer(); 2932 m.find(); 2933 try { 2934 m.appendReplacement(result, "$1w$5w$3"); 2935 failCount++; 2936 } catch (IndexOutOfBoundsException ioobe) { 2937 // Correct result 2938 } 2939 2940 // Check double digit group references 2941 blah = "zzz123456789101112zzz"; 2942 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2943 m = p.matcher(blah); 2944 result = new StringBuffer(); 2945 m.find(); 2946 m.appendReplacement(result, "$1w$11w$3"); 2947 if (!result.toString().equals("zzz1w11w3")) 2948 failCount++; 2949 2950 // Check to make sure it backs off $15 to $1 if only three groups 2951 blah = "zzzabcdcdefzzz"; 2952 p = Pattern.compile("(ab)(cd)*(ef)"); 2953 m = p.matcher(blah); 2954 result = new StringBuffer(); 2955 m.find(); 2956 m.appendReplacement(result, "$1w$15w$3"); 2957 if (!result.toString().equals("zzzabwab5wef")) 2958 failCount++; 2959 2960 2961 // Supplementary character test 2962 // SB substitution with literal 2963 blah = toSupplementaries("zzzblahzzz"); 2964 p = Pattern.compile(toSupplementaries("blah")); 2965 m = p.matcher(blah); 2966 result = new StringBuffer(); 2967 try { 2968 m.appendReplacement(result, toSupplementaries("blech")); 2969 failCount++; 2970 } catch (IllegalStateException e) { 2971 } 2972 m.find(); 2973 m.appendReplacement(result, toSupplementaries("blech")); 2974 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2975 failCount++; 2976 2977 m.appendTail(result); 2978 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2979 failCount++; 2980 2981 // SB substitution with groups 2982 blah = toSupplementaries("zzzabcdzzz"); 2983 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2984 m = p.matcher(blah); 2985 result = new StringBuffer(); 2986 try { 2987 m.appendReplacement(result, "$1"); 2988 failCount++; 2989 } catch (IllegalStateException e) { 2990 } 2991 m.find(); 2992 m.appendReplacement(result, "$1"); 2993 if (!result.toString().equals(toSupplementaries("zzzab"))) 2994 failCount++; 2995 2996 m.appendTail(result); 2997 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2998 failCount++; 2999 3000 // SB substitution with 3 groups 3001 blah = toSupplementaries("zzzabcdcdefzzz"); 3002 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3003 m = p.matcher(blah); 3004 result = new StringBuffer(); 3005 try { 3006 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3007 failCount++; 3008 } catch (IllegalStateException e) { 3009 } 3010 m.find(); 3011 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3012 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3013 failCount++; 3014 3015 m.appendTail(result); 3016 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3017 failCount++; 3018 3019 // SB substitution with groups and three matches 3020 // skipping middle match 3021 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3022 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3023 m = p.matcher(blah); 3024 result = new StringBuffer(); 3025 try { 3026 m.appendReplacement(result, "$1"); 3027 failCount++; 3028 } catch (IllegalStateException e) { 3029 } 3030 m.find(); 3031 m.appendReplacement(result, "$1"); 3032 if (!result.toString().equals(toSupplementaries("zzzab"))) 3033 failCount++; 3034 3035 m.find(); 3036 m.find(); 3037 m.appendReplacement(result, "$2"); 3038 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3039 failCount++; 3040 3041 m.appendTail(result); 3042 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3043 failCount++; 3044 3045 // Check to make sure escaped $ is ignored 3046 blah = toSupplementaries("zzzabcdcdefzzz"); 3047 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3048 m = p.matcher(blah); 3049 result = new StringBuffer(); 3050 m.find(); 3051 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3052 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3053 failCount++; 3054 3055 m.appendTail(result); 3056 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3057 failCount++; 3058 3059 // Check to make sure a reference to nonexistent group causes error 3060 blah = toSupplementaries("zzzabcdcdefzzz"); 3061 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3062 m = p.matcher(blah); 3063 result = new StringBuffer(); 3064 m.find(); 3065 try { 3066 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3067 failCount++; 3068 } catch (IndexOutOfBoundsException ioobe) { 3069 // Correct result 3070 } 3071 3072 // Check double digit group references 3073 blah = toSupplementaries("zzz123456789101112zzz"); 3074 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3075 m = p.matcher(blah); 3076 result = new StringBuffer(); 3077 m.find(); 3078 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3079 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3080 failCount++; 3081 3082 // Check to make sure it backs off $15 to $1 if only three groups 3083 blah = toSupplementaries("zzzabcdcdefzzz"); 3084 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3085 m = p.matcher(blah); 3086 result = new StringBuffer(); 3087 m.find(); 3088 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3089 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3090 failCount++; 3091 3092 // Check nothing has been appended into the output buffer if 3093 // the replacement string triggers IllegalArgumentException. 3094 p = Pattern.compile("(abc)"); 3095 m = p.matcher("abcd"); 3096 result = new StringBuffer(); 3097 m.find(); 3098 try { 3099 m.appendReplacement(result, ("xyz$g")); 3100 failCount++; 3101 } catch (IllegalArgumentException iae) { 3102 if (result.length() != 0) 3103 failCount++; 3104 } 3105 3106 report("SB Substitution"); 3107 } 3108 3109 /** 3110 * Tests the usage of Matcher.appendReplacement() with literal 3111 * and group substitutions. 3112 */ 3113 private static void stringbuilderSubstitute() throws Exception { 3114 // SB substitution with literal 3115 String blah = "zzzblahzzz"; 3116 Pattern p = Pattern.compile("blah"); 3117 Matcher m = p.matcher(blah); 3118 StringBuilder result = new StringBuilder(); 3119 try { 3120 m.appendReplacement(result, "blech"); 3121 failCount++; 3122 } catch (IllegalStateException e) { 3123 } 3124 m.find(); 3125 m.appendReplacement(result, "blech"); 3126 if (!result.toString().equals("zzzblech")) 3127 failCount++; 3128 3129 m.appendTail(result); 3130 if (!result.toString().equals("zzzblechzzz")) 3131 failCount++; 3132 3133 // SB substitution with groups 3134 blah = "zzzabcdzzz"; 3135 p = Pattern.compile("(ab)(cd)*"); 3136 m = p.matcher(blah); 3137 result = new StringBuilder(); 3138 try { 3139 m.appendReplacement(result, "$1"); 3140 failCount++; 3141 } catch (IllegalStateException e) { 3142 } 3143 m.find(); 3144 m.appendReplacement(result, "$1"); 3145 if (!result.toString().equals("zzzab")) 3146 failCount++; 3147 3148 m.appendTail(result); 3149 if (!result.toString().equals("zzzabzzz")) 3150 failCount++; 3151 3152 // SB substitution with 3 groups 3153 blah = "zzzabcdcdefzzz"; 3154 p = Pattern.compile("(ab)(cd)*(ef)"); 3155 m = p.matcher(blah); 3156 result = new StringBuilder(); 3157 try { 3158 m.appendReplacement(result, "$1w$2w$3"); 3159 failCount++; 3160 } catch (IllegalStateException e) { 3161 } 3162 m.find(); 3163 m.appendReplacement(result, "$1w$2w$3"); 3164 if (!result.toString().equals("zzzabwcdwef")) 3165 failCount++; 3166 3167 m.appendTail(result); 3168 if (!result.toString().equals("zzzabwcdwefzzz")) 3169 failCount++; 3170 3171 // SB substitution with groups and three matches 3172 // skipping middle match 3173 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3174 p = Pattern.compile("(ab)(cd*)"); 3175 m = p.matcher(blah); 3176 result = new StringBuilder(); 3177 try { 3178 m.appendReplacement(result, "$1"); 3179 failCount++; 3180 } catch (IllegalStateException e) { 3181 } 3182 m.find(); 3183 m.appendReplacement(result, "$1"); 3184 if (!result.toString().equals("zzzab")) 3185 failCount++; 3186 3187 m.find(); 3188 m.find(); 3189 m.appendReplacement(result, "$2"); 3190 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3191 failCount++; 3192 3193 m.appendTail(result); 3194 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3195 failCount++; 3196 3197 // Check to make sure escaped $ is ignored 3198 blah = "zzzabcdcdefzzz"; 3199 p = Pattern.compile("(ab)(cd)*(ef)"); 3200 m = p.matcher(blah); 3201 result = new StringBuilder(); 3202 m.find(); 3203 m.appendReplacement(result, "$1w\\$2w$3"); 3204 if (!result.toString().equals("zzzabw$2wef")) 3205 failCount++; 3206 3207 m.appendTail(result); 3208 if (!result.toString().equals("zzzabw$2wefzzz")) 3209 failCount++; 3210 3211 // Check to make sure a reference to nonexistent group causes error 3212 blah = "zzzabcdcdefzzz"; 3213 p = Pattern.compile("(ab)(cd)*(ef)"); 3214 m = p.matcher(blah); 3215 result = new StringBuilder(); 3216 m.find(); 3217 try { 3218 m.appendReplacement(result, "$1w$5w$3"); 3219 failCount++; 3220 } catch (IndexOutOfBoundsException ioobe) { 3221 // Correct result 3222 } 3223 3224 // Check double digit group references 3225 blah = "zzz123456789101112zzz"; 3226 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3227 m = p.matcher(blah); 3228 result = new StringBuilder(); 3229 m.find(); 3230 m.appendReplacement(result, "$1w$11w$3"); 3231 if (!result.toString().equals("zzz1w11w3")) 3232 failCount++; 3233 3234 // Check to make sure it backs off $15 to $1 if only three groups 3235 blah = "zzzabcdcdefzzz"; 3236 p = Pattern.compile("(ab)(cd)*(ef)"); 3237 m = p.matcher(blah); 3238 result = new StringBuilder(); 3239 m.find(); 3240 m.appendReplacement(result, "$1w$15w$3"); 3241 if (!result.toString().equals("zzzabwab5wef")) 3242 failCount++; 3243 3244 3245 // Supplementary character test 3246 // SB substitution with literal 3247 blah = toSupplementaries("zzzblahzzz"); 3248 p = Pattern.compile(toSupplementaries("blah")); 3249 m = p.matcher(blah); 3250 result = new StringBuilder(); 3251 try { 3252 m.appendReplacement(result, toSupplementaries("blech")); 3253 failCount++; 3254 } catch (IllegalStateException e) { 3255 } 3256 m.find(); 3257 m.appendReplacement(result, toSupplementaries("blech")); 3258 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3259 failCount++; 3260 m.appendTail(result); 3261 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3262 failCount++; 3263 3264 // SB substitution with groups 3265 blah = toSupplementaries("zzzabcdzzz"); 3266 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3267 m = p.matcher(blah); 3268 result = new StringBuilder(); 3269 try { 3270 m.appendReplacement(result, "$1"); 3271 failCount++; 3272 } catch (IllegalStateException e) { 3273 } 3274 m.find(); 3275 m.appendReplacement(result, "$1"); 3276 if (!result.toString().equals(toSupplementaries("zzzab"))) 3277 failCount++; 3278 3279 m.appendTail(result); 3280 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3281 failCount++; 3282 3283 // SB substitution with 3 groups 3284 blah = toSupplementaries("zzzabcdcdefzzz"); 3285 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3286 m = p.matcher(blah); 3287 result = new StringBuilder(); 3288 try { 3289 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3290 failCount++; 3291 } catch (IllegalStateException e) { 3292 } 3293 m.find(); 3294 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3295 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3296 failCount++; 3297 3298 m.appendTail(result); 3299 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3300 failCount++; 3301 3302 // SB substitution with groups and three matches 3303 // skipping middle match 3304 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3305 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3306 m = p.matcher(blah); 3307 result = new StringBuilder(); 3308 try { 3309 m.appendReplacement(result, "$1"); 3310 failCount++; 3311 } catch (IllegalStateException e) { 3312 } 3313 m.find(); 3314 m.appendReplacement(result, "$1"); 3315 if (!result.toString().equals(toSupplementaries("zzzab"))) 3316 failCount++; 3317 3318 m.find(); 3319 m.find(); 3320 m.appendReplacement(result, "$2"); 3321 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3322 failCount++; 3323 3324 m.appendTail(result); 3325 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3326 failCount++; 3327 3328 // Check to make sure escaped $ is ignored 3329 blah = toSupplementaries("zzzabcdcdefzzz"); 3330 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3331 m = p.matcher(blah); 3332 result = new StringBuilder(); 3333 m.find(); 3334 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3335 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3336 failCount++; 3337 3338 m.appendTail(result); 3339 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3340 failCount++; 3341 3342 // Check to make sure a reference to nonexistent group causes error 3343 blah = toSupplementaries("zzzabcdcdefzzz"); 3344 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3345 m = p.matcher(blah); 3346 result = new StringBuilder(); 3347 m.find(); 3348 try { 3349 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3350 failCount++; 3351 } catch (IndexOutOfBoundsException ioobe) { 3352 // Correct result 3353 } 3354 // Check double digit group references 3355 blah = toSupplementaries("zzz123456789101112zzz"); 3356 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3357 m = p.matcher(blah); 3358 result = new StringBuilder(); 3359 m.find(); 3360 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3361 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3362 failCount++; 3363 3364 // Check to make sure it backs off $15 to $1 if only three groups 3365 blah = toSupplementaries("zzzabcdcdefzzz"); 3366 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3367 m = p.matcher(blah); 3368 result = new StringBuilder(); 3369 m.find(); 3370 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3371 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3372 failCount++; 3373 // Check nothing has been appended into the output buffer if 3374 // the replacement string triggers IllegalArgumentException. 3375 p = Pattern.compile("(abc)"); 3376 m = p.matcher("abcd"); 3377 result = new StringBuilder(); 3378 m.find(); 3379 try { 3380 m.appendReplacement(result, ("xyz$g")); 3381 failCount++; 3382 } catch (IllegalArgumentException iae) { 3383 if (result.length() != 0) 3384 failCount++; 3385 } 3386 report("SB Substitution 2"); 3387 } 3388 3389 /* 3390 * 5 groups of characters are created to make a substitution string. 3391 * A base string will be created including random lead chars, the 3392 * substitution string, and random trailing chars. 3393 * A pattern containing the 5 groups is searched for and replaced with: 3394 * random group + random string + random group. 3395 * The results are checked for correctness. 3396 */ 3397 private static void substitutionBasher() { 3398 for (int runs = 0; runs<1000; runs++) { 3399 // Create a base string to work in 3400 int leadingChars = generator.nextInt(10); 3401 StringBuffer baseBuffer = new StringBuffer(100); 3402 String leadingString = getRandomAlphaString(leadingChars); 3403 baseBuffer.append(leadingString); 3404 3405 // Create 5 groups of random number of random chars 3406 // Create the string to substitute 3407 // Create the pattern string to search for 3408 StringBuffer bufferToSub = new StringBuffer(25); 3409 StringBuffer bufferToPat = new StringBuffer(50); 3410 String[] groups = new String[5]; 3411 for(int i=0; i<5; i++) { 3412 int aGroupSize = generator.nextInt(5)+1; 3413 groups[i] = getRandomAlphaString(aGroupSize); 3414 bufferToSub.append(groups[i]); 3415 bufferToPat.append('('); 3416 bufferToPat.append(groups[i]); 3417 bufferToPat.append(')'); 3418 } 3419 String stringToSub = bufferToSub.toString(); 3420 String pattern = bufferToPat.toString(); 3421 3422 // Place sub string into working string at random index 3423 baseBuffer.append(stringToSub); 3424 3425 // Append random chars to end 3426 int trailingChars = generator.nextInt(10); 3427 String trailingString = getRandomAlphaString(trailingChars); 3428 baseBuffer.append(trailingString); 3429 String baseString = baseBuffer.toString(); 3430 3431 // Create test pattern and matcher 3432 Pattern p = Pattern.compile(pattern); 3433 Matcher m = p.matcher(baseString); 3434 3435 // Reject candidate if pattern happens to start early 3436 m.find(); 3437 if (m.start() < leadingChars) 3438 continue; 3439 3440 // Reject candidate if more than one match 3441 if (m.find()) 3442 continue; 3443 3444 // Construct a replacement string with : 3445 // random group + random string + random group 3446 StringBuffer bufferToRep = new StringBuffer(); 3447 int groupIndex1 = generator.nextInt(5); 3448 bufferToRep.append("$" + (groupIndex1 + 1)); 3449 String randomMidString = getRandomAlphaString(5); 3450 bufferToRep.append(randomMidString); 3451 int groupIndex2 = generator.nextInt(5); 3452 bufferToRep.append("$" + (groupIndex2 + 1)); 3453 String replacement = bufferToRep.toString(); 3454 3455 // Do the replacement 3456 String result = m.replaceAll(replacement); 3457 3458 // Construct expected result 3459 StringBuffer bufferToRes = new StringBuffer(); 3460 bufferToRes.append(leadingString); 3461 bufferToRes.append(groups[groupIndex1]); 3462 bufferToRes.append(randomMidString); 3463 bufferToRes.append(groups[groupIndex2]); 3464 bufferToRes.append(trailingString); 3465 String expectedResult = bufferToRes.toString(); 3466 3467 // Check results 3468 if (!result.equals(expectedResult)) 3469 failCount++; 3470 } 3471 3472 report("Substitution Basher"); 3473 } 3474 3475 /* 3476 * 5 groups of characters are created to make a substitution string. 3477 * A base string will be created including random lead chars, the 3478 * substitution string, and random trailing chars. 3479 * A pattern containing the 5 groups is searched for and replaced with: 3480 * random group + random string + random group. 3481 * The results are checked for correctness. 3482 */ 3483 private static void substitutionBasher2() { 3484 for (int runs = 0; runs<1000; runs++) { 3485 // Create a base string to work in 3486 int leadingChars = generator.nextInt(10); 3487 StringBuilder baseBuffer = new StringBuilder(100); 3488 String leadingString = getRandomAlphaString(leadingChars); 3489 baseBuffer.append(leadingString); 3490 3491 // Create 5 groups of random number of random chars 3492 // Create the string to substitute 3493 // Create the pattern string to search for 3494 StringBuilder bufferToSub = new StringBuilder(25); 3495 StringBuilder bufferToPat = new StringBuilder(50); 3496 String[] groups = new String[5]; 3497 for(int i=0; i<5; i++) { 3498 int aGroupSize = generator.nextInt(5)+1; 3499 groups[i] = getRandomAlphaString(aGroupSize); 3500 bufferToSub.append(groups[i]); 3501 bufferToPat.append('('); 3502 bufferToPat.append(groups[i]); 3503 bufferToPat.append(')'); 3504 } 3505 String stringToSub = bufferToSub.toString(); 3506 String pattern = bufferToPat.toString(); 3507 3508 // Place sub string into working string at random index 3509 baseBuffer.append(stringToSub); 3510 3511 // Append random chars to end 3512 int trailingChars = generator.nextInt(10); 3513 String trailingString = getRandomAlphaString(trailingChars); 3514 baseBuffer.append(trailingString); 3515 String baseString = baseBuffer.toString(); 3516 3517 // Create test pattern and matcher 3518 Pattern p = Pattern.compile(pattern); 3519 Matcher m = p.matcher(baseString); 3520 3521 // Reject candidate if pattern happens to start early 3522 m.find(); 3523 if (m.start() < leadingChars) 3524 continue; 3525 3526 // Reject candidate if more than one match 3527 if (m.find()) 3528 continue; 3529 3530 // Construct a replacement string with : 3531 // random group + random string + random group 3532 StringBuilder bufferToRep = new StringBuilder(); 3533 int groupIndex1 = generator.nextInt(5); 3534 bufferToRep.append("$" + (groupIndex1 + 1)); 3535 String randomMidString = getRandomAlphaString(5); 3536 bufferToRep.append(randomMidString); 3537 int groupIndex2 = generator.nextInt(5); 3538 bufferToRep.append("$" + (groupIndex2 + 1)); 3539 String replacement = bufferToRep.toString(); 3540 3541 // Do the replacement 3542 String result = m.replaceAll(replacement); 3543 3544 // Construct expected result 3545 StringBuilder bufferToRes = new StringBuilder(); 3546 bufferToRes.append(leadingString); 3547 bufferToRes.append(groups[groupIndex1]); 3548 bufferToRes.append(randomMidString); 3549 bufferToRes.append(groups[groupIndex2]); 3550 bufferToRes.append(trailingString); 3551 String expectedResult = bufferToRes.toString(); 3552 3553 // Check results 3554 if (!result.equals(expectedResult)) { 3555 failCount++; 3556 } 3557 } 3558 3559 report("Substitution Basher 2"); 3560 } 3561 3562 /** 3563 * Checks the handling of some escape sequences that the Pattern 3564 * class should process instead of the java compiler. These are 3565 * not in the file because the escapes should be be processed 3566 * by the Pattern class when the regex is compiled. 3567 */ 3568 private static void escapes() throws Exception { 3569 Pattern p = Pattern.compile("\\043"); 3570 Matcher m = p.matcher("#"); 3571 if (!m.find()) 3572 failCount++; 3573 3574 p = Pattern.compile("\\x23"); 3575 m = p.matcher("#"); 3576 if (!m.find()) 3577 failCount++; 3578 3579 p = Pattern.compile("\\u0023"); 3580 m = p.matcher("#"); 3581 if (!m.find()) 3582 failCount++; 3583 3584 report("Escape sequences"); 3585 } 3586 3587 /** 3588 * Checks the handling of blank input situations. These 3589 * tests are incompatible with my test file format. 3590 */ 3591 private static void blankInput() throws Exception { 3592 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3593 Matcher m = p.matcher(""); 3594 if (m.find()) 3595 failCount++; 3596 3597 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3598 m = p.matcher(""); 3599 if (!m.find()) 3600 failCount++; 3601 3602 p = Pattern.compile("abc"); 3603 m = p.matcher(""); 3604 if (m.find()) 3605 failCount++; 3606 3607 p = Pattern.compile("a*"); 3608 m = p.matcher(""); 3609 if (!m.find()) 3610 failCount++; 3611 3612 report("Blank input"); 3613 } 3614 3615 /** 3616 * Tests the Boyer-Moore pattern matching of a character sequence 3617 * on randomly generated patterns. 3618 */ 3619 private static void bm() throws Exception { 3620 doBnM('a'); 3621 report("Boyer Moore (ASCII)"); 3622 3623 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3624 report("Boyer Moore (Supplementary)"); 3625 } 3626 3627 private static void doBnM(int baseCharacter) throws Exception { 3628 int achar=0; 3629 3630 for (int i=0; i<100; i++) { 3631 // Create a short pattern to search for 3632 int patternLength = generator.nextInt(7) + 4; 3633 StringBuffer patternBuffer = new StringBuffer(patternLength); 3634 String pattern; 3635 retry: for (;;) { 3636 for (int x=0; x<patternLength; x++) { 3637 int ch = baseCharacter + generator.nextInt(26); 3638 if (Character.isSupplementaryCodePoint(ch)) { 3639 patternBuffer.append(Character.toChars(ch)); 3640 } else { 3641 patternBuffer.append((char)ch); 3642 } 3643 } 3644 pattern = patternBuffer.toString(); 3645 3646 // Avoid patterns that start and end with the same substring 3647 // See JDK-6854417 3648 for (int x=1; x < pattern.length(); x++) { 3649 if (pattern.startsWith(pattern.substring(x))) 3650 continue retry; 3651 } 3652 break; 3653 } 3654 Pattern p = Pattern.compile(pattern); 3655 3656 // Create a buffer with random ASCII chars that does 3657 // not match the sample 3658 String toSearch = null; 3659 StringBuffer s = null; 3660 Matcher m = p.matcher(""); 3661 do { 3662 s = new StringBuffer(100); 3663 for (int x=0; x<100; x++) { 3664 int ch = baseCharacter + generator.nextInt(26); 3665 if (Character.isSupplementaryCodePoint(ch)) { 3666 s.append(Character.toChars(ch)); 3667 } else { 3668 s.append((char)ch); 3669 } 3670 } 3671 toSearch = s.toString(); 3672 m.reset(toSearch); 3673 } while (m.find()); 3674 3675 // Insert the pattern at a random spot 3676 int insertIndex = generator.nextInt(99); 3677 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3678 insertIndex++; 3679 s = s.insert(insertIndex, pattern); 3680 toSearch = s.toString(); 3681 3682 // Make sure that the pattern is found 3683 m.reset(toSearch); 3684 if (!m.find()) 3685 failCount++; 3686 3687 // Make sure that the match text is the pattern 3688 if (!m.group().equals(pattern)) 3689 failCount++; 3690 3691 // Make sure match occured at insertion point 3692 if (m.start() != insertIndex) 3693 failCount++; 3694 } 3695 } 3696 3697 /** 3698 * Tests the matching of slices on randomly generated patterns. 3699 * The Boyer-Moore optimization is not done on these patterns 3700 * because it uses unicode case folding. 3701 */ 3702 private static void slice() throws Exception { 3703 doSlice(Character.MAX_VALUE); 3704 report("Slice"); 3705 3706 doSlice(Character.MAX_CODE_POINT); 3707 report("Slice (Supplementary)"); 3708 } 3709 3710 private static void doSlice(int maxCharacter) throws Exception { 3711 Random generator = new Random(); 3712 int achar=0; 3713 3714 for (int i=0; i<100; i++) { 3715 // Create a short pattern to search for 3716 int patternLength = generator.nextInt(7) + 4; 3717 StringBuffer patternBuffer = new StringBuffer(patternLength); 3718 for (int x=0; x<patternLength; x++) { 3719 int randomChar = 0; 3720 while (!Character.isLetterOrDigit(randomChar)) 3721 randomChar = generator.nextInt(maxCharacter); 3722 if (Character.isSupplementaryCodePoint(randomChar)) { 3723 patternBuffer.append(Character.toChars(randomChar)); 3724 } else { 3725 patternBuffer.append((char) randomChar); 3726 } 3727 } 3728 String pattern = patternBuffer.toString(); 3729 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3730 3731 // Create a buffer with random chars that does not match the sample 3732 String toSearch = null; 3733 StringBuffer s = null; 3734 Matcher m = p.matcher(""); 3735 do { 3736 s = new StringBuffer(100); 3737 for (int x=0; x<100; x++) { 3738 int randomChar = 0; 3739 while (!Character.isLetterOrDigit(randomChar)) 3740 randomChar = generator.nextInt(maxCharacter); 3741 if (Character.isSupplementaryCodePoint(randomChar)) { 3742 s.append(Character.toChars(randomChar)); 3743 } else { 3744 s.append((char) randomChar); 3745 } 3746 } 3747 toSearch = s.toString(); 3748 m.reset(toSearch); 3749 } while (m.find()); 3750 3751 // Insert the pattern at a random spot 3752 int insertIndex = generator.nextInt(99); 3753 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3754 insertIndex++; 3755 s = s.insert(insertIndex, pattern); 3756 toSearch = s.toString(); 3757 3758 // Make sure that the pattern is found 3759 m.reset(toSearch); 3760 if (!m.find()) 3761 failCount++; 3762 3763 // Make sure that the match text is the pattern 3764 if (!m.group().equals(pattern)) 3765 failCount++; 3766 3767 // Make sure match occured at insertion point 3768 if (m.start() != insertIndex) 3769 failCount++; 3770 } 3771 } 3772 3773 private static void explainFailure(String pattern, String data, 3774 String expected, String actual) { 3775 System.err.println("----------------------------------------"); 3776 System.err.println("Pattern = "+pattern); 3777 System.err.println("Data = "+data); 3778 System.err.println("Expected = " + expected); 3779 System.err.println("Actual = " + actual); 3780 } 3781 3782 private static void explainFailure(String pattern, String data, 3783 Throwable t) { 3784 System.err.println("----------------------------------------"); 3785 System.err.println("Pattern = "+pattern); 3786 System.err.println("Data = "+data); 3787 t.printStackTrace(System.err); 3788 } 3789 3790 // Testing examples from a file 3791 3792 /** 3793 * Goes through the file "TestCases.txt" and creates many patterns 3794 * described in the file, matching the patterns against input lines in 3795 * the file, and comparing the results against the correct results 3796 * also found in the file. The file format is described in comments 3797 * at the head of the file. 3798 */ 3799 private static void processFile(String fileName) throws Exception { 3800 File testCases = new File(System.getProperty("test.src", "."), 3801 fileName); 3802 FileInputStream in = new FileInputStream(testCases); 3803 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3804 3805 // Process next test case. 3806 String aLine; 3807 while((aLine = r.readLine()) != null) { 3808 // Read a line for pattern 3809 String patternString = grabLine(r); 3810 Pattern p = null; 3811 try { 3812 p = compileTestPattern(patternString); 3813 } catch (PatternSyntaxException e) { 3814 String dataString = grabLine(r); 3815 String expectedResult = grabLine(r); 3816 if (expectedResult.startsWith("error")) 3817 continue; 3818 explainFailure(patternString, dataString, e); 3819 failCount++; 3820 continue; 3821 } 3822 3823 // Read a line for input string 3824 String dataString = grabLine(r); 3825 Matcher m = p.matcher(dataString); 3826 StringBuffer result = new StringBuffer(); 3827 3828 // Check for IllegalStateExceptions before a match 3829 failCount += preMatchInvariants(m); 3830 3831 boolean found = m.find(); 3832 3833 if (found) 3834 failCount += postTrueMatchInvariants(m); 3835 else 3836 failCount += postFalseMatchInvariants(m); 3837 3838 if (found) { 3839 result.append("true "); 3840 result.append(m.group(0) + " "); 3841 } else { 3842 result.append("false "); 3843 } 3844 3845 result.append(m.groupCount()); 3846 3847 if (found) { 3848 for (int i=1; i<m.groupCount()+1; i++) 3849 if (m.group(i) != null) 3850 result.append(" " +m.group(i)); 3851 } 3852 3853 // Read a line for the expected result 3854 String expectedResult = grabLine(r); 3855 3856 if (!result.toString().equals(expectedResult)) { 3857 explainFailure(patternString, dataString, expectedResult, result.toString()); 3858 failCount++; 3859 } 3860 } 3861 3862 report(fileName); 3863 } 3864 3865 private static int preMatchInvariants(Matcher m) { 3866 int failCount = 0; 3867 try { 3868 m.start(); 3869 failCount++; 3870 } catch (IllegalStateException ise) {} 3871 try { 3872 m.end(); 3873 failCount++; 3874 } catch (IllegalStateException ise) {} 3875 try { 3876 m.group(); 3877 failCount++; 3878 } catch (IllegalStateException ise) {} 3879 return failCount; 3880 } 3881 3882 private static int postFalseMatchInvariants(Matcher m) { 3883 int failCount = 0; 3884 try { 3885 m.group(); 3886 failCount++; 3887 } catch (IllegalStateException ise) {} 3888 try { 3889 m.start(); 3890 failCount++; 3891 } catch (IllegalStateException ise) {} 3892 try { 3893 m.end(); 3894 failCount++; 3895 } catch (IllegalStateException ise) {} 3896 return failCount; 3897 } 3898 3899 private static int postTrueMatchInvariants(Matcher m) { 3900 int failCount = 0; 3901 //assert(m.start() = m.start(0); 3902 if (m.start() != m.start(0)) 3903 failCount++; 3904 //assert(m.end() = m.end(0); 3905 if (m.start() != m.start(0)) 3906 failCount++; 3907 //assert(m.group() = m.group(0); 3908 if (!m.group().equals(m.group(0))) 3909 failCount++; 3910 try { 3911 m.group(50); 3912 failCount++; 3913 } catch (IndexOutOfBoundsException ise) {} 3914 3915 return failCount; 3916 } 3917 3918 private static Pattern compileTestPattern(String patternString) { 3919 if (!patternString.startsWith("'")) { 3920 return Pattern.compile(patternString); 3921 } 3922 int break1 = patternString.lastIndexOf("'"); 3923 String flagString = patternString.substring( 3924 break1+1, patternString.length()); 3925 patternString = patternString.substring(1, break1); 3926 3927 if (flagString.equals("i")) 3928 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3929 3930 if (flagString.equals("m")) 3931 return Pattern.compile(patternString, Pattern.MULTILINE); 3932 3933 return Pattern.compile(patternString); 3934 } 3935 3936 /** 3937 * Reads a line from the input file. Keeps reading lines until a non 3938 * empty non comment line is read. If the line contains a \n then 3939 * these two characters are replaced by a newline char. If a \\uxxxx 3940 * sequence is read then the sequence is replaced by the unicode char. 3941 */ 3942 private static String grabLine(BufferedReader r) throws Exception { 3943 int index = 0; 3944 String line = r.readLine(); 3945 while (line.startsWith("//") || line.length() < 1) 3946 line = r.readLine(); 3947 while ((index = line.indexOf("\\n")) != -1) { 3948 StringBuffer temp = new StringBuffer(line); 3949 temp.replace(index, index+2, "\n"); 3950 line = temp.toString(); 3951 } 3952 while ((index = line.indexOf("\\u")) != -1) { 3953 StringBuffer temp = new StringBuffer(line); 3954 String value = temp.substring(index+2, index+6); 3955 char aChar = (char)Integer.parseInt(value, 16); 3956 String unicodeChar = "" + aChar; 3957 temp.replace(index, index+6, unicodeChar); 3958 line = temp.toString(); 3959 } 3960 3961 return line; 3962 } 3963 3964 private static void check(Pattern p, String s, String g, String expected) { 3965 Matcher m = p.matcher(s); 3966 m.find(); 3967 if (!m.group(g).equals(expected) || 3968 s.charAt(m.start(g)) != expected.charAt(0) || 3969 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3970 failCount++; 3971 } 3972 3973 private static void checkReplaceFirst(String p, String s, String r, String expected) 3974 { 3975 if (!expected.equals(Pattern.compile(p) 3976 .matcher(s) 3977 .replaceFirst(r))) 3978 failCount++; 3979 } 3980 3981 private static void checkReplaceAll(String p, String s, String r, String expected) 3982 { 3983 if (!expected.equals(Pattern.compile(p) 3984 .matcher(s) 3985 .replaceAll(r))) 3986 failCount++; 3987 } 3988 3989 private static void checkExpectedFail(String p) { 3990 try { 3991 Pattern.compile(p); 3992 } catch (PatternSyntaxException pse) { 3993 //pse.printStackTrace(); 3994 return; 3995 } 3996 failCount++; 3997 } 3998 3999 private static void checkExpectedIAE(Matcher m, String g) { 4000 m.find(); 4001 try { 4002 m.group(g); 4003 } catch (IllegalArgumentException x) { 4004 //iae.printStackTrace(); 4005 try { 4006 m.start(g); 4007 } catch (IllegalArgumentException xx) { 4008 try { 4009 m.start(g); 4010 } catch (IllegalArgumentException xxx) { 4011 return; 4012 } 4013 } 4014 } 4015 failCount++; 4016 } 4017 4018 private static void checkExpectedNPE(Matcher m) { 4019 m.find(); 4020 try { 4021 m.group(null); 4022 } catch (NullPointerException x) { 4023 try { 4024 m.start(null); 4025 } catch (NullPointerException xx) { 4026 try { 4027 m.end(null); 4028 } catch (NullPointerException xxx) { 4029 return; 4030 } 4031 } 4032 } 4033 failCount++; 4034 } 4035 4036 private static void namedGroupCaptureTest() throws Exception { 4037 check(Pattern.compile("x+(?<gname>y+)z+"), 4038 "xxxyyyzzz", 4039 "gname", 4040 "yyy"); 4041 4042 check(Pattern.compile("x+(?<gname8>y+)z+"), 4043 "xxxyyyzzz", 4044 "gname8", 4045 "yyy"); 4046 4047 //backref 4048 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4049 check(pattern, "zzzaabcazzz", true); // found "abca" 4050 4051 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4052 "zzzaabcaazzz", true); 4053 4054 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4055 "abcdefabc", true); 4056 4057 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4058 "abcdefghijkk", true); 4059 4060 // Supplementary character tests 4061 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4062 toSupplementaries("zzzaabcazzz"), true); 4063 4064 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4065 toSupplementaries("zzzaabcaazzz"), true); 4066 4067 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4068 toSupplementaries("abcdefabc"), true); 4069 4070 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4071 "(?<gname>" + 4072 toSupplementaries("k)") + "\\k<gname>"), 4073 toSupplementaries("abcdefghijkk"), true); 4074 4075 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4076 "xxxyyyzzzyyy", 4077 "gname", 4078 "yyy"); 4079 4080 //replaceFirst/All 4081 checkReplaceFirst("(?<gn>ab)(c*)", 4082 "abccczzzabcczzzabccc", 4083 "${gn}", 4084 "abzzzabcczzzabccc"); 4085 4086 checkReplaceAll("(?<gn>ab)(c*)", 4087 "abccczzzabcczzzabccc", 4088 "${gn}", 4089 "abzzzabzzzab"); 4090 4091 4092 checkReplaceFirst("(?<gn>ab)(c*)", 4093 "zzzabccczzzabcczzzabccczzz", 4094 "${gn}", 4095 "zzzabzzzabcczzzabccczzz"); 4096 4097 checkReplaceAll("(?<gn>ab)(c*)", 4098 "zzzabccczzzabcczzzabccczzz", 4099 "${gn}", 4100 "zzzabzzzabzzzabzzz"); 4101 4102 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4103 "zzzabccczzzabcczzzabccczzz", 4104 "${gn2}", 4105 "zzzccczzzabcczzzabccczzz"); 4106 4107 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4108 "zzzabccczzzabcczzzabccczzz", 4109 "${gn2}", 4110 "zzzccczzzcczzzccczzz"); 4111 4112 //toSupplementaries("(ab)(c*)")); 4113 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4114 ")(?<gn2>" + toSupplementaries("c") + "*)", 4115 toSupplementaries("abccczzzabcczzzabccc"), 4116 "${gn1}", 4117 toSupplementaries("abzzzabcczzzabccc")); 4118 4119 4120 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4121 ")(?<gn2>" + toSupplementaries("c") + "*)", 4122 toSupplementaries("abccczzzabcczzzabccc"), 4123 "${gn1}", 4124 toSupplementaries("abzzzabzzzab")); 4125 4126 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4127 ")(?<gn2>" + toSupplementaries("c") + "*)", 4128 toSupplementaries("abccczzzabcczzzabccc"), 4129 "${gn2}", 4130 toSupplementaries("ccczzzabcczzzabccc")); 4131 4132 4133 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4134 ")(?<gn2>" + toSupplementaries("c") + "*)", 4135 toSupplementaries("abccczzzabcczzzabccc"), 4136 "${gn2}", 4137 toSupplementaries("ccczzzcczzzccc")); 4138 4139 checkReplaceFirst("(?<dog>Dog)AndCat", 4140 "zzzDogAndCatzzzDogAndCatzzz", 4141 "${dog}", 4142 "zzzDogzzzDogAndCatzzz"); 4143 4144 4145 checkReplaceAll("(?<dog>Dog)AndCat", 4146 "zzzDogAndCatzzzDogAndCatzzz", 4147 "${dog}", 4148 "zzzDogzzzDogzzz"); 4149 4150 // backref in Matcher & String 4151 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4152 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4153 failCount++; 4154 4155 // negative 4156 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4157 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4158 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4159 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4160 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4161 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4162 "gnameX"); 4163 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4164 report("NamedGroupCapture"); 4165 } 4166 4167 // This is for bug 6919132 4168 private static void nonBmpClassComplementTest() throws Exception { 4169 Pattern p = Pattern.compile("\\P{Lu}"); 4170 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4171 4172 if (m.find() && m.start() == 1) 4173 failCount++; 4174 4175 // from a unicode category 4176 p = Pattern.compile("\\P{Lu}"); 4177 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4178 if (m.find()) 4179 failCount++; 4180 if (!m.hitEnd()) 4181 failCount++; 4182 4183 // block 4184 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4185 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4186 if (m.find() && m.start() == 1) 4187 failCount++; 4188 4189 p = Pattern.compile("\\P{sc=GRANTHA}"); 4190 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4191 if (m.find() && m.start() == 1) 4192 failCount++; 4193 4194 report("NonBmpClassComplement"); 4195 } 4196 4197 private static void unicodePropertiesTest() throws Exception { 4198 // different forms 4199 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4200 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4201 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4202 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4203 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4204 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4205 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4206 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4207 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4208 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4209 failCount++; 4210 4211 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4212 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4213 Matcher lastSM = common; 4214 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4215 4216 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4217 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4218 Matcher lastBM = latin; 4219 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4220 4221 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4222 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4223 continue; // only pick couple code points, they are the same 4224 } 4225 4226 // Unicode Script 4227 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4228 Matcher m; 4229 String str = new String(Character.toChars(cp)); 4230 if (script == lastScript) { 4231 m = lastSM; 4232 m.reset(str); 4233 } else { 4234 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4235 } 4236 if (!m.matches()) { 4237 failCount++; 4238 } 4239 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4240 other.reset(str); 4241 if (other.matches()) { 4242 failCount++; 4243 } 4244 lastSM = m; 4245 lastScript = script; 4246 4247 // Unicode Block 4248 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4249 if (block == null) { 4250 //System.out.printf("Not a Block: cp=%x%n", cp); 4251 continue; 4252 } 4253 if (block == lastBlock) { 4254 m = lastBM; 4255 m.reset(str); 4256 } else { 4257 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4258 } 4259 if (!m.matches()) { 4260 failCount++; 4261 } 4262 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4263 other.reset(str); 4264 if (other.matches()) { 4265 failCount++; 4266 } 4267 lastBM = m; 4268 lastBlock = block; 4269 } 4270 report("unicodeProperties"); 4271 } 4272 4273 private static void unicodeHexNotationTest() throws Exception { 4274 4275 // negative 4276 checkExpectedFail("\\x{-23}"); 4277 checkExpectedFail("\\x{110000}"); 4278 checkExpectedFail("\\x{}"); 4279 checkExpectedFail("\\x{AB[ef]"); 4280 4281 // codepoint 4282 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4283 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4284 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4285 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4286 4287 // in class 4288 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4289 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4290 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4291 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4292 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4293 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4294 4295 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4296 String s = "A" + new String(Character.toChars(cp)) + "B"; 4297 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4298 : String.format("\\u%04x\\u%04x", 4299 (int) Character.toChars(cp)[0], 4300 (int) Character.toChars(cp)[1]); 4301 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4302 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4303 failCount++; 4304 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4305 failCount++; 4306 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4307 failCount++; 4308 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4309 failCount++; 4310 } 4311 report("unicodeHexNotation"); 4312 } 4313 4314 private static void unicodeClassesTest() throws Exception { 4315 4316 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4317 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4318 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4319 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4320 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4321 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4322 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4323 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4324 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4325 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4326 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4327 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4328 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4329 Matcher bound = Pattern.compile("\\b").matcher(""); 4330 Matcher word = Pattern.compile("\\w++").matcher(""); 4331 // UNICODE_CHARACTER_CLASS 4332 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4333 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4334 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4335 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4336 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4337 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4338 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4339 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4340 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4341 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4342 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4343 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4344 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4345 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4346 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4347 // embedded flag (?U) 4348 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4349 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4350 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4351 4352 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4353 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4354 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4355 // properties 4356 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4357 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4358 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4359 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4360 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4361 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4362 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4363 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4364 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4365 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4366 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4367 // javaMethod 4368 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4369 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4370 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4371 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4372 // GC/C 4373 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4374 4375 for (int cp = 1; cp < 0x30000; cp++) { 4376 String str = new String(Character.toChars(cp)); 4377 int type = Character.getType(cp); 4378 if (// lower 4379 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4380 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4381 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4382 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4383 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4384 // upper 4385 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4386 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4387 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4388 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4389 // alpha 4390 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4391 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4392 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4393 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4394 // digit 4395 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4396 Character.isDigit(cp) != digitU.reset(str).matches() || 4397 // alnum 4398 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4399 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4400 // punct 4401 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4402 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4403 // graph 4404 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4405 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4406 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4407 // blank 4408 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4409 != blank.reset(str).matches() || 4410 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4411 // print 4412 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4413 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4414 // cntrl 4415 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4416 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4417 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4418 // hexdigit 4419 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4420 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4421 // space 4422 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4423 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4424 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4425 // word 4426 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4427 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4428 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4429 // bwordb 4430 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4431 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4432 // properties 4433 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4434 Character.isLetter(cp) != letterP.reset(str).matches()|| 4435 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4436 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4437 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4438 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4439 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4440 // gc_C 4441 (Character.CONTROL == type || Character.FORMAT == type || 4442 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4443 Character.UNASSIGNED == type) 4444 != gcC.reset(str).matches()) { 4445 failCount++; 4446 } 4447 } 4448 4449 // bounds/word align 4450 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4451 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4452 failCount++; 4453 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4454 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4455 failCount++; 4456 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4457 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4458 failCount++; 4459 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4460 failCount++; 4461 report("unicodePredefinedClasses"); 4462 } 4463 4464 private static void unicodeCharacterNameTest() throws Exception { 4465 4466 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4467 if (!Character.isValidCodePoint(cp) || 4468 Character.getType(cp) == Character.UNASSIGNED) 4469 continue; 4470 String str = new String(Character.toChars(cp)); 4471 // single 4472 String p = "\\N{" + Character.getName(cp) + "}"; 4473 if (!Pattern.compile(p).matcher(str).matches()) { 4474 failCount++; 4475 } 4476 // class[c] 4477 p = "[\\N{" + Character.getName(cp) + "}]"; 4478 if (!Pattern.compile(p).matcher(str).matches()) { 4479 failCount++; 4480 } 4481 } 4482 4483 // range 4484 for (int i = 0; i < 10; i++) { 4485 int start = generator.nextInt(20); 4486 int end = start + generator.nextInt(200); 4487 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4488 String str; 4489 for (int cp = start; cp < end; cp++) { 4490 str = new String(Character.toChars(cp)); 4491 if (!Pattern.compile(p).matcher(str).matches()) { 4492 failCount++; 4493 } 4494 } 4495 str = new String(Character.toChars(end + 10)); 4496 if (Pattern.compile(p).matcher(str).matches()) { 4497 failCount++; 4498 } 4499 } 4500 4501 // slice 4502 for (int i = 0; i < 10; i++) { 4503 int n = generator.nextInt(256); 4504 int[] buf = new int[n]; 4505 StringBuffer sb = new StringBuffer(1024); 4506 for (int j = 0; j < n; j++) { 4507 int cp = generator.nextInt(1000); 4508 if (!Character.isValidCodePoint(cp) || 4509 Character.getType(cp) == Character.UNASSIGNED) 4510 cp = 0x4e00; // just use 4e00 4511 sb.append("\\N{" + Character.getName(cp) + "}"); 4512 buf[j] = cp; 4513 } 4514 String p = sb.toString(); 4515 String str = new String(buf, 0, buf.length); 4516 if (!Pattern.compile(p).matcher(str).matches()) { 4517 failCount++; 4518 } 4519 } 4520 report("unicodeCharacterName"); 4521 } 4522 4523 private static void horizontalAndVerticalWSTest() throws Exception { 4524 String hws = new String (new char[] { 4525 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4526 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4527 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4528 0x202f, 0x205f, 0x3000 }); 4529 String vws = new String (new char[] { 4530 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4531 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4532 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4533 failCount++; 4534 if (Pattern.compile("\\H").matcher(hws).find() || 4535 Pattern.compile("[\\H]").matcher(hws).find()) 4536 failCount++; 4537 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4538 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4539 failCount++; 4540 if (Pattern.compile("\\V").matcher(vws).find() || 4541 Pattern.compile("[\\V]").matcher(vws).find()) 4542 failCount++; 4543 String prefix = "abcd"; 4544 String suffix = "efgh"; 4545 String ng = "A"; 4546 for (int i = 0; i < hws.length(); i++) { 4547 String c = String.valueOf(hws.charAt(i)); 4548 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4549 if (!m.find() || !c.equals(m.group())) 4550 failCount++; 4551 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4552 if (!m.find() || !c.equals(m.group())) 4553 failCount++; 4554 4555 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4556 if (!m.find() || !ng.equals(m.group())) 4557 failCount++; 4558 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4559 if (!m.find() || !ng.equals(m.group())) 4560 failCount++; 4561 } 4562 for (int i = 0; i < vws.length(); i++) { 4563 String c = String.valueOf(vws.charAt(i)); 4564 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4565 if (!m.find() || !c.equals(m.group())) 4566 failCount++; 4567 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4568 if (!m.find() || !c.equals(m.group())) 4569 failCount++; 4570 4571 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4572 if (!m.find() || !ng.equals(m.group())) 4573 failCount++; 4574 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4575 if (!m.find() || !ng.equals(m.group())) 4576 failCount++; 4577 } 4578 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4579 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4580 failCount++; 4581 report("horizontalAndVerticalWSTest"); 4582 } 4583 4584 private static void linebreakTest() throws Exception { 4585 String linebreaks = new String (new char[] { 4586 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4587 String crnl = "\r\n"; 4588 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4589 Pattern.compile("\\R").matcher(crnl).matches() && 4590 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4591 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4592 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4593 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4594 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4595 failCount++; 4596 } 4597 report("linebreakTest"); 4598 } 4599 4600 // #7189363 4601 private static void branchTest() throws Exception { 4602 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4603 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4604 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4605 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4606 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4607 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4608 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4609 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4610 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4611 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4612 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4613 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4614 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4615 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4616 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4617 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4618 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4619 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4620 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4621 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4622 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4623 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4624 failCount++; 4625 report("branchTest"); 4626 } 4627 4628 // This test is for 8007395 4629 private static void groupCurlyNotFoundSuppTest() throws Exception { 4630 String input = "test this as \ud83d\ude0d"; 4631 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4632 "test(.)*(@[a-zA-Z.]+)", 4633 "test([^B])+(@[a-zA-Z.]+)", 4634 "test([^B])*(@[a-zA-Z.]+)", 4635 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4636 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4637 }) { 4638 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4639 .matcher(input); 4640 try { 4641 if (m.find()) { 4642 failCount++; 4643 } 4644 } catch (Exception x) { 4645 failCount++; 4646 } 4647 } 4648 report("GroupCurly NotFoundSupp"); 4649 } 4650 4651 // This test is for 8023647 4652 private static void groupCurlyBackoffTest() throws Exception { 4653 if (!"abc1c".matches("(\\w)+1\\1") || 4654 "abc11".matches("(\\w)+1\\1")) { 4655 failCount++; 4656 } 4657 report("GroupCurly backoff"); 4658 } 4659 4660 // This test is for 8012646 4661 private static void patternAsPredicate() throws Exception { 4662 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4663 4664 if (p.test("")) { 4665 failCount++; 4666 } 4667 if (!p.test("word")) { 4668 failCount++; 4669 } 4670 if (p.test("1234")) { 4671 failCount++; 4672 } 4673 report("Pattern.asPredicate"); 4674 } 4675 4676 // This test is for 8035975 4677 private static void invalidFlags() throws Exception { 4678 for (int flag = 1; flag != 0; flag <<= 1) { 4679 switch (flag) { 4680 case Pattern.CASE_INSENSITIVE: 4681 case Pattern.MULTILINE: 4682 case Pattern.DOTALL: 4683 case Pattern.UNICODE_CASE: 4684 case Pattern.CANON_EQ: 4685 case Pattern.UNIX_LINES: 4686 case Pattern.LITERAL: 4687 case Pattern.UNICODE_CHARACTER_CLASS: 4688 case Pattern.COMMENTS: 4689 // valid flag, continue 4690 break; 4691 default: 4692 try { 4693 Pattern.compile(".", flag); 4694 failCount++; 4695 } catch (IllegalArgumentException expected) { 4696 } 4697 } 4698 } 4699 report("Invalid compile flags"); 4700 } 4701 4702 // This test is for 8158482 4703 private static void embeddedFlags() throws Exception { 4704 try { 4705 Pattern.compile("(?i).(?-i)."); 4706 Pattern.compile("(?m).(?-m)."); 4707 Pattern.compile("(?s).(?-s)."); 4708 Pattern.compile("(?d).(?-d)."); 4709 Pattern.compile("(?u).(?-u)."); 4710 Pattern.compile("(?c).(?-c)."); 4711 Pattern.compile("(?x).(?-x)."); 4712 Pattern.compile("(?U).(?-U)."); 4713 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4714 } catch (PatternSyntaxException x) { 4715 failCount++; 4716 } 4717 report("Embedded flags"); 4718 } 4719 4720 private static void grapheme() throws Exception { 4721 Files.lines(Paths.get(System.getProperty("test.src", "."), 4722 "GraphemeBreakTest.txt")) 4723 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4724 .forEach( ln -> { 4725 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4726 // System.out.println(str); 4727 String[] strs = ln.split("\u00f7|\u00d7"); 4728 StringBuilder src = new StringBuilder(); 4729 ArrayList<String> graphemes = new ArrayList<>(); 4730 StringBuilder buf = new StringBuilder(); 4731 int offBk = 0; 4732 for (String str : strs) { 4733 if (str.length() == 0) // first empty str 4734 continue; 4735 int cp = Integer.parseInt(str, 16); 4736 src.appendCodePoint(cp); 4737 buf.appendCodePoint(cp); 4738 offBk += (str.length() + 1); 4739 if (ln.charAt(offBk) == '\u00f7') { // DIV 4740 graphemes.add(buf.toString()); 4741 buf = new StringBuilder(); 4742 } 4743 } 4744 Pattern p = Pattern.compile("\\X"); 4745 Matcher m = p.matcher(src.toString()); 4746 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4747 for (String g : graphemes) { 4748 // System.out.printf(" grapheme:=[%s]%n", g); 4749 // (1) test \\X directly 4750 if (!m.find() || !m.group().equals(g)) { 4751 System.out.println("Failed \\X [" + ln + "] : " + g); 4752 failCount++; 4753 } 4754 // (2) test \\b{g} + \\X via Scanner 4755 boolean hasNext = s.hasNext(p); 4756 // if (!s.hasNext() || !s.next().equals(next)) { 4757 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4758 System.out.println("Failed b{g} [" + ln + "] : " + g); 4759 failCount++; 4760 } 4761 } 4762 }); 4763 // some sanity checks 4764 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4765 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4766 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4767 failCount++; 4768 // make sure "\b{n}" still works 4769 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4770 failCount++; 4771 report("Unicode extended grapheme cluster"); 4772 } 4773 4774 // hangup/timeout if go into exponential backtracking 4775 private static void expoBacktracking() throws Exception { 4776 4777 Object[][] patternMatchers = { 4778 // 6328855 4779 { "(.*\n*)*", 4780 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4781 false }, 4782 // 6192895 4783 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4784 "Hello World this is a test this is a test this is a test A", 4785 true }, 4786 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4787 "Hello World this is a test this is a test this is a test \u4e00 ", 4788 false }, 4789 { " *([a-z0-9]+ *)+", 4790 "hello world this is a test this is a test this is a test A", 4791 false }, 4792 // 4771934 [FIXED] #5013651? 4793 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4794 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4795 true }, 4796 // 4866249 [FIXED] 4797 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4798 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4799 true }, 4800 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4801 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4802 false }, 4803 // 6345469 4804 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4805 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4806 true }, // --> matched 4807 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4808 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4809 false }, 4810 // 5026912 4811 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4812 "156580451111112225588087755221111111566969655555555", 4813 false}, 4814 // 6988218 4815 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4816 "'%)) order by ANGEBOT.ID", 4817 false}, // find 4818 // 6693451 4819 { "^(\\s*foo\\s*)*$", 4820 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4821 true }, 4822 { "^(\\s*foo\\s*)*$", 4823 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4824 false 4825 }, 4826 // 7006761 4827 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4828 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4829 // 8140212 4830 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4831 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4832 false 4833 }, 4834 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4835 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4836 4837 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4838 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4839 4840 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4841 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4842 4843 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4844 4845 /* not fixed 4846 //8132141 ---> second level exponential backtracking 4847 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4848 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4849 */ 4850 }; 4851 4852 for (Object[] pm : patternMatchers) { 4853 String p = (String)pm[0]; 4854 String s = (String)pm[1]; 4855 boolean r = (Boolean)pm[2]; 4856 if (r != Pattern.compile(p).matcher(s).matches()) { 4857 failCount++; 4858 } 4859 } 4860 } 4861} 4862