RegExTest.java revision 8729:0242fce0f717
142016Sdillon/*
242016Sdillon * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
342016Sdillon * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
442016Sdillon *
542016Sdillon * This code is free software; you can redistribute it and/or modify it
642016Sdillon * under the terms of the GNU General Public License version 2 only, as
742016Sdillon * published by the Free Software Foundation.
842016Sdillon *
942016Sdillon * This code is distributed in the hope that it will be useful, but WITHOUT
1042016Sdillon * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1142016Sdillon * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1242016Sdillon * version 2 for more details (a copy is included in the LICENSE file that
1342016Sdillon * accompanied this code).
1442016Sdillon *
1542016Sdillon * You should have received a copy of the GNU General Public License version
1642016Sdillon * 2 along with this work; if not, write to the Free Software Foundation,
1742016Sdillon * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
1842016Sdillon *
1942016Sdillon * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2042016Sdillon * or visit www.oracle.com if you need additional information or have any
2142016Sdillon * questions.
2242016Sdillon */
2342016Sdillon
2442016Sdillon/**
2542016Sdillon * @test
2642016Sdillon * @summary tests RegExp framework
2742016Sdillon * @author Mike McCloskey
2842016Sdillon * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
2942016Sdillon * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
3042016Sdillon * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
3142016Sdillon * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
3250476Speter * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
3342016Sdillon * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
3442016Sdillon * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647
3542016Sdillon */
3642016Sdillon
3742016Sdillonimport java.util.regex.*;
3887519Sdavidcimport java.util.Random;
3975670Sruimport java.io.*;
4042016Sdillonimport java.util.*;
4142016Sdillonimport java.nio.CharBuffer;
4279727Sschweikhimport java.util.function.Predicate;
43121384Shmp
44121384Shmp/**
4542016Sdillon * This is a test class created to check the operation of
4642016Sdillon * the Pattern and Matcher classes.
47121384Shmp */
48121384Shmppublic class RegExTest {
4952662Sphantom
5052662Sphantom    private static Random generator = new Random();
5142016Sdillon    private static boolean failure = false;
5242016Sdillon    private static int failCount = 0;
5342016Sdillon    private static String firstFailure = null;
54121384Shmp
55121384Shmp    /**
56121384Shmp     * Main to interpret arguments and run several tests.
57121384Shmp     *
5842016Sdillon     */
59121384Shmp    public static void main(String[] args) throws Exception {
60121384Shmp        // Most of the tests are in a file
6142016Sdillon        processFile("TestCases.txt");
62121384Shmp        //processFile("PerlCases.txt");
63121384Shmp        processFile("BMPTestCases.txt");
64121384Shmp        processFile("SupplementaryTestCases.txt");
65121384Shmp
6642016Sdillon        // These test many randomly generated char patterns
67121384Shmp        bm();
68121384Shmp        slice();
69121384Shmp
70121384Shmp        // These are hard to put into the file
71131530Sru        escapes();
7242016Sdillon        blankInput();
7342016Sdillon
7442016Sdillon        // Substitition tests on randomly generated sequences
75121384Shmp        globalSubstitute();
76141946Sru        stringbufferSubstitute();
7779727Sschweikh        substitutionBasher();
78121384Shmp
79121384Shmp        // Canonical Equivalence
80121384Shmp        ceTest();
81121384Shmp
8242016Sdillon        // Anchors
83121384Shmp        anchorTest();
84121384Shmp
8579727Sschweikh        // boolean match calls
8642016Sdillon        matchesTest();
8742016Sdillon        lookingAtTest();
8842016Sdillon
8942016Sdillon        // Pattern API
90121384Shmp        patternMatchesTest();
91121384Shmp
9279727Sschweikh        // Misc
9342016Sdillon        lookbehindTest();
94121384Shmp        nullArgumentTest();
95121384Shmp        backRefTest();
9642016Sdillon        groupCaptureTest();
97121384Shmp        caretTest();
98121384Shmp        charClassTest();
9942016Sdillon        emptyPatternTest();
100121384Shmp        findIntTest();
101121384Shmp        group0Test();
102107788Sru        longPatternTest();
10389124Smpp        octalTest();
10442016Sdillon        ampersandTest();
105121384Shmp        negationTest();
106121384Shmp        splitTest();
10742016Sdillon        appendTest();
10842016Sdillon        caseFoldingTest();
109121384Shmp        commentsTest();
110121384Shmp        unixLinesTest();
111107788Sru        replaceFirstTest();
112121384Shmp        gTest();
113121384Shmp        zTest();
114131530Sru        serializeTest();
115131530Sru        reluctantRepetitionTest();
11652662Sphantom        multilineDollarTest();
117131530Sru        dollarAtEndTest();
118121384Shmp        caretBetweenTerminatorsTest();
119121384Shmp        // This RFE rejected in Tiger numOccurrencesTest();
120121384Shmp        javaCharClassTest();
121121384Shmp        nonCaptureRepetitionTest();
122121384Shmp        notCapturedGroupCurlyMatchTest();
12342016Sdillon        escapedSegmentTest();
12442016Sdillon        literalPatternTest();
125121384Shmp        literalReplacementTest();
126121384Shmp        regionTest();
12779727Sschweikh        toStringTest();
12842016Sdillon        negatedCharClassTest();
129128951Shmp        findFromTest();
130130582Sru        boundsTest();
131130582Sru        unicodeWordBoundsTest();
13242016Sdillon        caretAtEndTest();
13342016Sdillon        wordSearchTest();
134121384Shmp        hitEndTest();
135121384Shmp        toMatchResultTest();
13679727Sschweikh        surrogatesInClassTest();
13742016Sdillon        removeQEQuotingTest();
138121384Shmp        namedGroupCaptureTest();
139121384Shmp        nonBmpClassComplementTest();
14042016Sdillon        unicodePropertiesTest();
14148264Smpp        unicodeHexNotationTest();
14248264Smpp        unicodeClassesTest();
14342016Sdillon        horizontalAndVerticalWSTest();
14442016Sdillon        linebreakTest();
14542016Sdillon        branchTest();
14652662Sphantom        groupCurlyNotFoundSuppTest();
14752662Sphantom        groupCurlyBackoffTest();
14852662Sphantom        patternAsPredicate();
14952662Sphantom        if (failure) {
15048264Smpp            throw new
151                RuntimeException("RegExTest failed, 1st failure: " +
152                                 firstFailure);
153        } else {
154            System.err.println("OKAY: All tests passed.");
155        }
156    }
157
158    // Utility functions
159
160    private static String getRandomAlphaString(int length) {
161        StringBuffer buf = new StringBuffer(length);
162        for (int i=0; i<length; i++) {
163            char randChar = (char)(97 + generator.nextInt(26));
164            buf.append(randChar);
165        }
166        return buf.toString();
167    }
168
169    private static void check(Matcher m, String expected) {
170        m.find();
171        if (!m.group().equals(expected))
172            failCount++;
173    }
174
175    private static void check(Matcher m, String result, boolean expected) {
176        m.find();
177        if (m.group().equals(result) != expected)
178            failCount++;
179    }
180
181    private static void check(Pattern p, String s, boolean expected) {
182        if (p.matcher(s).find() != expected)
183            failCount++;
184    }
185
186    private static void check(String p, String s, boolean expected) {
187        Matcher matcher = Pattern.compile(p).matcher(s);
188        if (matcher.find() != expected)
189            failCount++;
190    }
191
192    private static void check(String p, char c, boolean expected) {
193        String propertyPattern = expected ? "\\p" + p : "\\P" + p;
194        Pattern pattern = Pattern.compile(propertyPattern);
195        char[] ca = new char[1]; ca[0] = c;
196        Matcher matcher = pattern.matcher(new String(ca));
197        if (!matcher.find())
198            failCount++;
199    }
200
201    private static void check(String p, int codePoint, boolean expected) {
202        String propertyPattern = expected ? "\\p" + p : "\\P" + p;
203        Pattern pattern = Pattern.compile(propertyPattern);
204        char[] ca = Character.toChars(codePoint);
205        Matcher matcher = pattern.matcher(new String(ca));
206        if (!matcher.find())
207            failCount++;
208    }
209
210    private static void check(String p, int flag, String input, String s,
211                              boolean expected)
212    {
213        Pattern pattern = Pattern.compile(p, flag);
214        Matcher matcher = pattern.matcher(input);
215        if (expected)
216            check(matcher, s, expected);
217        else
218            check(pattern, input, false);
219    }
220
221    private static void report(String testName) {
222        int spacesToAdd = 30 - testName.length();
223        StringBuffer paddedNameBuffer = new StringBuffer(testName);
224        for (int i=0; i<spacesToAdd; i++)
225            paddedNameBuffer.append(" ");
226        String paddedName = paddedNameBuffer.toString();
227        System.err.println(paddedName + ": " +
228                           (failCount==0 ? "Passed":"Failed("+failCount+")"));
229        if (failCount > 0) {
230            failure = true;
231
232            if (firstFailure == null) {
233                firstFailure = testName;
234            }
235        }
236
237        failCount = 0;
238    }
239
240    /**
241     * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
242     * supplementary characters. This method does NOT fully take care
243     * of the regex syntax.
244     */
245    private static String toSupplementaries(String s) {
246        int length = s.length();
247        StringBuffer sb = new StringBuffer(length * 2);
248
249        for (int i = 0; i < length; ) {
250            char c = s.charAt(i++);
251            if (c == '\\') {
252                sb.append(c);
253                if (i < length) {
254                    c = s.charAt(i++);
255                    sb.append(c);
256                    if (c == 'u') {
257                        // assume no syntax error
258                        sb.append(s.charAt(i++));
259                        sb.append(s.charAt(i++));
260                        sb.append(s.charAt(i++));
261                        sb.append(s.charAt(i++));
262                    }
263                }
264            } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
265                sb.append('\ud800').append((char)('\udc00'+c));
266            } else {
267                sb.append(c);
268            }
269        }
270        return sb.toString();
271    }
272
273    // Regular expression tests
274
275    // This is for bug 6178785
276    // Test if an expected NPE gets thrown when passing in a null argument
277    private static boolean check(Runnable test) {
278        try {
279            test.run();
280            failCount++;
281            return false;
282        } catch (NullPointerException npe) {
283            return true;
284        }
285    }
286
287    private static void nullArgumentTest() {
288        check(new Runnable() { public void run() { Pattern.compile(null); }});
289        check(new Runnable() { public void run() { Pattern.matches(null, null); }});
290        check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
291        check(new Runnable() { public void run() { Pattern.quote(null);}});
292        check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
293        check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
294
295        final Matcher m = Pattern.compile("xyz").matcher("xyz");
296        m.matches();
297        check(new Runnable() { public void run() { m.appendTail(null);}});
298        check(new Runnable() { public void run() { m.replaceAll(null);}});
299        check(new Runnable() { public void run() { m.replaceFirst(null);}});
300        check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
301        check(new Runnable() { public void run() { m.reset(null);}});
302        check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
303        //check(new Runnable() { public void run() { m.usePattern(null);}});
304
305        report("Null Argument");
306    }
307
308    // This is for bug6635133
309    // Test if surrogate pair in Unicode escapes can be handled correctly.
310    private static void surrogatesInClassTest() throws Exception {
311        Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
312        Matcher matcher = pattern.matcher("\ud834\udd22");
313        if (!matcher.find())
314            failCount++;
315
316        report("Surrogate pair in Unicode escape");
317    }
318
319    // This is for bug6990617
320    // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
321    // char encoding is only 2 or 3 digits instead of 4 and the first quoted
322    // char is an octal digit.
323    private static void removeQEQuotingTest() throws Exception {
324        Pattern pattern =
325            Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
326        Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
327        if (!matcher.find())
328            failCount++;
329
330        report("Remove Q/E Quoting");
331    }
332
333    // This is for bug 4988891
334    // Test toMatchResult to see that it is a copy of the Matcher
335    // that is not affected by subsequent operations on the original
336    private static void toMatchResultTest() throws Exception {
337        Pattern pattern = Pattern.compile("squid");
338        Matcher matcher = pattern.matcher(
339            "agiantsquidofdestinyasmallsquidoffate");
340        matcher.find();
341        int matcherStart1 = matcher.start();
342        MatchResult mr = matcher.toMatchResult();
343        if (mr == matcher)
344            failCount++;
345        int resultStart1 = mr.start();
346        if (matcherStart1 != resultStart1)
347            failCount++;
348        matcher.find();
349        int matcherStart2 = matcher.start();
350        int resultStart2 = mr.start();
351        if (matcherStart2 == resultStart2)
352            failCount++;
353        if (resultStart1 != resultStart2)
354            failCount++;
355        MatchResult mr2 = matcher.toMatchResult();
356        if (mr == mr2)
357            failCount++;
358        if (mr2.start() != matcherStart2)
359            failCount++;
360        report("toMatchResult is a copy");
361    }
362
363    // This is for bug 5013885
364    // Must test a slice to see if it reports hitEnd correctly
365    private static void hitEndTest() throws Exception {
366        // Basic test of Slice node
367        Pattern p = Pattern.compile("^squidattack");
368        Matcher m = p.matcher("squack");
369        m.find();
370        if (m.hitEnd())
371            failCount++;
372        m.reset("squid");
373        m.find();
374        if (!m.hitEnd())
375            failCount++;
376
377        // Test Slice, SliceA and SliceU nodes
378        for (int i=0; i<3; i++) {
379            int flags = 0;
380            if (i==1) flags = Pattern.CASE_INSENSITIVE;
381            if (i==2) flags = Pattern.UNICODE_CASE;
382            p = Pattern.compile("^abc", flags);
383            m = p.matcher("ad");
384            m.find();
385            if (m.hitEnd())
386                failCount++;
387            m.reset("ab");
388            m.find();
389            if (!m.hitEnd())
390                failCount++;
391        }
392
393        // Test Boyer-Moore node
394        p = Pattern.compile("catattack");
395        m = p.matcher("attack");
396        m.find();
397        if (!m.hitEnd())
398            failCount++;
399
400        p = Pattern.compile("catattack");
401        m = p.matcher("attackattackattackcatatta");
402        m.find();
403        if (!m.hitEnd())
404            failCount++;
405        report("hitEnd from a Slice");
406    }
407
408    // This is for bug 4997476
409    // It is weird code submitted by customer demonstrating a regression
410    private static void wordSearchTest() throws Exception {
411        String testString = new String("word1 word2 word3");
412        Pattern p = Pattern.compile("\\b");
413        Matcher m = p.matcher(testString);
414        int position = 0;
415        int start = 0;
416        while (m.find(position)) {
417            start = m.start();
418            if (start == testString.length())
419                break;
420            if (m.find(start+1)) {
421                position = m.start();
422            } else {
423                position = testString.length();
424            }
425            if (testString.substring(start, position).equals(" "))
426                continue;
427            if (!testString.substring(start, position-1).startsWith("word"))
428                failCount++;
429        }
430        report("Customer word search");
431    }
432
433    // This is for bug 4994840
434    private static void caretAtEndTest() throws Exception {
435        // Problem only occurs with multiline patterns
436        // containing a beginning-of-line caret "^" followed
437        // by an expression that also matches the empty string.
438        Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
439        Matcher matcher = pattern.matcher("\r");
440        matcher.find();
441        matcher.find();
442        report("Caret at end");
443    }
444
445    // This test is for 4979006
446    // Check to see if word boundary construct properly handles unicode
447    // non spacing marks
448    private static void unicodeWordBoundsTest() throws Exception {
449        String spaces = "  ";
450        String wordChar = "a";
451        String nsm = "\u030a";
452
453        assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
454
455        Pattern pattern = Pattern.compile("\\b");
456        Matcher matcher = pattern.matcher("");
457        // S=other B=word character N=non spacing mark .=word boundary
458        // SS.BB.SS
459        String input = spaces + wordChar + wordChar + spaces;
460        twoFindIndexes(input, matcher, 2, 4);
461        // SS.BBN.SS
462        input = spaces + wordChar +wordChar + nsm + spaces;
463        twoFindIndexes(input, matcher, 2, 5);
464        // SS.BN.SS
465        input = spaces + wordChar + nsm + spaces;
466        twoFindIndexes(input, matcher, 2, 4);
467        // SS.BNN.SS
468        input = spaces + wordChar + nsm + nsm + spaces;
469        twoFindIndexes(input, matcher, 2, 5);
470        // SSN.BB.SS
471        input = spaces + nsm + wordChar + wordChar + spaces;
472        twoFindIndexes(input, matcher, 3, 5);
473        // SS.BNB.SS
474        input = spaces + wordChar + nsm + wordChar + spaces;
475        twoFindIndexes(input, matcher, 2, 5);
476        // SSNNSS
477        input = spaces + nsm + nsm + spaces;
478        matcher.reset(input);
479        if (matcher.find())
480            failCount++;
481        // SSN.BBN.SS
482        input = spaces + nsm + wordChar + wordChar + nsm + spaces;
483        twoFindIndexes(input, matcher, 3, 6);
484
485        report("Unicode word boundary");
486    }
487
488    private static void twoFindIndexes(String input, Matcher matcher, int a,
489                                       int b) throws Exception
490    {
491        matcher.reset(input);
492        matcher.find();
493        if (matcher.start() != a)
494            failCount++;
495        matcher.find();
496        if (matcher.start() != b)
497            failCount++;
498    }
499
500    // This test is for 6284152
501    static void check(String regex, String input, String[] expected) {
502        List<String> result = new ArrayList<String>();
503        Pattern p = Pattern.compile(regex);
504        Matcher m = p.matcher(input);
505        while (m.find()) {
506            result.add(m.group());
507        }
508        if (!Arrays.asList(expected).equals(result))
509            failCount++;
510    }
511
512    private static void lookbehindTest() throws Exception {
513        //Positive
514        check("(?<=%.{0,5})foo\\d",
515              "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
516              new String[]{"foo1", "foo2", "foo3"});
517
518        //boundary at end of the lookbehind sub-regex should work consistently
519        //with the boundary just after the lookbehind sub-regex
520        check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
521        check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
522        check("(?<!abc )\\bfoo", "abc foo", new String[0]);
523        check("(?<!abc \\b)foo", "abc foo", new String[0]);
524
525        //Negative
526        check("(?<!%.{0,5})foo\\d",
527              "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
528              new String[] {"foo4", "foo5"});
529
530        //Positive greedy
531        check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
532
533        //Positive reluctant
534        check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
535
536        //supplementary
537        check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
538              new String[] {"fo\ud800\udc00o"});
539        check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
540              new String[] {"fo\ud800\udc00o"});
541        check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
542              new String[] {"fo\ud800\udc00o"});
543        check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
544              new String[] {"fo\ud800\udc00o"});
545        report("Lookbehind");
546    }
547
548    // This test is for 4938995
549    // Check to see if weak region boundaries are transparent to
550    // lookahead and lookbehind constructs
551    private static void boundsTest() throws Exception {
552        String fullMessage = "catdogcat";
553        Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
554        Matcher matcher = pattern.matcher("catdogca");
555        matcher.useTransparentBounds(true);
556        if (matcher.find())
557            failCount++;
558        matcher.reset("atdogcat");
559        if (matcher.find())
560            failCount++;
561        matcher.reset(fullMessage);
562        if (!matcher.find())
563            failCount++;
564        matcher.reset(fullMessage);
565        matcher.region(0,9);
566        if (!matcher.find())
567            failCount++;
568        matcher.reset(fullMessage);
569        matcher.region(0,6);
570        if (!matcher.find())
571            failCount++;
572        matcher.reset(fullMessage);
573        matcher.region(3,6);
574        if (!matcher.find())
575            failCount++;
576        matcher.useTransparentBounds(false);
577        if (matcher.find())
578            failCount++;
579
580        // Negative lookahead/lookbehind
581        pattern = Pattern.compile("(?<!cat)dog(?!cat)");
582        matcher = pattern.matcher("dogcat");
583        matcher.useTransparentBounds(true);
584        matcher.region(0,3);
585        if (matcher.find())
586            failCount++;
587        matcher.reset("catdog");
588        matcher.region(3,6);
589        if (matcher.find())
590            failCount++;
591        matcher.useTransparentBounds(false);
592        matcher.reset("dogcat");
593        matcher.region(0,3);
594        if (!matcher.find())
595            failCount++;
596        matcher.reset("catdog");
597        matcher.region(3,6);
598        if (!matcher.find())
599            failCount++;
600
601        report("Region bounds transparency");
602    }
603
604    // This test is for 4945394
605    private static void findFromTest() throws Exception {
606        String message = "This is 40 $0 message.";
607        Pattern pat = Pattern.compile("\\$0");
608        Matcher match = pat.matcher(message);
609        if (!match.find())
610            failCount++;
611        if (match.find())
612            failCount++;
613        if (match.find())
614            failCount++;
615        report("Check for alternating find");
616    }
617
618    // This test is for 4872664 and 4892980
619    private static void negatedCharClassTest() throws Exception {
620        Pattern pattern = Pattern.compile("[^>]");
621        Matcher matcher = pattern.matcher("\u203A");
622        if (!matcher.matches())
623            failCount++;
624        pattern = Pattern.compile("[^fr]");
625        matcher = pattern.matcher("a");
626        if (!matcher.find())
627            failCount++;
628        matcher.reset("\u203A");
629        if (!matcher.find())
630            failCount++;
631        String s = "for";
632        String result[] = s.split("[^fr]");
633        if (!result[0].equals("f"))
634            failCount++;
635        if (!result[1].equals("r"))
636            failCount++;
637        s = "f\u203Ar";
638        result = s.split("[^fr]");
639        if (!result[0].equals("f"))
640            failCount++;
641        if (!result[1].equals("r"))
642            failCount++;
643
644        // Test adding to bits, subtracting a node, then adding to bits again
645        pattern = Pattern.compile("[^f\u203Ar]");
646        matcher = pattern.matcher("a");
647        if (!matcher.find())
648            failCount++;
649        matcher.reset("f");
650        if (matcher.find())
651            failCount++;
652        matcher.reset("\u203A");
653        if (matcher.find())
654            failCount++;
655        matcher.reset("r");
656        if (matcher.find())
657            failCount++;
658        matcher.reset("\u203B");
659        if (!matcher.find())
660            failCount++;
661
662        // Test subtracting a node, adding to bits, subtracting again
663        pattern = Pattern.compile("[^\u203Ar\u203B]");
664        matcher = pattern.matcher("a");
665        if (!matcher.find())
666            failCount++;
667        matcher.reset("\u203A");
668        if (matcher.find())
669            failCount++;
670        matcher.reset("r");
671        if (matcher.find())
672            failCount++;
673        matcher.reset("\u203B");
674        if (matcher.find())
675            failCount++;
676        matcher.reset("\u203C");
677        if (!matcher.find())
678            failCount++;
679
680        report("Negated Character Class");
681    }
682
683    // This test is for 4628291
684    private static void toStringTest() throws Exception {
685        Pattern pattern = Pattern.compile("b+");
686        if (pattern.toString() != "b+")
687            failCount++;
688        Matcher matcher = pattern.matcher("aaabbbccc");
689        String matcherString = matcher.toString(); // unspecified
690        matcher.find();
691        matcherString = matcher.toString(); // unspecified
692        matcher.region(0,3);
693        matcherString = matcher.toString(); // unspecified
694        matcher.reset();
695        matcherString = matcher.toString(); // unspecified
696        report("toString");
697    }
698
699    // This test is for 4808962
700    private static void literalPatternTest() throws Exception {
701        int flags = Pattern.LITERAL;
702
703        Pattern pattern = Pattern.compile("abc\\t$^", flags);
704        check(pattern, "abc\\t$^", true);
705
706        pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
707        check(pattern, "abc\\t$^", true);
708
709        pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
710        check(pattern, "\\Qa^$bcabc\\E", true);
711        check(pattern, "a^$bcabc", false);
712
713        pattern = Pattern.compile("\\\\Q\\\\E");
714        check(pattern, "\\Q\\E", true);
715
716        pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
717        check(pattern, "abcefg\\Q\\Ehij", true);
718
719        pattern = Pattern.compile("\\\\\\Q\\\\E");
720        check(pattern, "\\\\\\\\", true);
721
722        pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
723        check(pattern, "\\Qa^$bcabc\\E", true);
724        check(pattern, "a^$bcabc", false);
725
726        pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
727        check(pattern, "\\Qabc\\Edef", true);
728        check(pattern, "abcdef", false);
729
730        pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
731        check(pattern, "abc\\Edef", true);
732        check(pattern, "abcdef", false);
733
734        pattern = Pattern.compile(Pattern.quote("\\E"));
735        check(pattern, "\\E", true);
736
737        pattern = Pattern.compile("((((abc.+?:)", flags);
738        check(pattern, "((((abc.+?:)", true);
739
740        flags |= Pattern.MULTILINE;
741
742        pattern = Pattern.compile("^cat$", flags);
743        check(pattern, "abc^cat$def", true);
744        check(pattern, "cat", false);
745
746        flags |= Pattern.CASE_INSENSITIVE;
747
748        pattern = Pattern.compile("abcdef", flags);
749        check(pattern, "ABCDEF", true);
750        check(pattern, "AbCdEf", true);
751
752        flags |= Pattern.DOTALL;
753
754        pattern = Pattern.compile("a...b", flags);
755        check(pattern, "A...b", true);
756        check(pattern, "Axxxb", false);
757
758        flags |= Pattern.CANON_EQ;
759
760        Pattern p = Pattern.compile("testa\u030a", flags);
761        check(pattern, "testa\u030a", false);
762        check(pattern, "test\u00e5", false);
763
764        // Supplementary character test
765        flags = Pattern.LITERAL;
766
767        pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
768        check(pattern, toSupplementaries("abc\\t$^"), true);
769
770        pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
771        check(pattern, toSupplementaries("abc\\t$^"), true);
772
773        pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
774        check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
775        check(pattern, toSupplementaries("a^$bcabc"), false);
776
777        pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
778        check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
779        check(pattern, toSupplementaries("a^$bcabc"), false);
780
781        pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
782        check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
783        check(pattern, toSupplementaries("abcdef"), false);
784
785        pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
786        check(pattern, toSupplementaries("abc\\Edef"), true);
787        check(pattern, toSupplementaries("abcdef"), false);
788
789        pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
790        check(pattern, toSupplementaries("((((abc.+?:)"), true);
791
792        flags |= Pattern.MULTILINE;
793
794        pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
795        check(pattern, toSupplementaries("abc^cat$def"), true);
796        check(pattern, toSupplementaries("cat"), false);
797
798        flags |= Pattern.DOTALL;
799
800        // note: this is case-sensitive.
801        pattern = Pattern.compile(toSupplementaries("a...b"), flags);
802        check(pattern, toSupplementaries("a...b"), true);
803        check(pattern, toSupplementaries("axxxb"), false);
804
805        flags |= Pattern.CANON_EQ;
806
807        String t = toSupplementaries("test");
808        p = Pattern.compile(t + "a\u030a", flags);
809        check(pattern, t + "a\u030a", false);
810        check(pattern, t + "\u00e5", false);
811
812        report("Literal pattern");
813    }
814
815    // This test is for 4803179
816    // This test is also for 4808962, replacement parts
817    private static void literalReplacementTest() throws Exception {
818        int flags = Pattern.LITERAL;
819
820        Pattern pattern = Pattern.compile("abc", flags);
821        Matcher matcher = pattern.matcher("zzzabczzz");
822        String replaceTest = "$0";
823        String result = matcher.replaceAll(replaceTest);
824        if (!result.equals("zzzabczzz"))
825            failCount++;
826
827        matcher.reset();
828        String literalReplacement = matcher.quoteReplacement(replaceTest);
829        result = matcher.replaceAll(literalReplacement);
830        if (!result.equals("zzz$0zzz"))
831            failCount++;
832
833        matcher.reset();
834        replaceTest = "\\t$\\$";
835        literalReplacement = matcher.quoteReplacement(replaceTest);
836        result = matcher.replaceAll(literalReplacement);
837        if (!result.equals("zzz\\t$\\$zzz"))
838            failCount++;
839
840        // Supplementary character test
841        pattern = Pattern.compile(toSupplementaries("abc"), flags);
842        matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
843        replaceTest = "$0";
844        result = matcher.replaceAll(replaceTest);
845        if (!result.equals(toSupplementaries("zzzabczzz")))
846            failCount++;
847
848        matcher.reset();
849        literalReplacement = matcher.quoteReplacement(replaceTest);
850        result = matcher.replaceAll(literalReplacement);
851        if (!result.equals(toSupplementaries("zzz$0zzz")))
852            failCount++;
853
854        matcher.reset();
855        replaceTest = "\\t$\\$";
856        literalReplacement = matcher.quoteReplacement(replaceTest);
857        result = matcher.replaceAll(literalReplacement);
858        if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
859            failCount++;
860
861        // IAE should be thrown if backslash or '$' is the last character
862        // in replacement string
863        try {
864            "\uac00".replaceAll("\uac00", "$");
865            failCount++;
866        } catch (IllegalArgumentException iie) {
867        } catch (Exception e) {
868            failCount++;
869        }
870        try {
871            "\uac00".replaceAll("\uac00", "\\");
872            failCount++;
873        } catch (IllegalArgumentException iie) {
874        } catch (Exception e) {
875            failCount++;
876        }
877        report("Literal replacement");
878    }
879
880    // This test is for 4757029
881    private static void regionTest() throws Exception {
882        Pattern pattern = Pattern.compile("abc");
883        Matcher matcher = pattern.matcher("abcdefabc");
884
885        matcher.region(0,9);
886        if (!matcher.find())
887            failCount++;
888        if (!matcher.find())
889            failCount++;
890        matcher.region(0,3);
891        if (!matcher.find())
892           failCount++;
893        matcher.region(3,6);
894        if (matcher.find())
895           failCount++;
896        matcher.region(0,2);
897        if (matcher.find())
898           failCount++;
899
900        expectRegionFail(matcher, 1, -1);
901        expectRegionFail(matcher, -1, -1);
902        expectRegionFail(matcher, -1, 1);
903        expectRegionFail(matcher, 5, 3);
904        expectRegionFail(matcher, 5, 12);
905        expectRegionFail(matcher, 12, 12);
906
907        pattern = Pattern.compile("^abc$");
908        matcher = pattern.matcher("zzzabczzz");
909        matcher.region(0,9);
910        if (matcher.find())
911            failCount++;
912        matcher.region(3,6);
913        if (!matcher.find())
914           failCount++;
915        matcher.region(3,6);
916        matcher.useAnchoringBounds(false);
917        if (matcher.find())
918           failCount++;
919
920        // Supplementary character test
921        pattern = Pattern.compile(toSupplementaries("abc"));
922        matcher = pattern.matcher(toSupplementaries("abcdefabc"));
923        matcher.region(0,9*2);
924        if (!matcher.find())
925            failCount++;
926        if (!matcher.find())
927            failCount++;
928        matcher.region(0,3*2);
929        if (!matcher.find())
930           failCount++;
931        matcher.region(1,3*2);
932        if (matcher.find())
933           failCount++;
934        matcher.region(3*2,6*2);
935        if (matcher.find())
936           failCount++;
937        matcher.region(0,2*2);
938        if (matcher.find())
939           failCount++;
940        matcher.region(0,2*2+1);
941        if (matcher.find())
942           failCount++;
943
944        expectRegionFail(matcher, 1*2, -1);
945        expectRegionFail(matcher, -1, -1);
946        expectRegionFail(matcher, -1, 1*2);
947        expectRegionFail(matcher, 5*2, 3*2);
948        expectRegionFail(matcher, 5*2, 12*2);
949        expectRegionFail(matcher, 12*2, 12*2);
950
951        pattern = Pattern.compile(toSupplementaries("^abc$"));
952        matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
953        matcher.region(0,9*2);
954        if (matcher.find())
955            failCount++;
956        matcher.region(3*2,6*2);
957        if (!matcher.find())
958           failCount++;
959        matcher.region(3*2+1,6*2);
960        if (matcher.find())
961           failCount++;
962        matcher.region(3*2,6*2-1);
963        if (matcher.find())
964           failCount++;
965        matcher.region(3*2,6*2);
966        matcher.useAnchoringBounds(false);
967        if (matcher.find())
968           failCount++;
969        report("Regions");
970    }
971
972    private static void expectRegionFail(Matcher matcher, int index1,
973                                         int index2)
974    {
975        try {
976            matcher.region(index1, index2);
977            failCount++;
978        } catch (IndexOutOfBoundsException ioobe) {
979            // Correct result
980        } catch (IllegalStateException ise) {
981            // Correct result
982        }
983    }
984
985    // This test is for 4803197
986    private static void escapedSegmentTest() throws Exception {
987
988        Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
989        check(pattern, "dir1\\dir2", true);
990
991        pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
992        check(pattern, "dir1\\dir2\\", true);
993
994        pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
995        check(pattern, "dir1\\dir2\\", true);
996
997        // Supplementary character test
998        pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
999        check(pattern, toSupplementaries("dir1\\dir2"), true);
1000
1001        pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1002        check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1003
1004        pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1005        check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1006
1007        report("Escaped segment");
1008    }
1009
1010    // This test is for 4792284
1011    private static void nonCaptureRepetitionTest() throws Exception {
1012        String input = "abcdefgh;";
1013
1014        String[] patterns = new String[] {
1015            "(?:\\w{4})+;",
1016            "(?:\\w{8})*;",
1017            "(?:\\w{2}){2,4};",
1018            "(?:\\w{4}){2,};",   // only matches the
1019            ".*?(?:\\w{5})+;",   //     specified minimum
1020            ".*?(?:\\w{9})*;",   //     number of reps - OK
1021            "(?:\\w{4})+?;",     // lazy repetition - OK
1022            "(?:\\w{4})++;",     // possessive repetition - OK
1023            "(?:\\w{2,}?)+;",    // non-deterministic - OK
1024            "(\\w{4})+;",        // capturing group - OK
1025        };
1026
1027        for (int i = 0; i < patterns.length; i++) {
1028            // Check find()
1029            check(patterns[i], 0, input, input, true);
1030            // Check matches()
1031            Pattern p = Pattern.compile(patterns[i]);
1032            Matcher m = p.matcher(input);
1033
1034            if (m.matches()) {
1035                if (!m.group(0).equals(input))
1036                    failCount++;
1037            } else {
1038                failCount++;
1039            }
1040        }
1041
1042        report("Non capturing repetition");
1043    }
1044
1045    // This test is for 6358731
1046    private static void notCapturedGroupCurlyMatchTest() throws Exception {
1047        Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1048        Matcher matcher = pattern.matcher("abcd");
1049        if (!matcher.matches() ||
1050             matcher.group(1) != null ||
1051             !matcher.group(2).equals("abcd")) {
1052            failCount++;
1053        }
1054        report("Not captured GroupCurly");
1055    }
1056
1057    // This test is for 4706545
1058    private static void javaCharClassTest() throws Exception {
1059        for (int i=0; i<1000; i++) {
1060            char c = (char)generator.nextInt();
1061            check("{javaLowerCase}", c, Character.isLowerCase(c));
1062            check("{javaUpperCase}", c, Character.isUpperCase(c));
1063            check("{javaUpperCase}+", c, Character.isUpperCase(c));
1064            check("{javaTitleCase}", c, Character.isTitleCase(c));
1065            check("{javaDigit}", c, Character.isDigit(c));
1066            check("{javaDefined}", c, Character.isDefined(c));
1067            check("{javaLetter}", c, Character.isLetter(c));
1068            check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1069            check("{javaJavaIdentifierStart}", c,
1070                  Character.isJavaIdentifierStart(c));
1071            check("{javaJavaIdentifierPart}", c,
1072                  Character.isJavaIdentifierPart(c));
1073            check("{javaUnicodeIdentifierStart}", c,
1074                  Character.isUnicodeIdentifierStart(c));
1075            check("{javaUnicodeIdentifierPart}", c,
1076                  Character.isUnicodeIdentifierPart(c));
1077            check("{javaIdentifierIgnorable}", c,
1078                  Character.isIdentifierIgnorable(c));
1079            check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1080            check("{javaWhitespace}", c, Character.isWhitespace(c));
1081            check("{javaISOControl}", c, Character.isISOControl(c));
1082            check("{javaMirrored}", c, Character.isMirrored(c));
1083
1084        }
1085
1086        // Supplementary character test
1087        for (int i=0; i<1000; i++) {
1088            int c = generator.nextInt(Character.MAX_CODE_POINT
1089                                      - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1090                        + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1091            check("{javaLowerCase}", c, Character.isLowerCase(c));
1092            check("{javaUpperCase}", c, Character.isUpperCase(c));
1093            check("{javaUpperCase}+", c, Character.isUpperCase(c));
1094            check("{javaTitleCase}", c, Character.isTitleCase(c));
1095            check("{javaDigit}", c, Character.isDigit(c));
1096            check("{javaDefined}", c, Character.isDefined(c));
1097            check("{javaLetter}", c, Character.isLetter(c));
1098            check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1099            check("{javaJavaIdentifierStart}", c,
1100                  Character.isJavaIdentifierStart(c));
1101            check("{javaJavaIdentifierPart}", c,
1102                  Character.isJavaIdentifierPart(c));
1103            check("{javaUnicodeIdentifierStart}", c,
1104                  Character.isUnicodeIdentifierStart(c));
1105            check("{javaUnicodeIdentifierPart}", c,
1106                  Character.isUnicodeIdentifierPart(c));
1107            check("{javaIdentifierIgnorable}", c,
1108                  Character.isIdentifierIgnorable(c));
1109            check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1110            check("{javaWhitespace}", c, Character.isWhitespace(c));
1111            check("{javaISOControl}", c, Character.isISOControl(c));
1112            check("{javaMirrored}", c, Character.isMirrored(c));
1113        }
1114
1115        report("Java character classes");
1116    }
1117
1118    // This test is for 4523620
1119    /*
1120    private static void numOccurrencesTest() throws Exception {
1121        Pattern pattern = Pattern.compile("aaa");
1122
1123        if (pattern.numOccurrences("aaaaaa", false) != 2)
1124            failCount++;
1125        if (pattern.numOccurrences("aaaaaa", true) != 4)
1126            failCount++;
1127
1128        pattern = Pattern.compile("^");
1129        if (pattern.numOccurrences("aaaaaa", false) != 1)
1130            failCount++;
1131        if (pattern.numOccurrences("aaaaaa", true) != 1)
1132            failCount++;
1133
1134        report("Number of Occurrences");
1135    }
1136    */
1137
1138    // This test is for 4776374
1139    private static void caretBetweenTerminatorsTest() throws Exception {
1140        int flags1 = Pattern.DOTALL;
1141        int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1142        int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1143        int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1144
1145        check("^....", flags1, "test\ntest", "test", true);
1146        check(".....^", flags1, "test\ntest", "test", false);
1147        check(".....^", flags1, "test\n", "test", false);
1148        check("....^", flags1, "test\r\n", "test", false);
1149
1150        check("^....", flags2, "test\ntest", "test", true);
1151        check("....^", flags2, "test\ntest", "test", false);
1152        check(".....^", flags2, "test\n", "test", false);
1153        check("....^", flags2, "test\r\n", "test", false);
1154
1155        check("^....", flags3, "test\ntest", "test", true);
1156        check(".....^", flags3, "test\ntest", "test\n", true);
1157        check(".....^", flags3, "test\u0085test", "test\u0085", false);
1158        check(".....^", flags3, "test\n", "test", false);
1159        check(".....^", flags3, "test\r\n", "test", false);
1160        check("......^", flags3, "test\r\ntest", "test\r\n", true);
1161
1162        check("^....", flags4, "test\ntest", "test", true);
1163        check(".....^", flags3, "test\ntest", "test\n", true);
1164        check(".....^", flags4, "test\u0085test", "test\u0085", true);
1165        check(".....^", flags4, "test\n", "test\n", false);
1166        check(".....^", flags4, "test\r\n", "test\r", false);
1167
1168        // Supplementary character test
1169        String t = toSupplementaries("test");
1170        check("^....", flags1, t+"\n"+t, t, true);
1171        check(".....^", flags1, t+"\n"+t, t, false);
1172        check(".....^", flags1, t+"\n", t, false);
1173        check("....^", flags1, t+"\r\n", t, false);
1174
1175        check("^....", flags2, t+"\n"+t, t, true);
1176        check("....^", flags2, t+"\n"+t, t, false);
1177        check(".....^", flags2, t+"\n", t, false);
1178        check("....^", flags2, t+"\r\n", t, false);
1179
1180        check("^....", flags3, t+"\n"+t, t, true);
1181        check(".....^", flags3, t+"\n"+t, t+"\n", true);
1182        check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1183        check(".....^", flags3, t+"\n", t, false);
1184        check(".....^", flags3, t+"\r\n", t, false);
1185        check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1186
1187        check("^....", flags4, t+"\n"+t, t, true);
1188        check(".....^", flags3, t+"\n"+t, t+"\n", true);
1189        check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1190        check(".....^", flags4, t+"\n", t+"\n", false);
1191        check(".....^", flags4, t+"\r\n", t+"\r", false);
1192
1193        report("Caret between terminators");
1194    }
1195
1196    // This test is for 4727935
1197    private static void dollarAtEndTest() throws Exception {
1198        int flags1 = Pattern.DOTALL;
1199        int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1200        int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1201
1202        check("....$", flags1, "test\n", "test", true);
1203        check("....$", flags1, "test\r\n", "test", true);
1204        check(".....$", flags1, "test\n", "test\n", true);
1205        check(".....$", flags1, "test\u0085", "test\u0085", true);
1206        check("....$", flags1, "test\u0085", "test", true);
1207
1208        check("....$", flags2, "test\n", "test", true);
1209        check(".....$", flags2, "test\n", "test\n", true);
1210        check(".....$", flags2, "test\u0085", "test\u0085", true);
1211        check("....$", flags2, "test\u0085", "est\u0085", true);
1212
1213        check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1214        check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1215        check("....$blah", flags3, "test\nblah", "!!!!", false);
1216        check(".....$blah", flags3, "test\nblah", "!!!!", false);
1217
1218        // Supplementary character test
1219        String t = toSupplementaries("test");
1220        String b = toSupplementaries("blah");
1221        check("....$", flags1, t+"\n", t, true);
1222        check("....$", flags1, t+"\r\n", t, true);
1223        check(".....$", flags1, t+"\n", t+"\n", true);
1224        check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1225        check("....$", flags1, t+"\u0085", t, true);
1226
1227        check("....$", flags2, t+"\n", t, true);
1228        check(".....$", flags2, t+"\n", t+"\n", true);
1229        check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1230        check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1231
1232        check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1233        check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1234        check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1235        check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1236
1237        report("Dollar at End");
1238    }
1239
1240    // This test is for 4711773
1241    private static void multilineDollarTest() throws Exception {
1242        Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1243        Matcher matcher = findCR.matcher("first bit\nsecond bit");
1244        matcher.find();
1245        if (matcher.start(0) != 9)
1246            failCount++;
1247        matcher.find();
1248        if (matcher.start(0) != 20)
1249            failCount++;
1250
1251        // Supplementary character test
1252        matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1253        matcher.find();
1254        if (matcher.start(0) != 9*2)
1255            failCount++;
1256        matcher.find();
1257        if (matcher.start(0) != 20*2)
1258            failCount++;
1259
1260        report("Multiline Dollar");
1261    }
1262
1263    private static void reluctantRepetitionTest() throws Exception {
1264        Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1265        check(p, "1 word word word 2", true);
1266        check(p, "1 wor wo w 2", true);
1267        check(p, "1 word word 2", true);
1268        check(p, "1 word 2", true);
1269        check(p, "1 wo w w 2", true);
1270        check(p, "1 wo w 2", true);
1271        check(p, "1 wor w 2", true);
1272
1273        p = Pattern.compile("([a-z])+?c");
1274        Matcher m = p.matcher("ababcdefdec");
1275        check(m, "ababc");
1276
1277        // Supplementary character test
1278        p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1279        m = p.matcher(toSupplementaries("ababcdefdec"));
1280        check(m, toSupplementaries("ababc"));
1281
1282        report("Reluctant Repetition");
1283    }
1284
1285    private static void serializeTest() throws Exception {
1286        String patternStr = "(b)";
1287        String matchStr = "b";
1288        Pattern pattern = Pattern.compile(patternStr);
1289        ByteArrayOutputStream baos = new ByteArrayOutputStream();
1290        ObjectOutputStream oos = new ObjectOutputStream(baos);
1291        oos.writeObject(pattern);
1292        oos.close();
1293        ObjectInputStream ois = new ObjectInputStream(
1294            new ByteArrayInputStream(baos.toByteArray()));
1295        Pattern serializedPattern = (Pattern)ois.readObject();
1296        ois.close();
1297        Matcher matcher = serializedPattern.matcher(matchStr);
1298        if (!matcher.matches())
1299            failCount++;
1300        if (matcher.groupCount() != 1)
1301            failCount++;
1302
1303        report("Serialization");
1304    }
1305
1306    private static void gTest() {
1307        Pattern pattern = Pattern.compile("\\G\\w");
1308        Matcher matcher = pattern.matcher("abc#x#x");
1309        matcher.find();
1310        matcher.find();
1311        matcher.find();
1312        if (matcher.find())
1313            failCount++;
1314
1315        pattern = Pattern.compile("\\GA*");
1316        matcher = pattern.matcher("1A2AA3");
1317        matcher.find();
1318        if (matcher.find())
1319            failCount++;
1320
1321        pattern = Pattern.compile("\\GA*");
1322        matcher = pattern.matcher("1A2AA3");
1323        if (!matcher.find(1))
1324            failCount++;
1325        matcher.find();
1326        if (matcher.find())
1327            failCount++;
1328
1329        report("\\G");
1330    }
1331
1332    private static void zTest() {
1333        Pattern pattern = Pattern.compile("foo\\Z");
1334        // Positives
1335        check(pattern, "foo\u0085", true);
1336        check(pattern, "foo\u2028", true);
1337        check(pattern, "foo\u2029", true);
1338        check(pattern, "foo\n", true);
1339        check(pattern, "foo\r", true);
1340        check(pattern, "foo\r\n", true);
1341        // Negatives
1342        check(pattern, "fooo", false);
1343        check(pattern, "foo\n\r", false);
1344
1345        pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1346        // Positives
1347        check(pattern, "foo", true);
1348        check(pattern, "foo\n", true);
1349        // Negatives
1350        check(pattern, "foo\r", false);
1351        check(pattern, "foo\u0085", false);
1352        check(pattern, "foo\u2028", false);
1353        check(pattern, "foo\u2029", false);
1354
1355        report("\\Z");
1356    }
1357
1358    private static void replaceFirstTest() {
1359        Pattern pattern = Pattern.compile("(ab)(c*)");
1360        Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1361        if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1362            failCount++;
1363
1364        matcher.reset("zzzabccczzzabcczzzabccczzz");
1365        if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1366            failCount++;
1367
1368        matcher.reset("zzzabccczzzabcczzzabccczzz");
1369        String result = matcher.replaceFirst("$1");
1370        if (!result.equals("zzzabzzzabcczzzabccczzz"))
1371            failCount++;
1372
1373        matcher.reset("zzzabccczzzabcczzzabccczzz");
1374        result = matcher.replaceFirst("$2");
1375        if (!result.equals("zzzccczzzabcczzzabccczzz"))
1376            failCount++;
1377
1378        pattern = Pattern.compile("a*");
1379        matcher = pattern.matcher("aaaaaaaaaa");
1380        if (!matcher.replaceFirst("test").equals("test"))
1381            failCount++;
1382
1383        pattern = Pattern.compile("a+");
1384        matcher = pattern.matcher("zzzaaaaaaaaaa");
1385        if (!matcher.replaceFirst("test").equals("zzztest"))
1386            failCount++;
1387
1388        // Supplementary character test
1389        pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1390        matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1391        if (!matcher.replaceFirst(toSupplementaries("test"))
1392                .equals(toSupplementaries("testzzzabcczzzabccc")))
1393            failCount++;
1394
1395        matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1396        if (!matcher.replaceFirst(toSupplementaries("test")).
1397            equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1398            failCount++;
1399
1400        matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1401        result = matcher.replaceFirst("$1");
1402        if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1403            failCount++;
1404
1405        matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1406        result = matcher.replaceFirst("$2");
1407        if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1408            failCount++;
1409
1410        pattern = Pattern.compile(toSupplementaries("a*"));
1411        matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1412        if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1413            failCount++;
1414
1415        pattern = Pattern.compile(toSupplementaries("a+"));
1416        matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1417        if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1418            failCount++;
1419
1420        report("Replace First");
1421    }
1422
1423    private static void unixLinesTest() {
1424        Pattern pattern = Pattern.compile(".*");
1425        Matcher matcher = pattern.matcher("aa\u2028blah");
1426        matcher.find();
1427        if (!matcher.group(0).equals("aa"))
1428            failCount++;
1429
1430        pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1431        matcher = pattern.matcher("aa\u2028blah");
1432        matcher.find();
1433        if (!matcher.group(0).equals("aa\u2028blah"))
1434            failCount++;
1435
1436        pattern = Pattern.compile("[az]$",
1437                                  Pattern.MULTILINE | Pattern.UNIX_LINES);
1438        matcher = pattern.matcher("aa\u2028zz");
1439        check(matcher, "a\u2028", false);
1440
1441        // Supplementary character test
1442        pattern = Pattern.compile(".*");
1443        matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1444        matcher.find();
1445        if (!matcher.group(0).equals(toSupplementaries("aa")))
1446            failCount++;
1447
1448        pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1449        matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1450        matcher.find();
1451        if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1452            failCount++;
1453
1454        pattern = Pattern.compile(toSupplementaries("[az]$"),
1455                                  Pattern.MULTILINE | Pattern.UNIX_LINES);
1456        matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1457        check(matcher, toSupplementaries("a\u2028"), false);
1458
1459        report("Unix Lines");
1460    }
1461
1462    private static void commentsTest() {
1463        int flags = Pattern.COMMENTS;
1464
1465        Pattern pattern = Pattern.compile("aa \\# aa", flags);
1466        Matcher matcher = pattern.matcher("aa#aa");
1467        if (!matcher.matches())
1468            failCount++;
1469
1470        pattern = Pattern.compile("aa  # blah", flags);
1471        matcher = pattern.matcher("aa");
1472        if (!matcher.matches())
1473            failCount++;
1474
1475        pattern = Pattern.compile("aa blah", flags);
1476        matcher = pattern.matcher("aablah");
1477        if (!matcher.matches())
1478             failCount++;
1479
1480        pattern = Pattern.compile("aa  # blah blech  ", flags);
1481        matcher = pattern.matcher("aa");
1482        if (!matcher.matches())
1483            failCount++;
1484
1485        pattern = Pattern.compile("aa  # blah\n  ", flags);
1486        matcher = pattern.matcher("aa");
1487        if (!matcher.matches())
1488            failCount++;
1489
1490        pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1491        matcher = pattern.matcher("aabc");
1492        if (!matcher.matches())
1493             failCount++;
1494
1495        pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1496        matcher = pattern.matcher("aabc");
1497        if (!matcher.matches())
1498             failCount++;
1499
1500        pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1501        matcher = pattern.matcher("aabc#blech");
1502        if (!matcher.matches())
1503             failCount++;
1504
1505        // Supplementary character test
1506        pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1507        matcher = pattern.matcher(toSupplementaries("aa#aa"));
1508        if (!matcher.matches())
1509            failCount++;
1510
1511        pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1512        matcher = pattern.matcher(toSupplementaries("aa"));
1513        if (!matcher.matches())
1514            failCount++;
1515
1516        pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1517        matcher = pattern.matcher(toSupplementaries("aablah"));
1518        if (!matcher.matches())
1519             failCount++;
1520
1521        pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1522        matcher = pattern.matcher(toSupplementaries("aa"));
1523        if (!matcher.matches())
1524            failCount++;
1525
1526        pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1527        matcher = pattern.matcher(toSupplementaries("aa"));
1528        if (!matcher.matches())
1529            failCount++;
1530
1531        pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1532        matcher = pattern.matcher(toSupplementaries("aabc"));
1533        if (!matcher.matches())
1534             failCount++;
1535
1536        pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1537        matcher = pattern.matcher(toSupplementaries("aabc"));
1538        if (!matcher.matches())
1539             failCount++;
1540
1541        pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1542        matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1543        if (!matcher.matches())
1544             failCount++;
1545
1546        report("Comments");
1547    }
1548
1549    private static void caseFoldingTest() { // bug 4504687
1550        int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1551        Pattern pattern = Pattern.compile("aa", flags);
1552        Matcher matcher = pattern.matcher("ab");
1553        if (matcher.matches())
1554            failCount++;
1555
1556        pattern = Pattern.compile("aA", flags);
1557        matcher = pattern.matcher("ab");
1558        if (matcher.matches())
1559            failCount++;
1560
1561        pattern = Pattern.compile("aa", flags);
1562        matcher = pattern.matcher("aB");
1563        if (matcher.matches())
1564            failCount++;
1565        matcher = pattern.matcher("Ab");
1566        if (matcher.matches())
1567            failCount++;
1568
1569        // ASCII               "a"
1570        // Latin-1 Supplement  "a" + grave
1571        // Cyrillic            "a"
1572        String[] patterns = new String[] {
1573            //single
1574            "a", "\u00e0", "\u0430",
1575            //slice
1576            "ab", "\u00e0\u00e1", "\u0430\u0431",
1577            //class single
1578            "[a]", "[\u00e0]", "[\u0430]",
1579            //class range
1580            "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1581            //back reference
1582            "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1583        };
1584
1585        String[] texts = new String[] {
1586            "A", "\u00c0", "\u0410",
1587            "AB", "\u00c0\u00c1", "\u0410\u0411",
1588            "A", "\u00c0", "\u0410",
1589            "B", "\u00c2", "\u0411",
1590            "aA", "\u00e0\u00c0", "\u0430\u0410"
1591        };
1592
1593        boolean[] expected = new boolean[] {
1594            true, false, false,
1595            true, false, false,
1596            true, false, false,
1597            true, false, false,
1598            true, false, false
1599        };
1600
1601        flags = Pattern.CASE_INSENSITIVE;
1602        for (int i = 0; i < patterns.length; i++) {
1603            pattern = Pattern.compile(patterns[i], flags);
1604            matcher = pattern.matcher(texts[i]);
1605            if (matcher.matches() != expected[i]) {
1606                System.out.println("<1> Failed at " + i);
1607                failCount++;
1608            }
1609        }
1610
1611        flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1612        for (int i = 0; i < patterns.length; i++) {
1613            pattern = Pattern.compile(patterns[i], flags);
1614            matcher = pattern.matcher(texts[i]);
1615            if (!matcher.matches()) {
1616                System.out.println("<2> Failed at " + i);
1617                failCount++;
1618            }
1619        }
1620        // flag unicode_case alone should do nothing
1621        flags = Pattern.UNICODE_CASE;
1622        for (int i = 0; i < patterns.length; i++) {
1623            pattern = Pattern.compile(patterns[i], flags);
1624            matcher = pattern.matcher(texts[i]);
1625            if (matcher.matches()) {
1626                System.out.println("<3> Failed at " + i);
1627                failCount++;
1628            }
1629        }
1630
1631        // Special cases: i, I, u+0131 and u+0130
1632        flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1633        pattern = Pattern.compile("[h-j]+", flags);
1634        if (!pattern.matcher("\u0131\u0130").matches())
1635            failCount++;
1636        report("Case Folding");
1637    }
1638
1639    private static void appendTest() {
1640        Pattern pattern = Pattern.compile("(ab)(cd)");
1641        Matcher matcher = pattern.matcher("abcd");
1642        String result = matcher.replaceAll("$2$1");
1643        if (!result.equals("cdab"))
1644            failCount++;
1645
1646        String  s1 = "Swap all: first = 123, second = 456";
1647        String  s2 = "Swap one: first = 123, second = 456";
1648        String  r  = "$3$2$1";
1649        pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1650        matcher = pattern.matcher(s1);
1651
1652        result = matcher.replaceAll(r);
1653        if (!result.equals("Swap all: 123 = first, 456 = second"))
1654            failCount++;
1655
1656        matcher = pattern.matcher(s2);
1657
1658        if (matcher.find()) {
1659            StringBuffer sb = new StringBuffer();
1660            matcher.appendReplacement(sb, r);
1661            matcher.appendTail(sb);
1662            result = sb.toString();
1663            if (!result.equals("Swap one: 123 = first, second = 456"))
1664                failCount++;
1665        }
1666
1667        // Supplementary character test
1668        pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1669        matcher = pattern.matcher(toSupplementaries("abcd"));
1670        result = matcher.replaceAll("$2$1");
1671        if (!result.equals(toSupplementaries("cdab")))
1672            failCount++;
1673
1674        s1 = toSupplementaries("Swap all: first = 123, second = 456");
1675        s2 = toSupplementaries("Swap one: first = 123, second = 456");
1676        r  = toSupplementaries("$3$2$1");
1677        pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1678        matcher = pattern.matcher(s1);
1679
1680        result = matcher.replaceAll(r);
1681        if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1682            failCount++;
1683
1684        matcher = pattern.matcher(s2);
1685
1686        if (matcher.find()) {
1687            StringBuffer sb = new StringBuffer();
1688            matcher.appendReplacement(sb, r);
1689            matcher.appendTail(sb);
1690            result = sb.toString();
1691            if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1692                failCount++;
1693        }
1694        report("Append");
1695    }
1696
1697    private static void splitTest() {
1698        Pattern pattern = Pattern.compile(":");
1699        String[] result = pattern.split("foo:and:boo", 2);
1700        if (!result[0].equals("foo"))
1701            failCount++;
1702        if (!result[1].equals("and:boo"))
1703            failCount++;
1704        // Supplementary character test
1705        Pattern patternX = Pattern.compile(toSupplementaries("X"));
1706        result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1707        if (!result[0].equals(toSupplementaries("foo")))
1708            failCount++;
1709        if (!result[1].equals(toSupplementaries("andXboo")))
1710            failCount++;
1711
1712        CharBuffer cb = CharBuffer.allocate(100);
1713        cb.put("foo:and:boo");
1714        cb.flip();
1715        result = pattern.split(cb);
1716        if (!result[0].equals("foo"))
1717            failCount++;
1718        if (!result[1].equals("and"))
1719            failCount++;
1720        if (!result[2].equals("boo"))
1721            failCount++;
1722
1723        // Supplementary character test
1724        CharBuffer cbs = CharBuffer.allocate(100);
1725        cbs.put(toSupplementaries("fooXandXboo"));
1726        cbs.flip();
1727        result = patternX.split(cbs);
1728        if (!result[0].equals(toSupplementaries("foo")))
1729            failCount++;
1730        if (!result[1].equals(toSupplementaries("and")))
1731            failCount++;
1732        if (!result[2].equals(toSupplementaries("boo")))
1733            failCount++;
1734
1735        String source = "0123456789";
1736        for (int limit=-2; limit<3; limit++) {
1737            for (int x=0; x<10; x++) {
1738                result = source.split(Integer.toString(x), limit);
1739                int expectedLength = limit < 1 ? 2 : limit;
1740
1741                if ((limit == 0) && (x == 9)) {
1742                    // expected dropping of ""
1743                    if (result.length != 1)
1744                        failCount++;
1745                    if (!result[0].equals("012345678")) {
1746                        failCount++;
1747                    }
1748                } else {
1749                    if (result.length != expectedLength) {
1750                        failCount++;
1751                    }
1752                    if (!result[0].equals(source.substring(0,x))) {
1753                        if (limit != 1) {
1754                            failCount++;
1755                        } else {
1756                            if (!result[0].equals(source.substring(0,10))) {
1757                                failCount++;
1758                            }
1759                        }
1760                    }
1761                    if (expectedLength > 1) { // Check segment 2
1762                        if (!result[1].equals(source.substring(x+1,10)))
1763                            failCount++;
1764                    }
1765                }
1766            }
1767        }
1768        // Check the case for no match found
1769        for (int limit=-2; limit<3; limit++) {
1770            result = source.split("e", limit);
1771            if (result.length != 1)
1772                failCount++;
1773            if (!result[0].equals(source))
1774                failCount++;
1775        }
1776        // Check the case for limit == 0, source = "";
1777        source = "";
1778        result = source.split("e", 0);
1779        if (result.length != 1)
1780            failCount++;
1781        if (!result[0].equals(source))
1782            failCount++;
1783
1784        report("Split");
1785    }
1786
1787    private static void negationTest() {
1788        Pattern pattern = Pattern.compile("[\\[@^]+");
1789        Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1790        if (!matcher.find())
1791            failCount++;
1792        if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1793            failCount++;
1794        pattern = Pattern.compile("[@\\[^]+");
1795        matcher = pattern.matcher("@@@@[[[[^^^^");
1796        if (!matcher.find())
1797            failCount++;
1798        if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1799            failCount++;
1800        pattern = Pattern.compile("[@\\[^@]+");
1801        matcher = pattern.matcher("@@@@[[[[^^^^");
1802        if (!matcher.find())
1803            failCount++;
1804        if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1805            failCount++;
1806
1807        pattern = Pattern.compile("\\)");
1808        matcher = pattern.matcher("xxx)xxx");
1809        if (!matcher.find())
1810            failCount++;
1811
1812        report("Negation");
1813    }
1814
1815    private static void ampersandTest() {
1816        Pattern pattern = Pattern.compile("[&@]+");
1817        check(pattern, "@@@@&&&&", true);
1818
1819        pattern = Pattern.compile("[@&]+");
1820        check(pattern, "@@@@&&&&", true);
1821
1822        pattern = Pattern.compile("[@\\&]+");
1823        check(pattern, "@@@@&&&&", true);
1824
1825        report("Ampersand");
1826    }
1827
1828    private static void octalTest() throws Exception {
1829        Pattern pattern = Pattern.compile("\\u0007");
1830        Matcher matcher = pattern.matcher("\u0007");
1831        if (!matcher.matches())
1832            failCount++;
1833        pattern = Pattern.compile("\\07");
1834        matcher = pattern.matcher("\u0007");
1835        if (!matcher.matches())
1836            failCount++;
1837        pattern = Pattern.compile("\\007");
1838        matcher = pattern.matcher("\u0007");
1839        if (!matcher.matches())
1840            failCount++;
1841        pattern = Pattern.compile("\\0007");
1842        matcher = pattern.matcher("\u0007");
1843        if (!matcher.matches())
1844            failCount++;
1845        pattern = Pattern.compile("\\040");
1846        matcher = pattern.matcher("\u0020");
1847        if (!matcher.matches())
1848            failCount++;
1849        pattern = Pattern.compile("\\0403");
1850        matcher = pattern.matcher("\u00203");
1851        if (!matcher.matches())
1852            failCount++;
1853        pattern = Pattern.compile("\\0103");
1854        matcher = pattern.matcher("\u0043");
1855        if (!matcher.matches())
1856            failCount++;
1857
1858        report("Octal");
1859    }
1860
1861    private static void longPatternTest() throws Exception {
1862        try {
1863            Pattern pattern = Pattern.compile(
1864                "a 32-character-long pattern xxxx");
1865            pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1866            pattern = Pattern.compile("a thirty four character long regex");
1867            StringBuffer patternToBe = new StringBuffer(101);
1868            for (int i=0; i<100; i++)
1869                patternToBe.append((char)(97 + i%26));
1870            pattern = Pattern.compile(patternToBe.toString());
1871        } catch (PatternSyntaxException e) {
1872            failCount++;
1873        }
1874
1875        // Supplementary character test
1876        try {
1877            Pattern pattern = Pattern.compile(
1878                toSupplementaries("a 32-character-long pattern xxxx"));
1879            pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1880            pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1881            StringBuffer patternToBe = new StringBuffer(101*2);
1882            for (int i=0; i<100; i++)
1883                patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1884                                                     + 97 + i%26));
1885            pattern = Pattern.compile(patternToBe.toString());
1886        } catch (PatternSyntaxException e) {
1887            failCount++;
1888        }
1889        report("LongPattern");
1890    }
1891
1892    private static void group0Test() throws Exception {
1893        Pattern pattern = Pattern.compile("(tes)ting");
1894        Matcher matcher = pattern.matcher("testing");
1895        check(matcher, "testing");
1896
1897        matcher.reset("testing");
1898        if (matcher.lookingAt()) {
1899            if (!matcher.group(0).equals("testing"))
1900                failCount++;
1901        } else {
1902            failCount++;
1903        }
1904
1905        matcher.reset("testing");
1906        if (matcher.matches()) {
1907            if (!matcher.group(0).equals("testing"))
1908                failCount++;
1909        } else {
1910            failCount++;
1911        }
1912
1913        pattern = Pattern.compile("(tes)ting");
1914        matcher = pattern.matcher("testing");
1915        if (matcher.lookingAt()) {
1916            if (!matcher.group(0).equals("testing"))
1917                failCount++;
1918        } else {
1919            failCount++;
1920        }
1921
1922        pattern = Pattern.compile("^(tes)ting");
1923        matcher = pattern.matcher("testing");
1924        if (matcher.matches()) {
1925            if (!matcher.group(0).equals("testing"))
1926                failCount++;
1927        } else {
1928            failCount++;
1929        }
1930
1931        // Supplementary character test
1932        pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1933        matcher = pattern.matcher(toSupplementaries("testing"));
1934        check(matcher, toSupplementaries("testing"));
1935
1936        matcher.reset(toSupplementaries("testing"));
1937        if (matcher.lookingAt()) {
1938            if (!matcher.group(0).equals(toSupplementaries("testing")))
1939                failCount++;
1940        } else {
1941            failCount++;
1942        }
1943
1944        matcher.reset(toSupplementaries("testing"));
1945        if (matcher.matches()) {
1946            if (!matcher.group(0).equals(toSupplementaries("testing")))
1947                failCount++;
1948        } else {
1949            failCount++;
1950        }
1951
1952        pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1953        matcher = pattern.matcher(toSupplementaries("testing"));
1954        if (matcher.lookingAt()) {
1955            if (!matcher.group(0).equals(toSupplementaries("testing")))
1956                failCount++;
1957        } else {
1958            failCount++;
1959        }
1960
1961        pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
1962        matcher = pattern.matcher(toSupplementaries("testing"));
1963        if (matcher.matches()) {
1964            if (!matcher.group(0).equals(toSupplementaries("testing")))
1965                failCount++;
1966        } else {
1967            failCount++;
1968        }
1969
1970        report("Group0");
1971    }
1972
1973    private static void findIntTest() throws Exception {
1974        Pattern p = Pattern.compile("blah");
1975        Matcher m = p.matcher("zzzzblahzzzzzblah");
1976        boolean result = m.find(2);
1977        if (!result)
1978            failCount++;
1979
1980        p = Pattern.compile("$");
1981        m = p.matcher("1234567890");
1982        result = m.find(10);
1983        if (!result)
1984            failCount++;
1985        try {
1986            result = m.find(11);
1987            failCount++;
1988        } catch (IndexOutOfBoundsException e) {
1989            // correct result
1990        }
1991
1992        // Supplementary character test
1993        p = Pattern.compile(toSupplementaries("blah"));
1994        m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
1995        result = m.find(2);
1996        if (!result)
1997            failCount++;
1998
1999        report("FindInt");
2000    }
2001
2002    private static void emptyPatternTest() throws Exception {
2003        Pattern p = Pattern.compile("");
2004        Matcher m = p.matcher("foo");
2005
2006        // Should find empty pattern at beginning of input
2007        boolean result = m.find();
2008        if (result != true)
2009            failCount++;
2010        if (m.start() != 0)
2011            failCount++;
2012
2013        // Should not match entire input if input is not empty
2014        m.reset();
2015        result = m.matches();
2016        if (result == true)
2017            failCount++;
2018
2019        try {
2020            m.start(0);
2021            failCount++;
2022        } catch (IllegalStateException e) {
2023            // Correct result
2024        }
2025
2026        // Should match entire input if input is empty
2027        m.reset("");
2028        result = m.matches();
2029        if (result != true)
2030            failCount++;
2031
2032        result = Pattern.matches("", "");
2033        if (result != true)
2034            failCount++;
2035
2036        result = Pattern.matches("", "foo");
2037        if (result == true)
2038            failCount++;
2039        report("EmptyPattern");
2040    }
2041
2042    private static void charClassTest() throws Exception {
2043        Pattern pattern = Pattern.compile("blah[ab]]blech");
2044        check(pattern, "blahb]blech", true);
2045
2046        pattern = Pattern.compile("[abc[def]]");
2047        check(pattern, "b", true);
2048
2049        // Supplementary character tests
2050        pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2051        check(pattern, toSupplementaries("blahb]blech"), true);
2052
2053        pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2054        check(pattern, toSupplementaries("b"), true);
2055
2056        try {
2057            // u00ff when UNICODE_CASE
2058            pattern = Pattern.compile("[ab\u00ffcd]",
2059                                      Pattern.CASE_INSENSITIVE|
2060                                      Pattern.UNICODE_CASE);
2061            check(pattern, "ab\u00ffcd", true);
2062            check(pattern, "Ab\u0178Cd", true);
2063
2064            // u00b5 when UNICODE_CASE
2065            pattern = Pattern.compile("[ab\u00b5cd]",
2066                                      Pattern.CASE_INSENSITIVE|
2067                                      Pattern.UNICODE_CASE);
2068            check(pattern, "ab\u00b5cd", true);
2069            check(pattern, "Ab\u039cCd", true);
2070        } catch (Exception e) { failCount++; }
2071
2072        /* Special cases
2073           (1)LatinSmallLetterLongS u+017f
2074           (2)LatinSmallLetterDotlessI u+0131
2075           (3)LatineCapitalLetterIWithDotAbove u+0130
2076           (4)KelvinSign u+212a
2077           (5)AngstromSign u+212b
2078        */
2079        int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2080        pattern = Pattern.compile("[sik\u00c5]+", flags);
2081        if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2082            failCount++;
2083
2084        report("CharClass");
2085    }
2086
2087    private static void caretTest() throws Exception {
2088        Pattern pattern = Pattern.compile("\\w*");
2089        Matcher matcher = pattern.matcher("a#bc#def##g");
2090        check(matcher, "a");
2091        check(matcher, "");
2092        check(matcher, "bc");
2093        check(matcher, "");
2094        check(matcher, "def");
2095        check(matcher, "");
2096        check(matcher, "");
2097        check(matcher, "g");
2098        check(matcher, "");
2099        if (matcher.find())
2100            failCount++;
2101
2102        pattern = Pattern.compile("^\\w*");
2103        matcher = pattern.matcher("a#bc#def##g");
2104        check(matcher, "a");
2105        if (matcher.find())
2106            failCount++;
2107
2108        pattern = Pattern.compile("\\w");
2109        matcher = pattern.matcher("abc##x");
2110        check(matcher, "a");
2111        check(matcher, "b");
2112        check(matcher, "c");
2113        check(matcher, "x");
2114        if (matcher.find())
2115            failCount++;
2116
2117        pattern = Pattern.compile("^\\w");
2118        matcher = pattern.matcher("abc##x");
2119        check(matcher, "a");
2120        if (matcher.find())
2121            failCount++;
2122
2123        pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2124        matcher = pattern.matcher("abcdef-ghi\njklmno");
2125        check(matcher, "abc");
2126        if (matcher.find())
2127            failCount++;
2128
2129        pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2130        matcher = pattern.matcher("abcdef-ghi\njklmno");
2131        check(matcher, "abc");
2132        check(matcher, "jkl");
2133        if (matcher.find())
2134            failCount++;
2135
2136        pattern = Pattern.compile("^", Pattern.MULTILINE);
2137        matcher = pattern.matcher("this is some text");
2138        String result = matcher.replaceAll("X");
2139        if (!result.equals("Xthis is some text"))
2140            failCount++;
2141
2142        pattern = Pattern.compile("^");
2143        matcher = pattern.matcher("this is some text");
2144        result = matcher.replaceAll("X");
2145        if (!result.equals("Xthis is some text"))
2146            failCount++;
2147
2148        pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2149        matcher = pattern.matcher("this is some text\n");
2150        result = matcher.replaceAll("X");
2151        if (!result.equals("Xthis is some text\n"))
2152            failCount++;
2153
2154        report("Caret");
2155    }
2156
2157    private static void groupCaptureTest() throws Exception {
2158        // Independent group
2159        Pattern pattern = Pattern.compile("x+(?>y+)z+");
2160        Matcher matcher = pattern.matcher("xxxyyyzzz");
2161        matcher.find();
2162        try {
2163            String blah = matcher.group(1);
2164            failCount++;
2165        } catch (IndexOutOfBoundsException ioobe) {
2166            // Good result
2167        }
2168        // Pure group
2169        pattern = Pattern.compile("x+(?:y+)z+");
2170        matcher = pattern.matcher("xxxyyyzzz");
2171        matcher.find();
2172        try {
2173            String blah = matcher.group(1);
2174            failCount++;
2175        } catch (IndexOutOfBoundsException ioobe) {
2176            // Good result
2177        }
2178
2179        // Supplementary character tests
2180        // Independent group
2181        pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2182        matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2183        matcher.find();
2184        try {
2185            String blah = matcher.group(1);
2186            failCount++;
2187        } catch (IndexOutOfBoundsException ioobe) {
2188            // Good result
2189        }
2190        // Pure group
2191        pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2192        matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2193        matcher.find();
2194        try {
2195            String blah = matcher.group(1);
2196            failCount++;
2197        } catch (IndexOutOfBoundsException ioobe) {
2198            // Good result
2199        }
2200
2201        report("GroupCapture");
2202    }
2203
2204    private static void backRefTest() throws Exception {
2205        Pattern pattern = Pattern.compile("(a*)bc\\1");
2206        check(pattern, "zzzaabcazzz", true);
2207
2208        pattern = Pattern.compile("(a*)bc\\1");
2209        check(pattern, "zzzaabcaazzz", true);
2210
2211        pattern = Pattern.compile("(abc)(def)\\1");
2212        check(pattern, "abcdefabc", true);
2213
2214        pattern = Pattern.compile("(abc)(def)\\3");
2215        check(pattern, "abcdefabc", false);
2216
2217        try {
2218            for (int i = 1; i < 10; i++) {
2219                // Make sure backref 1-9 are always accepted
2220                pattern = Pattern.compile("abcdef\\" + i);
2221                // and fail to match if the target group does not exit
2222                check(pattern, "abcdef", false);
2223            }
2224        } catch(PatternSyntaxException e) {
2225            failCount++;
2226        }
2227
2228        pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2229        check(pattern, "abcdefghija", false);
2230        check(pattern, "abcdefghija1", true);
2231
2232        pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2233        check(pattern, "abcdefghijkk", true);
2234
2235        pattern = Pattern.compile("(a)bcdefghij\\11");
2236        check(pattern, "abcdefghija1", true);
2237
2238        // Supplementary character tests
2239        pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2240        check(pattern, toSupplementaries("zzzaabcazzz"), true);
2241
2242        pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2243        check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2244
2245        pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2246        check(pattern, toSupplementaries("abcdefabc"), true);
2247
2248        pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2249        check(pattern, toSupplementaries("abcdefabc"), false);
2250
2251        pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2252        check(pattern, toSupplementaries("abcdefghija"), false);
2253        check(pattern, toSupplementaries("abcdefghija1"), true);
2254
2255        pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2256        check(pattern, toSupplementaries("abcdefghijkk"), true);
2257
2258        report("BackRef");
2259    }
2260
2261    /**
2262     * Unicode Technical Report #18, section 2.6 End of Line
2263     * There is no empty line to be matched in the sequence \u000D\u000A
2264     * but there is an empty line in the sequence \u000A\u000D.
2265     */
2266    private static void anchorTest() throws Exception {
2267        Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2268        Matcher m = p.matcher("blah1\r\nblah2");
2269        m.find();
2270        m.find();
2271        if (!m.group().equals("blah2"))
2272            failCount++;
2273
2274        m.reset("blah1\n\rblah2");
2275        m.find();
2276        m.find();
2277        m.find();
2278        if (!m.group().equals("blah2"))
2279            failCount++;
2280
2281        // Test behavior of $ with \r\n at end of input
2282        p = Pattern.compile(".+$");
2283        m = p.matcher("blah1\r\n");
2284        if (!m.find())
2285            failCount++;
2286       if (!m.group().equals("blah1"))
2287            failCount++;
2288        if (m.find())
2289            failCount++;
2290
2291        // Test behavior of $ with \r\n at end of input in multiline
2292        p = Pattern.compile(".+$", Pattern.MULTILINE);
2293        m = p.matcher("blah1\r\n");
2294        if (!m.find())
2295            failCount++;
2296        if (m.find())
2297            failCount++;
2298
2299        // Test for $ recognition of \u0085 for bug 4527731
2300        p = Pattern.compile(".+$", Pattern.MULTILINE);
2301        m = p.matcher("blah1\u0085");
2302        if (!m.find())
2303            failCount++;
2304
2305        // Supplementary character test
2306        p = Pattern.compile("^.*$", Pattern.MULTILINE);
2307        m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2308        m.find();
2309        m.find();
2310        if (!m.group().equals(toSupplementaries("blah2")))
2311            failCount++;
2312
2313        m.reset(toSupplementaries("blah1\n\rblah2"));
2314        m.find();
2315        m.find();
2316        m.find();
2317        if (!m.group().equals(toSupplementaries("blah2")))
2318            failCount++;
2319
2320        // Test behavior of $ with \r\n at end of input
2321        p = Pattern.compile(".+$");
2322        m = p.matcher(toSupplementaries("blah1\r\n"));
2323        if (!m.find())
2324            failCount++;
2325        if (!m.group().equals(toSupplementaries("blah1")))
2326            failCount++;
2327        if (m.find())
2328            failCount++;
2329
2330        // Test behavior of $ with \r\n at end of input in multiline
2331        p = Pattern.compile(".+$", Pattern.MULTILINE);
2332        m = p.matcher(toSupplementaries("blah1\r\n"));
2333        if (!m.find())
2334            failCount++;
2335        if (m.find())
2336            failCount++;
2337
2338        // Test for $ recognition of \u0085 for bug 4527731
2339        p = Pattern.compile(".+$", Pattern.MULTILINE);
2340        m = p.matcher(toSupplementaries("blah1\u0085"));
2341        if (!m.find())
2342            failCount++;
2343
2344        report("Anchors");
2345    }
2346
2347    /**
2348     * A basic sanity test of Matcher.lookingAt().
2349     */
2350    private static void lookingAtTest() throws Exception {
2351        Pattern p = Pattern.compile("(ab)(c*)");
2352        Matcher m = p.matcher("abccczzzabcczzzabccc");
2353
2354        if (!m.lookingAt())
2355            failCount++;
2356
2357        if (!m.group().equals(m.group(0)))
2358            failCount++;
2359
2360        m = p.matcher("zzzabccczzzabcczzzabccczzz");
2361        if (m.lookingAt())
2362            failCount++;
2363
2364        // Supplementary character test
2365        p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2366        m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2367
2368        if (!m.lookingAt())
2369            failCount++;
2370
2371        if (!m.group().equals(m.group(0)))
2372            failCount++;
2373
2374        m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2375        if (m.lookingAt())
2376            failCount++;
2377
2378        report("Looking At");
2379    }
2380
2381    /**
2382     * A basic sanity test of Matcher.matches().
2383     */
2384    private static void matchesTest() throws Exception {
2385        // matches()
2386        Pattern p = Pattern.compile("ulb(c*)");
2387        Matcher m = p.matcher("ulbcccccc");
2388        if (!m.matches())
2389            failCount++;
2390
2391        // find() but not matches()
2392        m.reset("zzzulbcccccc");
2393        if (m.matches())
2394            failCount++;
2395
2396        // lookingAt() but not matches()
2397        m.reset("ulbccccccdef");
2398        if (m.matches())
2399            failCount++;
2400
2401        // matches()
2402        p = Pattern.compile("a|ad");
2403        m = p.matcher("ad");
2404        if (!m.matches())
2405            failCount++;
2406
2407        // Supplementary character test
2408        // matches()
2409        p = Pattern.compile(toSupplementaries("ulb(c*)"));
2410        m = p.matcher(toSupplementaries("ulbcccccc"));
2411        if (!m.matches())
2412            failCount++;
2413
2414        // find() but not matches()
2415        m.reset(toSupplementaries("zzzulbcccccc"));
2416        if (m.matches())
2417            failCount++;
2418
2419        // lookingAt() but not matches()
2420        m.reset(toSupplementaries("ulbccccccdef"));
2421        if (m.matches())
2422            failCount++;
2423
2424        // matches()
2425        p = Pattern.compile(toSupplementaries("a|ad"));
2426        m = p.matcher(toSupplementaries("ad"));
2427        if (!m.matches())
2428            failCount++;
2429
2430        report("Matches");
2431    }
2432
2433    /**
2434     * A basic sanity test of Pattern.matches().
2435     */
2436    private static void patternMatchesTest() throws Exception {
2437        // matches()
2438        if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2439                             toSupplementaries("ulbcccccc")))
2440            failCount++;
2441
2442        // find() but not matches()
2443        if (Pattern.matches(toSupplementaries("ulb(c*)"),
2444                            toSupplementaries("zzzulbcccccc")))
2445            failCount++;
2446
2447        // lookingAt() but not matches()
2448        if (Pattern.matches(toSupplementaries("ulb(c*)"),
2449                            toSupplementaries("ulbccccccdef")))
2450            failCount++;
2451
2452        // Supplementary character test
2453        // matches()
2454        if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2455                             toSupplementaries("ulbcccccc")))
2456            failCount++;
2457
2458        // find() but not matches()
2459        if (Pattern.matches(toSupplementaries("ulb(c*)"),
2460                            toSupplementaries("zzzulbcccccc")))
2461            failCount++;
2462
2463        // lookingAt() but not matches()
2464        if (Pattern.matches(toSupplementaries("ulb(c*)"),
2465                            toSupplementaries("ulbccccccdef")))
2466            failCount++;
2467
2468        report("Pattern Matches");
2469    }
2470
2471    /**
2472     * Canonical equivalence testing. Tests the ability of the engine
2473     * to match sequences that are not explicitly specified in the
2474     * pattern when they are considered equivalent by the Unicode Standard.
2475     */
2476    private static void ceTest() throws Exception {
2477        // Decomposed char outside char classes
2478        Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2479        Matcher m = p.matcher("test\u00e5");
2480        if (!m.matches())
2481            failCount++;
2482
2483        m.reset("testa\u030a");
2484        if (!m.matches())
2485            failCount++;
2486
2487        // Composed char outside char classes
2488        p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2489        m = p.matcher("test\u00e5");
2490        if (!m.matches())
2491            failCount++;
2492
2493        m.reset("testa\u030a");
2494        if (!m.find())
2495            failCount++;
2496
2497        // Decomposed char inside a char class
2498        p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2499        m = p.matcher("test\u00e5");
2500        if (!m.find())
2501            failCount++;
2502
2503        m.reset("testa\u030a");
2504        if (!m.find())
2505            failCount++;
2506
2507        // Composed char inside a char class
2508        p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2509        m = p.matcher("test\u00e5");
2510        if (!m.find())
2511            failCount++;
2512
2513        m.reset("testa\u0300");
2514        if (!m.find())
2515            failCount++;
2516
2517        m.reset("testa\u030a");
2518        if (!m.find())
2519            failCount++;
2520
2521        // Marks that cannot legally change order and be equivalent
2522        p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2523        check(p, "testa\u0308\u0300", true);
2524        check(p, "testa\u0300\u0308", false);
2525
2526        // Marks that can legally change order and be equivalent
2527        p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2528        check(p, "testa\u0308\u0323", true);
2529        check(p, "testa\u0323\u0308", true);
2530
2531        // Test all equivalences of the sequence a\u0308\u0323\u0300
2532        p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2533        check(p, "testa\u0308\u0323\u0300", true);
2534        check(p, "testa\u0323\u0308\u0300", true);
2535        check(p, "testa\u0308\u0300\u0323", true);
2536        check(p, "test\u00e4\u0323\u0300", true);
2537        check(p, "test\u00e4\u0300\u0323", true);
2538
2539        /*
2540         * The following canonical equivalence tests don't work. Bug id: 4916384.
2541         *
2542        // Decomposed hangul (jamos)
2543        p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2544        m = p.matcher("\u1100\u1161");
2545        if (!m.matches())
2546            failCount++;
2547
2548        m.reset("\uac00");
2549        if (!m.matches())
2550            failCount++;
2551
2552        // Composed hangul
2553        p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2554        m = p.matcher("\u1100\u1161");
2555        if (!m.matches())
2556            failCount++;
2557
2558        m.reset("\uac00");
2559        if (!m.matches())
2560            failCount++;
2561
2562        // Decomposed supplementary outside char classes
2563        p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2564        m = p.matcher("test\ud834\uddc0");
2565        if (!m.matches())
2566            failCount++;
2567
2568        m.reset("test\ud834\uddbc\ud834\udd6f");
2569        if (!m.matches())
2570            failCount++;
2571
2572        // Composed supplementary outside char classes
2573        p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2574        m.reset("test\ud834\uddbc\ud834\udd6f");
2575        if (!m.matches())
2576            failCount++;
2577
2578        m = p.matcher("test\ud834\uddc0");
2579        if (!m.matches())
2580            failCount++;
2581
2582        */
2583
2584        report("Canonical Equivalence");
2585    }
2586
2587    /**
2588     * A basic sanity test of Matcher.replaceAll().
2589     */
2590    private static void globalSubstitute() throws Exception {
2591        // Global substitution with a literal
2592        Pattern p = Pattern.compile("(ab)(c*)");
2593        Matcher m = p.matcher("abccczzzabcczzzabccc");
2594        if (!m.replaceAll("test").equals("testzzztestzzztest"))
2595            failCount++;
2596
2597        m.reset("zzzabccczzzabcczzzabccczzz");
2598        if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2599            failCount++;
2600
2601        // Global substitution with groups
2602        m.reset("zzzabccczzzabcczzzabccczzz");
2603        String result = m.replaceAll("$1");
2604        if (!result.equals("zzzabzzzabzzzabzzz"))
2605            failCount++;
2606
2607        // Supplementary character test
2608        // Global substitution with a literal
2609        p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2610        m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2611        if (!m.replaceAll(toSupplementaries("test")).
2612            equals(toSupplementaries("testzzztestzzztest")))
2613            failCount++;
2614
2615        m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2616        if (!m.replaceAll(toSupplementaries("test")).
2617            equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2618            failCount++;
2619
2620        // Global substitution with groups
2621        m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2622        result = m.replaceAll("$1");
2623        if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2624            failCount++;
2625
2626        report("Global Substitution");
2627    }
2628
2629    /**
2630     * Tests the usage of Matcher.appendReplacement() with literal
2631     * and group substitutions.
2632     */
2633    private static void stringbufferSubstitute() throws Exception {
2634        // SB substitution with literal
2635        String blah = "zzzblahzzz";
2636        Pattern p = Pattern.compile("blah");
2637        Matcher m = p.matcher(blah);
2638        StringBuffer result = new StringBuffer();
2639        try {
2640            m.appendReplacement(result, "blech");
2641            failCount++;
2642        } catch (IllegalStateException e) {
2643        }
2644        m.find();
2645        m.appendReplacement(result, "blech");
2646        if (!result.toString().equals("zzzblech"))
2647            failCount++;
2648
2649        m.appendTail(result);
2650        if (!result.toString().equals("zzzblechzzz"))
2651            failCount++;
2652
2653        // SB substitution with groups
2654        blah = "zzzabcdzzz";
2655        p = Pattern.compile("(ab)(cd)*");
2656        m = p.matcher(blah);
2657        result = new StringBuffer();
2658        try {
2659            m.appendReplacement(result, "$1");
2660            failCount++;
2661        } catch (IllegalStateException e) {
2662        }
2663        m.find();
2664        m.appendReplacement(result, "$1");
2665        if (!result.toString().equals("zzzab"))
2666            failCount++;
2667
2668        m.appendTail(result);
2669        if (!result.toString().equals("zzzabzzz"))
2670            failCount++;
2671
2672        // SB substitution with 3 groups
2673        blah = "zzzabcdcdefzzz";
2674        p = Pattern.compile("(ab)(cd)*(ef)");
2675        m = p.matcher(blah);
2676        result = new StringBuffer();
2677        try {
2678            m.appendReplacement(result, "$1w$2w$3");
2679            failCount++;
2680        } catch (IllegalStateException e) {
2681        }
2682        m.find();
2683        m.appendReplacement(result, "$1w$2w$3");
2684        if (!result.toString().equals("zzzabwcdwef"))
2685            failCount++;
2686
2687        m.appendTail(result);
2688        if (!result.toString().equals("zzzabwcdwefzzz"))
2689            failCount++;
2690
2691        // SB substitution with groups and three matches
2692        // skipping middle match
2693        blah = "zzzabcdzzzabcddzzzabcdzzz";
2694        p = Pattern.compile("(ab)(cd*)");
2695        m = p.matcher(blah);
2696        result = new StringBuffer();
2697        try {
2698            m.appendReplacement(result, "$1");
2699            failCount++;
2700        } catch (IllegalStateException e) {
2701        }
2702        m.find();
2703        m.appendReplacement(result, "$1");
2704        if (!result.toString().equals("zzzab"))
2705            failCount++;
2706
2707        m.find();
2708        m.find();
2709        m.appendReplacement(result, "$2");
2710        if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2711            failCount++;
2712
2713        m.appendTail(result);
2714        if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2715            failCount++;
2716
2717        // Check to make sure escaped $ is ignored
2718        blah = "zzzabcdcdefzzz";
2719        p = Pattern.compile("(ab)(cd)*(ef)");
2720        m = p.matcher(blah);
2721        result = new StringBuffer();
2722        m.find();
2723        m.appendReplacement(result, "$1w\\$2w$3");
2724        if (!result.toString().equals("zzzabw$2wef"))
2725            failCount++;
2726
2727        m.appendTail(result);
2728        if (!result.toString().equals("zzzabw$2wefzzz"))
2729            failCount++;
2730
2731        // Check to make sure a reference to nonexistent group causes error
2732        blah = "zzzabcdcdefzzz";
2733        p = Pattern.compile("(ab)(cd)*(ef)");
2734        m = p.matcher(blah);
2735        result = new StringBuffer();
2736        m.find();
2737        try {
2738            m.appendReplacement(result, "$1w$5w$3");
2739            failCount++;
2740        } catch (IndexOutOfBoundsException ioobe) {
2741            // Correct result
2742        }
2743
2744        // Check double digit group references
2745        blah = "zzz123456789101112zzz";
2746        p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2747        m = p.matcher(blah);
2748        result = new StringBuffer();
2749        m.find();
2750        m.appendReplacement(result, "$1w$11w$3");
2751        if (!result.toString().equals("zzz1w11w3"))
2752            failCount++;
2753
2754        // Check to make sure it backs off $15 to $1 if only three groups
2755        blah = "zzzabcdcdefzzz";
2756        p = Pattern.compile("(ab)(cd)*(ef)");
2757        m = p.matcher(blah);
2758        result = new StringBuffer();
2759        m.find();
2760        m.appendReplacement(result, "$1w$15w$3");
2761        if (!result.toString().equals("zzzabwab5wef"))
2762            failCount++;
2763
2764
2765        // Supplementary character test
2766        // SB substitution with literal
2767        blah = toSupplementaries("zzzblahzzz");
2768        p = Pattern.compile(toSupplementaries("blah"));
2769        m = p.matcher(blah);
2770        result = new StringBuffer();
2771        try {
2772            m.appendReplacement(result, toSupplementaries("blech"));
2773            failCount++;
2774        } catch (IllegalStateException e) {
2775        }
2776        m.find();
2777        m.appendReplacement(result, toSupplementaries("blech"));
2778        if (!result.toString().equals(toSupplementaries("zzzblech")))
2779            failCount++;
2780
2781        m.appendTail(result);
2782        if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2783            failCount++;
2784
2785        // SB substitution with groups
2786        blah = toSupplementaries("zzzabcdzzz");
2787        p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2788        m = p.matcher(blah);
2789        result = new StringBuffer();
2790        try {
2791            m.appendReplacement(result, "$1");
2792            failCount++;
2793        } catch (IllegalStateException e) {
2794        }
2795        m.find();
2796        m.appendReplacement(result, "$1");
2797        if (!result.toString().equals(toSupplementaries("zzzab")))
2798            failCount++;
2799
2800        m.appendTail(result);
2801        if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2802            failCount++;
2803
2804        // SB substitution with 3 groups
2805        blah = toSupplementaries("zzzabcdcdefzzz");
2806        p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2807        m = p.matcher(blah);
2808        result = new StringBuffer();
2809        try {
2810            m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2811            failCount++;
2812        } catch (IllegalStateException e) {
2813        }
2814        m.find();
2815        m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2816        if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2817            failCount++;
2818
2819        m.appendTail(result);
2820        if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2821            failCount++;
2822
2823        // SB substitution with groups and three matches
2824        // skipping middle match
2825        blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2826        p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2827        m = p.matcher(blah);
2828        result = new StringBuffer();
2829        try {
2830            m.appendReplacement(result, "$1");
2831            failCount++;
2832        } catch (IllegalStateException e) {
2833        }
2834        m.find();
2835        m.appendReplacement(result, "$1");
2836        if (!result.toString().equals(toSupplementaries("zzzab")))
2837            failCount++;
2838
2839        m.find();
2840        m.find();
2841        m.appendReplacement(result, "$2");
2842        if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2843            failCount++;
2844
2845        m.appendTail(result);
2846        if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2847            failCount++;
2848
2849        // Check to make sure escaped $ is ignored
2850        blah = toSupplementaries("zzzabcdcdefzzz");
2851        p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2852        m = p.matcher(blah);
2853        result = new StringBuffer();
2854        m.find();
2855        m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2856        if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2857            failCount++;
2858
2859        m.appendTail(result);
2860        if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2861            failCount++;
2862
2863        // Check to make sure a reference to nonexistent group causes error
2864        blah = toSupplementaries("zzzabcdcdefzzz");
2865        p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2866        m = p.matcher(blah);
2867        result = new StringBuffer();
2868        m.find();
2869        try {
2870            m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2871            failCount++;
2872        } catch (IndexOutOfBoundsException ioobe) {
2873            // Correct result
2874        }
2875
2876        // Check double digit group references
2877        blah = toSupplementaries("zzz123456789101112zzz");
2878        p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2879        m = p.matcher(blah);
2880        result = new StringBuffer();
2881        m.find();
2882        m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2883        if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2884            failCount++;
2885
2886        // Check to make sure it backs off $15 to $1 if only three groups
2887        blah = toSupplementaries("zzzabcdcdefzzz");
2888        p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2889        m = p.matcher(blah);
2890        result = new StringBuffer();
2891        m.find();
2892        m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2893        if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2894            failCount++;
2895
2896        // Check nothing has been appended into the output buffer if
2897        // the replacement string triggers IllegalArgumentException.
2898        p = Pattern.compile("(abc)");
2899        m = p.matcher("abcd");
2900        result = new StringBuffer();
2901        m.find();
2902        try {
2903            m.appendReplacement(result, ("xyz$g"));
2904            failCount++;
2905        } catch (IllegalArgumentException iae) {
2906            if (result.length() != 0)
2907                failCount++;
2908        }
2909
2910        report("SB Substitution");
2911    }
2912
2913    /*
2914     * 5 groups of characters are created to make a substitution string.
2915     * A base string will be created including random lead chars, the
2916     * substitution string, and random trailing chars.
2917     * A pattern containing the 5 groups is searched for and replaced with:
2918     * random group + random string + random group.
2919     * The results are checked for correctness.
2920     */
2921    private static void substitutionBasher() {
2922        for (int runs = 0; runs<1000; runs++) {
2923            // Create a base string to work in
2924            int leadingChars = generator.nextInt(10);
2925            StringBuffer baseBuffer = new StringBuffer(100);
2926            String leadingString = getRandomAlphaString(leadingChars);
2927            baseBuffer.append(leadingString);
2928
2929            // Create 5 groups of random number of random chars
2930            // Create the string to substitute
2931            // Create the pattern string to search for
2932            StringBuffer bufferToSub = new StringBuffer(25);
2933            StringBuffer bufferToPat = new StringBuffer(50);
2934            String[] groups = new String[5];
2935            for(int i=0; i<5; i++) {
2936                int aGroupSize = generator.nextInt(5)+1;
2937                groups[i] = getRandomAlphaString(aGroupSize);
2938                bufferToSub.append(groups[i]);
2939                bufferToPat.append('(');
2940                bufferToPat.append(groups[i]);
2941                bufferToPat.append(')');
2942            }
2943            String stringToSub = bufferToSub.toString();
2944            String pattern = bufferToPat.toString();
2945
2946            // Place sub string into working string at random index
2947            baseBuffer.append(stringToSub);
2948
2949            // Append random chars to end
2950            int trailingChars = generator.nextInt(10);
2951            String trailingString = getRandomAlphaString(trailingChars);
2952            baseBuffer.append(trailingString);
2953            String baseString = baseBuffer.toString();
2954
2955            // Create test pattern and matcher
2956            Pattern p = Pattern.compile(pattern);
2957            Matcher m = p.matcher(baseString);
2958
2959            // Reject candidate if pattern happens to start early
2960            m.find();
2961            if (m.start() < leadingChars)
2962                continue;
2963
2964            // Reject candidate if more than one match
2965            if (m.find())
2966                continue;
2967
2968            // Construct a replacement string with :
2969            // random group + random string + random group
2970            StringBuffer bufferToRep = new StringBuffer();
2971            int groupIndex1 = generator.nextInt(5);
2972            bufferToRep.append("$" + (groupIndex1 + 1));
2973            String randomMidString = getRandomAlphaString(5);
2974            bufferToRep.append(randomMidString);
2975            int groupIndex2 = generator.nextInt(5);
2976            bufferToRep.append("$" + (groupIndex2 + 1));
2977            String replacement = bufferToRep.toString();
2978
2979            // Do the replacement
2980            String result = m.replaceAll(replacement);
2981
2982            // Construct expected result
2983            StringBuffer bufferToRes = new StringBuffer();
2984            bufferToRes.append(leadingString);
2985            bufferToRes.append(groups[groupIndex1]);
2986            bufferToRes.append(randomMidString);
2987            bufferToRes.append(groups[groupIndex2]);
2988            bufferToRes.append(trailingString);
2989            String expectedResult = bufferToRes.toString();
2990
2991            // Check results
2992            if (!result.equals(expectedResult))
2993                failCount++;
2994        }
2995
2996        report("Substitution Basher");
2997    }
2998
2999    /**
3000     * Checks the handling of some escape sequences that the Pattern
3001     * class should process instead of the java compiler. These are
3002     * not in the file because the escapes should be be processed
3003     * by the Pattern class when the regex is compiled.
3004     */
3005    private static void escapes() throws Exception {
3006        Pattern p = Pattern.compile("\\043");
3007        Matcher m = p.matcher("#");
3008        if (!m.find())
3009            failCount++;
3010
3011        p = Pattern.compile("\\x23");
3012        m = p.matcher("#");
3013        if (!m.find())
3014            failCount++;
3015
3016        p = Pattern.compile("\\u0023");
3017        m = p.matcher("#");
3018        if (!m.find())
3019            failCount++;
3020
3021        report("Escape sequences");
3022    }
3023
3024    /**
3025     * Checks the handling of blank input situations. These
3026     * tests are incompatible with my test file format.
3027     */
3028    private static void blankInput() throws Exception {
3029        Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3030        Matcher m = p.matcher("");
3031        if (m.find())
3032            failCount++;
3033
3034        p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3035        m = p.matcher("");
3036        if (!m.find())
3037            failCount++;
3038
3039        p = Pattern.compile("abc");
3040        m = p.matcher("");
3041        if (m.find())
3042            failCount++;
3043
3044        p = Pattern.compile("a*");
3045        m = p.matcher("");
3046        if (!m.find())
3047            failCount++;
3048
3049        report("Blank input");
3050    }
3051
3052    /**
3053     * Tests the Boyer-Moore pattern matching of a character sequence
3054     * on randomly generated patterns.
3055     */
3056    private static void bm() throws Exception {
3057        doBnM('a');
3058        report("Boyer Moore (ASCII)");
3059
3060        doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3061        report("Boyer Moore (Supplementary)");
3062    }
3063
3064    private static void doBnM(int baseCharacter) throws Exception {
3065        int achar=0;
3066
3067        for (int i=0; i<100; i++) {
3068            // Create a short pattern to search for
3069            int patternLength = generator.nextInt(7) + 4;
3070            StringBuffer patternBuffer = new StringBuffer(patternLength);
3071            for (int x=0; x<patternLength; x++) {
3072                int ch = baseCharacter + generator.nextInt(26);
3073                if (Character.isSupplementaryCodePoint(ch)) {
3074                    patternBuffer.append(Character.toChars(ch));
3075                } else {
3076                    patternBuffer.append((char)ch);
3077                }
3078            }
3079            String pattern =  patternBuffer.toString();
3080            Pattern p = Pattern.compile(pattern);
3081
3082            // Create a buffer with random ASCII chars that does
3083            // not match the sample
3084            String toSearch = null;
3085            StringBuffer s = null;
3086            Matcher m = p.matcher("");
3087            do {
3088                s = new StringBuffer(100);
3089                for (int x=0; x<100; x++) {
3090                    int ch = baseCharacter + generator.nextInt(26);
3091                    if (Character.isSupplementaryCodePoint(ch)) {
3092                        s.append(Character.toChars(ch));
3093                    } else {
3094                        s.append((char)ch);
3095                    }
3096                }
3097                toSearch = s.toString();
3098                m.reset(toSearch);
3099            } while (m.find());
3100
3101            // Insert the pattern at a random spot
3102            int insertIndex = generator.nextInt(99);
3103            if (Character.isLowSurrogate(s.charAt(insertIndex)))
3104                insertIndex++;
3105            s = s.insert(insertIndex, pattern);
3106            toSearch = s.toString();
3107
3108            // Make sure that the pattern is found
3109            m.reset(toSearch);
3110            if (!m.find())
3111                failCount++;
3112
3113            // Make sure that the match text is the pattern
3114            if (!m.group().equals(pattern))
3115                failCount++;
3116
3117            // Make sure match occured at insertion point
3118            if (m.start() != insertIndex)
3119                failCount++;
3120        }
3121    }
3122
3123    /**
3124     * Tests the matching of slices on randomly generated patterns.
3125     * The Boyer-Moore optimization is not done on these patterns
3126     * because it uses unicode case folding.
3127     */
3128    private static void slice() throws Exception {
3129        doSlice(Character.MAX_VALUE);
3130        report("Slice");
3131
3132        doSlice(Character.MAX_CODE_POINT);
3133        report("Slice (Supplementary)");
3134    }
3135
3136    private static void doSlice(int maxCharacter) throws Exception {
3137        Random generator = new Random();
3138        int achar=0;
3139
3140        for (int i=0; i<100; i++) {
3141            // Create a short pattern to search for
3142            int patternLength = generator.nextInt(7) + 4;
3143            StringBuffer patternBuffer = new StringBuffer(patternLength);
3144            for (int x=0; x<patternLength; x++) {
3145                int randomChar = 0;
3146                while (!Character.isLetterOrDigit(randomChar))
3147                    randomChar = generator.nextInt(maxCharacter);
3148                if (Character.isSupplementaryCodePoint(randomChar)) {
3149                    patternBuffer.append(Character.toChars(randomChar));
3150                } else {
3151                    patternBuffer.append((char) randomChar);
3152                }
3153            }
3154            String pattern =  patternBuffer.toString();
3155            Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3156
3157            // Create a buffer with random chars that does not match the sample
3158            String toSearch = null;
3159            StringBuffer s = null;
3160            Matcher m = p.matcher("");
3161            do {
3162                s = new StringBuffer(100);
3163                for (int x=0; x<100; x++) {
3164                    int randomChar = 0;
3165                    while (!Character.isLetterOrDigit(randomChar))
3166                        randomChar = generator.nextInt(maxCharacter);
3167                    if (Character.isSupplementaryCodePoint(randomChar)) {
3168                        s.append(Character.toChars(randomChar));
3169                    } else {
3170                        s.append((char) randomChar);
3171                    }
3172                }
3173                toSearch = s.toString();
3174                m.reset(toSearch);
3175            } while (m.find());
3176
3177            // Insert the pattern at a random spot
3178            int insertIndex = generator.nextInt(99);
3179            if (Character.isLowSurrogate(s.charAt(insertIndex)))
3180                insertIndex++;
3181            s = s.insert(insertIndex, pattern);
3182            toSearch = s.toString();
3183
3184            // Make sure that the pattern is found
3185            m.reset(toSearch);
3186            if (!m.find())
3187                failCount++;
3188
3189            // Make sure that the match text is the pattern
3190            if (!m.group().equals(pattern))
3191                failCount++;
3192
3193            // Make sure match occured at insertion point
3194            if (m.start() != insertIndex)
3195                failCount++;
3196        }
3197    }
3198
3199    private static void explainFailure(String pattern, String data,
3200                                       String expected, String actual) {
3201        System.err.println("----------------------------------------");
3202        System.err.println("Pattern = "+pattern);
3203        System.err.println("Data = "+data);
3204        System.err.println("Expected = " + expected);
3205        System.err.println("Actual   = " + actual);
3206    }
3207
3208    private static void explainFailure(String pattern, String data,
3209                                       Throwable t) {
3210        System.err.println("----------------------------------------");
3211        System.err.println("Pattern = "+pattern);
3212        System.err.println("Data = "+data);
3213        t.printStackTrace(System.err);
3214    }
3215
3216    // Testing examples from a file
3217
3218    /**
3219     * Goes through the file "TestCases.txt" and creates many patterns
3220     * described in the file, matching the patterns against input lines in
3221     * the file, and comparing the results against the correct results
3222     * also found in the file. The file format is described in comments
3223     * at the head of the file.
3224     */
3225    private static void processFile(String fileName) throws Exception {
3226        File testCases = new File(System.getProperty("test.src", "."),
3227                                  fileName);
3228        FileInputStream in = new FileInputStream(testCases);
3229        BufferedReader r = new BufferedReader(new InputStreamReader(in));
3230
3231        // Process next test case.
3232        String aLine;
3233        while((aLine = r.readLine()) != null) {
3234            // Read a line for pattern
3235            String patternString = grabLine(r);
3236            Pattern p = null;
3237            try {
3238                p = compileTestPattern(patternString);
3239            } catch (PatternSyntaxException e) {
3240                String dataString = grabLine(r);
3241                String expectedResult = grabLine(r);
3242                if (expectedResult.startsWith("error"))
3243                    continue;
3244                explainFailure(patternString, dataString, e);
3245                failCount++;
3246                continue;
3247            }
3248
3249            // Read a line for input string
3250            String dataString = grabLine(r);
3251            Matcher m = p.matcher(dataString);
3252            StringBuffer result = new StringBuffer();
3253
3254            // Check for IllegalStateExceptions before a match
3255            failCount += preMatchInvariants(m);
3256
3257            boolean found = m.find();
3258
3259            if (found)
3260                failCount += postTrueMatchInvariants(m);
3261            else
3262                failCount += postFalseMatchInvariants(m);
3263
3264            if (found) {
3265                result.append("true ");
3266                result.append(m.group(0) + " ");
3267            } else {
3268                result.append("false ");
3269            }
3270
3271            result.append(m.groupCount());
3272
3273            if (found) {
3274                for (int i=1; i<m.groupCount()+1; i++)
3275                    if (m.group(i) != null)
3276                        result.append(" " +m.group(i));
3277            }
3278
3279            // Read a line for the expected result
3280            String expectedResult = grabLine(r);
3281
3282            if (!result.toString().equals(expectedResult)) {
3283                explainFailure(patternString, dataString, expectedResult, result.toString());
3284                failCount++;
3285            }
3286        }
3287
3288        report(fileName);
3289    }
3290
3291    private static int preMatchInvariants(Matcher m) {
3292        int failCount = 0;
3293        try {
3294            m.start();
3295            failCount++;
3296        } catch (IllegalStateException ise) {}
3297        try {
3298            m.end();
3299            failCount++;
3300        } catch (IllegalStateException ise) {}
3301        try {
3302            m.group();
3303            failCount++;
3304        } catch (IllegalStateException ise) {}
3305        return failCount;
3306    }
3307
3308    private static int postFalseMatchInvariants(Matcher m) {
3309        int failCount = 0;
3310        try {
3311            m.group();
3312            failCount++;
3313        } catch (IllegalStateException ise) {}
3314        try {
3315            m.start();
3316            failCount++;
3317        } catch (IllegalStateException ise) {}
3318        try {
3319            m.end();
3320            failCount++;
3321        } catch (IllegalStateException ise) {}
3322        return failCount;
3323    }
3324
3325    private static int postTrueMatchInvariants(Matcher m) {
3326        int failCount = 0;
3327        //assert(m.start() = m.start(0);
3328        if (m.start() != m.start(0))
3329            failCount++;
3330        //assert(m.end() = m.end(0);
3331        if (m.start() != m.start(0))
3332            failCount++;
3333        //assert(m.group() = m.group(0);
3334        if (!m.group().equals(m.group(0)))
3335            failCount++;
3336        try {
3337            m.group(50);
3338            failCount++;
3339        } catch (IndexOutOfBoundsException ise) {}
3340
3341        return failCount;
3342    }
3343
3344    private static Pattern compileTestPattern(String patternString) {
3345        if (!patternString.startsWith("'")) {
3346            return Pattern.compile(patternString);
3347        }
3348
3349        int break1 = patternString.lastIndexOf("'");
3350        String flagString = patternString.substring(
3351                                          break1+1, patternString.length());
3352        patternString = patternString.substring(1, break1);
3353
3354        if (flagString.equals("i"))
3355            return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3356
3357        if (flagString.equals("m"))
3358            return Pattern.compile(patternString, Pattern.MULTILINE);
3359
3360        return Pattern.compile(patternString);
3361    }
3362
3363    /**
3364     * Reads a line from the input file. Keeps reading lines until a non
3365     * empty non comment line is read. If the line contains a \n then
3366     * these two characters are replaced by a newline char. If a \\uxxxx
3367     * sequence is read then the sequence is replaced by the unicode char.
3368     */
3369    private static String grabLine(BufferedReader r) throws Exception {
3370        int index = 0;
3371        String line = r.readLine();
3372        while (line.startsWith("//") || line.length() < 1)
3373            line = r.readLine();
3374        while ((index = line.indexOf("\\n")) != -1) {
3375            StringBuffer temp = new StringBuffer(line);
3376            temp.replace(index, index+2, "\n");
3377            line = temp.toString();
3378        }
3379        while ((index = line.indexOf("\\u")) != -1) {
3380            StringBuffer temp = new StringBuffer(line);
3381            String value = temp.substring(index+2, index+6);
3382            char aChar = (char)Integer.parseInt(value, 16);
3383            String unicodeChar = "" + aChar;
3384            temp.replace(index, index+6, unicodeChar);
3385            line = temp.toString();
3386        }
3387
3388        return line;
3389    }
3390
3391    private static void check(Pattern p, String s, String g, String expected) {
3392        Matcher m = p.matcher(s);
3393        m.find();
3394        if (!m.group(g).equals(expected) ||
3395            s.charAt(m.start(g)) != expected.charAt(0) ||
3396            s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
3397            failCount++;
3398    }
3399
3400    private static void checkReplaceFirst(String p, String s, String r, String expected)
3401    {
3402        if (!expected.equals(Pattern.compile(p)
3403                                    .matcher(s)
3404                                    .replaceFirst(r)))
3405            failCount++;
3406    }
3407
3408    private static void checkReplaceAll(String p, String s, String r, String expected)
3409    {
3410        if (!expected.equals(Pattern.compile(p)
3411                                    .matcher(s)
3412                                    .replaceAll(r)))
3413            failCount++;
3414    }
3415
3416    private static void checkExpectedFail(String p) {
3417        try {
3418            Pattern.compile(p);
3419        } catch (PatternSyntaxException pse) {
3420            //pse.printStackTrace();
3421            return;
3422        }
3423        failCount++;
3424    }
3425
3426    private static void checkExpectedIAE(Matcher m, String g) {
3427        m.find();
3428        try {
3429            m.group(g);
3430        } catch (IllegalArgumentException x) {
3431            //iae.printStackTrace();
3432            try {
3433                m.start(g);
3434            } catch (IllegalArgumentException xx) {
3435                try {
3436                    m.start(g);
3437                } catch (IllegalArgumentException xxx) {
3438                    return;
3439                }
3440            }
3441        }
3442        failCount++;
3443    }
3444
3445    private static void checkExpectedNPE(Matcher m) {
3446        m.find();
3447        try {
3448            m.group(null);
3449        } catch (NullPointerException x) {
3450            try {
3451                m.start(null);
3452            } catch (NullPointerException xx) {
3453                try {
3454                    m.end(null);
3455                } catch (NullPointerException xxx) {
3456                    return;
3457                }
3458            }
3459        }
3460        failCount++;
3461    }
3462
3463    private static void namedGroupCaptureTest() throws Exception {
3464        check(Pattern.compile("x+(?<gname>y+)z+"),
3465              "xxxyyyzzz",
3466              "gname",
3467              "yyy");
3468
3469        check(Pattern.compile("x+(?<gname8>y+)z+"),
3470              "xxxyyyzzz",
3471              "gname8",
3472              "yyy");
3473
3474        //backref
3475        Pattern pattern = Pattern.compile("(a*)bc\\1");
3476        check(pattern, "zzzaabcazzz", true);  // found "abca"
3477
3478        check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3479              "zzzaabcaazzz", true);
3480
3481        check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3482              "abcdefabc", true);
3483
3484        check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3485              "abcdefghijkk", true);
3486
3487        // Supplementary character tests
3488        check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3489              toSupplementaries("zzzaabcazzz"), true);
3490
3491        check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3492              toSupplementaries("zzzaabcaazzz"), true);
3493
3494        check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3495              toSupplementaries("abcdefabc"), true);
3496
3497        check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3498                              "(?<gname>" +
3499                              toSupplementaries("k)") + "\\k<gname>"),
3500              toSupplementaries("abcdefghijkk"), true);
3501
3502        check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3503              "xxxyyyzzzyyy",
3504              "gname",
3505              "yyy");
3506
3507        //replaceFirst/All
3508        checkReplaceFirst("(?<gn>ab)(c*)",
3509                          "abccczzzabcczzzabccc",
3510                          "${gn}",
3511                          "abzzzabcczzzabccc");
3512
3513        checkReplaceAll("(?<gn>ab)(c*)",
3514                        "abccczzzabcczzzabccc",
3515                        "${gn}",
3516                        "abzzzabzzzab");
3517
3518
3519        checkReplaceFirst("(?<gn>ab)(c*)",
3520                          "zzzabccczzzabcczzzabccczzz",
3521                          "${gn}",
3522                          "zzzabzzzabcczzzabccczzz");
3523
3524        checkReplaceAll("(?<gn>ab)(c*)",
3525                        "zzzabccczzzabcczzzabccczzz",
3526                        "${gn}",
3527                        "zzzabzzzabzzzabzzz");
3528
3529        checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3530                          "zzzabccczzzabcczzzabccczzz",
3531                          "${gn2}",
3532                          "zzzccczzzabcczzzabccczzz");
3533
3534        checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3535                        "zzzabccczzzabcczzzabccczzz",
3536                        "${gn2}",
3537                        "zzzccczzzcczzzccczzz");
3538
3539        //toSupplementaries("(ab)(c*)"));
3540        checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3541                           ")(?<gn2>" + toSupplementaries("c") + "*)",
3542                          toSupplementaries("abccczzzabcczzzabccc"),
3543                          "${gn1}",
3544                          toSupplementaries("abzzzabcczzzabccc"));
3545
3546
3547        checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3548                        ")(?<gn2>" + toSupplementaries("c") + "*)",
3549                        toSupplementaries("abccczzzabcczzzabccc"),
3550                        "${gn1}",
3551                        toSupplementaries("abzzzabzzzab"));
3552
3553        checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3554                           ")(?<gn2>" + toSupplementaries("c") + "*)",
3555                          toSupplementaries("abccczzzabcczzzabccc"),
3556                          "${gn2}",
3557                          toSupplementaries("ccczzzabcczzzabccc"));
3558
3559
3560        checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3561                        ")(?<gn2>" + toSupplementaries("c") + "*)",
3562                        toSupplementaries("abccczzzabcczzzabccc"),
3563                        "${gn2}",
3564                        toSupplementaries("ccczzzcczzzccc"));
3565
3566        checkReplaceFirst("(?<dog>Dog)AndCat",
3567                          "zzzDogAndCatzzzDogAndCatzzz",
3568                          "${dog}",
3569                          "zzzDogzzzDogAndCatzzz");
3570
3571
3572        checkReplaceAll("(?<dog>Dog)AndCat",
3573                          "zzzDogAndCatzzzDogAndCatzzz",
3574                          "${dog}",
3575                          "zzzDogzzzDogzzz");
3576
3577        // backref in Matcher & String
3578        if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3579            !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
3580            failCount++;
3581
3582        // negative
3583        checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3584        checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
3585        checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
3586        checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3587        checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3588        checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3589                         "gnameX");
3590        checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
3591        report("NamedGroupCapture");
3592    }
3593
3594    // This is for bug 6969132
3595    private static void nonBmpClassComplementTest() throws Exception {
3596        Pattern p = Pattern.compile("\\P{Lu}");
3597        Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3598        if (m.find() && m.start() == 1)
3599            failCount++;
3600
3601        // from a unicode category
3602        p = Pattern.compile("\\P{Lu}");
3603        m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3604        if (m.find())
3605            failCount++;
3606        if (!m.hitEnd())
3607            failCount++;
3608
3609        // block
3610        p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3611        m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3612        if (m.find() && m.start() == 1)
3613            failCount++;
3614
3615        report("NonBmpClassComplement");
3616    }
3617
3618    private static void unicodePropertiesTest() throws Exception {
3619        // different forms
3620        if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3621            !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3622            !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3623            !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3624            !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3625            !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3626            !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3627            !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3628            !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3629            !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3630            failCount++;
3631
3632        Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
3633        Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3634        Matcher lastSM  = common;
3635        Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3636
3637        Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
3638        Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
3639        Matcher lastBM = latin;
3640        Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3641
3642        for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3643            if (cp >= 0x30000 && (cp & 0x70) == 0){
3644                continue;  // only pick couple code points, they are the same
3645            }
3646
3647            // Unicode Script
3648            Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3649            Matcher m;
3650            String str = new String(Character.toChars(cp));
3651            if (script == lastScript) {
3652                 m = lastSM;
3653                 m.reset(str);
3654            } else {
3655                 m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3656            }
3657            if (!m.matches()) {
3658                failCount++;
3659            }
3660            Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3661            other.reset(str);
3662            if (other.matches()) {
3663                failCount++;
3664            }
3665            lastSM = m;
3666            lastScript = script;
3667
3668            // Unicode Block
3669            Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3670            if (block == null) {
3671                //System.out.printf("Not a Block: cp=%x%n", cp);
3672                continue;
3673            }
3674            if (block == lastBlock) {
3675                 m = lastBM;
3676                 m.reset(str);
3677            } else {
3678                 m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3679            }
3680            if (!m.matches()) {
3681                failCount++;
3682            }
3683            other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3684            other.reset(str);
3685            if (other.matches()) {
3686                failCount++;
3687            }
3688            lastBM = m;
3689            lastBlock = block;
3690        }
3691        report("unicodeProperties");
3692    }
3693
3694    private static void unicodeHexNotationTest() throws Exception {
3695
3696        // negative
3697        checkExpectedFail("\\x{-23}");
3698        checkExpectedFail("\\x{110000}");
3699        checkExpectedFail("\\x{}");
3700        checkExpectedFail("\\x{AB[ef]");
3701
3702        // codepoint
3703        check("^\\x{1033c}$",              "\uD800\uDF3C", true);
3704        check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
3705        check("^\\x{D800}\\x{DF3c}+$",     "\uD800\uDF3C", false);
3706        check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
3707
3708        // in class
3709        check("^[\\x{D800}\\x{DF3c}]+$",   "\uD800\uDF3C", false);
3710        check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3711        check("^[\\x{D800}\\x{DF3C}]+$",   "\uD800\uDF3C", false);
3712        check("^[\\x{DF3C}\\x{D800}]+$",   "\uD800\uDF3C", false);
3713        check("^[\\x{D800}\\x{DF3C}]+$",   "\uDF3C\uD800", true);
3714        check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
3715
3716        for (int cp = 0; cp <= 0x10FFFF; cp++) {
3717             String s = "A" + new String(Character.toChars(cp)) + "B";
3718             String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3719                                             : String.format("\\u%04x\\u%04x",
3720                                               (int) Character.toChars(cp)[0],
3721                                               (int) Character.toChars(cp)[1]);
3722             String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3723             if (!Pattern.matches("A" + hexUTF16 + "B", s))
3724                 failCount++;
3725             if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3726                 failCount++;
3727             if (!Pattern.matches("A" + hexCodePoint + "B", s))
3728                 failCount++;
3729             if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3730                 failCount++;
3731         }
3732         report("unicodeHexNotation");
3733    }
3734
3735    private static void unicodeClassesTest() throws Exception {
3736
3737        Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
3738        Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
3739        Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
3740        Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
3741        Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
3742        Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
3743        Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
3744        Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
3745        Matcher print  = Pattern.compile("\\p{Print}").matcher("");
3746        Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
3747        Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
3748        Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3749        Matcher space  = Pattern.compile("\\p{Space}").matcher("");
3750        Matcher bound  = Pattern.compile("\\b").matcher("");
3751        Matcher word   = Pattern.compile("\\w++").matcher("");
3752        // UNICODE_CHARACTER_CLASS
3753        Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3754        Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3755        Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3756        Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3757        Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3758        Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3759        Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3760        Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3761        Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3762        Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3763        Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3764        Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3765        Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3766        Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3767        Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3768        // embedded flag (?U)
3769        Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3770        Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3771        Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3772
3773        Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
3774        Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3775        Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3776        // properties
3777        Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
3778        Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
3779        Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
3780        Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3781        Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3782        Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
3783        Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
3784        Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3785        Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3786        Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
3787        Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
3788
3789        // javaMethod
3790        Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
3791        Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
3792        Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3793        Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
3794
3795        for (int cp = 1; cp < 0x30000; cp++) {
3796            String str = new String(Character.toChars(cp));
3797            int type = Character.getType(cp);
3798            if (// lower
3799                POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
3800                Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3801                Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3802                Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3803                Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3804                // upper
3805                POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
3806                POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3807                Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3808                Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3809                // alpha
3810                POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
3811                POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3812                Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3813                Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3814                // digit
3815                POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
3816                Character.isDigit(cp)     != digitU.reset(str).matches() ||
3817                // alnum
3818                POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
3819                POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3820                // punct
3821                POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
3822                POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3823                // graph
3824                POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
3825                POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3826                POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3827                // blank
3828                POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3829                                          != blank.reset(str).matches()  ||
3830                POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3831                // print
3832                POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
3833                POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3834                // cntrl
3835                POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
3836                POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3837                (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3838                // hexdigit
3839                POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
3840                POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3841                // space
3842                POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
3843                POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3844                POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3845                // word
3846                POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
3847                POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3848                POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3849                // bwordb
3850                POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3851                POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3852                // properties
3853                Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3854                Character.isLetter(cp)    != letterP.reset(str).matches()||
3855                Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3856                Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3857                (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
3858                POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
3859                POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches())
3860                failCount++;
3861        }
3862
3863        // bounds/word align
3864        twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3865        if (!bwbU.reset("\u0180sherman\u0400").matches())
3866            failCount++;
3867        twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3868        if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3869            failCount++;
3870        twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3871        if (!bwbU.reset("\u0724\u0739\u0724").matches())
3872            failCount++;
3873        if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3874            failCount++;
3875        report("unicodePredefinedClasses");
3876    }
3877
3878    private static void horizontalAndVerticalWSTest() throws Exception {
3879        String hws = new String (new char[] {
3880                                     0x09, 0x20, 0xa0, 0x1680, 0x180e,
3881                                     0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
3882                                     0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
3883                                     0x202f, 0x205f, 0x3000 });
3884        String vws = new String (new char[] {
3885                                     0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
3886        if (!Pattern.compile("\\h+").matcher(hws).matches() ||
3887            !Pattern.compile("[\\h]+").matcher(hws).matches())
3888            failCount++;
3889        if (Pattern.compile("\\H").matcher(hws).find() ||
3890            Pattern.compile("[\\H]").matcher(hws).find())
3891            failCount++;
3892        if (!Pattern.compile("\\v+").matcher(vws).matches() ||
3893            !Pattern.compile("[\\v]+").matcher(vws).matches())
3894            failCount++;
3895        if (Pattern.compile("\\V").matcher(vws).find() ||
3896            Pattern.compile("[\\V]").matcher(vws).find())
3897            failCount++;
3898        String prefix = "abcd";
3899        String suffix = "efgh";
3900        String ng = "A";
3901        for (int i = 0; i < hws.length(); i++) {
3902            String c = String.valueOf(hws.charAt(i));
3903            Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
3904            if (!m.find() || !c.equals(m.group()))
3905                failCount++;
3906            m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
3907            if (!m.find() || !c.equals(m.group()))
3908                failCount++;
3909
3910            m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
3911            if (!m.find() || !ng.equals(m.group()))
3912                failCount++;
3913            m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
3914            if (!m.find() || !ng.equals(m.group()))
3915                failCount++;
3916        }
3917        for (int i = 0; i < vws.length(); i++) {
3918            String c = String.valueOf(vws.charAt(i));
3919            Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
3920            if (!m.find() || !c.equals(m.group()))
3921                failCount++;
3922            m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
3923            if (!m.find() || !c.equals(m.group()))
3924                failCount++;
3925
3926            m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
3927            if (!m.find() || !ng.equals(m.group()))
3928                failCount++;
3929            m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
3930            if (!m.find() || !ng.equals(m.group()))
3931                failCount++;
3932        }
3933        // \v in range is interpreted as 0x0B. This is the undocumented behavior
3934        if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
3935            failCount++;
3936        report("horizontalAndVerticalWSTest");
3937    }
3938
3939    private static void linebreakTest() throws Exception {
3940        String linebreaks = new String (new char[] {
3941            0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
3942        String crnl = "\r\n";
3943        if (!Pattern.compile("\\R+").matcher(linebreaks).matches() ||
3944            !Pattern.compile("\\R").matcher(crnl).matches() ||
3945            Pattern.compile("\\R\\R").matcher(crnl).matches())
3946            failCount++;
3947        report("linebreakTest");
3948    }
3949
3950    // #7189363
3951    private static void branchTest() throws Exception {
3952        if (!Pattern.compile("(a)?bc|d").matcher("d").find() ||     // greedy
3953            !Pattern.compile("(a)+bc|d").matcher("d").find() ||
3954            !Pattern.compile("(a)*bc|d").matcher("d").find() ||
3955            !Pattern.compile("(a)??bc|d").matcher("d").find() ||    // reluctant
3956            !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
3957            !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
3958            !Pattern.compile("(a)?+bc|d").matcher("d").find() ||    // possessive
3959            !Pattern.compile("(a)++bc|d").matcher("d").find() ||
3960            !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
3961            !Pattern.compile("(a)?bc|d").matcher("d").matches() ||  // greedy
3962            !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
3963            !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
3964            !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
3965            !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
3966            !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
3967            !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
3968            !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
3969            !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
3970            !Pattern.compile("(a)?bc|de").matcher("de").find() ||   // others
3971            !Pattern.compile("(a)??bc|de").matcher("de").find() ||
3972            !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
3973            !Pattern.compile("(a)??bc|de").matcher("de").matches())
3974            failCount++;
3975        report("branchTest");
3976    }
3977
3978    // This test is for 8007395
3979    private static void groupCurlyNotFoundSuppTest() throws Exception {
3980        String input = "test this as \ud83d\ude0d";
3981        for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
3982                                          "test(.)*(@[a-zA-Z.]+)",
3983                                          "test([^B])+(@[a-zA-Z.]+)",
3984                                          "test([^B])*(@[a-zA-Z.]+)",
3985                                          "test(\\P{IsControl})+(@[a-zA-Z.]+)",
3986                                          "test(\\P{IsControl})*(@[a-zA-Z.]+)",
3987                                        }) {
3988            Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
3989                               .matcher(input);
3990            try {
3991                if (m.find()) {
3992                    failCount++;
3993                }
3994            } catch (Exception x) {
3995                failCount++;
3996            }
3997        }
3998        report("GroupCurly NotFoundSupp");
3999    }
4000
4001    // This test is for 8023647
4002    private static void groupCurlyBackoffTest() throws Exception {
4003        if (!"abc1c".matches("(\\w)+1\\1") ||
4004            "abc11".matches("(\\w)+1\\1")) {
4005            failCount++;
4006        }
4007        report("GroupCurly backoff");
4008    }
4009
4010    // This test is for 8012646
4011    private static void patternAsPredicate() throws Exception {
4012        Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4013
4014        if (p.test("")) {
4015            failCount++;
4016        }
4017        if (!p.test("word")) {
4018            failCount++;
4019        }
4020        if (p.test("1234")) {
4021            failCount++;
4022        }
4023        report("Pattern.asPredicate");
4024    }
4025}
4026