1/*
2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.util.regex;
27
28import java.util.ConcurrentModificationException;
29import java.util.Iterator;
30import java.util.NoSuchElementException;
31import java.util.Objects;
32import java.util.Spliterator;
33import java.util.Spliterators;
34import java.util.function.Consumer;
35import java.util.function.Function;
36import java.util.stream.Stream;
37import java.util.stream.StreamSupport;
38
39/**
40 * An engine that performs match operations on a {@linkplain java.lang.CharSequence
41 * character sequence} by interpreting a {@link Pattern}.
42 *
43 * <p> A matcher is created from a pattern by invoking the pattern's {@link
44 * Pattern#matcher matcher} method.  Once created, a matcher can be used to
45 * perform three different kinds of match operations:
46 *
47 * <ul>
48 *
49 *   <li><p> The {@link #matches matches} method attempts to match the entire
50 *   input sequence against the pattern.  </p></li>
51 *
52 *   <li><p> The {@link #lookingAt lookingAt} method attempts to match the
53 *   input sequence, starting at the beginning, against the pattern.  </p></li>
54 *
55 *   <li><p> The {@link #find find} method scans the input sequence looking for
56 *   the next subsequence that matches the pattern.  </p></li>
57 *
58 * </ul>
59 *
60 * <p> Each of these methods returns a boolean indicating success or failure.
61 * More information about a successful match can be obtained by querying the
62 * state of the matcher.
63 *
64 * <p> A matcher finds matches in a subset of its input called the
65 * <i>region</i>. By default, the region contains all of the matcher's input.
66 * The region can be modified via the {@link #region region} method and queried
67 * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd}
68 * methods. The way that the region boundaries interact with some pattern
69 * constructs can be changed. See {@link #useAnchoringBounds
70 * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds}
71 * for more details.
72 *
73 * <p> This class also defines methods for replacing matched subsequences with
74 * new strings whose contents can, if desired, be computed from the match
75 * result.  The {@link #appendReplacement appendReplacement} and {@link
76 * #appendTail appendTail} methods can be used in tandem in order to collect
77 * the result into an existing string buffer or string builder. Alternatively,
78 * the more convenient {@link #replaceAll replaceAll} method can be used to
79 * create a string in which every matching subsequence in the input sequence
80 * is replaced.
81 *
82 * <p> The explicit state of a matcher includes the start and end indices of
83 * the most recent successful match.  It also includes the start and end
84 * indices of the input subsequence captured by each <a
85 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total
86 * count of such subsequences.  As a convenience, methods are also provided for
87 * returning these captured subsequences in string form.
88 *
89 * <p> The explicit state of a matcher is initially undefined; attempting to
90 * query any part of it before a successful match will cause an {@link
91 * IllegalStateException} to be thrown.  The explicit state of a matcher is
92 * recomputed by every match operation.
93 *
94 * <p> The implicit state of a matcher includes the input character sequence as
95 * well as the <i>append position</i>, which is initially zero and is updated
96 * by the {@link #appendReplacement appendReplacement} method.
97 *
98 * <p> A matcher may be reset explicitly by invoking its {@link #reset()}
99 * method or, if a new input sequence is desired, its {@link
100 * #reset(java.lang.CharSequence) reset(CharSequence)} method.  Resetting a
101 * matcher discards its explicit state information and sets the append position
102 * to zero.
103 *
104 * <p> Instances of this class are not safe for use by multiple concurrent
105 * threads. </p>
106 *
107 *
108 * @author      Mike McCloskey
109 * @author      Mark Reinhold
110 * @author      JSR-51 Expert Group
111 * @since       1.4
112 * @spec        JSR-51
113 */
114
115public final class Matcher implements MatchResult {
116
117    /**
118     * The Pattern object that created this Matcher.
119     */
120    Pattern parentPattern;
121
122    /**
123     * The storage used by groups. They may contain invalid values if
124     * a group was skipped during the matching.
125     */
126    int[] groups;
127
128    /**
129     * The range within the sequence that is to be matched. Anchors
130     * will match at these "hard" boundaries. Changing the region
131     * changes these values.
132     */
133    int from, to;
134
135    /**
136     * Lookbehind uses this value to ensure that the subexpression
137     * match ends at the point where the lookbehind was encountered.
138     */
139    int lookbehindTo;
140
141    /**
142     * The original string being matched.
143     */
144    CharSequence text;
145
146    /**
147     * Matcher state used by the last node. NOANCHOR is used when a
148     * match does not have to consume all of the input. ENDANCHOR is
149     * the mode used for matching all the input.
150     */
151    static final int ENDANCHOR = 1;
152    static final int NOANCHOR = 0;
153    int acceptMode = NOANCHOR;
154
155    /**
156     * The range of string that last matched the pattern. If the last
157     * match failed then first is -1; last initially holds 0 then it
158     * holds the index of the end of the last match (which is where the
159     * next search starts).
160     */
161    int first = -1, last = 0;
162
163    /**
164     * The end index of what matched in the last match operation.
165     */
166    int oldLast = -1;
167
168    /**
169     * The index of the last position appended in a substitution.
170     */
171    int lastAppendPosition = 0;
172
173    /**
174     * Storage used by nodes to tell what repetition they are on in
175     * a pattern, and where groups begin. The nodes themselves are stateless,
176     * so they rely on this field to hold state during a match.
177     */
178    int[] locals;
179
180    /**
181     * Storage used by top greedy Loop node to store a specific hash set to
182     * keep the beginning index of the failed repetition match. The nodes
183     * themselves are stateless, so they rely on this field to hold state
184     * during a match.
185     */
186    IntHashSet[] localsPos;
187
188    /**
189     * Boolean indicating whether or not more input could change
190     * the results of the last match.
191     *
192     * If hitEnd is true, and a match was found, then more input
193     * might cause a different match to be found.
194     * If hitEnd is true and a match was not found, then more
195     * input could cause a match to be found.
196     * If hitEnd is false and a match was found, then more input
197     * will not change the match.
198     * If hitEnd is false and a match was not found, then more
199     * input will not cause a match to be found.
200     */
201    boolean hitEnd;
202
203    /**
204     * Boolean indicating whether or not more input could change
205     * a positive match into a negative one.
206     *
207     * If requireEnd is true, and a match was found, then more
208     * input could cause the match to be lost.
209     * If requireEnd is false and a match was found, then more
210     * input might change the match but the match won't be lost.
211     * If a match was not found, then requireEnd has no meaning.
212     */
213    boolean requireEnd;
214
215    /**
216     * If transparentBounds is true then the boundaries of this
217     * matcher's region are transparent to lookahead, lookbehind,
218     * and boundary matching constructs that try to see beyond them.
219     */
220    boolean transparentBounds = false;
221
222    /**
223     * If anchoringBounds is true then the boundaries of this
224     * matcher's region match anchors such as ^ and $.
225     */
226    boolean anchoringBounds = true;
227
228    /**
229     * Number of times this matcher's state has been modified
230     */
231    int modCount;
232
233    /**
234     * No default constructor.
235     */
236    Matcher() {
237    }
238
239    /**
240     * All matchers have the state used by Pattern during a match.
241     */
242    Matcher(Pattern parent, CharSequence text) {
243        this.parentPattern = parent;
244        this.text = text;
245
246        // Allocate state storage
247        int parentGroupCount = Math.max(parent.capturingGroupCount, 10);
248        groups = new int[parentGroupCount * 2];
249        locals = new int[parent.localCount];
250        localsPos = new IntHashSet[parent.localTCNCount];
251
252        // Put fields into initial states
253        reset();
254    }
255
256    /**
257     * Returns the pattern that is interpreted by this matcher.
258     *
259     * @return  The pattern for which this matcher was created
260     */
261    public Pattern pattern() {
262        return parentPattern;
263    }
264
265    /**
266     * Returns the match state of this matcher as a {@link MatchResult}.
267     * The result is unaffected by subsequent operations performed upon this
268     * matcher.
269     *
270     * @return  a {@code MatchResult} with the state of this matcher
271     * @since 1.5
272     */
273    public MatchResult toMatchResult() {
274        return toMatchResult(text.toString());
275    }
276
277    private MatchResult toMatchResult(String text) {
278        return new ImmutableMatchResult(this.first,
279                                        this.last,
280                                        groupCount(),
281                                        this.groups.clone(),
282                                        text);
283    }
284
285    private static class ImmutableMatchResult implements MatchResult {
286        private final int first;
287        private final int last;
288        private final int[] groups;
289        private final int groupCount;
290        private final String text;
291
292        ImmutableMatchResult(int first, int last, int groupCount,
293                             int groups[], String text)
294        {
295            this.first = first;
296            this.last = last;
297            this.groupCount = groupCount;
298            this.groups = groups;
299            this.text = text;
300        }
301
302        @Override
303        public int start() {
304            checkMatch();
305            return first;
306        }
307
308        @Override
309        public int start(int group) {
310            checkMatch();
311            if (group < 0 || group > groupCount)
312                throw new IndexOutOfBoundsException("No group " + group);
313            return groups[group * 2];
314        }
315
316        @Override
317        public int end() {
318            checkMatch();
319            return last;
320        }
321
322        @Override
323        public int end(int group) {
324            checkMatch();
325            if (group < 0 || group > groupCount)
326                throw new IndexOutOfBoundsException("No group " + group);
327            return groups[group * 2 + 1];
328        }
329
330        @Override
331        public int groupCount() {
332            return groupCount;
333        }
334
335        @Override
336        public String group() {
337            checkMatch();
338            return group(0);
339        }
340
341        @Override
342        public String group(int group) {
343            checkMatch();
344            if (group < 0 || group > groupCount)
345                throw new IndexOutOfBoundsException("No group " + group);
346            if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
347                return null;
348            return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString();
349        }
350
351        private void checkMatch() {
352            if (first < 0)
353                throw new IllegalStateException("No match found");
354
355        }
356    }
357
358    /**
359      * Changes the {@code Pattern} that this {@code Matcher} uses to
360      * find matches with.
361      *
362      * <p> This method causes this matcher to lose information
363      * about the groups of the last match that occurred. The
364      * matcher's position in the input is maintained and its
365      * last append position is unaffected.</p>
366      *
367      * @param  newPattern
368      *         The new pattern used by this matcher
369      * @return  This matcher
370      * @throws  IllegalArgumentException
371      *          If newPattern is {@code null}
372      * @since 1.5
373      */
374    public Matcher usePattern(Pattern newPattern) {
375        if (newPattern == null)
376            throw new IllegalArgumentException("Pattern cannot be null");
377        parentPattern = newPattern;
378
379        // Reallocate state storage
380        int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);
381        groups = new int[parentGroupCount * 2];
382        locals = new int[newPattern.localCount];
383        for (int i = 0; i < groups.length; i++)
384            groups[i] = -1;
385        for (int i = 0; i < locals.length; i++)
386            locals[i] = -1;
387        localsPos = new IntHashSet[parentPattern.localTCNCount];
388        modCount++;
389        return this;
390    }
391
392    /**
393     * Resets this matcher.
394     *
395     * <p> Resetting a matcher discards all of its explicit state information
396     * and sets its append position to zero. The matcher's region is set to the
397     * default region, which is its entire character sequence. The anchoring
398     * and transparency of this matcher's region boundaries are unaffected.
399     *
400     * @return  This matcher
401     */
402    public Matcher reset() {
403        first = -1;
404        last = 0;
405        oldLast = -1;
406        for(int i=0; i<groups.length; i++)
407            groups[i] = -1;
408        for(int i=0; i<locals.length; i++)
409            locals[i] = -1;
410        for (int i = 0; i < localsPos.length; i++) {
411            if (localsPos[i] != null)
412                localsPos[i].clear();
413        }
414        lastAppendPosition = 0;
415        from = 0;
416        to = getTextLength();
417        modCount++;
418        return this;
419    }
420
421    /**
422     * Resets this matcher with a new input sequence.
423     *
424     * <p> Resetting a matcher discards all of its explicit state information
425     * and sets its append position to zero.  The matcher's region is set to
426     * the default region, which is its entire character sequence.  The
427     * anchoring and transparency of this matcher's region boundaries are
428     * unaffected.
429     *
430     * @param  input
431     *         The new input character sequence
432     *
433     * @return  This matcher
434     */
435    public Matcher reset(CharSequence input) {
436        text = input;
437        return reset();
438    }
439
440    /**
441     * Returns the start index of the previous match.
442     *
443     * @return  The index of the first character matched
444     *
445     * @throws  IllegalStateException
446     *          If no match has yet been attempted,
447     *          or if the previous match operation failed
448     */
449    public int start() {
450        if (first < 0)
451            throw new IllegalStateException("No match available");
452        return first;
453    }
454
455    /**
456     * Returns the start index of the subsequence captured by the given group
457     * during the previous match operation.
458     *
459     * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
460     * to right, starting at one.  Group zero denotes the entire pattern, so
461     * the expression <i>m.</i>{@code start(0)} is equivalent to
462     * <i>m.</i>{@code start()}.  </p>
463     *
464     * @param  group
465     *         The index of a capturing group in this matcher's pattern
466     *
467     * @return  The index of the first character captured by the group,
468     *          or {@code -1} if the match was successful but the group
469     *          itself did not match anything
470     *
471     * @throws  IllegalStateException
472     *          If no match has yet been attempted,
473     *          or if the previous match operation failed
474     *
475     * @throws  IndexOutOfBoundsException
476     *          If there is no capturing group in the pattern
477     *          with the given index
478     */
479    public int start(int group) {
480        if (first < 0)
481            throw new IllegalStateException("No match available");
482        if (group < 0 || group > groupCount())
483            throw new IndexOutOfBoundsException("No group " + group);
484        return groups[group * 2];
485    }
486
487    /**
488     * Returns the start index of the subsequence captured by the given
489     * <a href="Pattern.html#groupname">named-capturing group</a> during the
490     * previous match operation.
491     *
492     * @param  name
493     *         The name of a named-capturing group in this matcher's pattern
494     *
495     * @return  The index of the first character captured by the group,
496     *          or {@code -1} if the match was successful but the group
497     *          itself did not match anything
498     *
499     * @throws  IllegalStateException
500     *          If no match has yet been attempted,
501     *          or if the previous match operation failed
502     *
503     * @throws  IllegalArgumentException
504     *          If there is no capturing group in the pattern
505     *          with the given name
506     * @since 1.8
507     */
508    public int start(String name) {
509        return groups[getMatchedGroupIndex(name) * 2];
510    }
511
512    /**
513     * Returns the offset after the last character matched.
514     *
515     * @return  The offset after the last character matched
516     *
517     * @throws  IllegalStateException
518     *          If no match has yet been attempted,
519     *          or if the previous match operation failed
520     */
521    public int end() {
522        if (first < 0)
523            throw new IllegalStateException("No match available");
524        return last;
525    }
526
527    /**
528     * Returns the offset after the last character of the subsequence
529     * captured by the given group during the previous match operation.
530     *
531     * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
532     * to right, starting at one.  Group zero denotes the entire pattern, so
533     * the expression <i>m.</i>{@code end(0)} is equivalent to
534     * <i>m.</i>{@code end()}.  </p>
535     *
536     * @param  group
537     *         The index of a capturing group in this matcher's pattern
538     *
539     * @return  The offset after the last character captured by the group,
540     *          or {@code -1} if the match was successful
541     *          but the group itself did not match anything
542     *
543     * @throws  IllegalStateException
544     *          If no match has yet been attempted,
545     *          or if the previous match operation failed
546     *
547     * @throws  IndexOutOfBoundsException
548     *          If there is no capturing group in the pattern
549     *          with the given index
550     */
551    public int end(int group) {
552        if (first < 0)
553            throw new IllegalStateException("No match available");
554        if (group < 0 || group > groupCount())
555            throw new IndexOutOfBoundsException("No group " + group);
556        return groups[group * 2 + 1];
557    }
558
559    /**
560     * Returns the offset after the last character of the subsequence
561     * captured by the given <a href="Pattern.html#groupname">named-capturing
562     * group</a> during the previous match operation.
563     *
564     * @param  name
565     *         The name of a named-capturing group in this matcher's pattern
566     *
567     * @return  The offset after the last character captured by the group,
568     *          or {@code -1} if the match was successful
569     *          but the group itself did not match anything
570     *
571     * @throws  IllegalStateException
572     *          If no match has yet been attempted,
573     *          or if the previous match operation failed
574     *
575     * @throws  IllegalArgumentException
576     *          If there is no capturing group in the pattern
577     *          with the given name
578     * @since 1.8
579     */
580    public int end(String name) {
581        return groups[getMatchedGroupIndex(name) * 2 + 1];
582    }
583
584    /**
585     * Returns the input subsequence matched by the previous match.
586     *
587     * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
588     * the expressions <i>m.</i>{@code group()} and
589     * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),}&nbsp;<i>m.</i>{@code end())}
590     * are equivalent.  </p>
591     *
592     * <p> Note that some patterns, for example {@code a*}, match the empty
593     * string.  This method will return the empty string when the pattern
594     * successfully matches the empty string in the input.  </p>
595     *
596     * @return The (possibly empty) subsequence matched by the previous match,
597     *         in string form
598     *
599     * @throws  IllegalStateException
600     *          If no match has yet been attempted,
601     *          or if the previous match operation failed
602     */
603    public String group() {
604        return group(0);
605    }
606
607    /**
608     * Returns the input subsequence captured by the given group during the
609     * previous match operation.
610     *
611     * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
612     * <i>g</i>, the expressions <i>m.</i>{@code group(}<i>g</i>{@code )} and
613     * <i>s.</i>{@code substring(}<i>m.</i>{@code start(}<i>g</i>{@code
614     * ),}&nbsp;<i>m.</i>{@code end(}<i>g</i>{@code ))}
615     * are equivalent.  </p>
616     *
617     * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
618     * to right, starting at one.  Group zero denotes the entire pattern, so
619     * the expression {@code m.group(0)} is equivalent to {@code m.group()}.
620     * </p>
621     *
622     * <p> If the match was successful but the group specified failed to match
623     * any part of the input sequence, then {@code null} is returned. Note
624     * that some groups, for example {@code (a*)}, match the empty string.
625     * This method will return the empty string when such a group successfully
626     * matches the empty string in the input.  </p>
627     *
628     * @param  group
629     *         The index of a capturing group in this matcher's pattern
630     *
631     * @return  The (possibly empty) subsequence captured by the group
632     *          during the previous match, or {@code null} if the group
633     *          failed to match part of the input
634     *
635     * @throws  IllegalStateException
636     *          If no match has yet been attempted,
637     *          or if the previous match operation failed
638     *
639     * @throws  IndexOutOfBoundsException
640     *          If there is no capturing group in the pattern
641     *          with the given index
642     */
643    public String group(int group) {
644        if (first < 0)
645            throw new IllegalStateException("No match found");
646        if (group < 0 || group > groupCount())
647            throw new IndexOutOfBoundsException("No group " + group);
648        if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
649            return null;
650        return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
651    }
652
653    /**
654     * Returns the input subsequence captured by the given
655     * <a href="Pattern.html#groupname">named-capturing group</a> during the previous
656     * match operation.
657     *
658     * <p> If the match was successful but the group specified failed to match
659     * any part of the input sequence, then {@code null} is returned. Note
660     * that some groups, for example {@code (a*)}, match the empty string.
661     * This method will return the empty string when such a group successfully
662     * matches the empty string in the input.  </p>
663     *
664     * @param  name
665     *         The name of a named-capturing group in this matcher's pattern
666     *
667     * @return  The (possibly empty) subsequence captured by the named group
668     *          during the previous match, or {@code null} if the group
669     *          failed to match part of the input
670     *
671     * @throws  IllegalStateException
672     *          If no match has yet been attempted,
673     *          or if the previous match operation failed
674     *
675     * @throws  IllegalArgumentException
676     *          If there is no capturing group in the pattern
677     *          with the given name
678     * @since 1.7
679     */
680    public String group(String name) {
681        int group = getMatchedGroupIndex(name);
682        if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
683            return null;
684        return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
685    }
686
687    /**
688     * Returns the number of capturing groups in this matcher's pattern.
689     *
690     * <p> Group zero denotes the entire pattern by convention. It is not
691     * included in this count.
692     *
693     * <p> Any non-negative integer smaller than or equal to the value
694     * returned by this method is guaranteed to be a valid group index for
695     * this matcher.  </p>
696     *
697     * @return The number of capturing groups in this matcher's pattern
698     */
699    public int groupCount() {
700        return parentPattern.capturingGroupCount - 1;
701    }
702
703    /**
704     * Attempts to match the entire region against the pattern.
705     *
706     * <p> If the match succeeds then more information can be obtained via the
707     * {@code start}, {@code end}, and {@code group} methods.  </p>
708     *
709     * @return  {@code true} if, and only if, the entire region sequence
710     *          matches this matcher's pattern
711     */
712    public boolean matches() {
713        return match(from, ENDANCHOR);
714    }
715
716    /**
717     * Attempts to find the next subsequence of the input sequence that matches
718     * the pattern.
719     *
720     * <p> This method starts at the beginning of this matcher's region, or, if
721     * a previous invocation of the method was successful and the matcher has
722     * not since been reset, at the first character not matched by the previous
723     * match.
724     *
725     * <p> If the match succeeds then more information can be obtained via the
726     * {@code start}, {@code end}, and {@code group} methods.  </p>
727     *
728     * @return  {@code true} if, and only if, a subsequence of the input
729     *          sequence matches this matcher's pattern
730     */
731    public boolean find() {
732        int nextSearchIndex = last;
733        if (nextSearchIndex == first)
734            nextSearchIndex++;
735
736        // If next search starts before region, start it at region
737        if (nextSearchIndex < from)
738            nextSearchIndex = from;
739
740        // If next search starts beyond region then it fails
741        if (nextSearchIndex > to) {
742            for (int i = 0; i < groups.length; i++)
743                groups[i] = -1;
744            return false;
745        }
746        return search(nextSearchIndex);
747    }
748
749    /**
750     * Resets this matcher and then attempts to find the next subsequence of
751     * the input sequence that matches the pattern, starting at the specified
752     * index.
753     *
754     * <p> If the match succeeds then more information can be obtained via the
755     * {@code start}, {@code end}, and {@code group} methods, and subsequent
756     * invocations of the {@link #find()} method will start at the first
757     * character not matched by this match.  </p>
758     *
759     * @param start the index to start searching for a match
760     * @throws  IndexOutOfBoundsException
761     *          If start is less than zero or if start is greater than the
762     *          length of the input sequence.
763     *
764     * @return  {@code true} if, and only if, a subsequence of the input
765     *          sequence starting at the given index matches this matcher's
766     *          pattern
767     */
768    public boolean find(int start) {
769        int limit = getTextLength();
770        if ((start < 0) || (start > limit))
771            throw new IndexOutOfBoundsException("Illegal start index");
772        reset();
773        return search(start);
774    }
775
776    /**
777     * Attempts to match the input sequence, starting at the beginning of the
778     * region, against the pattern.
779     *
780     * <p> Like the {@link #matches matches} method, this method always starts
781     * at the beginning of the region; unlike that method, it does not
782     * require that the entire region be matched.
783     *
784     * <p> If the match succeeds then more information can be obtained via the
785     * {@code start}, {@code end}, and {@code group} methods.  </p>
786     *
787     * @return  {@code true} if, and only if, a prefix of the input
788     *          sequence matches this matcher's pattern
789     */
790    public boolean lookingAt() {
791        return match(from, NOANCHOR);
792    }
793
794    /**
795     * Returns a literal replacement {@code String} for the specified
796     * {@code String}.
797     *
798     * This method produces a {@code String} that will work
799     * as a literal replacement {@code s} in the
800     * {@code appendReplacement} method of the {@link Matcher} class.
801     * The {@code String} produced will match the sequence of characters
802     * in {@code s} treated as a literal sequence. Slashes ('\') and
803     * dollar signs ('$') will be given no special meaning.
804     *
805     * @param  s The string to be literalized
806     * @return  A literal string replacement
807     * @since 1.5
808     */
809    public static String quoteReplacement(String s) {
810        if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))
811            return s;
812        StringBuilder sb = new StringBuilder();
813        for (int i=0; i<s.length(); i++) {
814            char c = s.charAt(i);
815            if (c == '\\' || c == '$') {
816                sb.append('\\');
817            }
818            sb.append(c);
819        }
820        return sb.toString();
821    }
822
823    /**
824     * Implements a non-terminal append-and-replace step.
825     *
826     * <p> This method performs the following actions: </p>
827     *
828     * <ol>
829     *
830     *   <li><p> It reads characters from the input sequence, starting at the
831     *   append position, and appends them to the given string buffer.  It
832     *   stops after reading the last character preceding the previous match,
833     *   that is, the character at index {@link
834     *   #start()}&nbsp;{@code -}&nbsp;{@code 1}.  </p></li>
835     *
836     *   <li><p> It appends the given replacement string to the string buffer.
837     *   </p></li>
838     *
839     *   <li><p> It sets the append position of this matcher to the index of
840     *   the last character matched, plus one, that is, to {@link #end()}.
841     *   </p></li>
842     *
843     * </ol>
844     *
845     * <p> The replacement string may contain references to subsequences
846     * captured during the previous match: Each occurrence of
847     * <code>${</code><i>name</i><code>}</code> or {@code $}<i>g</i>
848     * will be replaced by the result of evaluating the corresponding
849     * {@link #group(String) group(name)} or {@link #group(int) group(g)}
850     * respectively. For {@code $}<i>g</i>,
851     * the first number after the {@code $} is always treated as part of
852     * the group reference. Subsequent numbers are incorporated into g if
853     * they would form a legal group reference. Only the numerals '0'
854     * through '9' are considered as potential components of the group
855     * reference. If the second group matched the string {@code "foo"}, for
856     * example, then passing the replacement string {@code "$2bar"} would
857     * cause {@code "foobar"} to be appended to the string buffer. A dollar
858     * sign ({@code $}) may be included as a literal in the replacement
859     * string by preceding it with a backslash ({@code \$}).
860     *
861     * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
862     * the replacement string may cause the results to be different than if it
863     * were being treated as a literal replacement string. Dollar signs may be
864     * treated as references to captured subsequences as described above, and
865     * backslashes are used to escape literal characters in the replacement
866     * string.
867     *
868     * <p> This method is intended to be used in a loop together with the
869     * {@link #appendTail appendTail} and {@link #find find} methods.  The
870     * following code, for example, writes {@code one dog two dogs in the
871     * yard} to the standard-output stream: </p>
872     *
873     * <blockquote><pre>
874     * Pattern p = Pattern.compile("cat");
875     * Matcher m = p.matcher("one cat two cats in the yard");
876     * StringBuffer sb = new StringBuffer();
877     * while (m.find()) {
878     *     m.appendReplacement(sb, "dog");
879     * }
880     * m.appendTail(sb);
881     * System.out.println(sb.toString());</pre></blockquote>
882     *
883     * @param  sb
884     *         The target string buffer
885     *
886     * @param  replacement
887     *         The replacement string
888     *
889     * @return  This matcher
890     *
891     * @throws  IllegalStateException
892     *          If no match has yet been attempted,
893     *          or if the previous match operation failed
894     *
895     * @throws  IllegalArgumentException
896     *          If the replacement string refers to a named-capturing
897     *          group that does not exist in the pattern
898     *
899     * @throws  IndexOutOfBoundsException
900     *          If the replacement string refers to a capturing group
901     *          that does not exist in the pattern
902     */
903    public Matcher appendReplacement(StringBuffer sb, String replacement) {
904        // If no match, return error
905        if (first < 0)
906            throw new IllegalStateException("No match available");
907        StringBuilder result = new StringBuilder();
908        appendExpandedReplacement(replacement, result);
909        // Append the intervening text
910        sb.append(text, lastAppendPosition, first);
911        // Append the match substitution
912        sb.append(result);
913        lastAppendPosition = last;
914        modCount++;
915        return this;
916    }
917
918    /**
919     * Implements a non-terminal append-and-replace step.
920     *
921     * <p> This method performs the following actions: </p>
922     *
923     * <ol>
924     *
925     *   <li><p> It reads characters from the input sequence, starting at the
926     *   append position, and appends them to the given string builder.  It
927     *   stops after reading the last character preceding the previous match,
928     *   that is, the character at index {@link
929     *   #start()}&nbsp;{@code -}&nbsp;{@code 1}.  </p></li>
930     *
931     *   <li><p> It appends the given replacement string to the string builder.
932     *   </p></li>
933     *
934     *   <li><p> It sets the append position of this matcher to the index of
935     *   the last character matched, plus one, that is, to {@link #end()}.
936     *   </p></li>
937     *
938     * </ol>
939     *
940     * <p> The replacement string may contain references to subsequences
941     * captured during the previous match: Each occurrence of
942     * {@code $}<i>g</i> will be replaced by the result of
943     * evaluating {@link #group(int) group}{@code (}<i>g</i>{@code )}.
944     * The first number after the {@code $} is always treated as part of
945     * the group reference. Subsequent numbers are incorporated into g if
946     * they would form a legal group reference. Only the numerals '0'
947     * through '9' are considered as potential components of the group
948     * reference. If the second group matched the string {@code "foo"}, for
949     * example, then passing the replacement string {@code "$2bar"} would
950     * cause {@code "foobar"} to be appended to the string builder. A dollar
951     * sign ({@code $}) may be included as a literal in the replacement
952     * string by preceding it with a backslash ({@code \$}).
953     *
954     * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
955     * the replacement string may cause the results to be different than if it
956     * were being treated as a literal replacement string. Dollar signs may be
957     * treated as references to captured subsequences as described above, and
958     * backslashes are used to escape literal characters in the replacement
959     * string.
960     *
961     * <p> This method is intended to be used in a loop together with the
962     * {@link #appendTail appendTail} and {@link #find find} methods.  The
963     * following code, for example, writes {@code one dog two dogs in the
964     * yard} to the standard-output stream: </p>
965     *
966     * <blockquote><pre>
967     * Pattern p = Pattern.compile("cat");
968     * Matcher m = p.matcher("one cat two cats in the yard");
969     * StringBuilder sb = new StringBuilder();
970     * while (m.find()) {
971     *     m.appendReplacement(sb, "dog");
972     * }
973     * m.appendTail(sb);
974     * System.out.println(sb.toString());</pre></blockquote>
975     *
976     * @param  sb
977     *         The target string builder
978     * @param  replacement
979     *         The replacement string
980     * @return  This matcher
981     *
982     * @throws  IllegalStateException
983     *          If no match has yet been attempted,
984     *          or if the previous match operation failed
985     * @throws  IllegalArgumentException
986     *          If the replacement string refers to a named-capturing
987     *          group that does not exist in the pattern
988     * @throws  IndexOutOfBoundsException
989     *          If the replacement string refers to a capturing group
990     *          that does not exist in the pattern
991     * @since 9
992     */
993    public Matcher appendReplacement(StringBuilder sb, String replacement) {
994        // If no match, return error
995        if (first < 0)
996            throw new IllegalStateException("No match available");
997        StringBuilder result = new StringBuilder();
998        appendExpandedReplacement(replacement, result);
999        // Append the intervening text
1000        sb.append(text, lastAppendPosition, first);
1001        // Append the match substitution
1002        sb.append(result);
1003        lastAppendPosition = last;
1004        modCount++;
1005        return this;
1006    }
1007
1008    /**
1009     * Processes replacement string to replace group references with
1010     * groups.
1011     */
1012    private StringBuilder appendExpandedReplacement(
1013        String replacement, StringBuilder result) {
1014        int cursor = 0;
1015        while (cursor < replacement.length()) {
1016            char nextChar = replacement.charAt(cursor);
1017            if (nextChar == '\\') {
1018                cursor++;
1019                if (cursor == replacement.length())
1020                    throw new IllegalArgumentException(
1021                        "character to be escaped is missing");
1022                nextChar = replacement.charAt(cursor);
1023                result.append(nextChar);
1024                cursor++;
1025            } else if (nextChar == '$') {
1026                // Skip past $
1027                cursor++;
1028                // Throw IAE if this "$" is the last character in replacement
1029                if (cursor == replacement.length())
1030                   throw new IllegalArgumentException(
1031                        "Illegal group reference: group index is missing");
1032                nextChar = replacement.charAt(cursor);
1033                int refNum = -1;
1034                if (nextChar == '{') {
1035                    cursor++;
1036                    StringBuilder gsb = new StringBuilder();
1037                    while (cursor < replacement.length()) {
1038                        nextChar = replacement.charAt(cursor);
1039                        if (ASCII.isLower(nextChar) ||
1040                            ASCII.isUpper(nextChar) ||
1041                            ASCII.isDigit(nextChar)) {
1042                            gsb.append(nextChar);
1043                            cursor++;
1044                        } else {
1045                            break;
1046                        }
1047                    }
1048                    if (gsb.length() == 0)
1049                        throw new IllegalArgumentException(
1050                            "named capturing group has 0 length name");
1051                    if (nextChar != '}')
1052                        throw new IllegalArgumentException(
1053                            "named capturing group is missing trailing '}'");
1054                    String gname = gsb.toString();
1055                    if (ASCII.isDigit(gname.charAt(0)))
1056                        throw new IllegalArgumentException(
1057                            "capturing group name {" + gname +
1058                            "} starts with digit character");
1059                    if (!parentPattern.namedGroups().containsKey(gname))
1060                        throw new IllegalArgumentException(
1061                            "No group with name {" + gname + "}");
1062                    refNum = parentPattern.namedGroups().get(gname);
1063                    cursor++;
1064                } else {
1065                    // The first number is always a group
1066                    refNum = nextChar - '0';
1067                    if ((refNum < 0) || (refNum > 9))
1068                        throw new IllegalArgumentException(
1069                            "Illegal group reference");
1070                    cursor++;
1071                    // Capture the largest legal group string
1072                    boolean done = false;
1073                    while (!done) {
1074                        if (cursor >= replacement.length()) {
1075                            break;
1076                        }
1077                        int nextDigit = replacement.charAt(cursor) - '0';
1078                        if ((nextDigit < 0) || (nextDigit > 9)) { // not a number
1079                            break;
1080                        }
1081                        int newRefNum = (refNum * 10) + nextDigit;
1082                        if (groupCount() < newRefNum) {
1083                            done = true;
1084                        } else {
1085                            refNum = newRefNum;
1086                            cursor++;
1087                        }
1088                    }
1089                }
1090                // Append group
1091                if (start(refNum) != -1 && end(refNum) != -1)
1092                    result.append(text, start(refNum), end(refNum));
1093            } else {
1094                result.append(nextChar);
1095                cursor++;
1096            }
1097        }
1098        return result;
1099    }
1100
1101    /**
1102     * Implements a terminal append-and-replace step.
1103     *
1104     * <p> This method reads characters from the input sequence, starting at
1105     * the append position, and appends them to the given string buffer.  It is
1106     * intended to be invoked after one or more invocations of the {@link
1107     * #appendReplacement appendReplacement} method in order to copy the
1108     * remainder of the input sequence.  </p>
1109     *
1110     * @param  sb
1111     *         The target string buffer
1112     *
1113     * @return  The target string buffer
1114     */
1115    public StringBuffer appendTail(StringBuffer sb) {
1116        sb.append(text, lastAppendPosition, getTextLength());
1117        return sb;
1118    }
1119
1120    /**
1121     * Implements a terminal append-and-replace step.
1122     *
1123     * <p> This method reads characters from the input sequence, starting at
1124     * the append position, and appends them to the given string builder.  It is
1125     * intended to be invoked after one or more invocations of the {@link
1126     * #appendReplacement appendReplacement} method in order to copy the
1127     * remainder of the input sequence.  </p>
1128     *
1129     * @param  sb
1130     *         The target string builder
1131     *
1132     * @return  The target string builder
1133     *
1134     * @since 9
1135     */
1136    public StringBuilder appendTail(StringBuilder sb) {
1137        sb.append(text, lastAppendPosition, getTextLength());
1138        return sb;
1139    }
1140
1141    /**
1142     * Replaces every subsequence of the input sequence that matches the
1143     * pattern with the given replacement string.
1144     *
1145     * <p> This method first resets this matcher.  It then scans the input
1146     * sequence looking for matches of the pattern.  Characters that are not
1147     * part of any match are appended directly to the result string; each match
1148     * is replaced in the result by the replacement string.  The replacement
1149     * string may contain references to captured subsequences as in the {@link
1150     * #appendReplacement appendReplacement} method.
1151     *
1152     * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1153     * the replacement string may cause the results to be different than if it
1154     * were being treated as a literal replacement string. Dollar signs may be
1155     * treated as references to captured subsequences as described above, and
1156     * backslashes are used to escape literal characters in the replacement
1157     * string.
1158     *
1159     * <p> Given the regular expression {@code a*b}, the input
1160     * {@code "aabfooaabfooabfoob"}, and the replacement string
1161     * {@code "-"}, an invocation of this method on a matcher for that
1162     * expression would yield the string {@code "-foo-foo-foo-"}.
1163     *
1164     * <p> Invoking this method changes this matcher's state.  If the matcher
1165     * is to be used in further matching operations then it should first be
1166     * reset.  </p>
1167     *
1168     * @param  replacement
1169     *         The replacement string
1170     *
1171     * @return  The string constructed by replacing each matching subsequence
1172     *          by the replacement string, substituting captured subsequences
1173     *          as needed
1174     */
1175    public String replaceAll(String replacement) {
1176        reset();
1177        boolean result = find();
1178        if (result) {
1179            StringBuilder sb = new StringBuilder();
1180            do {
1181                appendReplacement(sb, replacement);
1182                result = find();
1183            } while (result);
1184            appendTail(sb);
1185            return sb.toString();
1186        }
1187        return text.toString();
1188    }
1189
1190    /**
1191     * Replaces every subsequence of the input sequence that matches the
1192     * pattern with the result of applying the given replacer function to the
1193     * match result of this matcher corresponding to that subsequence.
1194     * Exceptions thrown by the function are relayed to the caller.
1195     *
1196     * <p> This method first resets this matcher.  It then scans the input
1197     * sequence looking for matches of the pattern.  Characters that are not
1198     * part of any match are appended directly to the result string; each match
1199     * is replaced in the result by the applying the replacer function that
1200     * returns a replacement string.  Each replacement string may contain
1201     * references to captured subsequences as in the {@link #appendReplacement
1202     * appendReplacement} method.
1203     *
1204     * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1205     * a replacement string may cause the results to be different than if it
1206     * were being treated as a literal replacement string. Dollar signs may be
1207     * treated as references to captured subsequences as described above, and
1208     * backslashes are used to escape literal characters in the replacement
1209     * string.
1210     *
1211     * <p> Given the regular expression {@code dog}, the input
1212     * {@code "zzzdogzzzdogzzz"}, and the function
1213     * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1214     * a matcher for that expression would yield the string
1215     * {@code "zzzDOGzzzDOGzzz"}.
1216     *
1217     * <p> Invoking this method changes this matcher's state.  If the matcher
1218     * is to be used in further matching operations then it should first be
1219     * reset.  </p>
1220     *
1221     * <p> The replacer function should not modify this matcher's state during
1222     * replacement.  This method will, on a best-effort basis, throw a
1223     * {@link java.util.ConcurrentModificationException} if such modification is
1224     * detected.
1225     *
1226     * <p> The state of each match result passed to the replacer function is
1227     * guaranteed to be constant only for the duration of the replacer function
1228     * call and only if the replacer function does not modify this matcher's
1229     * state.
1230     *
1231     * @implNote
1232     * This implementation applies the replacer function to this matcher, which
1233     * is an instance of {@code MatchResult}.
1234     *
1235     * @param  replacer
1236     *         The function to be applied to the match result of this matcher
1237     *         that returns a replacement string.
1238     * @return  The string constructed by replacing each matching subsequence
1239     *          with the result of applying the replacer function to that
1240     *          matched subsequence, substituting captured subsequences as
1241     *          needed.
1242     * @throws NullPointerException if the replacer function is null
1243     * @throws ConcurrentModificationException if it is detected, on a
1244     *         best-effort basis, that the replacer function modified this
1245     *         matcher's state
1246     * @since 9
1247     */
1248    public String replaceAll(Function<MatchResult, String> replacer) {
1249        Objects.requireNonNull(replacer);
1250        reset();
1251        boolean result = find();
1252        if (result) {
1253            StringBuilder sb = new StringBuilder();
1254            do {
1255                int ec = modCount;
1256                String replacement =  replacer.apply(this);
1257                if (ec != modCount)
1258                    throw new ConcurrentModificationException();
1259                appendReplacement(sb, replacement);
1260                result = find();
1261            } while (result);
1262            appendTail(sb);
1263            return sb.toString();
1264        }
1265        return text.toString();
1266    }
1267
1268    /**
1269     * Returns a stream of match results for each subsequence of the input
1270     * sequence that matches the pattern.  The match results occur in the
1271     * same order as the matching subsequences in the input sequence.
1272     *
1273     * <p> Each match result is produced as if by {@link #toMatchResult()}.
1274     *
1275     * <p> This method does not reset this matcher.  Matching starts on
1276     * initiation of the terminal stream operation either at the beginning of
1277     * this matcher's region, or, if the matcher has not since been reset, at
1278     * the first character not matched by a previous match.
1279     *
1280     * <p> If the matcher is to be used for further matching operations after
1281     * the terminal stream operation completes then it should be first reset.
1282     *
1283     * <p> This matcher's state should not be modified during execution of the
1284     * returned stream's pipeline.  The returned stream's source
1285     * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort
1286     * basis, throw a {@link java.util.ConcurrentModificationException} if such
1287     * modification is detected.
1288     *
1289     * @return a sequential stream of match results.
1290     * @since 9
1291     */
1292    public Stream<MatchResult> results() {
1293        class MatchResultIterator implements Iterator<MatchResult> {
1294            // -ve for call to find, 0 for not found, 1 for found
1295            int state = -1;
1296            // State for concurrent modification checking
1297            // -1 for uninitialized
1298            int expectedCount = -1;
1299            // The input sequence as a string, set once only after first find
1300            // Avoids repeated conversion from CharSequence for each match
1301            String textAsString;
1302
1303            @Override
1304            public MatchResult next() {
1305                if (expectedCount >= 0 && expectedCount != modCount)
1306                    throw new ConcurrentModificationException();
1307
1308                if (!hasNext())
1309                    throw new NoSuchElementException();
1310
1311                state = -1;
1312                return toMatchResult(textAsString);
1313            }
1314
1315            @Override
1316            public boolean hasNext() {
1317                if (state >= 0)
1318                    return state == 1;
1319
1320                // Defer throwing ConcurrentModificationException to when next
1321                // or forEachRemaining is called.  The is consistent with other
1322                // fail-fast implementations.
1323                if (expectedCount >= 0 && expectedCount != modCount)
1324                    return true;
1325
1326                boolean found = find();
1327                // Capture the input sequence as a string on first find
1328                if (found && state < 0)
1329                    textAsString = text.toString();
1330                state = found ? 1 : 0;
1331                expectedCount = modCount;
1332                return found;
1333            }
1334
1335            @Override
1336            public void forEachRemaining(Consumer<? super MatchResult> action) {
1337                if (expectedCount >= 0 && expectedCount != modCount)
1338                    throw new ConcurrentModificationException();
1339
1340                int s = state;
1341                if (s == 0)
1342                    return;
1343
1344                // Set state to report no more elements on further operations
1345                state = 0;
1346                expectedCount = -1;
1347
1348                // Perform a first find if required
1349                if (s < 0 && !find())
1350                    return;
1351
1352                // Capture the input sequence as a string on first find
1353                textAsString = text.toString();
1354
1355                do {
1356                    int ec = modCount;
1357                    action.accept(toMatchResult(textAsString));
1358                    if (ec != modCount)
1359                        throw new ConcurrentModificationException();
1360                } while (find());
1361            }
1362        }
1363        return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
1364                new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false);
1365    }
1366
1367    /**
1368     * Replaces the first subsequence of the input sequence that matches the
1369     * pattern with the given replacement string.
1370     *
1371     * <p> This method first resets this matcher.  It then scans the input
1372     * sequence looking for a match of the pattern.  Characters that are not
1373     * part of the match are appended directly to the result string; the match
1374     * is replaced in the result by the replacement string.  The replacement
1375     * string may contain references to captured subsequences as in the {@link
1376     * #appendReplacement appendReplacement} method.
1377     *
1378     * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1379     * the replacement string may cause the results to be different than if it
1380     * were being treated as a literal replacement string. Dollar signs may be
1381     * treated as references to captured subsequences as described above, and
1382     * backslashes are used to escape literal characters in the replacement
1383     * string.
1384     *
1385     * <p> Given the regular expression {@code dog}, the input
1386     * {@code "zzzdogzzzdogzzz"}, and the replacement string
1387     * {@code "cat"}, an invocation of this method on a matcher for that
1388     * expression would yield the string {@code "zzzcatzzzdogzzz"}.  </p>
1389     *
1390     * <p> Invoking this method changes this matcher's state.  If the matcher
1391     * is to be used in further matching operations then it should first be
1392     * reset.  </p>
1393     *
1394     * @param  replacement
1395     *         The replacement string
1396     * @return  The string constructed by replacing the first matching
1397     *          subsequence by the replacement string, substituting captured
1398     *          subsequences as needed
1399     */
1400    public String replaceFirst(String replacement) {
1401        if (replacement == null)
1402            throw new NullPointerException("replacement");
1403        reset();
1404        if (!find())
1405            return text.toString();
1406        StringBuilder sb = new StringBuilder();
1407        appendReplacement(sb, replacement);
1408        appendTail(sb);
1409        return sb.toString();
1410    }
1411
1412    /**
1413     * Replaces the first subsequence of the input sequence that matches the
1414     * pattern with the result of applying the given replacer function to the
1415     * match result of this matcher corresponding to that subsequence.
1416     * Exceptions thrown by the replace function are relayed to the caller.
1417     *
1418     * <p> This method first resets this matcher.  It then scans the input
1419     * sequence looking for a match of the pattern.  Characters that are not
1420     * part of the match are appended directly to the result string; the match
1421     * is replaced in the result by the applying the replacer function that
1422     * returns a replacement string.  The replacement string may contain
1423     * references to captured subsequences as in the {@link #appendReplacement
1424     * appendReplacement} method.
1425     *
1426     * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1427     * the replacement string may cause the results to be different than if it
1428     * were being treated as a literal replacement string. Dollar signs may be
1429     * treated as references to captured subsequences as described above, and
1430     * backslashes are used to escape literal characters in the replacement
1431     * string.
1432     *
1433     * <p> Given the regular expression {@code dog}, the input
1434     * {@code "zzzdogzzzdogzzz"}, and the function
1435     * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1436     * a matcher for that expression would yield the string
1437     * {@code "zzzDOGzzzdogzzz"}.
1438     *
1439     * <p> Invoking this method changes this matcher's state.  If the matcher
1440     * is to be used in further matching operations then it should first be
1441     * reset.
1442     *
1443     * <p> The replacer function should not modify this matcher's state during
1444     * replacement.  This method will, on a best-effort basis, throw a
1445     * {@link java.util.ConcurrentModificationException} if such modification is
1446     * detected.
1447     *
1448     * <p> The state of the match result passed to the replacer function is
1449     * guaranteed to be constant only for the duration of the replacer function
1450     * call and only if the replacer function does not modify this matcher's
1451     * state.
1452     *
1453     * @implNote
1454     * This implementation applies the replacer function to this matcher, which
1455     * is an instance of {@code MatchResult}.
1456     *
1457     * @param  replacer
1458     *         The function to be applied to the match result of this matcher
1459     *         that returns a replacement string.
1460     * @return  The string constructed by replacing the first matching
1461     *          subsequence with the result of applying the replacer function to
1462     *          the matched subsequence, substituting captured subsequences as
1463     *          needed.
1464     * @throws NullPointerException if the replacer function is null
1465     * @throws ConcurrentModificationException if it is detected, on a
1466     *         best-effort basis, that the replacer function modified this
1467     *         matcher's state
1468     * @since 9
1469     */
1470    public String replaceFirst(Function<MatchResult, String> replacer) {
1471        Objects.requireNonNull(replacer);
1472        reset();
1473        if (!find())
1474            return text.toString();
1475        StringBuilder sb = new StringBuilder();
1476        int ec = modCount;
1477        String replacement = replacer.apply(this);
1478        if (ec != modCount)
1479            throw new ConcurrentModificationException();
1480        appendReplacement(sb, replacement);
1481        appendTail(sb);
1482        return sb.toString();
1483    }
1484
1485    /**
1486     * Sets the limits of this matcher's region. The region is the part of the
1487     * input sequence that will be searched to find a match. Invoking this
1488     * method resets the matcher, and then sets the region to start at the
1489     * index specified by the {@code start} parameter and end at the
1490     * index specified by the {@code end} parameter.
1491     *
1492     * <p>Depending on the transparency and anchoring being used (see
1493     * {@link #useTransparentBounds useTransparentBounds} and
1494     * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such
1495     * as anchors may behave differently at or around the boundaries of the
1496     * region.
1497     *
1498     * @param  start
1499     *         The index to start searching at (inclusive)
1500     * @param  end
1501     *         The index to end searching at (exclusive)
1502     * @throws  IndexOutOfBoundsException
1503     *          If start or end is less than zero, if
1504     *          start is greater than the length of the input sequence, if
1505     *          end is greater than the length of the input sequence, or if
1506     *          start is greater than end.
1507     * @return  this matcher
1508     * @since 1.5
1509     */
1510    public Matcher region(int start, int end) {
1511        if ((start < 0) || (start > getTextLength()))
1512            throw new IndexOutOfBoundsException("start");
1513        if ((end < 0) || (end > getTextLength()))
1514            throw new IndexOutOfBoundsException("end");
1515        if (start > end)
1516            throw new IndexOutOfBoundsException("start > end");
1517        reset();
1518        from = start;
1519        to = end;
1520        return this;
1521    }
1522
1523    /**
1524     * Reports the start index of this matcher's region. The
1525     * searches this matcher conducts are limited to finding matches
1526     * within {@link #regionStart regionStart} (inclusive) and
1527     * {@link #regionEnd regionEnd} (exclusive).
1528     *
1529     * @return  The starting point of this matcher's region
1530     * @since 1.5
1531     */
1532    public int regionStart() {
1533        return from;
1534    }
1535
1536    /**
1537     * Reports the end index (exclusive) of this matcher's region.
1538     * The searches this matcher conducts are limited to finding matches
1539     * within {@link #regionStart regionStart} (inclusive) and
1540     * {@link #regionEnd regionEnd} (exclusive).
1541     *
1542     * @return  the ending point of this matcher's region
1543     * @since 1.5
1544     */
1545    public int regionEnd() {
1546        return to;
1547    }
1548
1549    /**
1550     * Queries the transparency of region bounds for this matcher.
1551     *
1552     * <p> This method returns {@code true} if this matcher uses
1553     * <i>transparent</i> bounds, {@code false} if it uses <i>opaque</i>
1554     * bounds.
1555     *
1556     * <p> See {@link #useTransparentBounds useTransparentBounds} for a
1557     * description of transparent and opaque bounds.
1558     *
1559     * <p> By default, a matcher uses opaque region boundaries.
1560     *
1561     * @return {@code true} iff this matcher is using transparent bounds,
1562     *         {@code false} otherwise.
1563     * @see java.util.regex.Matcher#useTransparentBounds(boolean)
1564     * @since 1.5
1565     */
1566    public boolean hasTransparentBounds() {
1567        return transparentBounds;
1568    }
1569
1570    /**
1571     * Sets the transparency of region bounds for this matcher.
1572     *
1573     * <p> Invoking this method with an argument of {@code true} will set this
1574     * matcher to use <i>transparent</i> bounds. If the boolean
1575     * argument is {@code false}, then <i>opaque</i> bounds will be used.
1576     *
1577     * <p> Using transparent bounds, the boundaries of this
1578     * matcher's region are transparent to lookahead, lookbehind,
1579     * and boundary matching constructs. Those constructs can see beyond the
1580     * boundaries of the region to see if a match is appropriate.
1581     *
1582     * <p> Using opaque bounds, the boundaries of this matcher's
1583     * region are opaque to lookahead, lookbehind, and boundary matching
1584     * constructs that may try to see beyond them. Those constructs cannot
1585     * look past the boundaries so they will fail to match anything outside
1586     * of the region.
1587     *
1588     * <p> By default, a matcher uses opaque bounds.
1589     *
1590     * @param  b a boolean indicating whether to use opaque or transparent
1591     *         regions
1592     * @return this matcher
1593     * @see java.util.regex.Matcher#hasTransparentBounds
1594     * @since 1.5
1595     */
1596    public Matcher useTransparentBounds(boolean b) {
1597        transparentBounds = b;
1598        return this;
1599    }
1600
1601    /**
1602     * Queries the anchoring of region bounds for this matcher.
1603     *
1604     * <p> This method returns {@code true} if this matcher uses
1605     * <i>anchoring</i> bounds, {@code false} otherwise.
1606     *
1607     * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a
1608     * description of anchoring bounds.
1609     *
1610     * <p> By default, a matcher uses anchoring region boundaries.
1611     *
1612     * @return {@code true} iff this matcher is using anchoring bounds,
1613     *         {@code false} otherwise.
1614     * @see java.util.regex.Matcher#useAnchoringBounds(boolean)
1615     * @since 1.5
1616     */
1617    public boolean hasAnchoringBounds() {
1618        return anchoringBounds;
1619    }
1620
1621    /**
1622     * Sets the anchoring of region bounds for this matcher.
1623     *
1624     * <p> Invoking this method with an argument of {@code true} will set this
1625     * matcher to use <i>anchoring</i> bounds. If the boolean
1626     * argument is {@code false}, then <i>non-anchoring</i> bounds will be
1627     * used.
1628     *
1629     * <p> Using anchoring bounds, the boundaries of this
1630     * matcher's region match anchors such as ^ and $.
1631     *
1632     * <p> Without anchoring bounds, the boundaries of this
1633     * matcher's region will not match anchors such as ^ and $.
1634     *
1635     * <p> By default, a matcher uses anchoring region boundaries.
1636     *
1637     * @param  b a boolean indicating whether or not to use anchoring bounds.
1638     * @return this matcher
1639     * @see java.util.regex.Matcher#hasAnchoringBounds
1640     * @since 1.5
1641     */
1642    public Matcher useAnchoringBounds(boolean b) {
1643        anchoringBounds = b;
1644        return this;
1645    }
1646
1647    /**
1648     * <p>Returns the string representation of this matcher. The
1649     * string representation of a {@code Matcher} contains information
1650     * that may be useful for debugging. The exact format is unspecified.
1651     *
1652     * @return  The string representation of this matcher
1653     * @since 1.5
1654     */
1655    public String toString() {
1656        StringBuilder sb = new StringBuilder();
1657        sb.append("java.util.regex.Matcher")
1658                .append("[pattern=").append(pattern())
1659                .append(" region=")
1660                .append(regionStart()).append(',').append(regionEnd())
1661                .append(" lastmatch=");
1662        if ((first >= 0) && (group() != null)) {
1663            sb.append(group());
1664        }
1665        sb.append(']');
1666        return sb.toString();
1667    }
1668
1669    /**
1670     * <p>Returns true if the end of input was hit by the search engine in
1671     * the last match operation performed by this matcher.
1672     *
1673     * <p>When this method returns true, then it is possible that more input
1674     * would have changed the result of the last search.
1675     *
1676     * @return  true iff the end of input was hit in the last match; false
1677     *          otherwise
1678     * @since 1.5
1679     */
1680    public boolean hitEnd() {
1681        return hitEnd;
1682    }
1683
1684    /**
1685     * <p>Returns true if more input could change a positive match into a
1686     * negative one.
1687     *
1688     * <p>If this method returns true, and a match was found, then more
1689     * input could cause the match to be lost. If this method returns false
1690     * and a match was found, then more input might change the match but the
1691     * match won't be lost. If a match was not found, then requireEnd has no
1692     * meaning.
1693     *
1694     * @return  true iff more input could change a positive match into a
1695     *          negative one.
1696     * @since 1.5
1697     */
1698    public boolean requireEnd() {
1699        return requireEnd;
1700    }
1701
1702    /**
1703     * Initiates a search to find a Pattern within the given bounds.
1704     * The groups are filled with default values and the match of the root
1705     * of the state machine is called. The state machine will hold the state
1706     * of the match as it proceeds in this matcher.
1707     *
1708     * Matcher.from is not set here, because it is the "hard" boundary
1709     * of the start of the search which anchors will set to. The from param
1710     * is the "soft" boundary of the start of the search, meaning that the
1711     * regex tries to match at that index but ^ won't match there. Subsequent
1712     * calls to the search methods start at a new "soft" boundary which is
1713     * the end of the previous match.
1714     */
1715    boolean search(int from) {
1716        this.hitEnd = false;
1717        this.requireEnd = false;
1718        from        = from < 0 ? 0 : from;
1719        this.first  = from;
1720        this.oldLast = oldLast < 0 ? from : oldLast;
1721        for (int i = 0; i < groups.length; i++)
1722            groups[i] = -1;
1723        for (int i = 0; i < localsPos.length; i++) {
1724            if (localsPos[i] != null)
1725                localsPos[i].clear();
1726        }
1727        acceptMode = NOANCHOR;
1728        boolean result = parentPattern.root.match(this, from, text);
1729        if (!result)
1730            this.first = -1;
1731        this.oldLast = this.last;
1732        this.modCount++;
1733        return result;
1734    }
1735
1736    /**
1737     * Initiates a search for an anchored match to a Pattern within the given
1738     * bounds. The groups are filled with default values and the match of the
1739     * root of the state machine is called. The state machine will hold the
1740     * state of the match as it proceeds in this matcher.
1741     */
1742    boolean match(int from, int anchor) {
1743        this.hitEnd = false;
1744        this.requireEnd = false;
1745        from        = from < 0 ? 0 : from;
1746        this.first  = from;
1747        this.oldLast = oldLast < 0 ? from : oldLast;
1748        for (int i = 0; i < groups.length; i++)
1749            groups[i] = -1;
1750        for (int i = 0; i < localsPos.length; i++) {
1751            if (localsPos[i] != null)
1752                localsPos[i].clear();
1753        }
1754        acceptMode = anchor;
1755        boolean result = parentPattern.matchRoot.match(this, from, text);
1756        if (!result)
1757            this.first = -1;
1758        this.oldLast = this.last;
1759        this.modCount++;
1760        return result;
1761    }
1762
1763    /**
1764     * Returns the end index of the text.
1765     *
1766     * @return the index after the last character in the text
1767     */
1768    int getTextLength() {
1769        return text.length();
1770    }
1771
1772    /**
1773     * Generates a String from this Matcher's input in the specified range.
1774     *
1775     * @param  beginIndex   the beginning index, inclusive
1776     * @param  endIndex     the ending index, exclusive
1777     * @return A String generated from this Matcher's input
1778     */
1779    CharSequence getSubSequence(int beginIndex, int endIndex) {
1780        return text.subSequence(beginIndex, endIndex);
1781    }
1782
1783    /**
1784     * Returns this Matcher's input character at index i.
1785     *
1786     * @return A char from the specified index
1787     */
1788    char charAt(int i) {
1789        return text.charAt(i);
1790    }
1791
1792    /**
1793     * Returns the group index of the matched capturing group.
1794     *
1795     * @return the index of the named-capturing group
1796     */
1797    int getMatchedGroupIndex(String name) {
1798        Objects.requireNonNull(name, "Group name");
1799        if (first < 0)
1800            throw new IllegalStateException("No match found");
1801        if (!parentPattern.namedGroups().containsKey(name))
1802            throw new IllegalArgumentException("No group with name <" + name + ">");
1803        return parentPattern.namedGroups().get(name);
1804    }
1805}
1806