1/*
2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.nashorn.internal.objects;
27
28import static jdk.nashorn.internal.runtime.ECMAErrors.typeError;
29import static jdk.nashorn.internal.runtime.ScriptRuntime.UNDEFINED;
30
31import java.lang.invoke.MethodHandle;
32import java.util.ArrayList;
33import java.util.Arrays;
34import java.util.List;
35import java.util.concurrent.Callable;
36import jdk.nashorn.internal.objects.annotations.Attribute;
37import jdk.nashorn.internal.objects.annotations.Constructor;
38import jdk.nashorn.internal.objects.annotations.Function;
39import jdk.nashorn.internal.objects.annotations.Getter;
40import jdk.nashorn.internal.objects.annotations.Property;
41import jdk.nashorn.internal.objects.annotations.ScriptClass;
42import jdk.nashorn.internal.objects.annotations.SpecializedFunction;
43import jdk.nashorn.internal.objects.annotations.Where;
44import jdk.nashorn.internal.runtime.BitVector;
45import jdk.nashorn.internal.runtime.JSType;
46import jdk.nashorn.internal.runtime.ParserException;
47import jdk.nashorn.internal.runtime.PropertyMap;
48import jdk.nashorn.internal.runtime.ScriptObject;
49import jdk.nashorn.internal.runtime.ScriptRuntime;
50import jdk.nashorn.internal.runtime.linker.Bootstrap;
51import jdk.nashorn.internal.runtime.regexp.RegExp;
52import jdk.nashorn.internal.runtime.regexp.RegExpFactory;
53import jdk.nashorn.internal.runtime.regexp.RegExpMatcher;
54import jdk.nashorn.internal.runtime.regexp.RegExpResult;
55
56/**
57 * ECMA 15.10 RegExp Objects.
58 */
59@ScriptClass("RegExp")
60public final class NativeRegExp extends ScriptObject {
61    /** ECMA 15.10.7.5 lastIndex property */
62    @Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE)
63    public Object lastIndex;
64
65    /** Compiled regexp */
66    private RegExp regexp;
67
68    // Reference to global object needed to support static RegExp properties
69    private final Global globalObject;
70
71    // initialized by nasgen
72    private static PropertyMap $nasgenmap$;
73
74    private NativeRegExp(final Global global) {
75        super(global.getRegExpPrototype(), $nasgenmap$);
76        this.globalObject = global;
77    }
78
79    NativeRegExp(final String input, final String flagString, final Global global, final ScriptObject proto) {
80        super(proto, $nasgenmap$);
81        try {
82            this.regexp = RegExpFactory.create(input, flagString);
83        } catch (final ParserException e) {
84            // translate it as SyntaxError object and throw it
85            e.throwAsEcmaException();
86            throw new AssertionError(); //guard against null warnings below
87        }
88        this.globalObject = global;
89        this.setLastIndex(0);
90    }
91
92    NativeRegExp(final String input, final String flagString, final Global global) {
93        this(input, flagString, global, global.getRegExpPrototype());
94    }
95
96    NativeRegExp(final String input, final String flagString) {
97        this(input, flagString, Global.instance());
98    }
99
100    NativeRegExp(final String string, final Global global) {
101        this(string, "", global);
102    }
103
104    NativeRegExp(final String string) {
105        this(string, Global.instance());
106    }
107
108    NativeRegExp(final NativeRegExp regExp) {
109        this(Global.instance());
110        this.lastIndex  = regExp.getLastIndexObject();
111        this.regexp      = regExp.getRegExp();
112    }
113
114    @Override
115    public String getClassName() {
116        return "RegExp";
117    }
118
119    /**
120     * ECMA 15.10.4
121     *
122     * Constructor
123     *
124     * @param isNew is the new operator used for instantiating this regexp
125     * @param self  self reference
126     * @param args  arguments (optional: pattern and flags)
127     * @return new NativeRegExp
128     */
129    @Constructor(arity = 2)
130    public static NativeRegExp constructor(final boolean isNew, final Object self, final Object... args) {
131        if (args.length > 1) {
132            return newRegExp(args[0], args[1]);
133        } else if (args.length > 0) {
134            return newRegExp(args[0], UNDEFINED);
135        }
136
137        return newRegExp(UNDEFINED, UNDEFINED);
138    }
139
140    /**
141     * ECMA 15.10.4
142     *
143     * Constructor - specialized version, no args, empty regexp
144     *
145     * @param isNew is the new operator used for instantiating this regexp
146     * @param self  self reference
147     * @return new NativeRegExp
148     */
149    @SpecializedFunction(isConstructor=true)
150    public static NativeRegExp constructor(final boolean isNew, final Object self) {
151        return new NativeRegExp("", "");
152    }
153
154    /**
155     * ECMA 15.10.4
156     *
157     * Constructor - specialized version, pattern, no flags
158     *
159     * @param isNew is the new operator used for instantiating this regexp
160     * @param self  self reference
161     * @param pattern pattern
162     * @return new NativeRegExp
163     */
164    @SpecializedFunction(isConstructor=true)
165    public static NativeRegExp constructor(final boolean isNew, final Object self, final Object pattern) {
166        return newRegExp(pattern, UNDEFINED);
167    }
168
169    /**
170     * ECMA 15.10.4
171     *
172     * Constructor - specialized version, pattern and flags
173     *
174     * @param isNew is the new operator used for instantiating this regexp
175     * @param self  self reference
176     * @param pattern pattern
177     * @param flags  flags
178     * @return new NativeRegExp
179     */
180    @SpecializedFunction(isConstructor=true)
181    public static NativeRegExp constructor(final boolean isNew, final Object self, final Object pattern, final Object flags) {
182        return newRegExp(pattern, flags);
183    }
184
185    /**
186     * External constructor used in generated code, which explains the public access
187     *
188     * @param regexp regexp
189     * @param flags  flags
190     * @return new NativeRegExp
191     */
192    public static NativeRegExp newRegExp(final Object regexp, final Object flags) {
193        String  patternString = "";
194        String  flagString    = "";
195
196        if (regexp != UNDEFINED) {
197            if (regexp instanceof NativeRegExp) {
198                if (flags != UNDEFINED) {
199                    throw typeError("regex.cant.supply.flags");
200                }
201                return (NativeRegExp)regexp; // 15.10.3.1 - undefined flags and regexp as
202            }
203            patternString = JSType.toString(regexp);
204        }
205
206        if (flags != UNDEFINED) {
207            flagString = JSType.toString(flags);
208        }
209
210        return new NativeRegExp(patternString, flagString);
211    }
212
213    /**
214     * Build a regexp that matches {@code string} as-is. All meta-characters will be escaped.
215     *
216     * @param string pattern string
217     * @return flat regexp
218     */
219    static NativeRegExp flatRegExp(final String string) {
220        // escape special characters
221        StringBuilder sb = null;
222        final int length = string.length();
223
224        for (int i = 0; i < length; i++) {
225            final char c = string.charAt(i);
226            switch (c) {
227                case '^':
228                case '$':
229                case '\\':
230                case '.':
231                case '*':
232                case '+':
233                case '?':
234                case '(':
235                case ')':
236                case '[':
237                case '{':
238                case '|':
239                    if (sb == null) {
240                        sb = new StringBuilder(length * 2);
241                        sb.append(string, 0, i);
242                    }
243                    sb.append('\\');
244                    sb.append(c);
245                    break;
246                default:
247                    if (sb != null) {
248                        sb.append(c);
249                    }
250                    break;
251            }
252        }
253        return new NativeRegExp(sb == null ? string : sb.toString(), "");
254    }
255
256    private String getFlagString() {
257        final StringBuilder sb = new StringBuilder(3);
258
259        if (regexp.isGlobal()) {
260            sb.append('g');
261        }
262        if (regexp.isIgnoreCase()) {
263            sb.append('i');
264        }
265        if (regexp.isMultiline()) {
266            sb.append('m');
267        }
268
269        return sb.toString();
270    }
271
272    @Override
273    public String safeToString() {
274        return "[RegExp " + toString() + "]";
275    }
276
277    @Override
278    public String toString() {
279        return "/" + regexp.getSource() + "/" + getFlagString();
280    }
281
282    /**
283     * Nashorn extension: RegExp.prototype.compile - everybody implements this!
284     *
285     * @param self    self reference
286     * @param pattern pattern
287     * @param flags   flags
288     * @return new NativeRegExp
289     */
290    @Function(attributes = Attribute.NOT_ENUMERABLE)
291    public static ScriptObject compile(final Object self, final Object pattern, final Object flags) {
292        final NativeRegExp regExp   = checkRegExp(self);
293        final NativeRegExp compiled = newRegExp(pattern, flags);
294        // copy over regexp to 'self'
295        regExp.setRegExp(compiled.getRegExp());
296
297        // Some implementations return undefined. Some return 'self'. Since return
298        // value is most likely be ignored, we can play safe and return 'self'.
299        return regExp;
300    }
301
302    /**
303     * ECMA 15.10.6.2 RegExp.prototype.exec(string)
304     *
305     * @param self   self reference
306     * @param string string to match against regexp
307     * @return array containing the matches or {@code null} if no match
308     */
309    @Function(attributes = Attribute.NOT_ENUMERABLE)
310    public static ScriptObject exec(final Object self, final Object string) {
311        return checkRegExp(self).exec(JSType.toString(string));
312    }
313
314    /**
315     * ECMA 15.10.6.3 RegExp.prototype.test(string)
316     *
317     * @param self   self reference
318     * @param string string to test for matches against regexp
319     * @return true if matches found, false otherwise
320     */
321    @Function(attributes = Attribute.NOT_ENUMERABLE)
322    public static boolean test(final Object self, final Object string) {
323        return checkRegExp(self).test(JSType.toString(string));
324    }
325
326    /**
327     * ECMA 15.10.6.4 RegExp.prototype.toString()
328     *
329     * @param self self reference
330     * @return string version of regexp
331     */
332    @Function(attributes = Attribute.NOT_ENUMERABLE)
333    public static String toString(final Object self) {
334        return checkRegExp(self).toString();
335    }
336
337    /**
338     * ECMA 15.10.7.1 source
339     *
340     * @param self self reference
341     * @return the input string for the regexp
342     */
343    @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
344    public static Object source(final Object self) {
345        return checkRegExp(self).getRegExp().getSource();
346    }
347
348    /**
349     * ECMA 15.10.7.2 global
350     *
351     * @param self self reference
352     * @return true if this regexp is flagged global, false otherwise
353     */
354    @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
355    public static Object global(final Object self) {
356        return checkRegExp(self).getRegExp().isGlobal();
357    }
358
359    /**
360     * ECMA 15.10.7.3 ignoreCase
361     *
362     * @param self self reference
363     * @return true if this regexp if flagged to ignore case, false otherwise
364     */
365    @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
366    public static Object ignoreCase(final Object self) {
367        return checkRegExp(self).getRegExp().isIgnoreCase();
368    }
369
370    /**
371     * ECMA 15.10.7.4 multiline
372     *
373     * @param self self reference
374     * @return true if this regexp is flagged to be multiline, false otherwise
375     */
376    @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
377    public static Object multiline(final Object self) {
378        return checkRegExp(self).getRegExp().isMultiline();
379    }
380
381    /**
382     * Getter for non-standard RegExp.input property.
383     * @param self self object
384     * @return last regexp input
385     */
386    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "input")
387    public static Object getLastInput(final Object self) {
388        final RegExpResult match = Global.instance().getLastRegExpResult();
389        return match == null ? "" : match.getInput();
390    }
391
392    /**
393     * Getter for non-standard RegExp.multiline property.
394     * @param self self object
395     * @return last regexp input
396     */
397    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "multiline")
398    public static Object getLastMultiline(final Object self) {
399        return false; // doesn't ever seem to become true and isn't documented anyhwere
400    }
401
402    /**
403     * Getter for non-standard RegExp.lastMatch property.
404     * @param self self object
405     * @return last regexp input
406     */
407    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastMatch")
408    public static Object getLastMatch(final Object self) {
409        final RegExpResult match = Global.instance().getLastRegExpResult();
410        return match == null ? "" : match.getGroup(0);
411    }
412
413    /**
414     * Getter for non-standard RegExp.lastParen property.
415     * @param self self object
416     * @return last regexp input
417     */
418    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastParen")
419    public static Object getLastParen(final Object self) {
420        final RegExpResult match = Global.instance().getLastRegExpResult();
421        return match == null ? "" : match.getLastParen();
422    }
423
424    /**
425     * Getter for non-standard RegExp.leftContext property.
426     * @param self self object
427     * @return last regexp input
428     */
429    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "leftContext")
430    public static Object getLeftContext(final Object self) {
431        final RegExpResult match = Global.instance().getLastRegExpResult();
432        return match == null ? "" : match.getInput().substring(0, match.getIndex());
433    }
434
435    /**
436     * Getter for non-standard RegExp.rightContext property.
437     * @param self self object
438     * @return last regexp input
439     */
440    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "rightContext")
441    public static Object getRightContext(final Object self) {
442        final RegExpResult match = Global.instance().getLastRegExpResult();
443        return match == null ? "" : match.getInput().substring(match.getIndex() + match.length());
444    }
445
446    /**
447     * Getter for non-standard RegExp.$1 property.
448     * @param self self object
449     * @return last regexp input
450     */
451    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$1")
452    public static Object getGroup1(final Object self) {
453        final RegExpResult match = Global.instance().getLastRegExpResult();
454        return match == null ? "" : match.getGroup(1);
455    }
456
457    /**
458     * Getter for non-standard RegExp.$2 property.
459     * @param self self object
460     * @return last regexp input
461     */
462    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$2")
463    public static Object getGroup2(final Object self) {
464        final RegExpResult match = Global.instance().getLastRegExpResult();
465        return match == null ? "" : match.getGroup(2);
466    }
467
468    /**
469     * Getter for non-standard RegExp.$3 property.
470     * @param self self object
471     * @return last regexp input
472     */
473    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$3")
474    public static Object getGroup3(final Object self) {
475        final RegExpResult match = Global.instance().getLastRegExpResult();
476        return match == null ? "" : match.getGroup(3);
477    }
478
479    /**
480     * Getter for non-standard RegExp.$4 property.
481     * @param self self object
482     * @return last regexp input
483     */
484    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$4")
485    public static Object getGroup4(final Object self) {
486        final RegExpResult match = Global.instance().getLastRegExpResult();
487        return match == null ? "" : match.getGroup(4);
488    }
489
490    /**
491     * Getter for non-standard RegExp.$5 property.
492     * @param self self object
493     * @return last regexp input
494     */
495    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$5")
496    public static Object getGroup5(final Object self) {
497        final RegExpResult match = Global.instance().getLastRegExpResult();
498        return match == null ? "" : match.getGroup(5);
499    }
500
501    /**
502     * Getter for non-standard RegExp.$6 property.
503     * @param self self object
504     * @return last regexp input
505     */
506    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$6")
507    public static Object getGroup6(final Object self) {
508        final RegExpResult match = Global.instance().getLastRegExpResult();
509        return match == null ? "" : match.getGroup(6);
510    }
511
512    /**
513     * Getter for non-standard RegExp.$7 property.
514     * @param self self object
515     * @return last regexp input
516     */
517    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$7")
518    public static Object getGroup7(final Object self) {
519        final RegExpResult match = Global.instance().getLastRegExpResult();
520        return match == null ? "" : match.getGroup(7);
521    }
522
523    /**
524     * Getter for non-standard RegExp.$8 property.
525     * @param self self object
526     * @return last regexp input
527     */
528    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$8")
529    public static Object getGroup8(final Object self) {
530        final RegExpResult match = Global.instance().getLastRegExpResult();
531        return match == null ? "" : match.getGroup(8);
532    }
533
534    /**
535     * Getter for non-standard RegExp.$9 property.
536     * @param self self object
537     * @return last regexp input
538     */
539    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$9")
540    public static Object getGroup9(final Object self) {
541        final RegExpResult match = Global.instance().getLastRegExpResult();
542        return match == null ? "" : match.getGroup(9);
543    }
544
545    private RegExpResult execInner(final String string) {
546        final boolean isGlobal = regexp.isGlobal();
547        int start = getLastIndex();
548        if (!isGlobal) {
549            start = 0;
550        }
551
552        if (start < 0 || start > string.length()) {
553            if (isGlobal) {
554                setLastIndex(0);
555            }
556            return null;
557        }
558
559        final RegExpMatcher matcher = regexp.match(string);
560        if (matcher == null || !matcher.search(start)) {
561            if (isGlobal) {
562                setLastIndex(0);
563            }
564            return null;
565        }
566
567        if (isGlobal) {
568            setLastIndex(matcher.end());
569        }
570
571        final RegExpResult match = new RegExpResult(string, matcher.start(), groups(matcher));
572        globalObject.setLastRegExpResult(match);
573        return match;
574    }
575
576    // String.prototype.split method ignores the global flag and should not update lastIndex property.
577    private RegExpResult execSplit(final String string, final int start) {
578        if (start < 0 || start > string.length()) {
579            return null;
580        }
581
582        final RegExpMatcher matcher = regexp.match(string);
583        if (matcher == null || !matcher.search(start)) {
584            return null;
585        }
586
587        final RegExpResult match = new RegExpResult(string, matcher.start(), groups(matcher));
588        globalObject.setLastRegExpResult(match);
589        return match;
590    }
591
592    /**
593     * Convert java.util.regex.Matcher groups to JavaScript groups.
594     * That is, replace null and groups that didn't match with undefined.
595     */
596    private Object[] groups(final RegExpMatcher matcher) {
597        final int groupCount = matcher.groupCount();
598        final Object[] groups = new Object[groupCount + 1];
599        final BitVector groupsInNegativeLookahead  = regexp.getGroupsInNegativeLookahead();
600
601        for (int i = 0, lastGroupStart = matcher.start(); i <= groupCount; i++) {
602            final int groupStart = matcher.start(i);
603            if (lastGroupStart > groupStart
604                    || groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i)) {
605                // (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated.
606                // (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere
607                // in the pattern always return undefined because the negative lookahead must fail.
608                groups[i] = UNDEFINED;
609                continue;
610            }
611            final String group = matcher.group(i);
612            groups[i] = group == null ? UNDEFINED : group;
613            lastGroupStart = groupStart;
614        }
615        return groups;
616    }
617
618    /**
619     * Executes a search for a match within a string based on a regular
620     * expression. It returns an array of information or null if no match is
621     * found.
622     *
623     * @param string String to match.
624     * @return NativeArray of matches, string or null.
625     */
626    public NativeRegExpExecResult exec(final String string) {
627        final RegExpResult match = execInner(string);
628
629        if (match == null) {
630            return null;
631        }
632
633        return new NativeRegExpExecResult(match, globalObject);
634    }
635
636    /**
637     * Executes a search for a match within a string based on a regular
638     * expression.
639     *
640     * @param string String to match.
641     * @return True if a match is found.
642     */
643    public boolean test(final String string) {
644        return execInner(string) != null;
645    }
646
647    /**
648     * Searches and replaces the regular expression portion (match) with the
649     * replaced text instead. For the "replacement text" parameter, you can use
650     * the keywords $1 to $2 to replace the original text with values from
651     * sub-patterns defined within the main pattern.
652     *
653     * @param string String to match.
654     * @param replacement Replacement string.
655     * @return String with substitutions.
656     */
657    String replace(final String string, final String replacement, final Object function) throws Throwable {
658        final RegExpMatcher matcher = regexp.match(string);
659
660        if (matcher == null) {
661            return string;
662        }
663
664        if (!regexp.isGlobal()) {
665            if (!matcher.search(0)) {
666                return string;
667            }
668
669            final StringBuilder sb = new StringBuilder();
670            sb.append(string, 0, matcher.start());
671
672            if (function != null) {
673                final Object self = Bootstrap.isStrictCallable(function) ? UNDEFINED : Global.instance();
674                sb.append(callReplaceValue(getReplaceValueInvoker(), function, self, matcher, string));
675            } else {
676                appendReplacement(matcher, string, replacement, sb);
677            }
678            sb.append(string, matcher.end(), string.length());
679            return sb.toString();
680        }
681
682        setLastIndex(0);
683
684        if (!matcher.search(0)) {
685            return string;
686        }
687
688        int thisIndex = 0;
689        int previousLastIndex = 0;
690        final StringBuilder sb = new StringBuilder();
691
692        final MethodHandle invoker = function == null ? null : getReplaceValueInvoker();
693        final Object self = function == null || Bootstrap.isStrictCallable(function) ? UNDEFINED : Global.instance();
694
695        do {
696            sb.append(string, thisIndex, matcher.start());
697            if (function != null) {
698                sb.append(callReplaceValue(invoker, function, self, matcher, string));
699            } else {
700                appendReplacement(matcher, string, replacement, sb);
701            }
702
703            thisIndex = matcher.end();
704
705            // ECMA6 21.2.5.6 step 8.g.iv.5: If matchStr is empty advance index by one
706            if (matcher.start() == matcher.end()) {
707                setLastIndex(thisIndex + 1);
708                previousLastIndex = thisIndex + 1;
709            } else {
710                previousLastIndex = thisIndex;
711            }
712        } while (previousLastIndex <= string.length() && matcher.search(previousLastIndex));
713
714        sb.append(string, thisIndex, string.length());
715
716        return sb.toString();
717    }
718
719    private void appendReplacement(final RegExpMatcher matcher, final String text, final String replacement, final StringBuilder sb) {
720        /*
721         * Process substitution patterns:
722         *
723         * $$ -> $
724         * $& -> the matched substring
725         * $` -> the portion of string that precedes matched substring
726         * $' -> the portion of string that follows the matched substring
727         * $n -> the nth capture, where n is [1-9] and $n is NOT followed by a decimal digit
728         * $nn -> the nnth capture, where nn is a two digit decimal number [01-99].
729         */
730
731        int cursor = 0;
732        Object[] groups = null;
733
734        while (cursor < replacement.length()) {
735            char nextChar = replacement.charAt(cursor);
736            if (nextChar == '$') {
737                // Skip past $
738                cursor++;
739                if (cursor == replacement.length()) {
740                    // nothing after "$"
741                    sb.append('$');
742                    break;
743                }
744
745                nextChar = replacement.charAt(cursor);
746                final int firstDigit = nextChar - '0';
747
748                if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= matcher.groupCount()) {
749                    // $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit.
750                    int refNum = firstDigit;
751                    cursor++;
752                    if (cursor < replacement.length() && firstDigit < matcher.groupCount()) {
753                        final int secondDigit = replacement.charAt(cursor) - '0';
754                        if (secondDigit >= 0 && secondDigit <= 9) {
755                            final int newRefNum = firstDigit * 10 + secondDigit;
756                            if (newRefNum <= matcher.groupCount() && newRefNum > 0) {
757                                // $nn ($01-$99)
758                                refNum = newRefNum;
759                                cursor++;
760                            }
761                        }
762                    }
763                    if (refNum > 0) {
764                        if (groups == null) {
765                            groups = groups(matcher);
766                        }
767                        // Append group if matched.
768                        if (groups[refNum] != UNDEFINED) {
769                            sb.append((String) groups[refNum]);
770                        }
771                    } else { // $0. ignore.
772                        assert refNum == 0;
773                        sb.append("$0");
774                    }
775                } else if (nextChar == '$') {
776                    sb.append('$');
777                    cursor++;
778                } else if (nextChar == '&') {
779                    sb.append(matcher.group());
780                    cursor++;
781                } else if (nextChar == '`') {
782                    sb.append(text, 0, matcher.start());
783                    cursor++;
784                } else if (nextChar == '\'') {
785                    sb.append(text, matcher.end(), text.length());
786                    cursor++;
787                } else {
788                    // unknown substitution or $n with n>m. skip.
789                    sb.append('$');
790                }
791            } else {
792                sb.append(nextChar);
793                cursor++;
794            }
795        }
796    }
797
798    private static final Object REPLACE_VALUE = new Object();
799
800    private static MethodHandle getReplaceValueInvoker() {
801        return Global.instance().getDynamicInvoker(REPLACE_VALUE,
802                new Callable<MethodHandle>() {
803                    @Override
804                    public MethodHandle call() {
805                        return Bootstrap.createDynamicCallInvoker(String.class, Object.class, Object.class, Object[].class);
806                    }
807                });
808    }
809
810    private String callReplaceValue(final MethodHandle invoker, final Object function, final Object self, final RegExpMatcher matcher, final String string) throws Throwable {
811        final Object[] groups = groups(matcher);
812        final Object[] args   = Arrays.copyOf(groups, groups.length + 2);
813
814        args[groups.length]     = matcher.start();
815        args[groups.length + 1] = string;
816
817        return (String)invoker.invokeExact(function, self, args);
818    }
819
820    /**
821     * Breaks up a string into an array of substrings based on a regular
822     * expression or fixed string.
823     *
824     * @param string String to match.
825     * @param limit  Split limit.
826     * @return Array of substrings.
827     */
828    NativeArray split(final String string, final long limit) {
829        if (limit == 0L) {
830            return new NativeArray();
831        }
832
833        final List<Object> matches = new ArrayList<>();
834
835        RegExpResult match;
836        final int inputLength = string.length();
837        int splitLastLength = -1;
838        int splitLastIndex = 0;
839        int splitLastLastIndex = 0;
840
841        while ((match = execSplit(string, splitLastIndex)) != null) {
842            splitLastIndex = match.getIndex() + match.length();
843
844            if (splitLastIndex > splitLastLastIndex) {
845                matches.add(string.substring(splitLastLastIndex, match.getIndex()));
846                final Object[] groups = match.getGroups();
847                if (groups.length > 1 && match.getIndex() < inputLength) {
848                    for (int index = 1; index < groups.length && matches.size() < limit; index++) {
849                        matches.add(groups[index]);
850                    }
851                }
852
853                splitLastLength = match.length();
854
855                if (matches.size() >= limit) {
856                    break;
857                }
858            }
859
860            // bump the index to avoid infinite loop
861            if (splitLastIndex == splitLastLastIndex) {
862                splitLastIndex++;
863            } else {
864                splitLastLastIndex = splitLastIndex;
865            }
866        }
867
868        if (matches.size() < limit) {
869            // check special case if we need to append an empty string at the
870            // end of the match
871            // if the lastIndex was the entire string
872            if (splitLastLastIndex == string.length()) {
873                if (splitLastLength > 0 || execSplit("", 0) == null) {
874                    matches.add("");
875                }
876            } else {
877                matches.add(string.substring(splitLastLastIndex, inputLength));
878            }
879        }
880
881        return new NativeArray(matches.toArray());
882    }
883
884    /**
885     * Tests for a match in a string. It returns the index of the match, or -1
886     * if not found.
887     *
888     * @param string String to match.
889     * @return Index of match.
890     */
891    int search(final String string) {
892        final RegExpResult match = execInner(string);
893
894        if (match == null) {
895            return -1;
896        }
897
898        return match.getIndex();
899    }
900
901    /**
902     * Fast lastIndex getter
903     * @return last index property as int
904     */
905    public int getLastIndex() {
906        return JSType.toInteger(lastIndex);
907    }
908
909    /**
910     * Fast lastIndex getter
911     * @return last index property as boxed integer
912     */
913    public Object getLastIndexObject() {
914        return lastIndex;
915    }
916
917    /**
918     * Fast lastIndex setter
919     * @param lastIndex lastIndex
920     */
921    public void setLastIndex(final int lastIndex) {
922        this.lastIndex = JSType.toObject(lastIndex);
923    }
924
925    private static NativeRegExp checkRegExp(final Object self) {
926        if (self instanceof NativeRegExp) {
927            return (NativeRegExp)self;
928        } else if (self != null && self == Global.instance().getRegExpPrototype()) {
929            return Global.instance().getDefaultRegExp();
930        } else {
931            throw typeError("not.a.regexp", ScriptRuntime.safeToString(self));
932        }
933    }
934
935    boolean getGlobal() {
936        return regexp.isGlobal();
937    }
938
939    private RegExp getRegExp() {
940        return regexp;
941    }
942
943    private void setRegExp(final RegExp regexp) {
944        this.regexp = regexp;
945    }
946
947}
948