1/*
2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "YarrJIT.h"
28
29#include <wtf/ASCIICType.h>
30#include "LinkBuffer.h"
31#include "Options.h"
32#include "Yarr.h"
33#include "YarrCanonicalizeUCS2.h"
34
35#if ENABLE(YARR_JIT)
36
37using namespace WTF;
38
39namespace JSC { namespace Yarr {
40
41template<YarrJITCompileMode compileMode>
42class YarrGenerator : private MacroAssembler {
43    friend void jitCompile(VM*, YarrCodeBlock& jitObject, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
44
45#if CPU(ARM)
46    static const RegisterID input = ARMRegisters::r0;
47    static const RegisterID index = ARMRegisters::r1;
48    static const RegisterID length = ARMRegisters::r2;
49    static const RegisterID output = ARMRegisters::r3;
50
51    static const RegisterID regT0 = ARMRegisters::r4;
52    static const RegisterID regT1 = ARMRegisters::r5;
53
54    static const RegisterID returnRegister = ARMRegisters::r0;
55    static const RegisterID returnRegister2 = ARMRegisters::r1;
56#elif CPU(ARM64)
57    static const RegisterID input = ARM64Registers::x0;
58    static const RegisterID index = ARM64Registers::x1;
59    static const RegisterID length = ARM64Registers::x2;
60    static const RegisterID output = ARM64Registers::x3;
61
62    static const RegisterID regT0 = ARM64Registers::x4;
63    static const RegisterID regT1 = ARM64Registers::x5;
64
65    static const RegisterID returnRegister = ARM64Registers::x0;
66    static const RegisterID returnRegister2 = ARM64Registers::x1;
67#elif CPU(MIPS)
68    static const RegisterID input = MIPSRegisters::a0;
69    static const RegisterID index = MIPSRegisters::a1;
70    static const RegisterID length = MIPSRegisters::a2;
71    static const RegisterID output = MIPSRegisters::a3;
72
73    static const RegisterID regT0 = MIPSRegisters::t4;
74    static const RegisterID regT1 = MIPSRegisters::t5;
75
76    static const RegisterID returnRegister = MIPSRegisters::v0;
77    static const RegisterID returnRegister2 = MIPSRegisters::v1;
78#elif CPU(SH4)
79    static const RegisterID input = SH4Registers::r4;
80    static const RegisterID index = SH4Registers::r5;
81    static const RegisterID length = SH4Registers::r6;
82    static const RegisterID output = SH4Registers::r7;
83
84    static const RegisterID regT0 = SH4Registers::r0;
85    static const RegisterID regT1 = SH4Registers::r1;
86
87    static const RegisterID returnRegister = SH4Registers::r0;
88    static const RegisterID returnRegister2 = SH4Registers::r1;
89#elif CPU(X86)
90    static const RegisterID input = X86Registers::eax;
91    static const RegisterID index = X86Registers::edx;
92    static const RegisterID length = X86Registers::ecx;
93    static const RegisterID output = X86Registers::edi;
94
95    static const RegisterID regT0 = X86Registers::ebx;
96    static const RegisterID regT1 = X86Registers::esi;
97
98    static const RegisterID returnRegister = X86Registers::eax;
99    static const RegisterID returnRegister2 = X86Registers::edx;
100#elif CPU(X86_64)
101#if !OS(WINDOWS)
102    static const RegisterID input = X86Registers::edi;
103    static const RegisterID index = X86Registers::esi;
104    static const RegisterID length = X86Registers::edx;
105    static const RegisterID output = X86Registers::ecx;
106#else
107    // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
108    // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
109    COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
110    static const RegisterID input = X86Registers::edx;
111    static const RegisterID index = X86Registers::r8;
112    static const RegisterID length = X86Registers::r9;
113    static const RegisterID output = X86Registers::r10;
114#endif
115
116    static const RegisterID regT0 = X86Registers::eax;
117    static const RegisterID regT1 = X86Registers::ebx;
118
119    static const RegisterID returnRegister = X86Registers::eax;
120    static const RegisterID returnRegister2 = X86Registers::edx;
121#endif
122
123    void optimizeAlternative(PatternAlternative* alternative)
124    {
125        if (!alternative->m_terms.size())
126            return;
127
128        for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
129            PatternTerm& term = alternative->m_terms[i];
130            PatternTerm& nextTerm = alternative->m_terms[i + 1];
131
132            if ((term.type == PatternTerm::TypeCharacterClass)
133                && (term.quantityType == QuantifierFixedCount)
134                && (nextTerm.type == PatternTerm::TypePatternCharacter)
135                && (nextTerm.quantityType == QuantifierFixedCount)) {
136                PatternTerm termCopy = term;
137                alternative->m_terms[i] = nextTerm;
138                alternative->m_terms[i + 1] = termCopy;
139            }
140        }
141    }
142
143    void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
144    {
145        do {
146            // pick which range we're going to generate
147            int which = count >> 1;
148            char lo = ranges[which].begin;
149            char hi = ranges[which].end;
150
151            // check if there are any ranges or matches below lo.  If not, just jl to failure -
152            // if there is anything else to check, check that first, if it falls through jmp to failure.
153            if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
154                Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
155
156                // generate code for all ranges before this one
157                if (which)
158                    matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
159
160                while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
161                    matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
162                    ++*matchIndex;
163                }
164                failures.append(jump());
165
166                loOrAbove.link(this);
167            } else if (which) {
168                Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
169
170                matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
171                failures.append(jump());
172
173                loOrAbove.link(this);
174            } else
175                failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
176
177            while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
178                ++*matchIndex;
179
180            matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
181            // fall through to here, the value is above hi.
182
183            // shuffle along & loop around if there are any more matches to handle.
184            unsigned next = which + 1;
185            ranges += next;
186            count -= next;
187        } while (count);
188    }
189
190    void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
191    {
192        if (charClass->m_table) {
193            ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
194            matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
195            return;
196        }
197        Jump unicodeFail;
198        if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
199            Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f));
200
201            if (charClass->m_matchesUnicode.size()) {
202                for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
203                    UChar ch = charClass->m_matchesUnicode[i];
204                    matchDest.append(branch32(Equal, character, Imm32(ch)));
205                }
206            }
207
208            if (charClass->m_rangesUnicode.size()) {
209                for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
210                    UChar lo = charClass->m_rangesUnicode[i].begin;
211                    UChar hi = charClass->m_rangesUnicode[i].end;
212
213                    Jump below = branch32(LessThan, character, Imm32(lo));
214                    matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
215                    below.link(this);
216                }
217            }
218
219            unicodeFail = jump();
220            isAscii.link(this);
221        }
222
223        if (charClass->m_ranges.size()) {
224            unsigned matchIndex = 0;
225            JumpList failures;
226            matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
227            while (matchIndex < charClass->m_matches.size())
228                matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
229
230            failures.link(this);
231        } else if (charClass->m_matches.size()) {
232            // optimization: gather 'a','A' etc back together, can mask & test once.
233            Vector<char> matchesAZaz;
234
235            for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
236                char ch = charClass->m_matches[i];
237                if (m_pattern.m_ignoreCase) {
238                    if (isASCIILower(ch)) {
239                        matchesAZaz.append(ch);
240                        continue;
241                    }
242                    if (isASCIIUpper(ch))
243                        continue;
244                }
245                matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
246            }
247
248            if (unsigned countAZaz = matchesAZaz.size()) {
249                or32(TrustedImm32(32), character);
250                for (unsigned i = 0; i < countAZaz; ++i)
251                    matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
252            }
253        }
254
255        if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
256            unicodeFail.link(this);
257    }
258
259    // Jumps if input not available; will have (incorrectly) incremented already!
260    Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
261    {
262        if (countToCheck)
263            add32(Imm32(countToCheck), index);
264        return branch32(Above, index, length);
265    }
266
267    Jump jumpIfAvailableInput(unsigned countToCheck)
268    {
269        add32(Imm32(countToCheck), index);
270        return branch32(BelowOrEqual, index, length);
271    }
272
273    Jump checkInput()
274    {
275        return branch32(BelowOrEqual, index, length);
276    }
277
278    Jump atEndOfInput()
279    {
280        return branch32(Equal, index, length);
281    }
282
283    Jump notAtEndOfInput()
284    {
285        return branch32(NotEqual, index, length);
286    }
287
288    Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character)
289    {
290        readCharacter(inputPosition, character);
291
292        // For case-insesitive compares, non-ascii characters that have different
293        // upper & lower case representations are converted to a character class.
294        ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
295        if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
296            or32(TrustedImm32(0x20), character);
297            ch |= 0x20;
298        }
299
300        return branch32(NotEqual, character, Imm32(ch));
301    }
302
303    void readCharacter(int inputPosition, RegisterID reg)
304    {
305        if (m_charSize == Char8)
306            load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg);
307        else
308            load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
309    }
310
311    void storeToFrame(RegisterID reg, unsigned frameLocation)
312    {
313        poke(reg, frameLocation);
314    }
315
316    void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
317    {
318        poke(imm, frameLocation);
319    }
320
321    DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
322    {
323        return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
324    }
325
326    void loadFromFrame(unsigned frameLocation, RegisterID reg)
327    {
328        peek(reg, frameLocation);
329    }
330
331    void loadFromFrameAndJump(unsigned frameLocation)
332    {
333        jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
334    }
335
336    unsigned alignCallFrameSizeInBytes(unsigned callFrameSize)
337    {
338        callFrameSize *= sizeof(void*);
339        if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize)
340            CRASH();
341        callFrameSize = (callFrameSize + 0x3f) & ~0x3f;
342        if (!callFrameSize)
343            CRASH();
344        return callFrameSize;
345    }
346    void initCallFrame()
347    {
348        unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
349        if (callFrameSize)
350            subPtr(Imm32(alignCallFrameSizeInBytes(callFrameSize)), stackPointerRegister);
351    }
352    void removeCallFrame()
353    {
354        unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
355        if (callFrameSize)
356            addPtr(Imm32(alignCallFrameSizeInBytes(callFrameSize)), stackPointerRegister);
357    }
358
359    // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns.
360    void setSubpatternStart(RegisterID reg, unsigned subpattern)
361    {
362        ASSERT(subpattern);
363        // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
364        store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
365    }
366    void setSubpatternEnd(RegisterID reg, unsigned subpattern)
367    {
368        ASSERT(subpattern);
369        // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
370        store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
371    }
372    void clearSubpatternStart(unsigned subpattern)
373    {
374        ASSERT(subpattern);
375        // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
376        store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
377    }
378
379    // We use one of three different strategies to track the start of the current match,
380    // while matching.
381    // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
382    //    at the end of matching. This is irrespective of compileMode, and in this case
383    //    these methods should never be called.
384    // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
385    //    vector, store the match start in the output vector.
386    // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
387    //    in this register.
388    void setMatchStart(RegisterID reg)
389    {
390        ASSERT(!m_pattern.m_body->m_hasFixedSize);
391        if (compileMode == IncludeSubpatterns)
392            store32(reg, output);
393        else
394            move(reg, output);
395    }
396    void getMatchStart(RegisterID reg)
397    {
398        ASSERT(!m_pattern.m_body->m_hasFixedSize);
399        if (compileMode == IncludeSubpatterns)
400            load32(output, reg);
401        else
402            move(output, reg);
403    }
404
405    enum YarrOpCode {
406        // These nodes wrap body alternatives - those in the main disjunction,
407        // rather than subpatterns or assertions. These are chained together in
408        // a doubly linked list, with a 'begin' node for the first alternative,
409        // a 'next' node for each subsequent alternative, and an 'end' node at
410        // the end. In the case of repeating alternatives, the 'end' node also
411        // has a reference back to 'begin'.
412        OpBodyAlternativeBegin,
413        OpBodyAlternativeNext,
414        OpBodyAlternativeEnd,
415        // Similar to the body alternatives, but used for subpatterns with two
416        // or more alternatives.
417        OpNestedAlternativeBegin,
418        OpNestedAlternativeNext,
419        OpNestedAlternativeEnd,
420        // Used for alternatives in subpatterns where there is only a single
421        // alternative (backtrackingis easier in these cases), or for alternatives
422        // which never need to be backtracked (those in parenthetical assertions,
423        // terminal subpatterns).
424        OpSimpleNestedAlternativeBegin,
425        OpSimpleNestedAlternativeNext,
426        OpSimpleNestedAlternativeEnd,
427        // Used to wrap 'Once' subpattern matches (quantityCount == 1).
428        OpParenthesesSubpatternOnceBegin,
429        OpParenthesesSubpatternOnceEnd,
430        // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
431        OpParenthesesSubpatternTerminalBegin,
432        OpParenthesesSubpatternTerminalEnd,
433        // Used to wrap parenthetical assertions.
434        OpParentheticalAssertionBegin,
435        OpParentheticalAssertionEnd,
436        // Wraps all simple terms (pattern characters, character classes).
437        OpTerm,
438        // Where an expression contains only 'once through' body alternatives
439        // and no repeating ones, this op is used to return match failure.
440        OpMatchFailed
441    };
442
443    // This structure is used to hold the compiled opcode information,
444    // including reference back to the original PatternTerm/PatternAlternatives,
445    // and JIT compilation data structures.
446    struct YarrOp {
447        explicit YarrOp(PatternTerm* term)
448            : m_op(OpTerm)
449            , m_term(term)
450            , m_isDeadCode(false)
451        {
452        }
453
454        explicit YarrOp(YarrOpCode op)
455            : m_op(op)
456            , m_isDeadCode(false)
457        {
458        }
459
460        // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
461        YarrOpCode m_op;
462        PatternTerm* m_term;
463
464        // For alternatives, this holds the PatternAlternative and doubly linked
465        // references to this alternative's siblings. In the case of the
466        // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
467        // m_nextOp will reference the OpBodyAlternativeBegin node of the first
468        // repeating alternative.
469        PatternAlternative* m_alternative;
470        size_t m_previousOp;
471        size_t m_nextOp;
472
473        // Used to record a set of Jumps out of the generated code, typically
474        // used for jumps out to backtracking code, and a single reentry back
475        // into the code for a node (likely where a backtrack will trigger
476        // rematching).
477        Label m_reentry;
478        JumpList m_jumps;
479
480        // Used for backtracking when the prior alternative did not consume any
481        // characters but matched.
482        Jump m_zeroLengthMatch;
483
484        // This flag is used to null out the second pattern character, when
485        // two are fused to match a pair together.
486        bool m_isDeadCode;
487
488        // Currently used in the case of some of the more complex management of
489        // 'm_checked', to cache the offset used in this alternative, to avoid
490        // recalculating it.
491        int m_checkAdjust;
492
493        // Used by OpNestedAlternativeNext/End to hold the pointer to the
494        // value that will be pushed into the pattern's frame to return to,
495        // upon backtracking back into the disjunction.
496        DataLabelPtr m_returnAddress;
497    };
498
499    // BacktrackingState
500    // This class encapsulates information about the state of code generation
501    // whilst generating the code for backtracking, when a term fails to match.
502    // Upon entry to code generation of the backtracking code for a given node,
503    // the Backtracking state will hold references to all control flow sources
504    // that are outputs in need of further backtracking from the prior node
505    // generated (which is the subsequent operation in the regular expression,
506    // and in the m_ops Vector, since we generated backtracking backwards).
507    // These references to control flow take the form of:
508    //  - A jump list of jumps, to be linked to code that will backtrack them
509    //    further.
510    //  - A set of DataLabelPtr values, to be populated with values to be
511    //    treated effectively as return addresses backtracking into complex
512    //    subpatterns.
513    //  - A flag indicating that the current sequence of generated code up to
514    //    this point requires backtracking.
515    class BacktrackingState {
516    public:
517        BacktrackingState()
518            : m_pendingFallthrough(false)
519        {
520        }
521
522        // Add a jump or jumps, a return address, or set the flag indicating
523        // that the current 'fallthrough' control flow requires backtracking.
524        void append(const Jump& jump)
525        {
526            m_laterFailures.append(jump);
527        }
528        void append(JumpList& jumpList)
529        {
530            m_laterFailures.append(jumpList);
531        }
532        void append(const DataLabelPtr& returnAddress)
533        {
534            m_pendingReturns.append(returnAddress);
535        }
536        void fallthrough()
537        {
538            ASSERT(!m_pendingFallthrough);
539            m_pendingFallthrough = true;
540        }
541
542        // These methods clear the backtracking state, either linking to the
543        // current location, a provided label, or copying the backtracking out
544        // to a JumpList. All actions may require code generation to take place,
545        // and as such are passed a pointer to the assembler.
546        void link(MacroAssembler* assembler)
547        {
548            if (m_pendingReturns.size()) {
549                Label here(assembler);
550                for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
551                    m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
552                m_pendingReturns.clear();
553            }
554            m_laterFailures.link(assembler);
555            m_laterFailures.clear();
556            m_pendingFallthrough = false;
557        }
558        void linkTo(Label label, MacroAssembler* assembler)
559        {
560            if (m_pendingReturns.size()) {
561                for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
562                    m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
563                m_pendingReturns.clear();
564            }
565            if (m_pendingFallthrough)
566                assembler->jump(label);
567            m_laterFailures.linkTo(label, assembler);
568            m_laterFailures.clear();
569            m_pendingFallthrough = false;
570        }
571        void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
572        {
573            if (m_pendingReturns.size()) {
574                Label here(assembler);
575                for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
576                    m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
577                m_pendingReturns.clear();
578                m_pendingFallthrough = true;
579            }
580            if (m_pendingFallthrough)
581                jumpList.append(assembler->jump());
582            jumpList.append(m_laterFailures);
583            m_laterFailures.clear();
584            m_pendingFallthrough = false;
585        }
586
587        bool isEmpty()
588        {
589            return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
590        }
591
592        // Called at the end of code generation to link all return addresses.
593        void linkDataLabels(LinkBuffer& linkBuffer)
594        {
595            ASSERT(isEmpty());
596            for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
597                linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
598        }
599
600    private:
601        struct ReturnAddressRecord {
602            ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
603                : m_dataLabel(dataLabel)
604                , m_backtrackLocation(backtrackLocation)
605            {
606            }
607
608            DataLabelPtr m_dataLabel;
609            Label m_backtrackLocation;
610        };
611
612        JumpList m_laterFailures;
613        bool m_pendingFallthrough;
614        Vector<DataLabelPtr, 4> m_pendingReturns;
615        Vector<ReturnAddressRecord, 4> m_backtrackRecords;
616    };
617
618    // Generation methods:
619    // ===================
620
621    // This method provides a default implementation of backtracking common
622    // to many terms; terms commonly jump out of the forwards  matching path
623    // on any failed conditions, and add these jumps to the m_jumps list. If
624    // no special handling is required we can often just backtrack to m_jumps.
625    void backtrackTermDefault(size_t opIndex)
626    {
627        YarrOp& op = m_ops[opIndex];
628        m_backtrackingState.append(op.m_jumps);
629    }
630
631    void generateAssertionBOL(size_t opIndex)
632    {
633        YarrOp& op = m_ops[opIndex];
634        PatternTerm* term = op.m_term;
635
636        if (m_pattern.m_multiline) {
637            const RegisterID character = regT0;
638
639            JumpList matchDest;
640            if (!term->inputPosition)
641                matchDest.append(branch32(Equal, index, Imm32(m_checked)));
642
643            readCharacter((term->inputPosition - m_checked) - 1, character);
644            matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
645            op.m_jumps.append(jump());
646
647            matchDest.link(this);
648        } else {
649            // Erk, really should poison out these alternatives early. :-/
650            if (term->inputPosition)
651                op.m_jumps.append(jump());
652            else
653                op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked)));
654        }
655    }
656    void backtrackAssertionBOL(size_t opIndex)
657    {
658        backtrackTermDefault(opIndex);
659    }
660
661    void generateAssertionEOL(size_t opIndex)
662    {
663        YarrOp& op = m_ops[opIndex];
664        PatternTerm* term = op.m_term;
665
666        if (m_pattern.m_multiline) {
667            const RegisterID character = regT0;
668
669            JumpList matchDest;
670            if (term->inputPosition == m_checked)
671                matchDest.append(atEndOfInput());
672
673            readCharacter(term->inputPosition - m_checked, character);
674            matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
675            op.m_jumps.append(jump());
676
677            matchDest.link(this);
678        } else {
679            if (term->inputPosition == m_checked)
680                op.m_jumps.append(notAtEndOfInput());
681            // Erk, really should poison out these alternatives early. :-/
682            else
683                op.m_jumps.append(jump());
684        }
685    }
686    void backtrackAssertionEOL(size_t opIndex)
687    {
688        backtrackTermDefault(opIndex);
689    }
690
691    // Also falls though on nextIsNotWordChar.
692    void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
693    {
694        YarrOp& op = m_ops[opIndex];
695        PatternTerm* term = op.m_term;
696
697        const RegisterID character = regT0;
698
699        if (term->inputPosition == m_checked)
700            nextIsNotWordChar.append(atEndOfInput());
701
702        readCharacter((term->inputPosition - m_checked), character);
703        matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
704    }
705
706    void generateAssertionWordBoundary(size_t opIndex)
707    {
708        YarrOp& op = m_ops[opIndex];
709        PatternTerm* term = op.m_term;
710
711        const RegisterID character = regT0;
712
713        Jump atBegin;
714        JumpList matchDest;
715        if (!term->inputPosition)
716            atBegin = branch32(Equal, index, Imm32(m_checked));
717        readCharacter((term->inputPosition - m_checked) - 1, character);
718        matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
719        if (!term->inputPosition)
720            atBegin.link(this);
721
722        // We fall through to here if the last character was not a wordchar.
723        JumpList nonWordCharThenWordChar;
724        JumpList nonWordCharThenNonWordChar;
725        if (term->invert()) {
726            matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
727            nonWordCharThenWordChar.append(jump());
728        } else {
729            matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
730            nonWordCharThenNonWordChar.append(jump());
731        }
732        op.m_jumps.append(nonWordCharThenNonWordChar);
733
734        // We jump here if the last character was a wordchar.
735        matchDest.link(this);
736        JumpList wordCharThenWordChar;
737        JumpList wordCharThenNonWordChar;
738        if (term->invert()) {
739            matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
740            wordCharThenWordChar.append(jump());
741        } else {
742            matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
743            // This can fall-though!
744        }
745
746        op.m_jumps.append(wordCharThenWordChar);
747
748        nonWordCharThenWordChar.link(this);
749        wordCharThenNonWordChar.link(this);
750    }
751    void backtrackAssertionWordBoundary(size_t opIndex)
752    {
753        backtrackTermDefault(opIndex);
754    }
755
756    void generatePatternCharacterOnce(size_t opIndex)
757    {
758        YarrOp& op = m_ops[opIndex];
759
760        if (op.m_isDeadCode)
761            return;
762
763        // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
764        // node, so there must always be at least one more node.
765        ASSERT(opIndex + 1 < m_ops.size());
766        YarrOp* nextOp = &m_ops[opIndex + 1];
767
768        PatternTerm* term = op.m_term;
769        UChar ch = term->patternCharacter;
770
771        if ((ch > 0xff) && (m_charSize == Char8)) {
772            // Have a 16 bit pattern character and an 8 bit string - short circuit
773            op.m_jumps.append(jump());
774            return;
775        }
776
777        const RegisterID character = regT0;
778        int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
779        unsigned ignoreCaseMask = 0;
780#if CPU(BIG_ENDIAN)
781        int allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
782#else
783        int allCharacters = ch;
784#endif
785        int numberCharacters;
786        int startTermPosition = term->inputPosition;
787
788        // For case-insesitive compares, non-ascii characters that have different
789        // upper & lower case representations are converted to a character class.
790        ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
791
792        if (m_pattern.m_ignoreCase && isASCIIAlpha(ch))
793#if CPU(BIG_ENDIAN)
794            ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
795#else
796            ignoreCaseMask |= 32;
797#endif
798
799        for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
800            PatternTerm* nextTerm = nextOp->m_term;
801
802            if (nextTerm->type != PatternTerm::TypePatternCharacter
803                || nextTerm->quantityType != QuantifierFixedCount
804                || nextTerm->quantityCount != 1
805                || nextTerm->inputPosition != (startTermPosition + numberCharacters))
806                break;
807
808            nextOp->m_isDeadCode = true;
809
810#if CPU(BIG_ENDIAN)
811            int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
812#else
813            int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
814#endif
815
816            UChar currentCharacter = nextTerm->patternCharacter;
817
818            if ((currentCharacter > 0xff) && (m_charSize == Char8)) {
819                // Have a 16 bit pattern character and an 8 bit string - short circuit
820                op.m_jumps.append(jump());
821                return;
822            }
823
824            // For case-insesitive compares, non-ascii characters that have different
825            // upper & lower case representations are converted to a character class.
826            ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter));
827
828            allCharacters |= (currentCharacter << shiftAmount);
829
830            if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter)))
831                ignoreCaseMask |= 32 << shiftAmount;
832        }
833
834        if (m_charSize == Char8) {
835            switch (numberCharacters) {
836            case 1:
837                op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character));
838                return;
839            case 2: {
840                BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
841                load16Unaligned(address, character);
842                break;
843            }
844            case 3: {
845                BaseIndex highAddress(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
846                load16Unaligned(highAddress, character);
847                if (ignoreCaseMask)
848                    or32(Imm32(ignoreCaseMask), character);
849                op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask)));
850                op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, startTermPosition + 2 - m_checked, character));
851                return;
852            }
853            case 4: {
854                BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
855                load32WithUnalignedHalfWords(address, character);
856                break;
857            }
858            }
859        } else {
860            switch (numberCharacters) {
861            case 1:
862                op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
863                return;
864            case 2:
865                BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
866                load32WithUnalignedHalfWords(address, character);
867                break;
868            }
869        }
870
871        if (ignoreCaseMask)
872            or32(Imm32(ignoreCaseMask), character);
873        op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask)));
874        return;
875    }
876    void backtrackPatternCharacterOnce(size_t opIndex)
877    {
878        backtrackTermDefault(opIndex);
879    }
880
881    void generatePatternCharacterFixed(size_t opIndex)
882    {
883        YarrOp& op = m_ops[opIndex];
884        PatternTerm* term = op.m_term;
885        UChar ch = term->patternCharacter;
886
887        const RegisterID character = regT0;
888        const RegisterID countRegister = regT1;
889
890        move(index, countRegister);
891        sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
892
893        Label loop(this);
894        BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet());
895
896        if (m_charSize == Char8)
897            load8(address, character);
898        else
899            load16(address, character);
900
901        // For case-insesitive compares, non-ascii characters that have different
902        // upper & lower case representations are converted to a character class.
903        ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
904        if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
905            or32(TrustedImm32(0x20), character);
906            ch |= 0x20;
907        }
908
909        op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
910        add32(TrustedImm32(1), countRegister);
911        branch32(NotEqual, countRegister, index).linkTo(loop, this);
912    }
913    void backtrackPatternCharacterFixed(size_t opIndex)
914    {
915        backtrackTermDefault(opIndex);
916    }
917
918    void generatePatternCharacterGreedy(size_t opIndex)
919    {
920        YarrOp& op = m_ops[opIndex];
921        PatternTerm* term = op.m_term;
922        UChar ch = term->patternCharacter;
923
924        const RegisterID character = regT0;
925        const RegisterID countRegister = regT1;
926
927        move(TrustedImm32(0), countRegister);
928
929        // Unless have a 16 bit pattern character and an 8 bit string - short circuit
930        if (!((ch > 0xff) && (m_charSize == Char8))) {
931            JumpList failures;
932            Label loop(this);
933            failures.append(atEndOfInput());
934            failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
935
936            add32(TrustedImm32(1), countRegister);
937            add32(TrustedImm32(1), index);
938            if (term->quantityCount == quantifyInfinite)
939                jump(loop);
940            else
941                branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
942
943            failures.link(this);
944        }
945        op.m_reentry = label();
946
947        storeToFrame(countRegister, term->frameLocation);
948    }
949    void backtrackPatternCharacterGreedy(size_t opIndex)
950    {
951        YarrOp& op = m_ops[opIndex];
952        PatternTerm* term = op.m_term;
953
954        const RegisterID countRegister = regT1;
955
956        m_backtrackingState.link(this);
957
958        loadFromFrame(term->frameLocation, countRegister);
959        m_backtrackingState.append(branchTest32(Zero, countRegister));
960        sub32(TrustedImm32(1), countRegister);
961        sub32(TrustedImm32(1), index);
962        jump(op.m_reentry);
963    }
964
965    void generatePatternCharacterNonGreedy(size_t opIndex)
966    {
967        YarrOp& op = m_ops[opIndex];
968        PatternTerm* term = op.m_term;
969
970        const RegisterID countRegister = regT1;
971
972        move(TrustedImm32(0), countRegister);
973        op.m_reentry = label();
974        storeToFrame(countRegister, term->frameLocation);
975    }
976    void backtrackPatternCharacterNonGreedy(size_t opIndex)
977    {
978        YarrOp& op = m_ops[opIndex];
979        PatternTerm* term = op.m_term;
980        UChar ch = term->patternCharacter;
981
982        const RegisterID character = regT0;
983        const RegisterID countRegister = regT1;
984
985        m_backtrackingState.link(this);
986
987        loadFromFrame(term->frameLocation, countRegister);
988
989        // Unless have a 16 bit pattern character and an 8 bit string - short circuit
990        if (!((ch > 0xff) && (m_charSize == Char8))) {
991            JumpList nonGreedyFailures;
992            nonGreedyFailures.append(atEndOfInput());
993            if (term->quantityCount != quantifyInfinite)
994                nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
995            nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
996
997            add32(TrustedImm32(1), countRegister);
998            add32(TrustedImm32(1), index);
999
1000            jump(op.m_reentry);
1001            nonGreedyFailures.link(this);
1002        }
1003
1004        sub32(countRegister, index);
1005        m_backtrackingState.fallthrough();
1006    }
1007
1008    void generateCharacterClassOnce(size_t opIndex)
1009    {
1010        YarrOp& op = m_ops[opIndex];
1011        PatternTerm* term = op.m_term;
1012
1013        const RegisterID character = regT0;
1014
1015        JumpList matchDest;
1016        readCharacter(term->inputPosition - m_checked, character);
1017        matchCharacterClass(character, matchDest, term->characterClass);
1018
1019        if (term->invert())
1020            op.m_jumps.append(matchDest);
1021        else {
1022            op.m_jumps.append(jump());
1023            matchDest.link(this);
1024        }
1025    }
1026    void backtrackCharacterClassOnce(size_t opIndex)
1027    {
1028        backtrackTermDefault(opIndex);
1029    }
1030
1031    void generateCharacterClassFixed(size_t opIndex)
1032    {
1033        YarrOp& op = m_ops[opIndex];
1034        PatternTerm* term = op.m_term;
1035
1036        const RegisterID character = regT0;
1037        const RegisterID countRegister = regT1;
1038
1039        move(index, countRegister);
1040        sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
1041
1042        Label loop(this);
1043        JumpList matchDest;
1044        if (m_charSize == Char8)
1045            load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character);
1046        else
1047            load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character);
1048        matchCharacterClass(character, matchDest, term->characterClass);
1049
1050        if (term->invert())
1051            op.m_jumps.append(matchDest);
1052        else {
1053            op.m_jumps.append(jump());
1054            matchDest.link(this);
1055        }
1056
1057        add32(TrustedImm32(1), countRegister);
1058        branch32(NotEqual, countRegister, index).linkTo(loop, this);
1059    }
1060    void backtrackCharacterClassFixed(size_t opIndex)
1061    {
1062        backtrackTermDefault(opIndex);
1063    }
1064
1065    void generateCharacterClassGreedy(size_t opIndex)
1066    {
1067        YarrOp& op = m_ops[opIndex];
1068        PatternTerm* term = op.m_term;
1069
1070        const RegisterID character = regT0;
1071        const RegisterID countRegister = regT1;
1072
1073        move(TrustedImm32(0), countRegister);
1074
1075        JumpList failures;
1076        Label loop(this);
1077        failures.append(atEndOfInput());
1078
1079        if (term->invert()) {
1080            readCharacter(term->inputPosition - m_checked, character);
1081            matchCharacterClass(character, failures, term->characterClass);
1082        } else {
1083            JumpList matchDest;
1084            readCharacter(term->inputPosition - m_checked, character);
1085            matchCharacterClass(character, matchDest, term->characterClass);
1086            failures.append(jump());
1087            matchDest.link(this);
1088        }
1089
1090        add32(TrustedImm32(1), countRegister);
1091        add32(TrustedImm32(1), index);
1092        if (term->quantityCount != quantifyInfinite) {
1093            branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
1094            failures.append(jump());
1095        } else
1096            jump(loop);
1097
1098        failures.link(this);
1099        op.m_reentry = label();
1100
1101        storeToFrame(countRegister, term->frameLocation);
1102    }
1103    void backtrackCharacterClassGreedy(size_t opIndex)
1104    {
1105        YarrOp& op = m_ops[opIndex];
1106        PatternTerm* term = op.m_term;
1107
1108        const RegisterID countRegister = regT1;
1109
1110        m_backtrackingState.link(this);
1111
1112        loadFromFrame(term->frameLocation, countRegister);
1113        m_backtrackingState.append(branchTest32(Zero, countRegister));
1114        sub32(TrustedImm32(1), countRegister);
1115        sub32(TrustedImm32(1), index);
1116        jump(op.m_reentry);
1117    }
1118
1119    void generateCharacterClassNonGreedy(size_t opIndex)
1120    {
1121        YarrOp& op = m_ops[opIndex];
1122        PatternTerm* term = op.m_term;
1123
1124        const RegisterID countRegister = regT1;
1125
1126        move(TrustedImm32(0), countRegister);
1127        op.m_reentry = label();
1128        storeToFrame(countRegister, term->frameLocation);
1129    }
1130    void backtrackCharacterClassNonGreedy(size_t opIndex)
1131    {
1132        YarrOp& op = m_ops[opIndex];
1133        PatternTerm* term = op.m_term;
1134
1135        const RegisterID character = regT0;
1136        const RegisterID countRegister = regT1;
1137
1138        JumpList nonGreedyFailures;
1139
1140        m_backtrackingState.link(this);
1141
1142        loadFromFrame(term->frameLocation, countRegister);
1143
1144        nonGreedyFailures.append(atEndOfInput());
1145        nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
1146
1147        JumpList matchDest;
1148        readCharacter(term->inputPosition - m_checked, character);
1149        matchCharacterClass(character, matchDest, term->characterClass);
1150
1151        if (term->invert())
1152            nonGreedyFailures.append(matchDest);
1153        else {
1154            nonGreedyFailures.append(jump());
1155            matchDest.link(this);
1156        }
1157
1158        add32(TrustedImm32(1), countRegister);
1159        add32(TrustedImm32(1), index);
1160
1161        jump(op.m_reentry);
1162
1163        nonGreedyFailures.link(this);
1164        sub32(countRegister, index);
1165        m_backtrackingState.fallthrough();
1166    }
1167
1168    void generateDotStarEnclosure(size_t opIndex)
1169    {
1170        YarrOp& op = m_ops[opIndex];
1171        PatternTerm* term = op.m_term;
1172
1173        const RegisterID character = regT0;
1174        const RegisterID matchPos = regT1;
1175
1176        JumpList foundBeginningNewLine;
1177        JumpList saveStartIndex;
1178        JumpList foundEndingNewLine;
1179
1180        ASSERT(!m_pattern.m_body->m_hasFixedSize);
1181        getMatchStart(matchPos);
1182
1183        saveStartIndex.append(branchTest32(Zero, matchPos));
1184        Label findBOLLoop(this);
1185        sub32(TrustedImm32(1), matchPos);
1186        if (m_charSize == Char8)
1187            load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1188        else
1189            load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1190        matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
1191        branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this);
1192        saveStartIndex.append(jump());
1193
1194        foundBeginningNewLine.link(this);
1195        add32(TrustedImm32(1), matchPos); // Advance past newline
1196        saveStartIndex.link(this);
1197
1198        if (!m_pattern.m_multiline && term->anchors.bolAnchor)
1199            op.m_jumps.append(branchTest32(NonZero, matchPos));
1200
1201        ASSERT(!m_pattern.m_body->m_hasFixedSize);
1202        setMatchStart(matchPos);
1203
1204        move(index, matchPos);
1205
1206        Label findEOLLoop(this);
1207        foundEndingNewLine.append(branch32(Equal, matchPos, length));
1208        if (m_charSize == Char8)
1209            load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1210        else
1211            load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1212        matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
1213        add32(TrustedImm32(1), matchPos);
1214        jump(findEOLLoop);
1215
1216        foundEndingNewLine.link(this);
1217
1218        if (!m_pattern.m_multiline && term->anchors.eolAnchor)
1219            op.m_jumps.append(branch32(NotEqual, matchPos, length));
1220
1221        move(matchPos, index);
1222    }
1223
1224    void backtrackDotStarEnclosure(size_t opIndex)
1225    {
1226        backtrackTermDefault(opIndex);
1227    }
1228
1229    // Code generation/backtracking for simple terms
1230    // (pattern characters, character classes, and assertions).
1231    // These methods farm out work to the set of functions above.
1232    void generateTerm(size_t opIndex)
1233    {
1234        YarrOp& op = m_ops[opIndex];
1235        PatternTerm* term = op.m_term;
1236
1237        switch (term->type) {
1238        case PatternTerm::TypePatternCharacter:
1239            switch (term->quantityType) {
1240            case QuantifierFixedCount:
1241                if (term->quantityCount == 1)
1242                    generatePatternCharacterOnce(opIndex);
1243                else
1244                    generatePatternCharacterFixed(opIndex);
1245                break;
1246            case QuantifierGreedy:
1247                generatePatternCharacterGreedy(opIndex);
1248                break;
1249            case QuantifierNonGreedy:
1250                generatePatternCharacterNonGreedy(opIndex);
1251                break;
1252            }
1253            break;
1254
1255        case PatternTerm::TypeCharacterClass:
1256            switch (term->quantityType) {
1257            case QuantifierFixedCount:
1258                if (term->quantityCount == 1)
1259                    generateCharacterClassOnce(opIndex);
1260                else
1261                    generateCharacterClassFixed(opIndex);
1262                break;
1263            case QuantifierGreedy:
1264                generateCharacterClassGreedy(opIndex);
1265                break;
1266            case QuantifierNonGreedy:
1267                generateCharacterClassNonGreedy(opIndex);
1268                break;
1269            }
1270            break;
1271
1272        case PatternTerm::TypeAssertionBOL:
1273            generateAssertionBOL(opIndex);
1274            break;
1275
1276        case PatternTerm::TypeAssertionEOL:
1277            generateAssertionEOL(opIndex);
1278            break;
1279
1280        case PatternTerm::TypeAssertionWordBoundary:
1281            generateAssertionWordBoundary(opIndex);
1282            break;
1283
1284        case PatternTerm::TypeForwardReference:
1285            break;
1286
1287        case PatternTerm::TypeParenthesesSubpattern:
1288        case PatternTerm::TypeParentheticalAssertion:
1289            RELEASE_ASSERT_NOT_REACHED();
1290        case PatternTerm::TypeBackReference:
1291            m_shouldFallBack = true;
1292            break;
1293        case PatternTerm::TypeDotStarEnclosure:
1294            generateDotStarEnclosure(opIndex);
1295            break;
1296        }
1297    }
1298    void backtrackTerm(size_t opIndex)
1299    {
1300        YarrOp& op = m_ops[opIndex];
1301        PatternTerm* term = op.m_term;
1302
1303        switch (term->type) {
1304        case PatternTerm::TypePatternCharacter:
1305            switch (term->quantityType) {
1306            case QuantifierFixedCount:
1307                if (term->quantityCount == 1)
1308                    backtrackPatternCharacterOnce(opIndex);
1309                else
1310                    backtrackPatternCharacterFixed(opIndex);
1311                break;
1312            case QuantifierGreedy:
1313                backtrackPatternCharacterGreedy(opIndex);
1314                break;
1315            case QuantifierNonGreedy:
1316                backtrackPatternCharacterNonGreedy(opIndex);
1317                break;
1318            }
1319            break;
1320
1321        case PatternTerm::TypeCharacterClass:
1322            switch (term->quantityType) {
1323            case QuantifierFixedCount:
1324                if (term->quantityCount == 1)
1325                    backtrackCharacterClassOnce(opIndex);
1326                else
1327                    backtrackCharacterClassFixed(opIndex);
1328                break;
1329            case QuantifierGreedy:
1330                backtrackCharacterClassGreedy(opIndex);
1331                break;
1332            case QuantifierNonGreedy:
1333                backtrackCharacterClassNonGreedy(opIndex);
1334                break;
1335            }
1336            break;
1337
1338        case PatternTerm::TypeAssertionBOL:
1339            backtrackAssertionBOL(opIndex);
1340            break;
1341
1342        case PatternTerm::TypeAssertionEOL:
1343            backtrackAssertionEOL(opIndex);
1344            break;
1345
1346        case PatternTerm::TypeAssertionWordBoundary:
1347            backtrackAssertionWordBoundary(opIndex);
1348            break;
1349
1350        case PatternTerm::TypeForwardReference:
1351            break;
1352
1353        case PatternTerm::TypeParenthesesSubpattern:
1354        case PatternTerm::TypeParentheticalAssertion:
1355            RELEASE_ASSERT_NOT_REACHED();
1356
1357        case PatternTerm::TypeDotStarEnclosure:
1358            backtrackDotStarEnclosure(opIndex);
1359            break;
1360
1361        case PatternTerm::TypeBackReference:
1362            m_shouldFallBack = true;
1363            break;
1364        }
1365    }
1366
1367    void generate()
1368    {
1369        // Forwards generate the matching code.
1370        ASSERT(m_ops.size());
1371        size_t opIndex = 0;
1372
1373        do {
1374            YarrOp& op = m_ops[opIndex];
1375            switch (op.m_op) {
1376
1377            case OpTerm:
1378                generateTerm(opIndex);
1379                break;
1380
1381            // OpBodyAlternativeBegin/Next/End
1382            //
1383            // These nodes wrap the set of alternatives in the body of the regular expression.
1384            // There may be either one or two chains of OpBodyAlternative nodes, one representing
1385            // the 'once through' sequence of alternatives (if any exist), and one representing
1386            // the repeating alternatives (again, if any exist).
1387            //
1388            // Upon normal entry to the Begin alternative, we will check that input is available.
1389            // Reentry to the Begin alternative will take place after the check has taken place,
1390            // and will assume that the input position has already been progressed as appropriate.
1391            //
1392            // Entry to subsequent Next/End alternatives occurs when the prior alternative has
1393            // successfully completed a match - return a success state from JIT code.
1394            //
1395            // Next alternatives allow for reentry optimized to suit backtracking from its
1396            // preceding alternative. It expects the input position to still be set to a position
1397            // appropriate to its predecessor, and it will only perform an input check if the
1398            // predecessor had a minimum size less than its own.
1399            //
1400            // In the case 'once through' expressions, the End node will also have a reentry
1401            // point to jump to when the last alternative fails. Again, this expects the input
1402            // position to still reflect that expected by the prior alternative.
1403            case OpBodyAlternativeBegin: {
1404                PatternAlternative* alternative = op.m_alternative;
1405
1406                // Upon entry at the head of the set of alternatives, check if input is available
1407                // to run the first alternative. (This progresses the input position).
1408                op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
1409                // We will reenter after the check, and assume the input position to have been
1410                // set as appropriate to this alternative.
1411                op.m_reentry = label();
1412
1413                m_checked += alternative->m_minimumSize;
1414                break;
1415            }
1416            case OpBodyAlternativeNext:
1417            case OpBodyAlternativeEnd: {
1418                PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1419                PatternAlternative* alternative = op.m_alternative;
1420
1421                // If we get here, the prior alternative matched - return success.
1422
1423                // Adjust the stack pointer to remove the pattern's frame.
1424                removeCallFrame();
1425
1426                // Load appropriate values into the return register and the first output
1427                // slot, and return. In the case of pattern with a fixed size, we will
1428                // not have yet set the value in the first
1429                ASSERT(index != returnRegister);
1430                if (m_pattern.m_body->m_hasFixedSize) {
1431                    move(index, returnRegister);
1432                    if (priorAlternative->m_minimumSize)
1433                        sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
1434                    if (compileMode == IncludeSubpatterns)
1435                        store32(returnRegister, output);
1436                } else
1437                    getMatchStart(returnRegister);
1438                if (compileMode == IncludeSubpatterns)
1439                    store32(index, Address(output, 4));
1440                move(index, returnRegister2);
1441
1442                generateReturn();
1443
1444                // This is the divide between the tail of the prior alternative, above, and
1445                // the head of the subsequent alternative, below.
1446
1447                if (op.m_op == OpBodyAlternativeNext) {
1448                    // This is the reentry point for the Next alternative. We expect any code
1449                    // that jumps here to do so with the input position matching that of the
1450                    // PRIOR alteranative, and we will only check input availability if we
1451                    // need to progress it forwards.
1452                    op.m_reentry = label();
1453                    if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
1454                        add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
1455                        op.m_jumps.append(jumpIfNoAvailableInput());
1456                    } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
1457                        sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
1458                } else if (op.m_nextOp == notFound) {
1459                    // This is the reentry point for the End of 'once through' alternatives,
1460                    // jumped to when the last alternative fails to match.
1461                    op.m_reentry = label();
1462                    sub32(Imm32(priorAlternative->m_minimumSize), index);
1463                }
1464
1465                if (op.m_op == OpBodyAlternativeNext)
1466                    m_checked += alternative->m_minimumSize;
1467                m_checked -= priorAlternative->m_minimumSize;
1468                break;
1469            }
1470
1471            // OpSimpleNestedAlternativeBegin/Next/End
1472            // OpNestedAlternativeBegin/Next/End
1473            //
1474            // These nodes are used to handle sets of alternatives that are nested within
1475            // subpatterns and parenthetical assertions. The 'simple' forms are used where
1476            // we do not need to be able to backtrack back into any alternative other than
1477            // the last, the normal forms allow backtracking into any alternative.
1478            //
1479            // Each Begin/Next node is responsible for planting an input check to ensure
1480            // sufficient input is available on entry. Next nodes additionally need to
1481            // jump to the end - Next nodes use the End node's m_jumps list to hold this
1482            // set of jumps.
1483            //
1484            // In the non-simple forms, successful alternative matches must store a
1485            // 'return address' using a DataLabelPtr, used to store the address to jump
1486            // to when backtracking, to get to the code for the appropriate alternative.
1487            case OpSimpleNestedAlternativeBegin:
1488            case OpNestedAlternativeBegin: {
1489                PatternTerm* term = op.m_term;
1490                PatternAlternative* alternative = op.m_alternative;
1491                PatternDisjunction* disjunction = term->parentheses.disjunction;
1492
1493                // Calculate how much input we need to check for, and if non-zero check.
1494                op.m_checkAdjust = alternative->m_minimumSize;
1495                if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1496                    op.m_checkAdjust -= disjunction->m_minimumSize;
1497                if (op.m_checkAdjust)
1498                    op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1499
1500                m_checked += op.m_checkAdjust;
1501                break;
1502            }
1503            case OpSimpleNestedAlternativeNext:
1504            case OpNestedAlternativeNext: {
1505                PatternTerm* term = op.m_term;
1506                PatternAlternative* alternative = op.m_alternative;
1507                PatternDisjunction* disjunction = term->parentheses.disjunction;
1508
1509                // In the non-simple case, store a 'return address' so we can backtrack correctly.
1510                if (op.m_op == OpNestedAlternativeNext) {
1511                    unsigned parenthesesFrameLocation = term->frameLocation;
1512                    unsigned alternativeFrameLocation = parenthesesFrameLocation;
1513                    if (term->quantityType != QuantifierFixedCount)
1514                        alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1515                    op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1516                }
1517
1518                if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
1519                    // If the previous alternative matched without consuming characters then
1520                    // backtrack to try to match while consumming some input.
1521                    op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1522                }
1523
1524                // If we reach here then the last alternative has matched - jump to the
1525                // End node, to skip over any further alternatives.
1526                //
1527                // FIXME: this is logically O(N^2) (though N can be expected to be very
1528                // small). We could avoid this either by adding an extra jump to the JIT
1529                // data structures, or by making backtracking code that jumps to Next
1530                // alternatives are responsible for checking that input is available (if
1531                // we didn't need to plant the input checks, then m_jumps would be free).
1532                YarrOp* endOp = &m_ops[op.m_nextOp];
1533                while (endOp->m_nextOp != notFound) {
1534                    ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1535                    endOp = &m_ops[endOp->m_nextOp];
1536                }
1537                ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1538                endOp->m_jumps.append(jump());
1539
1540                // This is the entry point for the next alternative.
1541                op.m_reentry = label();
1542
1543                // Calculate how much input we need to check for, and if non-zero check.
1544                op.m_checkAdjust = alternative->m_minimumSize;
1545                if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1546                    op.m_checkAdjust -= disjunction->m_minimumSize;
1547                if (op.m_checkAdjust)
1548                    op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1549
1550                YarrOp& lastOp = m_ops[op.m_previousOp];
1551                m_checked -= lastOp.m_checkAdjust;
1552                m_checked += op.m_checkAdjust;
1553                break;
1554            }
1555            case OpSimpleNestedAlternativeEnd:
1556            case OpNestedAlternativeEnd: {
1557                PatternTerm* term = op.m_term;
1558
1559                // In the non-simple case, store a 'return address' so we can backtrack correctly.
1560                if (op.m_op == OpNestedAlternativeEnd) {
1561                    unsigned parenthesesFrameLocation = term->frameLocation;
1562                    unsigned alternativeFrameLocation = parenthesesFrameLocation;
1563                    if (term->quantityType != QuantifierFixedCount)
1564                        alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1565                    op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1566                }
1567
1568                if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
1569                    // If the previous alternative matched without consuming characters then
1570                    // backtrack to try to match while consumming some input.
1571                    op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1572                }
1573
1574                // If this set of alternatives contains more than one alternative,
1575                // then the Next nodes will have planted jumps to the End, and added
1576                // them to this node's m_jumps list.
1577                op.m_jumps.link(this);
1578                op.m_jumps.clear();
1579
1580                YarrOp& lastOp = m_ops[op.m_previousOp];
1581                m_checked -= lastOp.m_checkAdjust;
1582                break;
1583            }
1584
1585            // OpParenthesesSubpatternOnceBegin/End
1586            //
1587            // These nodes support (optionally) capturing subpatterns, that have a
1588            // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
1589            case OpParenthesesSubpatternOnceBegin: {
1590                PatternTerm* term = op.m_term;
1591                unsigned parenthesesFrameLocation = term->frameLocation;
1592                const RegisterID indexTemporary = regT0;
1593                ASSERT(term->quantityCount == 1);
1594
1595                // Upon entry to a Greedy quantified set of parenthese store the index.
1596                // We'll use this for two purposes:
1597                //  - To indicate which iteration we are on of mathing the remainder of
1598                //    the expression after the parentheses - the first, including the
1599                //    match within the parentheses, or the second having skipped over them.
1600                //  - To check for empty matches, which must be rejected.
1601                //
1602                // At the head of a NonGreedy set of parentheses we'll immediately set the
1603                // value on the stack to -1 (indicating a match skipping the subpattern),
1604                // and plant a jump to the end. We'll also plant a label to backtrack to
1605                // to reenter the subpattern later, with a store to set up index on the
1606                // second iteration.
1607                //
1608                // FIXME: for capturing parens, could use the index in the capture array?
1609                if (term->quantityType == QuantifierGreedy)
1610                    storeToFrame(index, parenthesesFrameLocation);
1611                else if (term->quantityType == QuantifierNonGreedy) {
1612                    storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
1613                    op.m_jumps.append(jump());
1614                    op.m_reentry = label();
1615                    storeToFrame(index, parenthesesFrameLocation);
1616                }
1617
1618                // If the parenthese are capturing, store the starting index value to the
1619                // captures array, offsetting as necessary.
1620                //
1621                // FIXME: could avoid offsetting this value in JIT code, apply
1622                // offsets only afterwards, at the point the results array is
1623                // being accessed.
1624                if (term->capture() && compileMode == IncludeSubpatterns) {
1625                    int inputOffset = term->inputPosition - m_checked;
1626                    if (term->quantityType == QuantifierFixedCount)
1627                        inputOffset -= term->parentheses.disjunction->m_minimumSize;
1628                    if (inputOffset) {
1629                        move(index, indexTemporary);
1630                        add32(Imm32(inputOffset), indexTemporary);
1631                        setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
1632                    } else
1633                        setSubpatternStart(index, term->parentheses.subpatternId);
1634                }
1635                break;
1636            }
1637            case OpParenthesesSubpatternOnceEnd: {
1638                PatternTerm* term = op.m_term;
1639                const RegisterID indexTemporary = regT0;
1640                ASSERT(term->quantityCount == 1);
1641
1642                // Runtime ASSERT to make sure that the nested alternative handled the
1643                // "no input consumed" check.
1644                if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
1645                    Jump pastBreakpoint;
1646                    pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1647                    abortWithReason(YARRNoInputConsumed);
1648                    pastBreakpoint.link(this);
1649                }
1650
1651                // If the parenthese are capturing, store the ending index value to the
1652                // captures array, offsetting as necessary.
1653                //
1654                // FIXME: could avoid offsetting this value in JIT code, apply
1655                // offsets only afterwards, at the point the results array is
1656                // being accessed.
1657                if (term->capture() && compileMode == IncludeSubpatterns) {
1658                    int inputOffset = term->inputPosition - m_checked;
1659                    if (inputOffset) {
1660                        move(index, indexTemporary);
1661                        add32(Imm32(inputOffset), indexTemporary);
1662                        setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
1663                    } else
1664                        setSubpatternEnd(index, term->parentheses.subpatternId);
1665                }
1666
1667                // If the parentheses are quantified Greedy then add a label to jump back
1668                // to if get a failed match from after the parentheses. For NonGreedy
1669                // parentheses, link the jump from before the subpattern to here.
1670                if (term->quantityType == QuantifierGreedy)
1671                    op.m_reentry = label();
1672                else if (term->quantityType == QuantifierNonGreedy) {
1673                    YarrOp& beginOp = m_ops[op.m_previousOp];
1674                    beginOp.m_jumps.link(this);
1675                }
1676                break;
1677            }
1678
1679            // OpParenthesesSubpatternTerminalBegin/End
1680            case OpParenthesesSubpatternTerminalBegin: {
1681                PatternTerm* term = op.m_term;
1682                ASSERT(term->quantityType == QuantifierGreedy);
1683                ASSERT(term->quantityCount == quantifyInfinite);
1684                ASSERT(!term->capture());
1685
1686                // Upon entry set a label to loop back to.
1687                op.m_reentry = label();
1688
1689                // Store the start index of the current match; we need to reject zero
1690                // length matches.
1691                storeToFrame(index, term->frameLocation);
1692                break;
1693            }
1694            case OpParenthesesSubpatternTerminalEnd: {
1695                YarrOp& beginOp = m_ops[op.m_previousOp];
1696                if (!ASSERT_DISABLED) {
1697                    PatternTerm* term = op.m_term;
1698
1699                    // Runtime ASSERT to make sure that the nested alternative handled the
1700                    // "no input consumed" check.
1701                    Jump pastBreakpoint;
1702                    pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1703                    abortWithReason(YARRNoInputConsumed);
1704                    pastBreakpoint.link(this);
1705                }
1706
1707                // We know that the match is non-zero, we can accept it  and
1708                // loop back up to the head of the subpattern.
1709                jump(beginOp.m_reentry);
1710
1711                // This is the entry point to jump to when we stop matching - we will
1712                // do so once the subpattern cannot match any more.
1713                op.m_reentry = label();
1714                break;
1715            }
1716
1717            // OpParentheticalAssertionBegin/End
1718            case OpParentheticalAssertionBegin: {
1719                PatternTerm* term = op.m_term;
1720
1721                // Store the current index - assertions should not update index, so
1722                // we will need to restore it upon a successful match.
1723                unsigned parenthesesFrameLocation = term->frameLocation;
1724                storeToFrame(index, parenthesesFrameLocation);
1725
1726                // Check
1727                op.m_checkAdjust = m_checked - term->inputPosition;
1728                if (op.m_checkAdjust)
1729                    sub32(Imm32(op.m_checkAdjust), index);
1730
1731                m_checked -= op.m_checkAdjust;
1732                break;
1733            }
1734            case OpParentheticalAssertionEnd: {
1735                PatternTerm* term = op.m_term;
1736
1737                // Restore the input index value.
1738                unsigned parenthesesFrameLocation = term->frameLocation;
1739                loadFromFrame(parenthesesFrameLocation, index);
1740
1741                // If inverted, a successful match of the assertion must be treated
1742                // as a failure, so jump to backtracking.
1743                if (term->invert()) {
1744                    op.m_jumps.append(jump());
1745                    op.m_reentry = label();
1746                }
1747
1748                YarrOp& lastOp = m_ops[op.m_previousOp];
1749                m_checked += lastOp.m_checkAdjust;
1750                break;
1751            }
1752
1753            case OpMatchFailed:
1754                removeCallFrame();
1755                move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
1756                move(TrustedImm32(0), returnRegister2);
1757                generateReturn();
1758                break;
1759            }
1760
1761            ++opIndex;
1762        } while (opIndex < m_ops.size());
1763    }
1764
1765    void backtrack()
1766    {
1767        // Backwards generate the backtracking code.
1768        size_t opIndex = m_ops.size();
1769        ASSERT(opIndex);
1770
1771        do {
1772            --opIndex;
1773            YarrOp& op = m_ops[opIndex];
1774            switch (op.m_op) {
1775
1776            case OpTerm:
1777                backtrackTerm(opIndex);
1778                break;
1779
1780            // OpBodyAlternativeBegin/Next/End
1781            //
1782            // For each Begin/Next node representing an alternative, we need to decide what to do
1783            // in two circumstances:
1784            //  - If we backtrack back into this node, from within the alternative.
1785            //  - If the input check at the head of the alternative fails (if this exists).
1786            //
1787            // We treat these two cases differently since in the former case we have slightly
1788            // more information - since we are backtracking out of a prior alternative we know
1789            // that at least enough input was available to run it. For example, given the regular
1790            // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
1791            // character match of 'a'), then we need not perform an additional input availability
1792            // check before running the second alternative.
1793            //
1794            // Backtracking required differs for the last alternative, which in the case of the
1795            // repeating set of alternatives must loop. The code generated for the last alternative
1796            // will also be used to handle all input check failures from any prior alternatives -
1797            // these require similar functionality, in seeking the next available alternative for
1798            // which there is sufficient input.
1799            //
1800            // Since backtracking of all other alternatives simply requires us to link backtracks
1801            // to the reentry point for the subsequent alternative, we will only be generating any
1802            // code when backtracking the last alternative.
1803            case OpBodyAlternativeBegin:
1804            case OpBodyAlternativeNext: {
1805                PatternAlternative* alternative = op.m_alternative;
1806
1807                if (op.m_op == OpBodyAlternativeNext) {
1808                    PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1809                    m_checked += priorAlternative->m_minimumSize;
1810                }
1811                m_checked -= alternative->m_minimumSize;
1812
1813                // Is this the last alternative? If not, then if we backtrack to this point we just
1814                // need to jump to try to match the next alternative.
1815                if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
1816                    m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
1817                    break;
1818                }
1819                YarrOp& endOp = m_ops[op.m_nextOp];
1820
1821                YarrOp* beginOp = &op;
1822                while (beginOp->m_op != OpBodyAlternativeBegin) {
1823                    ASSERT(beginOp->m_op == OpBodyAlternativeNext);
1824                    beginOp = &m_ops[beginOp->m_previousOp];
1825                }
1826
1827                bool onceThrough = endOp.m_nextOp == notFound;
1828
1829                // First, generate code to handle cases where we backtrack out of an attempted match
1830                // of the last alternative. If this is a 'once through' set of alternatives then we
1831                // have nothing to do - link this straight through to the End.
1832                if (onceThrough)
1833                    m_backtrackingState.linkTo(endOp.m_reentry, this);
1834                else {
1835                    // If we don't need to move the input poistion, and the pattern has a fixed size
1836                    // (in which case we omit the store of the start index until the pattern has matched)
1837                    // then we can just link the backtrack out of the last alternative straight to the
1838                    // head of the first alternative.
1839                    if (m_pattern.m_body->m_hasFixedSize
1840                        && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
1841                        && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
1842                        m_backtrackingState.linkTo(beginOp->m_reentry, this);
1843                    else {
1844                        // We need to generate a trampoline of code to execute before looping back
1845                        // around to the first alternative.
1846                        m_backtrackingState.link(this);
1847
1848                        // If the pattern size is not fixed, then store the start index, for use if we match.
1849                        if (!m_pattern.m_body->m_hasFixedSize) {
1850                            if (alternative->m_minimumSize == 1)
1851                                setMatchStart(index);
1852                            else {
1853                                move(index, regT0);
1854                                if (alternative->m_minimumSize)
1855                                    sub32(Imm32(alternative->m_minimumSize - 1), regT0);
1856                                else
1857                                    add32(TrustedImm32(1), regT0);
1858                                setMatchStart(regT0);
1859                            }
1860                        }
1861
1862                        // Generate code to loop. Check whether the last alternative is longer than the
1863                        // first (e.g. /a|xy/ or /a|xyz/).
1864                        if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
1865                            // We want to loop, and increment input position. If the delta is 1, it is
1866                            // already correctly incremented, if more than one then decrement as appropriate.
1867                            unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
1868                            ASSERT(delta);
1869                            if (delta != 1)
1870                                sub32(Imm32(delta - 1), index);
1871                            jump(beginOp->m_reentry);
1872                        } else {
1873                            // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
1874                            // be sufficent input available to handle this, so just fall through.
1875                            unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
1876                            if (delta != 0xFFFFFFFFu) {
1877                                // We need to check input because we are incrementing the input.
1878                                add32(Imm32(delta + 1), index);
1879                                checkInput().linkTo(beginOp->m_reentry, this);
1880                            }
1881                        }
1882                    }
1883                }
1884
1885                // We can reach this point in the code in two ways:
1886                //  - Fallthrough from the code above (a repeating alternative backtracked out of its
1887                //    last alternative, and did not have sufficent input to run the first).
1888                //  - We will loop back up to the following label when a releating alternative loops,
1889                //    following a failed input check.
1890                //
1891                // Either way, we have just failed the input check for the first alternative.
1892                Label firstInputCheckFailed(this);
1893
1894                // Generate code to handle input check failures from alternatives except the last.
1895                // prevOp is the alternative we're handling a bail out from (initially Begin), and
1896                // nextOp is the alternative we will be attempting to reenter into.
1897                //
1898                // We will link input check failures from the forwards matching path back to the code
1899                // that can handle them.
1900                YarrOp* prevOp = beginOp;
1901                YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
1902                while (nextOp->m_op != OpBodyAlternativeEnd) {
1903                    prevOp->m_jumps.link(this);
1904
1905                    // We only get here if an input check fails, it is only worth checking again
1906                    // if the next alternative has a minimum size less than the last.
1907                    if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
1908                        // FIXME: if we added an extra label to YarrOp, we could avoid needing to
1909                        // subtract delta back out, and reduce this code. Should performance test
1910                        // the benefit of this.
1911                        unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
1912                        sub32(Imm32(delta), index);
1913                        Jump fail = jumpIfNoAvailableInput();
1914                        add32(Imm32(delta), index);
1915                        jump(nextOp->m_reentry);
1916                        fail.link(this);
1917                    } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
1918                        add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
1919                    prevOp = nextOp;
1920                    nextOp = &m_ops[nextOp->m_nextOp];
1921                }
1922
1923                // We fall through to here if there is insufficient input to run the last alternative.
1924
1925                // If there is insufficient input to run the last alternative, then for 'once through'
1926                // alternatives we are done - just jump back up into the forwards matching path at the End.
1927                if (onceThrough) {
1928                    op.m_jumps.linkTo(endOp.m_reentry, this);
1929                    jump(endOp.m_reentry);
1930                    break;
1931                }
1932
1933                // For repeating alternatives, link any input check failure from the last alternative to
1934                // this point.
1935                op.m_jumps.link(this);
1936
1937                bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
1938
1939                // Check for cases where input position is already incremented by 1 for the last
1940                // alternative (this is particularly useful where the minimum size of the body
1941                // disjunction is 0, e.g. /a*|b/).
1942                if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
1943                    // index is already incremented by 1, so just store it now!
1944                    setMatchStart(index);
1945                    needsToUpdateMatchStart = false;
1946                }
1947
1948                // Check whether there is sufficient input to loop. Increment the input position by
1949                // one, and check. Also add in the minimum disjunction size before checking - there
1950                // is no point in looping if we're just going to fail all the input checks around
1951                // the next iteration.
1952                ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
1953                if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
1954                    // If the last alternative had the same minimum size as the disjunction,
1955                    // just simply increment input pos by 1, no adjustment based on minimum size.
1956                    add32(TrustedImm32(1), index);
1957                } else {
1958                    // If the minumum for the last alternative was one greater than than that
1959                    // for the disjunction, we're already progressed by 1, nothing to do!
1960                    unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
1961                    if (delta)
1962                        sub32(Imm32(delta), index);
1963                }
1964                Jump matchFailed = jumpIfNoAvailableInput();
1965
1966                if (needsToUpdateMatchStart) {
1967                    if (!m_pattern.m_body->m_minimumSize)
1968                        setMatchStart(index);
1969                    else {
1970                        move(index, regT0);
1971                        sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
1972                        setMatchStart(regT0);
1973                    }
1974                }
1975
1976                // Calculate how much more input the first alternative requires than the minimum
1977                // for the body as a whole. If no more is needed then we dont need an additional
1978                // input check here - jump straight back up to the start of the first alternative.
1979                if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
1980                    jump(beginOp->m_reentry);
1981                else {
1982                    if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
1983                        add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
1984                    else
1985                        sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
1986                    checkInput().linkTo(beginOp->m_reentry, this);
1987                    jump(firstInputCheckFailed);
1988                }
1989
1990                // We jump to here if we iterate to the point that there is insufficient input to
1991                // run any matches, and need to return a failure state from JIT code.
1992                matchFailed.link(this);
1993
1994                removeCallFrame();
1995                move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
1996                move(TrustedImm32(0), returnRegister2);
1997                generateReturn();
1998                break;
1999            }
2000            case OpBodyAlternativeEnd: {
2001                // We should never backtrack back into a body disjunction.
2002                ASSERT(m_backtrackingState.isEmpty());
2003
2004                PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2005                m_checked += priorAlternative->m_minimumSize;
2006                break;
2007            }
2008
2009            // OpSimpleNestedAlternativeBegin/Next/End
2010            // OpNestedAlternativeBegin/Next/End
2011            //
2012            // Generate code for when we backtrack back out of an alternative into
2013            // a Begin or Next node, or when the entry input count check fails. If
2014            // there are more alternatives we need to jump to the next alternative,
2015            // if not we backtrack back out of the current set of parentheses.
2016            //
2017            // In the case of non-simple nested assertions we need to also link the
2018            // 'return address' appropriately to backtrack back out into the correct
2019            // alternative.
2020            case OpSimpleNestedAlternativeBegin:
2021            case OpSimpleNestedAlternativeNext:
2022            case OpNestedAlternativeBegin:
2023            case OpNestedAlternativeNext: {
2024                YarrOp& nextOp = m_ops[op.m_nextOp];
2025                bool isBegin = op.m_previousOp == notFound;
2026                bool isLastAlternative = nextOp.m_nextOp == notFound;
2027                ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
2028                ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
2029
2030                // Treat an input check failure the same as a failed match.
2031                m_backtrackingState.append(op.m_jumps);
2032
2033                // Set the backtracks to jump to the appropriate place. We may need
2034                // to link the backtracks in one of three different way depending on
2035                // the type of alternative we are dealing with:
2036                //  - A single alternative, with no simplings.
2037                //  - The last alternative of a set of two or more.
2038                //  - An alternative other than the last of a set of two or more.
2039                //
2040                // In the case of a single alternative on its own, we don't need to
2041                // jump anywhere - if the alternative fails to match we can just
2042                // continue to backtrack out of the parentheses without jumping.
2043                //
2044                // In the case of the last alternative in a set of more than one, we
2045                // need to jump to return back out to the beginning. We'll do so by
2046                // adding a jump to the End node's m_jumps list, and linking this
2047                // when we come to generate the Begin node. For alternatives other
2048                // than the last, we need to jump to the next alternative.
2049                //
2050                // If the alternative had adjusted the input position we must link
2051                // backtracking to here, correct, and then jump on. If not we can
2052                // link the backtracks directly to their destination.
2053                if (op.m_checkAdjust) {
2054                    // Handle the cases where we need to link the backtracks here.
2055                    m_backtrackingState.link(this);
2056                    sub32(Imm32(op.m_checkAdjust), index);
2057                    if (!isLastAlternative) {
2058                        // An alternative that is not the last should jump to its successor.
2059                        jump(nextOp.m_reentry);
2060                    } else if (!isBegin) {
2061                        // The last of more than one alternatives must jump back to the beginning.
2062                        nextOp.m_jumps.append(jump());
2063                    } else {
2064                        // A single alternative on its own can fall through.
2065                        m_backtrackingState.fallthrough();
2066                    }
2067                } else {
2068                    // Handle the cases where we can link the backtracks directly to their destinations.
2069                    if (!isLastAlternative) {
2070                        // An alternative that is not the last should jump to its successor.
2071                        m_backtrackingState.linkTo(nextOp.m_reentry, this);
2072                    } else if (!isBegin) {
2073                        // The last of more than one alternatives must jump back to the beginning.
2074                        m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
2075                    }
2076                    // In the case of a single alternative on its own do nothing - it can fall through.
2077                }
2078
2079                // If there is a backtrack jump from a zero length match link it here.
2080                if (op.m_zeroLengthMatch.isSet())
2081                    m_backtrackingState.append(op.m_zeroLengthMatch);
2082
2083                // At this point we've handled the backtracking back into this node.
2084                // Now link any backtracks that need to jump to here.
2085
2086                // For non-simple alternatives, link the alternative's 'return address'
2087                // so that we backtrack back out into the previous alternative.
2088                if (op.m_op == OpNestedAlternativeNext)
2089                    m_backtrackingState.append(op.m_returnAddress);
2090
2091                // If there is more than one alternative, then the last alternative will
2092                // have planted a jump to be linked to the end. This jump was added to the
2093                // End node's m_jumps list. If we are back at the beginning, link it here.
2094                if (isBegin) {
2095                    YarrOp* endOp = &m_ops[op.m_nextOp];
2096                    while (endOp->m_nextOp != notFound) {
2097                        ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
2098                        endOp = &m_ops[endOp->m_nextOp];
2099                    }
2100                    ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
2101                    m_backtrackingState.append(endOp->m_jumps);
2102                }
2103
2104                if (!isBegin) {
2105                    YarrOp& lastOp = m_ops[op.m_previousOp];
2106                    m_checked += lastOp.m_checkAdjust;
2107                }
2108                m_checked -= op.m_checkAdjust;
2109                break;
2110            }
2111            case OpSimpleNestedAlternativeEnd:
2112            case OpNestedAlternativeEnd: {
2113                PatternTerm* term = op.m_term;
2114
2115                // If there is a backtrack jump from a zero length match link it here.
2116                if (op.m_zeroLengthMatch.isSet())
2117                    m_backtrackingState.append(op.m_zeroLengthMatch);
2118
2119                // If we backtrack into the end of a simple subpattern do nothing;
2120                // just continue through into the last alternative. If we backtrack
2121                // into the end of a non-simple set of alterntives we need to jump
2122                // to the backtracking return address set up during generation.
2123                if (op.m_op == OpNestedAlternativeEnd) {
2124                    m_backtrackingState.link(this);
2125
2126                    // Plant a jump to the return address.
2127                    unsigned parenthesesFrameLocation = term->frameLocation;
2128                    unsigned alternativeFrameLocation = parenthesesFrameLocation;
2129                    if (term->quantityType != QuantifierFixedCount)
2130                        alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
2131                    loadFromFrameAndJump(alternativeFrameLocation);
2132
2133                    // Link the DataLabelPtr associated with the end of the last
2134                    // alternative to this point.
2135                    m_backtrackingState.append(op.m_returnAddress);
2136                }
2137
2138                YarrOp& lastOp = m_ops[op.m_previousOp];
2139                m_checked += lastOp.m_checkAdjust;
2140                break;
2141            }
2142
2143            // OpParenthesesSubpatternOnceBegin/End
2144            //
2145            // When we are backtracking back out of a capturing subpattern we need
2146            // to clear the start index in the matches output array, to record that
2147            // this subpattern has not been captured.
2148            //
2149            // When backtracking back out of a Greedy quantified subpattern we need
2150            // to catch this, and try running the remainder of the alternative after
2151            // the subpattern again, skipping the parentheses.
2152            //
2153            // Upon backtracking back into a quantified set of parentheses we need to
2154            // check whether we were currently skipping the subpattern. If not, we
2155            // can backtrack into them, if we were we need to either backtrack back
2156            // out of the start of the parentheses, or jump back to the forwards
2157            // matching start, depending of whether the match is Greedy or NonGreedy.
2158            case OpParenthesesSubpatternOnceBegin: {
2159                PatternTerm* term = op.m_term;
2160                ASSERT(term->quantityCount == 1);
2161
2162                // We only need to backtrack to thispoint if capturing or greedy.
2163                if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
2164                    m_backtrackingState.link(this);
2165
2166                    // If capturing, clear the capture (we only need to reset start).
2167                    if (term->capture() && compileMode == IncludeSubpatterns)
2168                        clearSubpatternStart(term->parentheses.subpatternId);
2169
2170                    // If Greedy, jump to the end.
2171                    if (term->quantityType == QuantifierGreedy) {
2172                        // Clear the flag in the stackframe indicating we ran through the subpattern.
2173                        unsigned parenthesesFrameLocation = term->frameLocation;
2174                        storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
2175                        // Jump to after the parentheses, skipping the subpattern.
2176                        jump(m_ops[op.m_nextOp].m_reentry);
2177                        // A backtrack from after the parentheses, when skipping the subpattern,
2178                        // will jump back to here.
2179                        op.m_jumps.link(this);
2180                    }
2181
2182                    m_backtrackingState.fallthrough();
2183                }
2184                break;
2185            }
2186            case OpParenthesesSubpatternOnceEnd: {
2187                PatternTerm* term = op.m_term;
2188
2189                if (term->quantityType != QuantifierFixedCount) {
2190                    m_backtrackingState.link(this);
2191
2192                    // Check whether we should backtrack back into the parentheses, or if we
2193                    // are currently in a state where we had skipped over the subpattern
2194                    // (in which case the flag value on the stack will be -1).
2195                    unsigned parenthesesFrameLocation = term->frameLocation;
2196                    Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
2197
2198                    if (term->quantityType == QuantifierGreedy) {
2199                        // For Greedy parentheses, we skip after having already tried going
2200                        // through the subpattern, so if we get here we're done.
2201                        YarrOp& beginOp = m_ops[op.m_previousOp];
2202                        beginOp.m_jumps.append(hadSkipped);
2203                    } else {
2204                        // For NonGreedy parentheses, we try skipping the subpattern first,
2205                        // so if we get here we need to try running through the subpattern
2206                        // next. Jump back to the start of the parentheses in the forwards
2207                        // matching path.
2208                        ASSERT(term->quantityType == QuantifierNonGreedy);
2209                        YarrOp& beginOp = m_ops[op.m_previousOp];
2210                        hadSkipped.linkTo(beginOp.m_reentry, this);
2211                    }
2212
2213                    m_backtrackingState.fallthrough();
2214                }
2215
2216                m_backtrackingState.append(op.m_jumps);
2217                break;
2218            }
2219
2220            // OpParenthesesSubpatternTerminalBegin/End
2221            //
2222            // Terminal subpatterns will always match - there is nothing after them to
2223            // force a backtrack, and they have a minimum count of 0, and as such will
2224            // always produce an acceptable result.
2225            case OpParenthesesSubpatternTerminalBegin: {
2226                // We will backtrack to this point once the subpattern cannot match any
2227                // more. Since no match is accepted as a successful match (we are Greedy
2228                // quantified with a minimum of zero) jump back to the forwards matching
2229                // path at the end.
2230                YarrOp& endOp = m_ops[op.m_nextOp];
2231                m_backtrackingState.linkTo(endOp.m_reentry, this);
2232                break;
2233            }
2234            case OpParenthesesSubpatternTerminalEnd:
2235                // We should never be backtracking to here (hence the 'terminal' in the name).
2236                ASSERT(m_backtrackingState.isEmpty());
2237                m_backtrackingState.append(op.m_jumps);
2238                break;
2239
2240            // OpParentheticalAssertionBegin/End
2241            case OpParentheticalAssertionBegin: {
2242                PatternTerm* term = op.m_term;
2243                YarrOp& endOp = m_ops[op.m_nextOp];
2244
2245                // We need to handle the backtracks upon backtracking back out
2246                // of a parenthetical assertion if either we need to correct
2247                // the input index, or the assertion was inverted.
2248                if (op.m_checkAdjust || term->invert()) {
2249                     m_backtrackingState.link(this);
2250
2251                    if (op.m_checkAdjust)
2252                        add32(Imm32(op.m_checkAdjust), index);
2253
2254                    // In an inverted assertion failure to match the subpattern
2255                    // is treated as a successful match - jump to the end of the
2256                    // subpattern. We already have adjusted the input position
2257                    // back to that before the assertion, which is correct.
2258                    if (term->invert())
2259                        jump(endOp.m_reentry);
2260
2261                    m_backtrackingState.fallthrough();
2262                }
2263
2264                // The End node's jump list will contain any backtracks into
2265                // the end of the assertion. Also, if inverted, we will have
2266                // added the failure caused by a successful match to this.
2267                m_backtrackingState.append(endOp.m_jumps);
2268
2269                m_checked += op.m_checkAdjust;
2270                break;
2271            }
2272            case OpParentheticalAssertionEnd: {
2273                // FIXME: We should really be clearing any nested subpattern
2274                // matches on bailing out from after the pattern. Firefox has
2275                // this bug too (presumably because they use YARR!)
2276
2277                // Never backtrack into an assertion; later failures bail to before the begin.
2278                m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
2279
2280                YarrOp& lastOp = m_ops[op.m_previousOp];
2281                m_checked -= lastOp.m_checkAdjust;
2282                break;
2283            }
2284
2285            case OpMatchFailed:
2286                break;
2287            }
2288
2289        } while (opIndex);
2290    }
2291
2292    // Compilation methods:
2293    // ====================
2294
2295    // opCompileParenthesesSubpattern
2296    // Emits ops for a subpattern (set of parentheses). These consist
2297    // of a set of alternatives wrapped in an outer set of nodes for
2298    // the parentheses.
2299    // Supported types of parentheses are 'Once' (quantityCount == 1)
2300    // and 'Terminal' (non-capturing parentheses quantified as greedy
2301    // and infinite).
2302    // Alternatives will use the 'Simple' set of ops if either the
2303    // subpattern is terminal (in which case we will never need to
2304    // backtrack), or if the subpattern only contains one alternative.
2305    void opCompileParenthesesSubpattern(PatternTerm* term)
2306    {
2307        YarrOpCode parenthesesBeginOpCode;
2308        YarrOpCode parenthesesEndOpCode;
2309        YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
2310        YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
2311        YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
2312
2313        // We can currently only compile quantity 1 subpatterns that are
2314        // not copies. We generate a copy in the case of a range quantifier,
2315        // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
2316        // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
2317        // comes where the subpattern is capturing, in which case we would
2318        // need to restore the capture from the first subpattern upon a
2319        // failure in the second.
2320        if (term->quantityCount == 1 && !term->parentheses.isCopy) {
2321            // Select the 'Once' nodes.
2322            parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
2323            parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
2324
2325            // If there is more than one alternative we cannot use the 'simple' nodes.
2326            if (term->parentheses.disjunction->m_alternatives.size() != 1) {
2327                alternativeBeginOpCode = OpNestedAlternativeBegin;
2328                alternativeNextOpCode = OpNestedAlternativeNext;
2329                alternativeEndOpCode = OpNestedAlternativeEnd;
2330            }
2331        } else if (term->parentheses.isTerminal) {
2332            // Select the 'Terminal' nodes.
2333            parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
2334            parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
2335        } else {
2336            // This subpattern is not supported by the JIT.
2337            m_shouldFallBack = true;
2338            return;
2339        }
2340
2341        size_t parenBegin = m_ops.size();
2342        m_ops.append(parenthesesBeginOpCode);
2343
2344        m_ops.append(alternativeBeginOpCode);
2345        m_ops.last().m_previousOp = notFound;
2346        m_ops.last().m_term = term;
2347        Vector<OwnPtr<PatternAlternative>>& alternatives =  term->parentheses.disjunction->m_alternatives;
2348        for (unsigned i = 0; i < alternatives.size(); ++i) {
2349            size_t lastOpIndex = m_ops.size() - 1;
2350
2351            PatternAlternative* nestedAlternative = alternatives[i].get();
2352            opCompileAlternative(nestedAlternative);
2353
2354            size_t thisOpIndex = m_ops.size();
2355            m_ops.append(YarrOp(alternativeNextOpCode));
2356
2357            YarrOp& lastOp = m_ops[lastOpIndex];
2358            YarrOp& thisOp = m_ops[thisOpIndex];
2359
2360            lastOp.m_alternative = nestedAlternative;
2361            lastOp.m_nextOp = thisOpIndex;
2362            thisOp.m_previousOp = lastOpIndex;
2363            thisOp.m_term = term;
2364        }
2365        YarrOp& lastOp = m_ops.last();
2366        ASSERT(lastOp.m_op == alternativeNextOpCode);
2367        lastOp.m_op = alternativeEndOpCode;
2368        lastOp.m_alternative = 0;
2369        lastOp.m_nextOp = notFound;
2370
2371        size_t parenEnd = m_ops.size();
2372        m_ops.append(parenthesesEndOpCode);
2373
2374        m_ops[parenBegin].m_term = term;
2375        m_ops[parenBegin].m_previousOp = notFound;
2376        m_ops[parenBegin].m_nextOp = parenEnd;
2377        m_ops[parenEnd].m_term = term;
2378        m_ops[parenEnd].m_previousOp = parenBegin;
2379        m_ops[parenEnd].m_nextOp = notFound;
2380    }
2381
2382    // opCompileParentheticalAssertion
2383    // Emits ops for a parenthetical assertion. These consist of an
2384    // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
2385    // the alternatives, with these wrapped by an outer pair of
2386    // OpParentheticalAssertionBegin/End nodes.
2387    // We can always use the OpSimpleNestedAlternative nodes in the
2388    // case of parenthetical assertions since these only ever match
2389    // once, and will never backtrack back into the assertion.
2390    void opCompileParentheticalAssertion(PatternTerm* term)
2391    {
2392        size_t parenBegin = m_ops.size();
2393        m_ops.append(OpParentheticalAssertionBegin);
2394
2395        m_ops.append(OpSimpleNestedAlternativeBegin);
2396        m_ops.last().m_previousOp = notFound;
2397        m_ops.last().m_term = term;
2398        Vector<OwnPtr<PatternAlternative>>& alternatives =  term->parentheses.disjunction->m_alternatives;
2399        for (unsigned i = 0; i < alternatives.size(); ++i) {
2400            size_t lastOpIndex = m_ops.size() - 1;
2401
2402            PatternAlternative* nestedAlternative = alternatives[i].get();
2403            opCompileAlternative(nestedAlternative);
2404
2405            size_t thisOpIndex = m_ops.size();
2406            m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
2407
2408            YarrOp& lastOp = m_ops[lastOpIndex];
2409            YarrOp& thisOp = m_ops[thisOpIndex];
2410
2411            lastOp.m_alternative = nestedAlternative;
2412            lastOp.m_nextOp = thisOpIndex;
2413            thisOp.m_previousOp = lastOpIndex;
2414            thisOp.m_term = term;
2415        }
2416        YarrOp& lastOp = m_ops.last();
2417        ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
2418        lastOp.m_op = OpSimpleNestedAlternativeEnd;
2419        lastOp.m_alternative = 0;
2420        lastOp.m_nextOp = notFound;
2421
2422        size_t parenEnd = m_ops.size();
2423        m_ops.append(OpParentheticalAssertionEnd);
2424
2425        m_ops[parenBegin].m_term = term;
2426        m_ops[parenBegin].m_previousOp = notFound;
2427        m_ops[parenBegin].m_nextOp = parenEnd;
2428        m_ops[parenEnd].m_term = term;
2429        m_ops[parenEnd].m_previousOp = parenBegin;
2430        m_ops[parenEnd].m_nextOp = notFound;
2431    }
2432
2433    // opCompileAlternative
2434    // Called to emit nodes for all terms in an alternative.
2435    void opCompileAlternative(PatternAlternative* alternative)
2436    {
2437        optimizeAlternative(alternative);
2438
2439        for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
2440            PatternTerm* term = &alternative->m_terms[i];
2441
2442            switch (term->type) {
2443            case PatternTerm::TypeParenthesesSubpattern:
2444                opCompileParenthesesSubpattern(term);
2445                break;
2446
2447            case PatternTerm::TypeParentheticalAssertion:
2448                opCompileParentheticalAssertion(term);
2449                break;
2450
2451            default:
2452                m_ops.append(term);
2453            }
2454        }
2455    }
2456
2457    // opCompileBody
2458    // This method compiles the body disjunction of the regular expression.
2459    // The body consists of two sets of alternatives - zero or more 'once
2460    // through' (BOL anchored) alternatives, followed by zero or more
2461    // repeated alternatives.
2462    // For each of these two sets of alteratives, if not empty they will be
2463    // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
2464    // 'begin' node referencing the first alternative, and 'next' nodes
2465    // referencing any further alternatives. The begin/next/end nodes are
2466    // linked together in a doubly linked list. In the case of repeating
2467    // alternatives, the end node is also linked back to the beginning.
2468    // If no repeating alternatives exist, then a OpMatchFailed node exists
2469    // to return the failing result.
2470    void opCompileBody(PatternDisjunction* disjunction)
2471    {
2472        Vector<OwnPtr<PatternAlternative>>& alternatives = disjunction->m_alternatives;
2473        size_t currentAlternativeIndex = 0;
2474
2475        // Emit the 'once through' alternatives.
2476        if (alternatives.size() && alternatives[0]->onceThrough()) {
2477            m_ops.append(YarrOp(OpBodyAlternativeBegin));
2478            m_ops.last().m_previousOp = notFound;
2479
2480            do {
2481                size_t lastOpIndex = m_ops.size() - 1;
2482                PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
2483                opCompileAlternative(alternative);
2484
2485                size_t thisOpIndex = m_ops.size();
2486                m_ops.append(YarrOp(OpBodyAlternativeNext));
2487
2488                YarrOp& lastOp = m_ops[lastOpIndex];
2489                YarrOp& thisOp = m_ops[thisOpIndex];
2490
2491                lastOp.m_alternative = alternative;
2492                lastOp.m_nextOp = thisOpIndex;
2493                thisOp.m_previousOp = lastOpIndex;
2494
2495                ++currentAlternativeIndex;
2496            } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
2497
2498            YarrOp& lastOp = m_ops.last();
2499
2500            ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2501            lastOp.m_op = OpBodyAlternativeEnd;
2502            lastOp.m_alternative = 0;
2503            lastOp.m_nextOp = notFound;
2504        }
2505
2506        if (currentAlternativeIndex == alternatives.size()) {
2507            m_ops.append(YarrOp(OpMatchFailed));
2508            return;
2509        }
2510
2511        // Emit the repeated alternatives.
2512        size_t repeatLoop = m_ops.size();
2513        m_ops.append(YarrOp(OpBodyAlternativeBegin));
2514        m_ops.last().m_previousOp = notFound;
2515        do {
2516            size_t lastOpIndex = m_ops.size() - 1;
2517            PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
2518            ASSERT(!alternative->onceThrough());
2519            opCompileAlternative(alternative);
2520
2521            size_t thisOpIndex = m_ops.size();
2522            m_ops.append(YarrOp(OpBodyAlternativeNext));
2523
2524            YarrOp& lastOp = m_ops[lastOpIndex];
2525            YarrOp& thisOp = m_ops[thisOpIndex];
2526
2527            lastOp.m_alternative = alternative;
2528            lastOp.m_nextOp = thisOpIndex;
2529            thisOp.m_previousOp = lastOpIndex;
2530
2531            ++currentAlternativeIndex;
2532        } while (currentAlternativeIndex < alternatives.size());
2533        YarrOp& lastOp = m_ops.last();
2534        ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2535        lastOp.m_op = OpBodyAlternativeEnd;
2536        lastOp.m_alternative = 0;
2537        lastOp.m_nextOp = repeatLoop;
2538    }
2539
2540    void generateEnter()
2541    {
2542#if CPU(X86_64)
2543        push(X86Registers::ebp);
2544        move(stackPointerRegister, X86Registers::ebp);
2545        push(X86Registers::ebx);
2546        // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
2547        zeroExtend32ToPtr(index, index);
2548        zeroExtend32ToPtr(length, length);
2549#if OS(WINDOWS)
2550        if (compileMode == IncludeSubpatterns)
2551            loadPtr(Address(X86Registers::ebp, 6 * sizeof(void*)), output);
2552#endif
2553#elif CPU(X86)
2554        push(X86Registers::ebp);
2555        move(stackPointerRegister, X86Registers::ebp);
2556        // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
2557        push(X86Registers::ebx);
2558        push(X86Registers::edi);
2559        push(X86Registers::esi);
2560        // load output into edi (2 = saved ebp + return address).
2561    #if COMPILER(MSVC)
2562        loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
2563        loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
2564        loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
2565        if (compileMode == IncludeSubpatterns)
2566            loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
2567    #else
2568        if (compileMode == IncludeSubpatterns)
2569            loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
2570    #endif
2571#elif CPU(ARM64)
2572        // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
2573        zeroExtend32ToPtr(index, index);
2574        zeroExtend32ToPtr(length, length);
2575#elif CPU(ARM)
2576        push(ARMRegisters::r4);
2577        push(ARMRegisters::r5);
2578        push(ARMRegisters::r6);
2579#elif CPU(SH4)
2580        push(SH4Registers::r11);
2581        push(SH4Registers::r13);
2582#elif CPU(MIPS)
2583        // Do nothing.
2584#endif
2585    }
2586
2587    void generateReturn()
2588    {
2589#if CPU(X86_64)
2590#if OS(WINDOWS)
2591        // Store the return value in the allocated space pointed by rcx.
2592        store64(returnRegister, Address(X86Registers::ecx));
2593        store64(returnRegister2, Address(X86Registers::ecx, sizeof(void*)));
2594        move(X86Registers::ecx, returnRegister);
2595#endif
2596        pop(X86Registers::ebx);
2597        pop(X86Registers::ebp);
2598#elif CPU(X86)
2599        pop(X86Registers::esi);
2600        pop(X86Registers::edi);
2601        pop(X86Registers::ebx);
2602        pop(X86Registers::ebp);
2603#elif CPU(ARM)
2604        pop(ARMRegisters::r6);
2605        pop(ARMRegisters::r5);
2606        pop(ARMRegisters::r4);
2607#elif CPU(SH4)
2608        pop(SH4Registers::r13);
2609        pop(SH4Registers::r11);
2610#elif CPU(MIPS)
2611        // Do nothing
2612#endif
2613        ret();
2614    }
2615
2616public:
2617    YarrGenerator(YarrPattern& pattern, YarrCharSize charSize)
2618        : m_pattern(pattern)
2619        , m_charSize(charSize)
2620        , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo)
2621        , m_shouldFallBack(false)
2622        , m_checked(0)
2623    {
2624    }
2625
2626    void compile(VM* vm, YarrCodeBlock& jitObject)
2627    {
2628        generateEnter();
2629
2630        Jump hasInput = checkInput();
2631        move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
2632        move(TrustedImm32(0), returnRegister2);
2633        generateReturn();
2634        hasInput.link(this);
2635
2636        if (compileMode == IncludeSubpatterns) {
2637            for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i)
2638                store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int)));
2639        }
2640
2641        if (!m_pattern.m_body->m_hasFixedSize)
2642            setMatchStart(index);
2643
2644        initCallFrame();
2645
2646        // Compile the pattern to the internal 'YarrOp' representation.
2647        opCompileBody(m_pattern.m_body);
2648
2649        // If we encountered anything we can't handle in the JIT code
2650        // (e.g. backreferences) then return early.
2651        if (m_shouldFallBack) {
2652            jitObject.setFallBack(true);
2653            return;
2654        }
2655
2656        generate();
2657        backtrack();
2658
2659        // Link & finalize the code.
2660        LinkBuffer linkBuffer(*vm, *this, REGEXP_CODE_ID);
2661        m_backtrackingState.linkDataLabels(linkBuffer);
2662
2663        if (compileMode == MatchOnly) {
2664            if (m_charSize == Char8)
2665                jitObject.set8BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 8-bit regular expression")));
2666            else
2667                jitObject.set16BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 16-bit regular expression")));
2668        } else {
2669            if (m_charSize == Char8)
2670                jitObject.set8BitCode(FINALIZE_CODE(linkBuffer, ("8-bit regular expression")));
2671            else
2672                jitObject.set16BitCode(FINALIZE_CODE(linkBuffer, ("16-bit regular expression")));
2673        }
2674        jitObject.setFallBack(m_shouldFallBack);
2675    }
2676
2677private:
2678    YarrPattern& m_pattern;
2679
2680    YarrCharSize m_charSize;
2681
2682    Scale m_charScale;
2683
2684    // Used to detect regular expression constructs that are not currently
2685    // supported in the JIT; fall back to the interpreter when this is detected.
2686    bool m_shouldFallBack;
2687
2688    // The regular expression expressed as a linear sequence of operations.
2689    Vector<YarrOp, 128> m_ops;
2690
2691    // This records the current input offset being applied due to the current
2692    // set of alternatives we are nested within. E.g. when matching the
2693    // character 'b' within the regular expression /abc/, we will know that
2694    // the minimum size for the alternative is 3, checked upon entry to the
2695    // alternative, and that 'b' is at offset 1 from the start, and as such
2696    // when matching 'b' we need to apply an offset of -2 to the load.
2697    //
2698    // FIXME: This should go away. Rather than tracking this value throughout
2699    // code generation, we should gather this information up front & store it
2700    // on the YarrOp structure.
2701    int m_checked;
2702
2703    // This class records state whilst generating the backtracking path of code.
2704    BacktrackingState m_backtrackingState;
2705};
2706
2707void jitCompile(YarrPattern& pattern, YarrCharSize charSize, VM* vm, YarrCodeBlock& jitObject, YarrJITCompileMode mode)
2708{
2709    if (mode == MatchOnly)
2710        YarrGenerator<MatchOnly>(pattern, charSize).compile(vm, jitObject);
2711    else
2712        YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(vm, jitObject);
2713}
2714
2715}}
2716
2717#endif
2718