1/*
2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "YarrJIT.h"
28
29#include <wtf/ASCIICType.h>
30#include "LinkBuffer.h"
31#include "Options.h"
32#include "Yarr.h"
33#include "YarrCanonicalizeUCS2.h"
34
35#if ENABLE(YARR_JIT)
36
37using namespace WTF;
38
39namespace JSC { namespace Yarr {
40
41template<YarrJITCompileMode compileMode>
42class YarrGenerator : private MacroAssembler {
43    friend void jitCompile(VM*, YarrCodeBlock& jitObject, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
44
45#if CPU(ARM)
46    static const RegisterID input = ARMRegisters::r0;
47    static const RegisterID index = ARMRegisters::r1;
48    static const RegisterID length = ARMRegisters::r2;
49    static const RegisterID output = ARMRegisters::r4;
50
51    static const RegisterID regT0 = ARMRegisters::r5;
52    static const RegisterID regT1 = ARMRegisters::r6;
53
54    static const RegisterID returnRegister = ARMRegisters::r0;
55    static const RegisterID returnRegister2 = ARMRegisters::r1;
56#elif CPU(MIPS)
57    static const RegisterID input = MIPSRegisters::a0;
58    static const RegisterID index = MIPSRegisters::a1;
59    static const RegisterID length = MIPSRegisters::a2;
60    static const RegisterID output = MIPSRegisters::a3;
61
62    static const RegisterID regT0 = MIPSRegisters::t4;
63    static const RegisterID regT1 = MIPSRegisters::t5;
64
65    static const RegisterID returnRegister = MIPSRegisters::v0;
66    static const RegisterID returnRegister2 = MIPSRegisters::v1;
67#elif CPU(SH4)
68    static const RegisterID input = SH4Registers::r4;
69    static const RegisterID index = SH4Registers::r5;
70    static const RegisterID length = SH4Registers::r6;
71    static const RegisterID output = SH4Registers::r7;
72
73    static const RegisterID regT0 = SH4Registers::r0;
74    static const RegisterID regT1 = SH4Registers::r1;
75
76    static const RegisterID returnRegister = SH4Registers::r0;
77    static const RegisterID returnRegister2 = SH4Registers::r1;
78#elif CPU(X86)
79    static const RegisterID input = X86Registers::eax;
80    static const RegisterID index = X86Registers::edx;
81    static const RegisterID length = X86Registers::ecx;
82    static const RegisterID output = X86Registers::edi;
83
84    static const RegisterID regT0 = X86Registers::ebx;
85    static const RegisterID regT1 = X86Registers::esi;
86
87    static const RegisterID returnRegister = X86Registers::eax;
88    static const RegisterID returnRegister2 = X86Registers::edx;
89#elif CPU(X86_64)
90#if !OS(WINDOWS)
91    static const RegisterID input = X86Registers::edi;
92    static const RegisterID index = X86Registers::esi;
93    static const RegisterID length = X86Registers::edx;
94    static const RegisterID output = X86Registers::ecx;
95#else
96    // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
97    // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
98    COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
99    static const RegisterID input = X86Registers::edx;
100    static const RegisterID index = X86Registers::r8;
101    static const RegisterID length = X86Registers::r9;
102    static const RegisterID output = X86Registers::r10;
103#endif
104
105    static const RegisterID regT0 = X86Registers::eax;
106    static const RegisterID regT1 = X86Registers::ebx;
107
108    static const RegisterID returnRegister = X86Registers::eax;
109    static const RegisterID returnRegister2 = X86Registers::edx;
110#endif
111
112    void optimizeAlternative(PatternAlternative* alternative)
113    {
114        if (!alternative->m_terms.size())
115            return;
116
117        for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
118            PatternTerm& term = alternative->m_terms[i];
119            PatternTerm& nextTerm = alternative->m_terms[i + 1];
120
121            if ((term.type == PatternTerm::TypeCharacterClass)
122                && (term.quantityType == QuantifierFixedCount)
123                && (nextTerm.type == PatternTerm::TypePatternCharacter)
124                && (nextTerm.quantityType == QuantifierFixedCount)) {
125                PatternTerm termCopy = term;
126                alternative->m_terms[i] = nextTerm;
127                alternative->m_terms[i + 1] = termCopy;
128            }
129        }
130    }
131
132    void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
133    {
134        do {
135            // pick which range we're going to generate
136            int which = count >> 1;
137            char lo = ranges[which].begin;
138            char hi = ranges[which].end;
139
140            // check if there are any ranges or matches below lo.  If not, just jl to failure -
141            // if there is anything else to check, check that first, if it falls through jmp to failure.
142            if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
143                Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
144
145                // generate code for all ranges before this one
146                if (which)
147                    matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
148
149                while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
150                    matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
151                    ++*matchIndex;
152                }
153                failures.append(jump());
154
155                loOrAbove.link(this);
156            } else if (which) {
157                Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
158
159                matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
160                failures.append(jump());
161
162                loOrAbove.link(this);
163            } else
164                failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
165
166            while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
167                ++*matchIndex;
168
169            matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
170            // fall through to here, the value is above hi.
171
172            // shuffle along & loop around if there are any more matches to handle.
173            unsigned next = which + 1;
174            ranges += next;
175            count -= next;
176        } while (count);
177    }
178
179    void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
180    {
181        if (charClass->m_table) {
182            ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
183            matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
184            return;
185        }
186        Jump unicodeFail;
187        if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
188            Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f));
189
190            if (charClass->m_matchesUnicode.size()) {
191                for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
192                    UChar ch = charClass->m_matchesUnicode[i];
193                    matchDest.append(branch32(Equal, character, Imm32(ch)));
194                }
195            }
196
197            if (charClass->m_rangesUnicode.size()) {
198                for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
199                    UChar lo = charClass->m_rangesUnicode[i].begin;
200                    UChar hi = charClass->m_rangesUnicode[i].end;
201
202                    Jump below = branch32(LessThan, character, Imm32(lo));
203                    matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
204                    below.link(this);
205                }
206            }
207
208            unicodeFail = jump();
209            isAscii.link(this);
210        }
211
212        if (charClass->m_ranges.size()) {
213            unsigned matchIndex = 0;
214            JumpList failures;
215            matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
216            while (matchIndex < charClass->m_matches.size())
217                matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
218
219            failures.link(this);
220        } else if (charClass->m_matches.size()) {
221            // optimization: gather 'a','A' etc back together, can mask & test once.
222            Vector<char> matchesAZaz;
223
224            for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
225                char ch = charClass->m_matches[i];
226                if (m_pattern.m_ignoreCase) {
227                    if (isASCIILower(ch)) {
228                        matchesAZaz.append(ch);
229                        continue;
230                    }
231                    if (isASCIIUpper(ch))
232                        continue;
233                }
234                matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
235            }
236
237            if (unsigned countAZaz = matchesAZaz.size()) {
238                or32(TrustedImm32(32), character);
239                for (unsigned i = 0; i < countAZaz; ++i)
240                    matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
241            }
242        }
243
244        if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
245            unicodeFail.link(this);
246    }
247
248    // Jumps if input not available; will have (incorrectly) incremented already!
249    Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
250    {
251        if (countToCheck)
252            add32(Imm32(countToCheck), index);
253        return branch32(Above, index, length);
254    }
255
256    Jump jumpIfAvailableInput(unsigned countToCheck)
257    {
258        add32(Imm32(countToCheck), index);
259        return branch32(BelowOrEqual, index, length);
260    }
261
262    Jump checkInput()
263    {
264        return branch32(BelowOrEqual, index, length);
265    }
266
267    Jump atEndOfInput()
268    {
269        return branch32(Equal, index, length);
270    }
271
272    Jump notAtEndOfInput()
273    {
274        return branch32(NotEqual, index, length);
275    }
276
277    Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character)
278    {
279        readCharacter(inputPosition, character);
280
281        // For case-insesitive compares, non-ascii characters that have different
282        // upper & lower case representations are converted to a character class.
283        ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
284        if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
285            or32(TrustedImm32(0x20), character);
286            ch |= 0x20;
287        }
288
289        return branch32(NotEqual, character, Imm32(ch));
290    }
291
292    void readCharacter(int inputPosition, RegisterID reg)
293    {
294        if (m_charSize == Char8)
295            load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg);
296        else
297            load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
298    }
299
300    void storeToFrame(RegisterID reg, unsigned frameLocation)
301    {
302        poke(reg, frameLocation);
303    }
304
305    void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
306    {
307        poke(imm, frameLocation);
308    }
309
310    DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
311    {
312        return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
313    }
314
315    void loadFromFrame(unsigned frameLocation, RegisterID reg)
316    {
317        peek(reg, frameLocation);
318    }
319
320    void loadFromFrameAndJump(unsigned frameLocation)
321    {
322        jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
323    }
324
325    void initCallFrame()
326    {
327        unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
328        if (callFrameSize)
329            subPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister);
330    }
331    void removeCallFrame()
332    {
333        unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
334        if (callFrameSize)
335            addPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister);
336    }
337
338    // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns.
339    void setSubpatternStart(RegisterID reg, unsigned subpattern)
340    {
341        ASSERT(subpattern);
342        // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
343        store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
344    }
345    void setSubpatternEnd(RegisterID reg, unsigned subpattern)
346    {
347        ASSERT(subpattern);
348        // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
349        store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
350    }
351    void clearSubpatternStart(unsigned subpattern)
352    {
353        ASSERT(subpattern);
354        // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
355        store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
356    }
357
358    // We use one of three different strategies to track the start of the current match,
359    // while matching.
360    // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
361    //    at the end of matching. This is irrespective of compileMode, and in this case
362    //    these methods should never be called.
363    // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
364    //    vector, store the match start in the output vector.
365    // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
366    //    in this register.
367    void setMatchStart(RegisterID reg)
368    {
369        ASSERT(!m_pattern.m_body->m_hasFixedSize);
370        if (compileMode == IncludeSubpatterns)
371            store32(reg, output);
372        else
373            move(reg, output);
374    }
375    void getMatchStart(RegisterID reg)
376    {
377        ASSERT(!m_pattern.m_body->m_hasFixedSize);
378        if (compileMode == IncludeSubpatterns)
379            load32(output, reg);
380        else
381            move(output, reg);
382    }
383
384    enum YarrOpCode {
385        // These nodes wrap body alternatives - those in the main disjunction,
386        // rather than subpatterns or assertions. These are chained together in
387        // a doubly linked list, with a 'begin' node for the first alternative,
388        // a 'next' node for each subsequent alternative, and an 'end' node at
389        // the end. In the case of repeating alternatives, the 'end' node also
390        // has a reference back to 'begin'.
391        OpBodyAlternativeBegin,
392        OpBodyAlternativeNext,
393        OpBodyAlternativeEnd,
394        // Similar to the body alternatives, but used for subpatterns with two
395        // or more alternatives.
396        OpNestedAlternativeBegin,
397        OpNestedAlternativeNext,
398        OpNestedAlternativeEnd,
399        // Used for alternatives in subpatterns where there is only a single
400        // alternative (backtrackingis easier in these cases), or for alternatives
401        // which never need to be backtracked (those in parenthetical assertions,
402        // terminal subpatterns).
403        OpSimpleNestedAlternativeBegin,
404        OpSimpleNestedAlternativeNext,
405        OpSimpleNestedAlternativeEnd,
406        // Used to wrap 'Once' subpattern matches (quantityCount == 1).
407        OpParenthesesSubpatternOnceBegin,
408        OpParenthesesSubpatternOnceEnd,
409        // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
410        OpParenthesesSubpatternTerminalBegin,
411        OpParenthesesSubpatternTerminalEnd,
412        // Used to wrap parenthetical assertions.
413        OpParentheticalAssertionBegin,
414        OpParentheticalAssertionEnd,
415        // Wraps all simple terms (pattern characters, character classes).
416        OpTerm,
417        // Where an expression contains only 'once through' body alternatives
418        // and no repeating ones, this op is used to return match failure.
419        OpMatchFailed
420    };
421
422    // This structure is used to hold the compiled opcode information,
423    // including reference back to the original PatternTerm/PatternAlternatives,
424    // and JIT compilation data structures.
425    struct YarrOp {
426        explicit YarrOp(PatternTerm* term)
427            : m_op(OpTerm)
428            , m_term(term)
429            , m_isDeadCode(false)
430        {
431        }
432
433        explicit YarrOp(YarrOpCode op)
434            : m_op(op)
435            , m_isDeadCode(false)
436        {
437        }
438
439        // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
440        YarrOpCode m_op;
441        PatternTerm* m_term;
442
443        // For alternatives, this holds the PatternAlternative and doubly linked
444        // references to this alternative's siblings. In the case of the
445        // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
446        // m_nextOp will reference the OpBodyAlternativeBegin node of the first
447        // repeating alternative.
448        PatternAlternative* m_alternative;
449        size_t m_previousOp;
450        size_t m_nextOp;
451
452        // Used to record a set of Jumps out of the generated code, typically
453        // used for jumps out to backtracking code, and a single reentry back
454        // into the code for a node (likely where a backtrack will trigger
455        // rematching).
456        Label m_reentry;
457        JumpList m_jumps;
458
459        // Used for backtracking when the prior alternative did not consume any
460        // characters but matched.
461        Jump m_zeroLengthMatch;
462
463        // This flag is used to null out the second pattern character, when
464        // two are fused to match a pair together.
465        bool m_isDeadCode;
466
467        // Currently used in the case of some of the more complex management of
468        // 'm_checked', to cache the offset used in this alternative, to avoid
469        // recalculating it.
470        int m_checkAdjust;
471
472        // Used by OpNestedAlternativeNext/End to hold the pointer to the
473        // value that will be pushed into the pattern's frame to return to,
474        // upon backtracking back into the disjunction.
475        DataLabelPtr m_returnAddress;
476    };
477
478    // BacktrackingState
479    // This class encapsulates information about the state of code generation
480    // whilst generating the code for backtracking, when a term fails to match.
481    // Upon entry to code generation of the backtracking code for a given node,
482    // the Backtracking state will hold references to all control flow sources
483    // that are outputs in need of further backtracking from the prior node
484    // generated (which is the subsequent operation in the regular expression,
485    // and in the m_ops Vector, since we generated backtracking backwards).
486    // These references to control flow take the form of:
487    //  - A jump list of jumps, to be linked to code that will backtrack them
488    //    further.
489    //  - A set of DataLabelPtr values, to be populated with values to be
490    //    treated effectively as return addresses backtracking into complex
491    //    subpatterns.
492    //  - A flag indicating that the current sequence of generated code up to
493    //    this point requires backtracking.
494    class BacktrackingState {
495    public:
496        BacktrackingState()
497            : m_pendingFallthrough(false)
498        {
499        }
500
501        // Add a jump or jumps, a return address, or set the flag indicating
502        // that the current 'fallthrough' control flow requires backtracking.
503        void append(const Jump& jump)
504        {
505            m_laterFailures.append(jump);
506        }
507        void append(JumpList& jumpList)
508        {
509            m_laterFailures.append(jumpList);
510        }
511        void append(const DataLabelPtr& returnAddress)
512        {
513            m_pendingReturns.append(returnAddress);
514        }
515        void fallthrough()
516        {
517            ASSERT(!m_pendingFallthrough);
518            m_pendingFallthrough = true;
519        }
520
521        // These methods clear the backtracking state, either linking to the
522        // current location, a provided label, or copying the backtracking out
523        // to a JumpList. All actions may require code generation to take place,
524        // and as such are passed a pointer to the assembler.
525        void link(MacroAssembler* assembler)
526        {
527            if (m_pendingReturns.size()) {
528                Label here(assembler);
529                for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
530                    m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
531                m_pendingReturns.clear();
532            }
533            m_laterFailures.link(assembler);
534            m_laterFailures.clear();
535            m_pendingFallthrough = false;
536        }
537        void linkTo(Label label, MacroAssembler* assembler)
538        {
539            if (m_pendingReturns.size()) {
540                for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
541                    m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
542                m_pendingReturns.clear();
543            }
544            if (m_pendingFallthrough)
545                assembler->jump(label);
546            m_laterFailures.linkTo(label, assembler);
547            m_laterFailures.clear();
548            m_pendingFallthrough = false;
549        }
550        void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
551        {
552            if (m_pendingReturns.size()) {
553                Label here(assembler);
554                for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
555                    m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
556                m_pendingReturns.clear();
557                m_pendingFallthrough = true;
558            }
559            if (m_pendingFallthrough)
560                jumpList.append(assembler->jump());
561            jumpList.append(m_laterFailures);
562            m_laterFailures.clear();
563            m_pendingFallthrough = false;
564        }
565
566        bool isEmpty()
567        {
568            return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
569        }
570
571        // Called at the end of code generation to link all return addresses.
572        void linkDataLabels(LinkBuffer& linkBuffer)
573        {
574            ASSERT(isEmpty());
575            for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
576                linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
577        }
578
579    private:
580        struct ReturnAddressRecord {
581            ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
582                : m_dataLabel(dataLabel)
583                , m_backtrackLocation(backtrackLocation)
584            {
585            }
586
587            DataLabelPtr m_dataLabel;
588            Label m_backtrackLocation;
589        };
590
591        JumpList m_laterFailures;
592        bool m_pendingFallthrough;
593        Vector<DataLabelPtr, 4> m_pendingReturns;
594        Vector<ReturnAddressRecord, 4> m_backtrackRecords;
595    };
596
597    // Generation methods:
598    // ===================
599
600    // This method provides a default implementation of backtracking common
601    // to many terms; terms commonly jump out of the forwards  matching path
602    // on any failed conditions, and add these jumps to the m_jumps list. If
603    // no special handling is required we can often just backtrack to m_jumps.
604    void backtrackTermDefault(size_t opIndex)
605    {
606        YarrOp& op = m_ops[opIndex];
607        m_backtrackingState.append(op.m_jumps);
608    }
609
610    void generateAssertionBOL(size_t opIndex)
611    {
612        YarrOp& op = m_ops[opIndex];
613        PatternTerm* term = op.m_term;
614
615        if (m_pattern.m_multiline) {
616            const RegisterID character = regT0;
617
618            JumpList matchDest;
619            if (!term->inputPosition)
620                matchDest.append(branch32(Equal, index, Imm32(m_checked)));
621
622            readCharacter((term->inputPosition - m_checked) - 1, character);
623            matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
624            op.m_jumps.append(jump());
625
626            matchDest.link(this);
627        } else {
628            // Erk, really should poison out these alternatives early. :-/
629            if (term->inputPosition)
630                op.m_jumps.append(jump());
631            else
632                op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked)));
633        }
634    }
635    void backtrackAssertionBOL(size_t opIndex)
636    {
637        backtrackTermDefault(opIndex);
638    }
639
640    void generateAssertionEOL(size_t opIndex)
641    {
642        YarrOp& op = m_ops[opIndex];
643        PatternTerm* term = op.m_term;
644
645        if (m_pattern.m_multiline) {
646            const RegisterID character = regT0;
647
648            JumpList matchDest;
649            if (term->inputPosition == m_checked)
650                matchDest.append(atEndOfInput());
651
652            readCharacter(term->inputPosition - m_checked, character);
653            matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
654            op.m_jumps.append(jump());
655
656            matchDest.link(this);
657        } else {
658            if (term->inputPosition == m_checked)
659                op.m_jumps.append(notAtEndOfInput());
660            // Erk, really should poison out these alternatives early. :-/
661            else
662                op.m_jumps.append(jump());
663        }
664    }
665    void backtrackAssertionEOL(size_t opIndex)
666    {
667        backtrackTermDefault(opIndex);
668    }
669
670    // Also falls though on nextIsNotWordChar.
671    void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
672    {
673        YarrOp& op = m_ops[opIndex];
674        PatternTerm* term = op.m_term;
675
676        const RegisterID character = regT0;
677
678        if (term->inputPosition == m_checked)
679            nextIsNotWordChar.append(atEndOfInput());
680
681        readCharacter((term->inputPosition - m_checked), character);
682        matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
683    }
684
685    void generateAssertionWordBoundary(size_t opIndex)
686    {
687        YarrOp& op = m_ops[opIndex];
688        PatternTerm* term = op.m_term;
689
690        const RegisterID character = regT0;
691
692        Jump atBegin;
693        JumpList matchDest;
694        if (!term->inputPosition)
695            atBegin = branch32(Equal, index, Imm32(m_checked));
696        readCharacter((term->inputPosition - m_checked) - 1, character);
697        matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
698        if (!term->inputPosition)
699            atBegin.link(this);
700
701        // We fall through to here if the last character was not a wordchar.
702        JumpList nonWordCharThenWordChar;
703        JumpList nonWordCharThenNonWordChar;
704        if (term->invert()) {
705            matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
706            nonWordCharThenWordChar.append(jump());
707        } else {
708            matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
709            nonWordCharThenNonWordChar.append(jump());
710        }
711        op.m_jumps.append(nonWordCharThenNonWordChar);
712
713        // We jump here if the last character was a wordchar.
714        matchDest.link(this);
715        JumpList wordCharThenWordChar;
716        JumpList wordCharThenNonWordChar;
717        if (term->invert()) {
718            matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
719            wordCharThenWordChar.append(jump());
720        } else {
721            matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
722            // This can fall-though!
723        }
724
725        op.m_jumps.append(wordCharThenWordChar);
726
727        nonWordCharThenWordChar.link(this);
728        wordCharThenNonWordChar.link(this);
729    }
730    void backtrackAssertionWordBoundary(size_t opIndex)
731    {
732        backtrackTermDefault(opIndex);
733    }
734
735    void generatePatternCharacterOnce(size_t opIndex)
736    {
737        YarrOp& op = m_ops[opIndex];
738
739        if (op.m_isDeadCode)
740            return;
741
742        // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
743        // node, so there must always be at least one more node.
744        ASSERT(opIndex + 1 < m_ops.size());
745        YarrOp* nextOp = &m_ops[opIndex + 1];
746
747        PatternTerm* term = op.m_term;
748        UChar ch = term->patternCharacter;
749
750        if ((ch > 0xff) && (m_charSize == Char8)) {
751            // Have a 16 bit pattern character and an 8 bit string - short circuit
752            op.m_jumps.append(jump());
753            return;
754        }
755
756        const RegisterID character = regT0;
757        int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
758        unsigned ignoreCaseMask = 0;
759#if CPU(BIG_ENDIAN)
760        int allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
761#else
762        int allCharacters = ch;
763#endif
764        int numberCharacters;
765        int startTermPosition = term->inputPosition;
766
767        // For case-insesitive compares, non-ascii characters that have different
768        // upper & lower case representations are converted to a character class.
769        ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
770
771        if (m_pattern.m_ignoreCase && isASCIIAlpha(ch))
772#if CPU(BIG_ENDIAN)
773            ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
774#else
775            ignoreCaseMask |= 32;
776#endif
777
778        for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
779            PatternTerm* nextTerm = nextOp->m_term;
780
781            if (nextTerm->type != PatternTerm::TypePatternCharacter
782                || nextTerm->quantityType != QuantifierFixedCount
783                || nextTerm->quantityCount != 1
784                || nextTerm->inputPosition != (startTermPosition + numberCharacters))
785                break;
786
787            nextOp->m_isDeadCode = true;
788
789#if CPU(BIG_ENDIAN)
790            int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
791#else
792            int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
793#endif
794
795            UChar currentCharacter = nextTerm->patternCharacter;
796
797            if ((currentCharacter > 0xff) && (m_charSize == Char8)) {
798                // Have a 16 bit pattern character and an 8 bit string - short circuit
799                op.m_jumps.append(jump());
800                return;
801            }
802
803            // For case-insesitive compares, non-ascii characters that have different
804            // upper & lower case representations are converted to a character class.
805            ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter));
806
807            allCharacters |= (currentCharacter << shiftAmount);
808
809            if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter)))
810                ignoreCaseMask |= 32 << shiftAmount;
811        }
812
813        if (m_charSize == Char8) {
814            switch (numberCharacters) {
815            case 1:
816                op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character));
817                return;
818            case 2: {
819                BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
820                load16Unaligned(address, character);
821                break;
822            }
823            case 3: {
824                BaseIndex highAddress(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
825                load16Unaligned(highAddress, character);
826                if (ignoreCaseMask)
827                    or32(Imm32(ignoreCaseMask), character);
828                op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask)));
829                op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, startTermPosition + 2 - m_checked, character));
830                return;
831            }
832            case 4: {
833                BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
834                load32WithUnalignedHalfWords(address, character);
835                break;
836            }
837            }
838        } else {
839            switch (numberCharacters) {
840            case 1:
841                op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
842                return;
843            case 2:
844                BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
845                load32WithUnalignedHalfWords(address, character);
846                break;
847            }
848        }
849
850        if (ignoreCaseMask)
851            or32(Imm32(ignoreCaseMask), character);
852        op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask)));
853        return;
854    }
855    void backtrackPatternCharacterOnce(size_t opIndex)
856    {
857        backtrackTermDefault(opIndex);
858    }
859
860    void generatePatternCharacterFixed(size_t opIndex)
861    {
862        YarrOp& op = m_ops[opIndex];
863        PatternTerm* term = op.m_term;
864        UChar ch = term->patternCharacter;
865
866        const RegisterID character = regT0;
867        const RegisterID countRegister = regT1;
868
869        move(index, countRegister);
870        sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
871
872        Label loop(this);
873        BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet());
874
875        if (m_charSize == Char8)
876            load8(address, character);
877        else
878            load16(address, character);
879
880        // For case-insesitive compares, non-ascii characters that have different
881        // upper & lower case representations are converted to a character class.
882        ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
883        if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
884            or32(TrustedImm32(0x20), character);
885            ch |= 0x20;
886        }
887
888        op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
889        add32(TrustedImm32(1), countRegister);
890        branch32(NotEqual, countRegister, index).linkTo(loop, this);
891    }
892    void backtrackPatternCharacterFixed(size_t opIndex)
893    {
894        backtrackTermDefault(opIndex);
895    }
896
897    void generatePatternCharacterGreedy(size_t opIndex)
898    {
899        YarrOp& op = m_ops[opIndex];
900        PatternTerm* term = op.m_term;
901        UChar ch = term->patternCharacter;
902
903        const RegisterID character = regT0;
904        const RegisterID countRegister = regT1;
905
906        move(TrustedImm32(0), countRegister);
907
908        // Unless have a 16 bit pattern character and an 8 bit string - short circuit
909        if (!((ch > 0xff) && (m_charSize == Char8))) {
910            JumpList failures;
911            Label loop(this);
912            failures.append(atEndOfInput());
913            failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
914
915            add32(TrustedImm32(1), countRegister);
916            add32(TrustedImm32(1), index);
917            if (term->quantityCount == quantifyInfinite)
918                jump(loop);
919            else
920                branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
921
922            failures.link(this);
923        }
924        op.m_reentry = label();
925
926        storeToFrame(countRegister, term->frameLocation);
927    }
928    void backtrackPatternCharacterGreedy(size_t opIndex)
929    {
930        YarrOp& op = m_ops[opIndex];
931        PatternTerm* term = op.m_term;
932
933        const RegisterID countRegister = regT1;
934
935        m_backtrackingState.link(this);
936
937        loadFromFrame(term->frameLocation, countRegister);
938        m_backtrackingState.append(branchTest32(Zero, countRegister));
939        sub32(TrustedImm32(1), countRegister);
940        sub32(TrustedImm32(1), index);
941        jump(op.m_reentry);
942    }
943
944    void generatePatternCharacterNonGreedy(size_t opIndex)
945    {
946        YarrOp& op = m_ops[opIndex];
947        PatternTerm* term = op.m_term;
948
949        const RegisterID countRegister = regT1;
950
951        move(TrustedImm32(0), countRegister);
952        op.m_reentry = label();
953        storeToFrame(countRegister, term->frameLocation);
954    }
955    void backtrackPatternCharacterNonGreedy(size_t opIndex)
956    {
957        YarrOp& op = m_ops[opIndex];
958        PatternTerm* term = op.m_term;
959        UChar ch = term->patternCharacter;
960
961        const RegisterID character = regT0;
962        const RegisterID countRegister = regT1;
963
964        m_backtrackingState.link(this);
965
966        loadFromFrame(term->frameLocation, countRegister);
967
968        // Unless have a 16 bit pattern character and an 8 bit string - short circuit
969        if (!((ch > 0xff) && (m_charSize == Char8))) {
970            JumpList nonGreedyFailures;
971            nonGreedyFailures.append(atEndOfInput());
972            if (term->quantityCount != quantifyInfinite)
973                nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
974            nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
975
976            add32(TrustedImm32(1), countRegister);
977            add32(TrustedImm32(1), index);
978
979            jump(op.m_reentry);
980            nonGreedyFailures.link(this);
981        }
982
983        sub32(countRegister, index);
984        m_backtrackingState.fallthrough();
985    }
986
987    void generateCharacterClassOnce(size_t opIndex)
988    {
989        YarrOp& op = m_ops[opIndex];
990        PatternTerm* term = op.m_term;
991
992        const RegisterID character = regT0;
993
994        JumpList matchDest;
995        readCharacter(term->inputPosition - m_checked, character);
996        matchCharacterClass(character, matchDest, term->characterClass);
997
998        if (term->invert())
999            op.m_jumps.append(matchDest);
1000        else {
1001            op.m_jumps.append(jump());
1002            matchDest.link(this);
1003        }
1004    }
1005    void backtrackCharacterClassOnce(size_t opIndex)
1006    {
1007        backtrackTermDefault(opIndex);
1008    }
1009
1010    void generateCharacterClassFixed(size_t opIndex)
1011    {
1012        YarrOp& op = m_ops[opIndex];
1013        PatternTerm* term = op.m_term;
1014
1015        const RegisterID character = regT0;
1016        const RegisterID countRegister = regT1;
1017
1018        move(index, countRegister);
1019        sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
1020
1021        Label loop(this);
1022        JumpList matchDest;
1023        if (m_charSize == Char8)
1024            load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character);
1025        else
1026            load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character);
1027        matchCharacterClass(character, matchDest, term->characterClass);
1028
1029        if (term->invert())
1030            op.m_jumps.append(matchDest);
1031        else {
1032            op.m_jumps.append(jump());
1033            matchDest.link(this);
1034        }
1035
1036        add32(TrustedImm32(1), countRegister);
1037        branch32(NotEqual, countRegister, index).linkTo(loop, this);
1038    }
1039    void backtrackCharacterClassFixed(size_t opIndex)
1040    {
1041        backtrackTermDefault(opIndex);
1042    }
1043
1044    void generateCharacterClassGreedy(size_t opIndex)
1045    {
1046        YarrOp& op = m_ops[opIndex];
1047        PatternTerm* term = op.m_term;
1048
1049        const RegisterID character = regT0;
1050        const RegisterID countRegister = regT1;
1051
1052        move(TrustedImm32(0), countRegister);
1053
1054        JumpList failures;
1055        Label loop(this);
1056        failures.append(atEndOfInput());
1057
1058        if (term->invert()) {
1059            readCharacter(term->inputPosition - m_checked, character);
1060            matchCharacterClass(character, failures, term->characterClass);
1061        } else {
1062            JumpList matchDest;
1063            readCharacter(term->inputPosition - m_checked, character);
1064            matchCharacterClass(character, matchDest, term->characterClass);
1065            failures.append(jump());
1066            matchDest.link(this);
1067        }
1068
1069        add32(TrustedImm32(1), countRegister);
1070        add32(TrustedImm32(1), index);
1071        if (term->quantityCount != quantifyInfinite) {
1072            branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
1073            failures.append(jump());
1074        } else
1075            jump(loop);
1076
1077        failures.link(this);
1078        op.m_reentry = label();
1079
1080        storeToFrame(countRegister, term->frameLocation);
1081    }
1082    void backtrackCharacterClassGreedy(size_t opIndex)
1083    {
1084        YarrOp& op = m_ops[opIndex];
1085        PatternTerm* term = op.m_term;
1086
1087        const RegisterID countRegister = regT1;
1088
1089        m_backtrackingState.link(this);
1090
1091        loadFromFrame(term->frameLocation, countRegister);
1092        m_backtrackingState.append(branchTest32(Zero, countRegister));
1093        sub32(TrustedImm32(1), countRegister);
1094        sub32(TrustedImm32(1), index);
1095        jump(op.m_reentry);
1096    }
1097
1098    void generateCharacterClassNonGreedy(size_t opIndex)
1099    {
1100        YarrOp& op = m_ops[opIndex];
1101        PatternTerm* term = op.m_term;
1102
1103        const RegisterID countRegister = regT1;
1104
1105        move(TrustedImm32(0), countRegister);
1106        op.m_reentry = label();
1107        storeToFrame(countRegister, term->frameLocation);
1108    }
1109    void backtrackCharacterClassNonGreedy(size_t opIndex)
1110    {
1111        YarrOp& op = m_ops[opIndex];
1112        PatternTerm* term = op.m_term;
1113
1114        const RegisterID character = regT0;
1115        const RegisterID countRegister = regT1;
1116
1117        JumpList nonGreedyFailures;
1118
1119        m_backtrackingState.link(this);
1120
1121        loadFromFrame(term->frameLocation, countRegister);
1122
1123        nonGreedyFailures.append(atEndOfInput());
1124        nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
1125
1126        JumpList matchDest;
1127        readCharacter(term->inputPosition - m_checked, character);
1128        matchCharacterClass(character, matchDest, term->characterClass);
1129
1130        if (term->invert())
1131            nonGreedyFailures.append(matchDest);
1132        else {
1133            nonGreedyFailures.append(jump());
1134            matchDest.link(this);
1135        }
1136
1137        add32(TrustedImm32(1), countRegister);
1138        add32(TrustedImm32(1), index);
1139
1140        jump(op.m_reentry);
1141
1142        nonGreedyFailures.link(this);
1143        sub32(countRegister, index);
1144        m_backtrackingState.fallthrough();
1145    }
1146
1147    void generateDotStarEnclosure(size_t opIndex)
1148    {
1149        YarrOp& op = m_ops[opIndex];
1150        PatternTerm* term = op.m_term;
1151
1152        const RegisterID character = regT0;
1153        const RegisterID matchPos = regT1;
1154
1155        JumpList foundBeginningNewLine;
1156        JumpList saveStartIndex;
1157        JumpList foundEndingNewLine;
1158
1159        ASSERT(!m_pattern.m_body->m_hasFixedSize);
1160        getMatchStart(matchPos);
1161
1162        saveStartIndex.append(branchTest32(Zero, matchPos));
1163        Label findBOLLoop(this);
1164        sub32(TrustedImm32(1), matchPos);
1165        if (m_charSize == Char8)
1166            load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1167        else
1168            load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1169        matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
1170        branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this);
1171        saveStartIndex.append(jump());
1172
1173        foundBeginningNewLine.link(this);
1174        add32(TrustedImm32(1), matchPos); // Advance past newline
1175        saveStartIndex.link(this);
1176
1177        if (!m_pattern.m_multiline && term->anchors.bolAnchor)
1178            op.m_jumps.append(branchTest32(NonZero, matchPos));
1179
1180        ASSERT(!m_pattern.m_body->m_hasFixedSize);
1181        setMatchStart(matchPos);
1182
1183        move(index, matchPos);
1184
1185        Label findEOLLoop(this);
1186        foundEndingNewLine.append(branch32(Equal, matchPos, length));
1187        if (m_charSize == Char8)
1188            load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1189        else
1190            load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1191        matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
1192        add32(TrustedImm32(1), matchPos);
1193        jump(findEOLLoop);
1194
1195        foundEndingNewLine.link(this);
1196
1197        if (!m_pattern.m_multiline && term->anchors.eolAnchor)
1198            op.m_jumps.append(branch32(NotEqual, matchPos, length));
1199
1200        move(matchPos, index);
1201    }
1202
1203    void backtrackDotStarEnclosure(size_t opIndex)
1204    {
1205        backtrackTermDefault(opIndex);
1206    }
1207
1208    // Code generation/backtracking for simple terms
1209    // (pattern characters, character classes, and assertions).
1210    // These methods farm out work to the set of functions above.
1211    void generateTerm(size_t opIndex)
1212    {
1213        YarrOp& op = m_ops[opIndex];
1214        PatternTerm* term = op.m_term;
1215
1216        switch (term->type) {
1217        case PatternTerm::TypePatternCharacter:
1218            switch (term->quantityType) {
1219            case QuantifierFixedCount:
1220                if (term->quantityCount == 1)
1221                    generatePatternCharacterOnce(opIndex);
1222                else
1223                    generatePatternCharacterFixed(opIndex);
1224                break;
1225            case QuantifierGreedy:
1226                generatePatternCharacterGreedy(opIndex);
1227                break;
1228            case QuantifierNonGreedy:
1229                generatePatternCharacterNonGreedy(opIndex);
1230                break;
1231            }
1232            break;
1233
1234        case PatternTerm::TypeCharacterClass:
1235            switch (term->quantityType) {
1236            case QuantifierFixedCount:
1237                if (term->quantityCount == 1)
1238                    generateCharacterClassOnce(opIndex);
1239                else
1240                    generateCharacterClassFixed(opIndex);
1241                break;
1242            case QuantifierGreedy:
1243                generateCharacterClassGreedy(opIndex);
1244                break;
1245            case QuantifierNonGreedy:
1246                generateCharacterClassNonGreedy(opIndex);
1247                break;
1248            }
1249            break;
1250
1251        case PatternTerm::TypeAssertionBOL:
1252            generateAssertionBOL(opIndex);
1253            break;
1254
1255        case PatternTerm::TypeAssertionEOL:
1256            generateAssertionEOL(opIndex);
1257            break;
1258
1259        case PatternTerm::TypeAssertionWordBoundary:
1260            generateAssertionWordBoundary(opIndex);
1261            break;
1262
1263        case PatternTerm::TypeForwardReference:
1264            break;
1265
1266        case PatternTerm::TypeParenthesesSubpattern:
1267        case PatternTerm::TypeParentheticalAssertion:
1268            RELEASE_ASSERT_NOT_REACHED();
1269        case PatternTerm::TypeBackReference:
1270            m_shouldFallBack = true;
1271            break;
1272        case PatternTerm::TypeDotStarEnclosure:
1273            generateDotStarEnclosure(opIndex);
1274            break;
1275        }
1276    }
1277    void backtrackTerm(size_t opIndex)
1278    {
1279        YarrOp& op = m_ops[opIndex];
1280        PatternTerm* term = op.m_term;
1281
1282        switch (term->type) {
1283        case PatternTerm::TypePatternCharacter:
1284            switch (term->quantityType) {
1285            case QuantifierFixedCount:
1286                if (term->quantityCount == 1)
1287                    backtrackPatternCharacterOnce(opIndex);
1288                else
1289                    backtrackPatternCharacterFixed(opIndex);
1290                break;
1291            case QuantifierGreedy:
1292                backtrackPatternCharacterGreedy(opIndex);
1293                break;
1294            case QuantifierNonGreedy:
1295                backtrackPatternCharacterNonGreedy(opIndex);
1296                break;
1297            }
1298            break;
1299
1300        case PatternTerm::TypeCharacterClass:
1301            switch (term->quantityType) {
1302            case QuantifierFixedCount:
1303                if (term->quantityCount == 1)
1304                    backtrackCharacterClassOnce(opIndex);
1305                else
1306                    backtrackCharacterClassFixed(opIndex);
1307                break;
1308            case QuantifierGreedy:
1309                backtrackCharacterClassGreedy(opIndex);
1310                break;
1311            case QuantifierNonGreedy:
1312                backtrackCharacterClassNonGreedy(opIndex);
1313                break;
1314            }
1315            break;
1316
1317        case PatternTerm::TypeAssertionBOL:
1318            backtrackAssertionBOL(opIndex);
1319            break;
1320
1321        case PatternTerm::TypeAssertionEOL:
1322            backtrackAssertionEOL(opIndex);
1323            break;
1324
1325        case PatternTerm::TypeAssertionWordBoundary:
1326            backtrackAssertionWordBoundary(opIndex);
1327            break;
1328
1329        case PatternTerm::TypeForwardReference:
1330            break;
1331
1332        case PatternTerm::TypeParenthesesSubpattern:
1333        case PatternTerm::TypeParentheticalAssertion:
1334            RELEASE_ASSERT_NOT_REACHED();
1335
1336        case PatternTerm::TypeDotStarEnclosure:
1337            backtrackDotStarEnclosure(opIndex);
1338            break;
1339
1340        case PatternTerm::TypeBackReference:
1341            m_shouldFallBack = true;
1342            break;
1343        }
1344    }
1345
1346    void generate()
1347    {
1348        // Forwards generate the matching code.
1349        ASSERT(m_ops.size());
1350        size_t opIndex = 0;
1351
1352        do {
1353            YarrOp& op = m_ops[opIndex];
1354            switch (op.m_op) {
1355
1356            case OpTerm:
1357                generateTerm(opIndex);
1358                break;
1359
1360            // OpBodyAlternativeBegin/Next/End
1361            //
1362            // These nodes wrap the set of alternatives in the body of the regular expression.
1363            // There may be either one or two chains of OpBodyAlternative nodes, one representing
1364            // the 'once through' sequence of alternatives (if any exist), and one representing
1365            // the repeating alternatives (again, if any exist).
1366            //
1367            // Upon normal entry to the Begin alternative, we will check that input is available.
1368            // Reentry to the Begin alternative will take place after the check has taken place,
1369            // and will assume that the input position has already been progressed as appropriate.
1370            //
1371            // Entry to subsequent Next/End alternatives occurs when the prior alternative has
1372            // successfully completed a match - return a success state from JIT code.
1373            //
1374            // Next alternatives allow for reentry optimized to suit backtracking from its
1375            // preceding alternative. It expects the input position to still be set to a position
1376            // appropriate to its predecessor, and it will only perform an input check if the
1377            // predecessor had a minimum size less than its own.
1378            //
1379            // In the case 'once through' expressions, the End node will also have a reentry
1380            // point to jump to when the last alternative fails. Again, this expects the input
1381            // position to still reflect that expected by the prior alternative.
1382            case OpBodyAlternativeBegin: {
1383                PatternAlternative* alternative = op.m_alternative;
1384
1385                // Upon entry at the head of the set of alternatives, check if input is available
1386                // to run the first alternative. (This progresses the input position).
1387                op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
1388                // We will reenter after the check, and assume the input position to have been
1389                // set as appropriate to this alternative.
1390                op.m_reentry = label();
1391
1392                m_checked += alternative->m_minimumSize;
1393                break;
1394            }
1395            case OpBodyAlternativeNext:
1396            case OpBodyAlternativeEnd: {
1397                PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1398                PatternAlternative* alternative = op.m_alternative;
1399
1400                // If we get here, the prior alternative matched - return success.
1401
1402                // Adjust the stack pointer to remove the pattern's frame.
1403                removeCallFrame();
1404
1405                // Load appropriate values into the return register and the first output
1406                // slot, and return. In the case of pattern with a fixed size, we will
1407                // not have yet set the value in the first
1408                ASSERT(index != returnRegister);
1409                if (m_pattern.m_body->m_hasFixedSize) {
1410                    move(index, returnRegister);
1411                    if (priorAlternative->m_minimumSize)
1412                        sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
1413                    if (compileMode == IncludeSubpatterns)
1414                        store32(returnRegister, output);
1415                } else
1416                    getMatchStart(returnRegister);
1417                if (compileMode == IncludeSubpatterns)
1418                    store32(index, Address(output, 4));
1419                move(index, returnRegister2);
1420
1421                generateReturn();
1422
1423                // This is the divide between the tail of the prior alternative, above, and
1424                // the head of the subsequent alternative, below.
1425
1426                if (op.m_op == OpBodyAlternativeNext) {
1427                    // This is the reentry point for the Next alternative. We expect any code
1428                    // that jumps here to do so with the input position matching that of the
1429                    // PRIOR alteranative, and we will only check input availability if we
1430                    // need to progress it forwards.
1431                    op.m_reentry = label();
1432                    if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
1433                        add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
1434                        op.m_jumps.append(jumpIfNoAvailableInput());
1435                    } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
1436                        sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
1437                } else if (op.m_nextOp == notFound) {
1438                    // This is the reentry point for the End of 'once through' alternatives,
1439                    // jumped to when the last alternative fails to match.
1440                    op.m_reentry = label();
1441                    sub32(Imm32(priorAlternative->m_minimumSize), index);
1442                }
1443
1444                if (op.m_op == OpBodyAlternativeNext)
1445                    m_checked += alternative->m_minimumSize;
1446                m_checked -= priorAlternative->m_minimumSize;
1447                break;
1448            }
1449
1450            // OpSimpleNestedAlternativeBegin/Next/End
1451            // OpNestedAlternativeBegin/Next/End
1452            //
1453            // These nodes are used to handle sets of alternatives that are nested within
1454            // subpatterns and parenthetical assertions. The 'simple' forms are used where
1455            // we do not need to be able to backtrack back into any alternative other than
1456            // the last, the normal forms allow backtracking into any alternative.
1457            //
1458            // Each Begin/Next node is responsible for planting an input check to ensure
1459            // sufficient input is available on entry. Next nodes additionally need to
1460            // jump to the end - Next nodes use the End node's m_jumps list to hold this
1461            // set of jumps.
1462            //
1463            // In the non-simple forms, successful alternative matches must store a
1464            // 'return address' using a DataLabelPtr, used to store the address to jump
1465            // to when backtracking, to get to the code for the appropriate alternative.
1466            case OpSimpleNestedAlternativeBegin:
1467            case OpNestedAlternativeBegin: {
1468                PatternTerm* term = op.m_term;
1469                PatternAlternative* alternative = op.m_alternative;
1470                PatternDisjunction* disjunction = term->parentheses.disjunction;
1471
1472                // Calculate how much input we need to check for, and if non-zero check.
1473                op.m_checkAdjust = alternative->m_minimumSize;
1474                if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1475                    op.m_checkAdjust -= disjunction->m_minimumSize;
1476                if (op.m_checkAdjust)
1477                    op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1478
1479                m_checked += op.m_checkAdjust;
1480                break;
1481            }
1482            case OpSimpleNestedAlternativeNext:
1483            case OpNestedAlternativeNext: {
1484                PatternTerm* term = op.m_term;
1485                PatternAlternative* alternative = op.m_alternative;
1486                PatternDisjunction* disjunction = term->parentheses.disjunction;
1487
1488                // In the non-simple case, store a 'return address' so we can backtrack correctly.
1489                if (op.m_op == OpNestedAlternativeNext) {
1490                    unsigned parenthesesFrameLocation = term->frameLocation;
1491                    unsigned alternativeFrameLocation = parenthesesFrameLocation;
1492                    if (term->quantityType != QuantifierFixedCount)
1493                        alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1494                    op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1495                }
1496
1497                if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
1498                    // If the previous alternative matched without consuming characters then
1499                    // backtrack to try to match while consumming some input.
1500                    op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1501                }
1502
1503                // If we reach here then the last alternative has matched - jump to the
1504                // End node, to skip over any further alternatives.
1505                //
1506                // FIXME: this is logically O(N^2) (though N can be expected to be very
1507                // small). We could avoid this either by adding an extra jump to the JIT
1508                // data structures, or by making backtracking code that jumps to Next
1509                // alternatives are responsible for checking that input is available (if
1510                // we didn't need to plant the input checks, then m_jumps would be free).
1511                YarrOp* endOp = &m_ops[op.m_nextOp];
1512                while (endOp->m_nextOp != notFound) {
1513                    ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1514                    endOp = &m_ops[endOp->m_nextOp];
1515                }
1516                ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1517                endOp->m_jumps.append(jump());
1518
1519                // This is the entry point for the next alternative.
1520                op.m_reentry = label();
1521
1522                // Calculate how much input we need to check for, and if non-zero check.
1523                op.m_checkAdjust = alternative->m_minimumSize;
1524                if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1525                    op.m_checkAdjust -= disjunction->m_minimumSize;
1526                if (op.m_checkAdjust)
1527                    op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1528
1529                YarrOp& lastOp = m_ops[op.m_previousOp];
1530                m_checked -= lastOp.m_checkAdjust;
1531                m_checked += op.m_checkAdjust;
1532                break;
1533            }
1534            case OpSimpleNestedAlternativeEnd:
1535            case OpNestedAlternativeEnd: {
1536                PatternTerm* term = op.m_term;
1537
1538                // In the non-simple case, store a 'return address' so we can backtrack correctly.
1539                if (op.m_op == OpNestedAlternativeEnd) {
1540                    unsigned parenthesesFrameLocation = term->frameLocation;
1541                    unsigned alternativeFrameLocation = parenthesesFrameLocation;
1542                    if (term->quantityType != QuantifierFixedCount)
1543                        alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1544                    op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1545                }
1546
1547                if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
1548                    // If the previous alternative matched without consuming characters then
1549                    // backtrack to try to match while consumming some input.
1550                    op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1551                }
1552
1553                // If this set of alternatives contains more than one alternative,
1554                // then the Next nodes will have planted jumps to the End, and added
1555                // them to this node's m_jumps list.
1556                op.m_jumps.link(this);
1557                op.m_jumps.clear();
1558
1559                YarrOp& lastOp = m_ops[op.m_previousOp];
1560                m_checked -= lastOp.m_checkAdjust;
1561                break;
1562            }
1563
1564            // OpParenthesesSubpatternOnceBegin/End
1565            //
1566            // These nodes support (optionally) capturing subpatterns, that have a
1567            // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
1568            case OpParenthesesSubpatternOnceBegin: {
1569                PatternTerm* term = op.m_term;
1570                unsigned parenthesesFrameLocation = term->frameLocation;
1571                const RegisterID indexTemporary = regT0;
1572                ASSERT(term->quantityCount == 1);
1573
1574                // Upon entry to a Greedy quantified set of parenthese store the index.
1575                // We'll use this for two purposes:
1576                //  - To indicate which iteration we are on of mathing the remainder of
1577                //    the expression after the parentheses - the first, including the
1578                //    match within the parentheses, or the second having skipped over them.
1579                //  - To check for empty matches, which must be rejected.
1580                //
1581                // At the head of a NonGreedy set of parentheses we'll immediately set the
1582                // value on the stack to -1 (indicating a match skipping the subpattern),
1583                // and plant a jump to the end. We'll also plant a label to backtrack to
1584                // to reenter the subpattern later, with a store to set up index on the
1585                // second iteration.
1586                //
1587                // FIXME: for capturing parens, could use the index in the capture array?
1588                if (term->quantityType == QuantifierGreedy)
1589                    storeToFrame(index, parenthesesFrameLocation);
1590                else if (term->quantityType == QuantifierNonGreedy) {
1591                    storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
1592                    op.m_jumps.append(jump());
1593                    op.m_reentry = label();
1594                    storeToFrame(index, parenthesesFrameLocation);
1595                }
1596
1597                // If the parenthese are capturing, store the starting index value to the
1598                // captures array, offsetting as necessary.
1599                //
1600                // FIXME: could avoid offsetting this value in JIT code, apply
1601                // offsets only afterwards, at the point the results array is
1602                // being accessed.
1603                if (term->capture() && compileMode == IncludeSubpatterns) {
1604                    int inputOffset = term->inputPosition - m_checked;
1605                    if (term->quantityType == QuantifierFixedCount)
1606                        inputOffset -= term->parentheses.disjunction->m_minimumSize;
1607                    if (inputOffset) {
1608                        move(index, indexTemporary);
1609                        add32(Imm32(inputOffset), indexTemporary);
1610                        setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
1611                    } else
1612                        setSubpatternStart(index, term->parentheses.subpatternId);
1613                }
1614                break;
1615            }
1616            case OpParenthesesSubpatternOnceEnd: {
1617                PatternTerm* term = op.m_term;
1618                const RegisterID indexTemporary = regT0;
1619                ASSERT(term->quantityCount == 1);
1620
1621#ifndef NDEBUG
1622                // Runtime ASSERT to make sure that the nested alternative handled the
1623                // "no input consumed" check.
1624                if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
1625                    Jump pastBreakpoint;
1626                    pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1627                    breakpoint();
1628                    pastBreakpoint.link(this);
1629                }
1630#endif
1631
1632                // If the parenthese are capturing, store the ending index value to the
1633                // captures array, offsetting as necessary.
1634                //
1635                // FIXME: could avoid offsetting this value in JIT code, apply
1636                // offsets only afterwards, at the point the results array is
1637                // being accessed.
1638                if (term->capture() && compileMode == IncludeSubpatterns) {
1639                    int inputOffset = term->inputPosition - m_checked;
1640                    if (inputOffset) {
1641                        move(index, indexTemporary);
1642                        add32(Imm32(inputOffset), indexTemporary);
1643                        setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
1644                    } else
1645                        setSubpatternEnd(index, term->parentheses.subpatternId);
1646                }
1647
1648                // If the parentheses are quantified Greedy then add a label to jump back
1649                // to if get a failed match from after the parentheses. For NonGreedy
1650                // parentheses, link the jump from before the subpattern to here.
1651                if (term->quantityType == QuantifierGreedy)
1652                    op.m_reentry = label();
1653                else if (term->quantityType == QuantifierNonGreedy) {
1654                    YarrOp& beginOp = m_ops[op.m_previousOp];
1655                    beginOp.m_jumps.link(this);
1656                }
1657                break;
1658            }
1659
1660            // OpParenthesesSubpatternTerminalBegin/End
1661            case OpParenthesesSubpatternTerminalBegin: {
1662                PatternTerm* term = op.m_term;
1663                ASSERT(term->quantityType == QuantifierGreedy);
1664                ASSERT(term->quantityCount == quantifyInfinite);
1665                ASSERT(!term->capture());
1666
1667                // Upon entry set a label to loop back to.
1668                op.m_reentry = label();
1669
1670                // Store the start index of the current match; we need to reject zero
1671                // length matches.
1672                storeToFrame(index, term->frameLocation);
1673                break;
1674            }
1675            case OpParenthesesSubpatternTerminalEnd: {
1676                YarrOp& beginOp = m_ops[op.m_previousOp];
1677#ifndef NDEBUG
1678                PatternTerm* term = op.m_term;
1679
1680                // Runtime ASSERT to make sure that the nested alternative handled the
1681                // "no input consumed" check.
1682                Jump pastBreakpoint;
1683                pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1684                breakpoint();
1685                pastBreakpoint.link(this);
1686#endif
1687
1688                // We know that the match is non-zero, we can accept it  and
1689                // loop back up to the head of the subpattern.
1690                jump(beginOp.m_reentry);
1691
1692                // This is the entry point to jump to when we stop matching - we will
1693                // do so once the subpattern cannot match any more.
1694                op.m_reentry = label();
1695                break;
1696            }
1697
1698            // OpParentheticalAssertionBegin/End
1699            case OpParentheticalAssertionBegin: {
1700                PatternTerm* term = op.m_term;
1701
1702                // Store the current index - assertions should not update index, so
1703                // we will need to restore it upon a successful match.
1704                unsigned parenthesesFrameLocation = term->frameLocation;
1705                storeToFrame(index, parenthesesFrameLocation);
1706
1707                // Check
1708                op.m_checkAdjust = m_checked - term->inputPosition;
1709                if (op.m_checkAdjust)
1710                    sub32(Imm32(op.m_checkAdjust), index);
1711
1712                m_checked -= op.m_checkAdjust;
1713                break;
1714            }
1715            case OpParentheticalAssertionEnd: {
1716                PatternTerm* term = op.m_term;
1717
1718                // Restore the input index value.
1719                unsigned parenthesesFrameLocation = term->frameLocation;
1720                loadFromFrame(parenthesesFrameLocation, index);
1721
1722                // If inverted, a successful match of the assertion must be treated
1723                // as a failure, so jump to backtracking.
1724                if (term->invert()) {
1725                    op.m_jumps.append(jump());
1726                    op.m_reentry = label();
1727                }
1728
1729                YarrOp& lastOp = m_ops[op.m_previousOp];
1730                m_checked += lastOp.m_checkAdjust;
1731                break;
1732            }
1733
1734            case OpMatchFailed:
1735                removeCallFrame();
1736                move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
1737                move(TrustedImm32(0), returnRegister2);
1738                generateReturn();
1739                break;
1740            }
1741
1742            ++opIndex;
1743        } while (opIndex < m_ops.size());
1744    }
1745
1746    void backtrack()
1747    {
1748        // Backwards generate the backtracking code.
1749        size_t opIndex = m_ops.size();
1750        ASSERT(opIndex);
1751
1752        do {
1753            --opIndex;
1754            YarrOp& op = m_ops[opIndex];
1755            switch (op.m_op) {
1756
1757            case OpTerm:
1758                backtrackTerm(opIndex);
1759                break;
1760
1761            // OpBodyAlternativeBegin/Next/End
1762            //
1763            // For each Begin/Next node representing an alternative, we need to decide what to do
1764            // in two circumstances:
1765            //  - If we backtrack back into this node, from within the alternative.
1766            //  - If the input check at the head of the alternative fails (if this exists).
1767            //
1768            // We treat these two cases differently since in the former case we have slightly
1769            // more information - since we are backtracking out of a prior alternative we know
1770            // that at least enough input was available to run it. For example, given the regular
1771            // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
1772            // character match of 'a'), then we need not perform an additional input availability
1773            // check before running the second alternative.
1774            //
1775            // Backtracking required differs for the last alternative, which in the case of the
1776            // repeating set of alternatives must loop. The code generated for the last alternative
1777            // will also be used to handle all input check failures from any prior alternatives -
1778            // these require similar functionality, in seeking the next available alternative for
1779            // which there is sufficient input.
1780            //
1781            // Since backtracking of all other alternatives simply requires us to link backtracks
1782            // to the reentry point for the subsequent alternative, we will only be generating any
1783            // code when backtracking the last alternative.
1784            case OpBodyAlternativeBegin:
1785            case OpBodyAlternativeNext: {
1786                PatternAlternative* alternative = op.m_alternative;
1787
1788                if (op.m_op == OpBodyAlternativeNext) {
1789                    PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1790                    m_checked += priorAlternative->m_minimumSize;
1791                }
1792                m_checked -= alternative->m_minimumSize;
1793
1794                // Is this the last alternative? If not, then if we backtrack to this point we just
1795                // need to jump to try to match the next alternative.
1796                if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
1797                    m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
1798                    break;
1799                }
1800                YarrOp& endOp = m_ops[op.m_nextOp];
1801
1802                YarrOp* beginOp = &op;
1803                while (beginOp->m_op != OpBodyAlternativeBegin) {
1804                    ASSERT(beginOp->m_op == OpBodyAlternativeNext);
1805                    beginOp = &m_ops[beginOp->m_previousOp];
1806                }
1807
1808                bool onceThrough = endOp.m_nextOp == notFound;
1809
1810                // First, generate code to handle cases where we backtrack out of an attempted match
1811                // of the last alternative. If this is a 'once through' set of alternatives then we
1812                // have nothing to do - link this straight through to the End.
1813                if (onceThrough)
1814                    m_backtrackingState.linkTo(endOp.m_reentry, this);
1815                else {
1816                    // If we don't need to move the input poistion, and the pattern has a fixed size
1817                    // (in which case we omit the store of the start index until the pattern has matched)
1818                    // then we can just link the backtrack out of the last alternative straight to the
1819                    // head of the first alternative.
1820                    if (m_pattern.m_body->m_hasFixedSize
1821                        && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
1822                        && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
1823                        m_backtrackingState.linkTo(beginOp->m_reentry, this);
1824                    else {
1825                        // We need to generate a trampoline of code to execute before looping back
1826                        // around to the first alternative.
1827                        m_backtrackingState.link(this);
1828
1829                        // If the pattern size is not fixed, then store the start index, for use if we match.
1830                        if (!m_pattern.m_body->m_hasFixedSize) {
1831                            if (alternative->m_minimumSize == 1)
1832                                setMatchStart(index);
1833                            else {
1834                                move(index, regT0);
1835                                if (alternative->m_minimumSize)
1836                                    sub32(Imm32(alternative->m_minimumSize - 1), regT0);
1837                                else
1838                                    add32(TrustedImm32(1), regT0);
1839                                setMatchStart(regT0);
1840                            }
1841                        }
1842
1843                        // Generate code to loop. Check whether the last alternative is longer than the
1844                        // first (e.g. /a|xy/ or /a|xyz/).
1845                        if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
1846                            // We want to loop, and increment input position. If the delta is 1, it is
1847                            // already correctly incremented, if more than one then decrement as appropriate.
1848                            unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
1849                            ASSERT(delta);
1850                            if (delta != 1)
1851                                sub32(Imm32(delta - 1), index);
1852                            jump(beginOp->m_reentry);
1853                        } else {
1854                            // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
1855                            // be sufficent input available to handle this, so just fall through.
1856                            unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
1857                            if (delta != 0xFFFFFFFFu) {
1858                                // We need to check input because we are incrementing the input.
1859                                add32(Imm32(delta + 1), index);
1860                                checkInput().linkTo(beginOp->m_reentry, this);
1861                            }
1862                        }
1863                    }
1864                }
1865
1866                // We can reach this point in the code in two ways:
1867                //  - Fallthrough from the code above (a repeating alternative backtracked out of its
1868                //    last alternative, and did not have sufficent input to run the first).
1869                //  - We will loop back up to the following label when a releating alternative loops,
1870                //    following a failed input check.
1871                //
1872                // Either way, we have just failed the input check for the first alternative.
1873                Label firstInputCheckFailed(this);
1874
1875                // Generate code to handle input check failures from alternatives except the last.
1876                // prevOp is the alternative we're handling a bail out from (initially Begin), and
1877                // nextOp is the alternative we will be attempting to reenter into.
1878                //
1879                // We will link input check failures from the forwards matching path back to the code
1880                // that can handle them.
1881                YarrOp* prevOp = beginOp;
1882                YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
1883                while (nextOp->m_op != OpBodyAlternativeEnd) {
1884                    prevOp->m_jumps.link(this);
1885
1886                    // We only get here if an input check fails, it is only worth checking again
1887                    // if the next alternative has a minimum size less than the last.
1888                    if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
1889                        // FIXME: if we added an extra label to YarrOp, we could avoid needing to
1890                        // subtract delta back out, and reduce this code. Should performance test
1891                        // the benefit of this.
1892                        unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
1893                        sub32(Imm32(delta), index);
1894                        Jump fail = jumpIfNoAvailableInput();
1895                        add32(Imm32(delta), index);
1896                        jump(nextOp->m_reentry);
1897                        fail.link(this);
1898                    } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
1899                        add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
1900                    prevOp = nextOp;
1901                    nextOp = &m_ops[nextOp->m_nextOp];
1902                }
1903
1904                // We fall through to here if there is insufficient input to run the last alternative.
1905
1906                // If there is insufficient input to run the last alternative, then for 'once through'
1907                // alternatives we are done - just jump back up into the forwards matching path at the End.
1908                if (onceThrough) {
1909                    op.m_jumps.linkTo(endOp.m_reentry, this);
1910                    jump(endOp.m_reentry);
1911                    break;
1912                }
1913
1914                // For repeating alternatives, link any input check failure from the last alternative to
1915                // this point.
1916                op.m_jumps.link(this);
1917
1918                bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
1919
1920                // Check for cases where input position is already incremented by 1 for the last
1921                // alternative (this is particularly useful where the minimum size of the body
1922                // disjunction is 0, e.g. /a*|b/).
1923                if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
1924                    // index is already incremented by 1, so just store it now!
1925                    setMatchStart(index);
1926                    needsToUpdateMatchStart = false;
1927                }
1928
1929                // Check whether there is sufficient input to loop. Increment the input position by
1930                // one, and check. Also add in the minimum disjunction size before checking - there
1931                // is no point in looping if we're just going to fail all the input checks around
1932                // the next iteration.
1933                ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
1934                if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
1935                    // If the last alternative had the same minimum size as the disjunction,
1936                    // just simply increment input pos by 1, no adjustment based on minimum size.
1937                    add32(TrustedImm32(1), index);
1938                } else {
1939                    // If the minumum for the last alternative was one greater than than that
1940                    // for the disjunction, we're already progressed by 1, nothing to do!
1941                    unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
1942                    if (delta)
1943                        sub32(Imm32(delta), index);
1944                }
1945                Jump matchFailed = jumpIfNoAvailableInput();
1946
1947                if (needsToUpdateMatchStart) {
1948                    if (!m_pattern.m_body->m_minimumSize)
1949                        setMatchStart(index);
1950                    else {
1951                        move(index, regT0);
1952                        sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
1953                        setMatchStart(regT0);
1954                    }
1955                }
1956
1957                // Calculate how much more input the first alternative requires than the minimum
1958                // for the body as a whole. If no more is needed then we dont need an additional
1959                // input check here - jump straight back up to the start of the first alternative.
1960                if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
1961                    jump(beginOp->m_reentry);
1962                else {
1963                    if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
1964                        add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
1965                    else
1966                        sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
1967                    checkInput().linkTo(beginOp->m_reentry, this);
1968                    jump(firstInputCheckFailed);
1969                }
1970
1971                // We jump to here if we iterate to the point that there is insufficient input to
1972                // run any matches, and need to return a failure state from JIT code.
1973                matchFailed.link(this);
1974
1975                removeCallFrame();
1976                move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
1977                move(TrustedImm32(0), returnRegister2);
1978                generateReturn();
1979                break;
1980            }
1981            case OpBodyAlternativeEnd: {
1982                // We should never backtrack back into a body disjunction.
1983                ASSERT(m_backtrackingState.isEmpty());
1984
1985                PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1986                m_checked += priorAlternative->m_minimumSize;
1987                break;
1988            }
1989
1990            // OpSimpleNestedAlternativeBegin/Next/End
1991            // OpNestedAlternativeBegin/Next/End
1992            //
1993            // Generate code for when we backtrack back out of an alternative into
1994            // a Begin or Next node, or when the entry input count check fails. If
1995            // there are more alternatives we need to jump to the next alternative,
1996            // if not we backtrack back out of the current set of parentheses.
1997            //
1998            // In the case of non-simple nested assertions we need to also link the
1999            // 'return address' appropriately to backtrack back out into the correct
2000            // alternative.
2001            case OpSimpleNestedAlternativeBegin:
2002            case OpSimpleNestedAlternativeNext:
2003            case OpNestedAlternativeBegin:
2004            case OpNestedAlternativeNext: {
2005                YarrOp& nextOp = m_ops[op.m_nextOp];
2006                bool isBegin = op.m_previousOp == notFound;
2007                bool isLastAlternative = nextOp.m_nextOp == notFound;
2008                ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
2009                ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
2010
2011                // Treat an input check failure the same as a failed match.
2012                m_backtrackingState.append(op.m_jumps);
2013
2014                // Set the backtracks to jump to the appropriate place. We may need
2015                // to link the backtracks in one of three different way depending on
2016                // the type of alternative we are dealing with:
2017                //  - A single alternative, with no simplings.
2018                //  - The last alternative of a set of two or more.
2019                //  - An alternative other than the last of a set of two or more.
2020                //
2021                // In the case of a single alternative on its own, we don't need to
2022                // jump anywhere - if the alternative fails to match we can just
2023                // continue to backtrack out of the parentheses without jumping.
2024                //
2025                // In the case of the last alternative in a set of more than one, we
2026                // need to jump to return back out to the beginning. We'll do so by
2027                // adding a jump to the End node's m_jumps list, and linking this
2028                // when we come to generate the Begin node. For alternatives other
2029                // than the last, we need to jump to the next alternative.
2030                //
2031                // If the alternative had adjusted the input position we must link
2032                // backtracking to here, correct, and then jump on. If not we can
2033                // link the backtracks directly to their destination.
2034                if (op.m_checkAdjust) {
2035                    // Handle the cases where we need to link the backtracks here.
2036                    m_backtrackingState.link(this);
2037                    sub32(Imm32(op.m_checkAdjust), index);
2038                    if (!isLastAlternative) {
2039                        // An alternative that is not the last should jump to its successor.
2040                        jump(nextOp.m_reentry);
2041                    } else if (!isBegin) {
2042                        // The last of more than one alternatives must jump back to the beginning.
2043                        nextOp.m_jumps.append(jump());
2044                    } else {
2045                        // A single alternative on its own can fall through.
2046                        m_backtrackingState.fallthrough();
2047                    }
2048                } else {
2049                    // Handle the cases where we can link the backtracks directly to their destinations.
2050                    if (!isLastAlternative) {
2051                        // An alternative that is not the last should jump to its successor.
2052                        m_backtrackingState.linkTo(nextOp.m_reentry, this);
2053                    } else if (!isBegin) {
2054                        // The last of more than one alternatives must jump back to the beginning.
2055                        m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
2056                    }
2057                    // In the case of a single alternative on its own do nothing - it can fall through.
2058                }
2059
2060                // If there is a backtrack jump from a zero length match link it here.
2061                if (op.m_zeroLengthMatch.isSet())
2062                    m_backtrackingState.append(op.m_zeroLengthMatch);
2063
2064                // At this point we've handled the backtracking back into this node.
2065                // Now link any backtracks that need to jump to here.
2066
2067                // For non-simple alternatives, link the alternative's 'return address'
2068                // so that we backtrack back out into the previous alternative.
2069                if (op.m_op == OpNestedAlternativeNext)
2070                    m_backtrackingState.append(op.m_returnAddress);
2071
2072                // If there is more than one alternative, then the last alternative will
2073                // have planted a jump to be linked to the end. This jump was added to the
2074                // End node's m_jumps list. If we are back at the beginning, link it here.
2075                if (isBegin) {
2076                    YarrOp* endOp = &m_ops[op.m_nextOp];
2077                    while (endOp->m_nextOp != notFound) {
2078                        ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
2079                        endOp = &m_ops[endOp->m_nextOp];
2080                    }
2081                    ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
2082                    m_backtrackingState.append(endOp->m_jumps);
2083                }
2084
2085                if (!isBegin) {
2086                    YarrOp& lastOp = m_ops[op.m_previousOp];
2087                    m_checked += lastOp.m_checkAdjust;
2088                }
2089                m_checked -= op.m_checkAdjust;
2090                break;
2091            }
2092            case OpSimpleNestedAlternativeEnd:
2093            case OpNestedAlternativeEnd: {
2094                PatternTerm* term = op.m_term;
2095
2096                // If there is a backtrack jump from a zero length match link it here.
2097                if (op.m_zeroLengthMatch.isSet())
2098                    m_backtrackingState.append(op.m_zeroLengthMatch);
2099
2100                // If we backtrack into the end of a simple subpattern do nothing;
2101                // just continue through into the last alternative. If we backtrack
2102                // into the end of a non-simple set of alterntives we need to jump
2103                // to the backtracking return address set up during generation.
2104                if (op.m_op == OpNestedAlternativeEnd) {
2105                    m_backtrackingState.link(this);
2106
2107                    // Plant a jump to the return address.
2108                    unsigned parenthesesFrameLocation = term->frameLocation;
2109                    unsigned alternativeFrameLocation = parenthesesFrameLocation;
2110                    if (term->quantityType != QuantifierFixedCount)
2111                        alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
2112                    loadFromFrameAndJump(alternativeFrameLocation);
2113
2114                    // Link the DataLabelPtr associated with the end of the last
2115                    // alternative to this point.
2116                    m_backtrackingState.append(op.m_returnAddress);
2117                }
2118
2119                YarrOp& lastOp = m_ops[op.m_previousOp];
2120                m_checked += lastOp.m_checkAdjust;
2121                break;
2122            }
2123
2124            // OpParenthesesSubpatternOnceBegin/End
2125            //
2126            // When we are backtracking back out of a capturing subpattern we need
2127            // to clear the start index in the matches output array, to record that
2128            // this subpattern has not been captured.
2129            //
2130            // When backtracking back out of a Greedy quantified subpattern we need
2131            // to catch this, and try running the remainder of the alternative after
2132            // the subpattern again, skipping the parentheses.
2133            //
2134            // Upon backtracking back into a quantified set of parentheses we need to
2135            // check whether we were currently skipping the subpattern. If not, we
2136            // can backtrack into them, if we were we need to either backtrack back
2137            // out of the start of the parentheses, or jump back to the forwards
2138            // matching start, depending of whether the match is Greedy or NonGreedy.
2139            case OpParenthesesSubpatternOnceBegin: {
2140                PatternTerm* term = op.m_term;
2141                ASSERT(term->quantityCount == 1);
2142
2143                // We only need to backtrack to thispoint if capturing or greedy.
2144                if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
2145                    m_backtrackingState.link(this);
2146
2147                    // If capturing, clear the capture (we only need to reset start).
2148                    if (term->capture() && compileMode == IncludeSubpatterns)
2149                        clearSubpatternStart(term->parentheses.subpatternId);
2150
2151                    // If Greedy, jump to the end.
2152                    if (term->quantityType == QuantifierGreedy) {
2153                        // Clear the flag in the stackframe indicating we ran through the subpattern.
2154                        unsigned parenthesesFrameLocation = term->frameLocation;
2155                        storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
2156                        // Jump to after the parentheses, skipping the subpattern.
2157                        jump(m_ops[op.m_nextOp].m_reentry);
2158                        // A backtrack from after the parentheses, when skipping the subpattern,
2159                        // will jump back to here.
2160                        op.m_jumps.link(this);
2161                    }
2162
2163                    m_backtrackingState.fallthrough();
2164                }
2165                break;
2166            }
2167            case OpParenthesesSubpatternOnceEnd: {
2168                PatternTerm* term = op.m_term;
2169
2170                if (term->quantityType != QuantifierFixedCount) {
2171                    m_backtrackingState.link(this);
2172
2173                    // Check whether we should backtrack back into the parentheses, or if we
2174                    // are currently in a state where we had skipped over the subpattern
2175                    // (in which case the flag value on the stack will be -1).
2176                    unsigned parenthesesFrameLocation = term->frameLocation;
2177                    Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
2178
2179                    if (term->quantityType == QuantifierGreedy) {
2180                        // For Greedy parentheses, we skip after having already tried going
2181                        // through the subpattern, so if we get here we're done.
2182                        YarrOp& beginOp = m_ops[op.m_previousOp];
2183                        beginOp.m_jumps.append(hadSkipped);
2184                    } else {
2185                        // For NonGreedy parentheses, we try skipping the subpattern first,
2186                        // so if we get here we need to try running through the subpattern
2187                        // next. Jump back to the start of the parentheses in the forwards
2188                        // matching path.
2189                        ASSERT(term->quantityType == QuantifierNonGreedy);
2190                        YarrOp& beginOp = m_ops[op.m_previousOp];
2191                        hadSkipped.linkTo(beginOp.m_reentry, this);
2192                    }
2193
2194                    m_backtrackingState.fallthrough();
2195                }
2196
2197                m_backtrackingState.append(op.m_jumps);
2198                break;
2199            }
2200
2201            // OpParenthesesSubpatternTerminalBegin/End
2202            //
2203            // Terminal subpatterns will always match - there is nothing after them to
2204            // force a backtrack, and they have a minimum count of 0, and as such will
2205            // always produce an acceptable result.
2206            case OpParenthesesSubpatternTerminalBegin: {
2207                // We will backtrack to this point once the subpattern cannot match any
2208                // more. Since no match is accepted as a successful match (we are Greedy
2209                // quantified with a minimum of zero) jump back to the forwards matching
2210                // path at the end.
2211                YarrOp& endOp = m_ops[op.m_nextOp];
2212                m_backtrackingState.linkTo(endOp.m_reentry, this);
2213                break;
2214            }
2215            case OpParenthesesSubpatternTerminalEnd:
2216                // We should never be backtracking to here (hence the 'terminal' in the name).
2217                ASSERT(m_backtrackingState.isEmpty());
2218                m_backtrackingState.append(op.m_jumps);
2219                break;
2220
2221            // OpParentheticalAssertionBegin/End
2222            case OpParentheticalAssertionBegin: {
2223                PatternTerm* term = op.m_term;
2224                YarrOp& endOp = m_ops[op.m_nextOp];
2225
2226                // We need to handle the backtracks upon backtracking back out
2227                // of a parenthetical assertion if either we need to correct
2228                // the input index, or the assertion was inverted.
2229                if (op.m_checkAdjust || term->invert()) {
2230                     m_backtrackingState.link(this);
2231
2232                    if (op.m_checkAdjust)
2233                        add32(Imm32(op.m_checkAdjust), index);
2234
2235                    // In an inverted assertion failure to match the subpattern
2236                    // is treated as a successful match - jump to the end of the
2237                    // subpattern. We already have adjusted the input position
2238                    // back to that before the assertion, which is correct.
2239                    if (term->invert())
2240                        jump(endOp.m_reentry);
2241
2242                    m_backtrackingState.fallthrough();
2243                }
2244
2245                // The End node's jump list will contain any backtracks into
2246                // the end of the assertion. Also, if inverted, we will have
2247                // added the failure caused by a successful match to this.
2248                m_backtrackingState.append(endOp.m_jumps);
2249
2250                m_checked += op.m_checkAdjust;
2251                break;
2252            }
2253            case OpParentheticalAssertionEnd: {
2254                // FIXME: We should really be clearing any nested subpattern
2255                // matches on bailing out from after the pattern. Firefox has
2256                // this bug too (presumably because they use YARR!)
2257
2258                // Never backtrack into an assertion; later failures bail to before the begin.
2259                m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
2260
2261                YarrOp& lastOp = m_ops[op.m_previousOp];
2262                m_checked -= lastOp.m_checkAdjust;
2263                break;
2264            }
2265
2266            case OpMatchFailed:
2267                break;
2268            }
2269
2270        } while (opIndex);
2271    }
2272
2273    // Compilation methods:
2274    // ====================
2275
2276    // opCompileParenthesesSubpattern
2277    // Emits ops for a subpattern (set of parentheses). These consist
2278    // of a set of alternatives wrapped in an outer set of nodes for
2279    // the parentheses.
2280    // Supported types of parentheses are 'Once' (quantityCount == 1)
2281    // and 'Terminal' (non-capturing parentheses quantified as greedy
2282    // and infinite).
2283    // Alternatives will use the 'Simple' set of ops if either the
2284    // subpattern is terminal (in which case we will never need to
2285    // backtrack), or if the subpattern only contains one alternative.
2286    void opCompileParenthesesSubpattern(PatternTerm* term)
2287    {
2288        YarrOpCode parenthesesBeginOpCode;
2289        YarrOpCode parenthesesEndOpCode;
2290        YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
2291        YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
2292        YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
2293
2294        // We can currently only compile quantity 1 subpatterns that are
2295        // not copies. We generate a copy in the case of a range quantifier,
2296        // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
2297        // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
2298        // comes where the subpattern is capturing, in which case we would
2299        // need to restore the capture from the first subpattern upon a
2300        // failure in the second.
2301        if (term->quantityCount == 1 && !term->parentheses.isCopy) {
2302            // Select the 'Once' nodes.
2303            parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
2304            parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
2305
2306            // If there is more than one alternative we cannot use the 'simple' nodes.
2307            if (term->parentheses.disjunction->m_alternatives.size() != 1) {
2308                alternativeBeginOpCode = OpNestedAlternativeBegin;
2309                alternativeNextOpCode = OpNestedAlternativeNext;
2310                alternativeEndOpCode = OpNestedAlternativeEnd;
2311            }
2312        } else if (term->parentheses.isTerminal) {
2313            // Select the 'Terminal' nodes.
2314            parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
2315            parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
2316        } else {
2317            // This subpattern is not supported by the JIT.
2318            m_shouldFallBack = true;
2319            return;
2320        }
2321
2322        size_t parenBegin = m_ops.size();
2323        m_ops.append(parenthesesBeginOpCode);
2324
2325        m_ops.append(alternativeBeginOpCode);
2326        m_ops.last().m_previousOp = notFound;
2327        m_ops.last().m_term = term;
2328        Vector<OwnPtr<PatternAlternative> >& alternatives =  term->parentheses.disjunction->m_alternatives;
2329        for (unsigned i = 0; i < alternatives.size(); ++i) {
2330            size_t lastOpIndex = m_ops.size() - 1;
2331
2332            PatternAlternative* nestedAlternative = alternatives[i].get();
2333            opCompileAlternative(nestedAlternative);
2334
2335            size_t thisOpIndex = m_ops.size();
2336            m_ops.append(YarrOp(alternativeNextOpCode));
2337
2338            YarrOp& lastOp = m_ops[lastOpIndex];
2339            YarrOp& thisOp = m_ops[thisOpIndex];
2340
2341            lastOp.m_alternative = nestedAlternative;
2342            lastOp.m_nextOp = thisOpIndex;
2343            thisOp.m_previousOp = lastOpIndex;
2344            thisOp.m_term = term;
2345        }
2346        YarrOp& lastOp = m_ops.last();
2347        ASSERT(lastOp.m_op == alternativeNextOpCode);
2348        lastOp.m_op = alternativeEndOpCode;
2349        lastOp.m_alternative = 0;
2350        lastOp.m_nextOp = notFound;
2351
2352        size_t parenEnd = m_ops.size();
2353        m_ops.append(parenthesesEndOpCode);
2354
2355        m_ops[parenBegin].m_term = term;
2356        m_ops[parenBegin].m_previousOp = notFound;
2357        m_ops[parenBegin].m_nextOp = parenEnd;
2358        m_ops[parenEnd].m_term = term;
2359        m_ops[parenEnd].m_previousOp = parenBegin;
2360        m_ops[parenEnd].m_nextOp = notFound;
2361    }
2362
2363    // opCompileParentheticalAssertion
2364    // Emits ops for a parenthetical assertion. These consist of an
2365    // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
2366    // the alternatives, with these wrapped by an outer pair of
2367    // OpParentheticalAssertionBegin/End nodes.
2368    // We can always use the OpSimpleNestedAlternative nodes in the
2369    // case of parenthetical assertions since these only ever match
2370    // once, and will never backtrack back into the assertion.
2371    void opCompileParentheticalAssertion(PatternTerm* term)
2372    {
2373        size_t parenBegin = m_ops.size();
2374        m_ops.append(OpParentheticalAssertionBegin);
2375
2376        m_ops.append(OpSimpleNestedAlternativeBegin);
2377        m_ops.last().m_previousOp = notFound;
2378        m_ops.last().m_term = term;
2379        Vector<OwnPtr<PatternAlternative> >& alternatives =  term->parentheses.disjunction->m_alternatives;
2380        for (unsigned i = 0; i < alternatives.size(); ++i) {
2381            size_t lastOpIndex = m_ops.size() - 1;
2382
2383            PatternAlternative* nestedAlternative = alternatives[i].get();
2384            opCompileAlternative(nestedAlternative);
2385
2386            size_t thisOpIndex = m_ops.size();
2387            m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
2388
2389            YarrOp& lastOp = m_ops[lastOpIndex];
2390            YarrOp& thisOp = m_ops[thisOpIndex];
2391
2392            lastOp.m_alternative = nestedAlternative;
2393            lastOp.m_nextOp = thisOpIndex;
2394            thisOp.m_previousOp = lastOpIndex;
2395            thisOp.m_term = term;
2396        }
2397        YarrOp& lastOp = m_ops.last();
2398        ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
2399        lastOp.m_op = OpSimpleNestedAlternativeEnd;
2400        lastOp.m_alternative = 0;
2401        lastOp.m_nextOp = notFound;
2402
2403        size_t parenEnd = m_ops.size();
2404        m_ops.append(OpParentheticalAssertionEnd);
2405
2406        m_ops[parenBegin].m_term = term;
2407        m_ops[parenBegin].m_previousOp = notFound;
2408        m_ops[parenBegin].m_nextOp = parenEnd;
2409        m_ops[parenEnd].m_term = term;
2410        m_ops[parenEnd].m_previousOp = parenBegin;
2411        m_ops[parenEnd].m_nextOp = notFound;
2412    }
2413
2414    // opCompileAlternative
2415    // Called to emit nodes for all terms in an alternative.
2416    void opCompileAlternative(PatternAlternative* alternative)
2417    {
2418        optimizeAlternative(alternative);
2419
2420        for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
2421            PatternTerm* term = &alternative->m_terms[i];
2422
2423            switch (term->type) {
2424            case PatternTerm::TypeParenthesesSubpattern:
2425                opCompileParenthesesSubpattern(term);
2426                break;
2427
2428            case PatternTerm::TypeParentheticalAssertion:
2429                opCompileParentheticalAssertion(term);
2430                break;
2431
2432            default:
2433                m_ops.append(term);
2434            }
2435        }
2436    }
2437
2438    // opCompileBody
2439    // This method compiles the body disjunction of the regular expression.
2440    // The body consists of two sets of alternatives - zero or more 'once
2441    // through' (BOL anchored) alternatives, followed by zero or more
2442    // repeated alternatives.
2443    // For each of these two sets of alteratives, if not empty they will be
2444    // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
2445    // 'begin' node referencing the first alternative, and 'next' nodes
2446    // referencing any further alternatives. The begin/next/end nodes are
2447    // linked together in a doubly linked list. In the case of repeating
2448    // alternatives, the end node is also linked back to the beginning.
2449    // If no repeating alternatives exist, then a OpMatchFailed node exists
2450    // to return the failing result.
2451    void opCompileBody(PatternDisjunction* disjunction)
2452    {
2453        Vector<OwnPtr<PatternAlternative> >& alternatives = disjunction->m_alternatives;
2454        size_t currentAlternativeIndex = 0;
2455
2456        // Emit the 'once through' alternatives.
2457        if (alternatives.size() && alternatives[0]->onceThrough()) {
2458            m_ops.append(YarrOp(OpBodyAlternativeBegin));
2459            m_ops.last().m_previousOp = notFound;
2460
2461            do {
2462                size_t lastOpIndex = m_ops.size() - 1;
2463                PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
2464                opCompileAlternative(alternative);
2465
2466                size_t thisOpIndex = m_ops.size();
2467                m_ops.append(YarrOp(OpBodyAlternativeNext));
2468
2469                YarrOp& lastOp = m_ops[lastOpIndex];
2470                YarrOp& thisOp = m_ops[thisOpIndex];
2471
2472                lastOp.m_alternative = alternative;
2473                lastOp.m_nextOp = thisOpIndex;
2474                thisOp.m_previousOp = lastOpIndex;
2475
2476                ++currentAlternativeIndex;
2477            } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
2478
2479            YarrOp& lastOp = m_ops.last();
2480
2481            ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2482            lastOp.m_op = OpBodyAlternativeEnd;
2483            lastOp.m_alternative = 0;
2484            lastOp.m_nextOp = notFound;
2485        }
2486
2487        if (currentAlternativeIndex == alternatives.size()) {
2488            m_ops.append(YarrOp(OpMatchFailed));
2489            return;
2490        }
2491
2492        // Emit the repeated alternatives.
2493        size_t repeatLoop = m_ops.size();
2494        m_ops.append(YarrOp(OpBodyAlternativeBegin));
2495        m_ops.last().m_previousOp = notFound;
2496        do {
2497            size_t lastOpIndex = m_ops.size() - 1;
2498            PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
2499            ASSERT(!alternative->onceThrough());
2500            opCompileAlternative(alternative);
2501
2502            size_t thisOpIndex = m_ops.size();
2503            m_ops.append(YarrOp(OpBodyAlternativeNext));
2504
2505            YarrOp& lastOp = m_ops[lastOpIndex];
2506            YarrOp& thisOp = m_ops[thisOpIndex];
2507
2508            lastOp.m_alternative = alternative;
2509            lastOp.m_nextOp = thisOpIndex;
2510            thisOp.m_previousOp = lastOpIndex;
2511
2512            ++currentAlternativeIndex;
2513        } while (currentAlternativeIndex < alternatives.size());
2514        YarrOp& lastOp = m_ops.last();
2515        ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2516        lastOp.m_op = OpBodyAlternativeEnd;
2517        lastOp.m_alternative = 0;
2518        lastOp.m_nextOp = repeatLoop;
2519    }
2520
2521    void generateEnter()
2522    {
2523#if CPU(X86_64)
2524        push(X86Registers::ebp);
2525        move(stackPointerRegister, X86Registers::ebp);
2526        push(X86Registers::ebx);
2527        // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
2528        zeroExtend32ToPtr(index, index);
2529        zeroExtend32ToPtr(length, length);
2530#if OS(WINDOWS)
2531        if (compileMode == IncludeSubpatterns)
2532            loadPtr(Address(X86Registers::ebp, 6 * sizeof(void*)), output);
2533#endif
2534#elif CPU(X86)
2535        push(X86Registers::ebp);
2536        move(stackPointerRegister, X86Registers::ebp);
2537        // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
2538        push(X86Registers::ebx);
2539        push(X86Registers::edi);
2540        push(X86Registers::esi);
2541        // load output into edi (2 = saved ebp + return address).
2542    #if COMPILER(MSVC)
2543        loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
2544        loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
2545        loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
2546        if (compileMode == IncludeSubpatterns)
2547            loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
2548    #else
2549        if (compileMode == IncludeSubpatterns)
2550            loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
2551    #endif
2552#elif CPU(ARM)
2553        push(ARMRegisters::r4);
2554        push(ARMRegisters::r5);
2555        push(ARMRegisters::r6);
2556#if CPU(ARM_TRADITIONAL)
2557        push(ARMRegisters::r8); // scratch register
2558#endif
2559        if (compileMode == IncludeSubpatterns)
2560            move(ARMRegisters::r3, output);
2561#elif CPU(SH4)
2562        push(SH4Registers::r11);
2563        push(SH4Registers::r13);
2564#elif CPU(MIPS)
2565        // Do nothing.
2566#endif
2567    }
2568
2569    void generateReturn()
2570    {
2571#if CPU(X86_64)
2572#if OS(WINDOWS)
2573        // Store the return value in the allocated space pointed by rcx.
2574        store64(returnRegister, Address(X86Registers::ecx));
2575        store64(returnRegister2, Address(X86Registers::ecx, sizeof(void*)));
2576        move(X86Registers::ecx, returnRegister);
2577#endif
2578        pop(X86Registers::ebx);
2579        pop(X86Registers::ebp);
2580#elif CPU(X86)
2581        pop(X86Registers::esi);
2582        pop(X86Registers::edi);
2583        pop(X86Registers::ebx);
2584        pop(X86Registers::ebp);
2585#elif CPU(ARM)
2586#if CPU(ARM_TRADITIONAL)
2587        pop(ARMRegisters::r8); // scratch register
2588#endif
2589        pop(ARMRegisters::r6);
2590        pop(ARMRegisters::r5);
2591        pop(ARMRegisters::r4);
2592#elif CPU(SH4)
2593        pop(SH4Registers::r13);
2594        pop(SH4Registers::r11);
2595#elif CPU(MIPS)
2596        // Do nothing
2597#endif
2598        ret();
2599    }
2600
2601public:
2602    YarrGenerator(YarrPattern& pattern, YarrCharSize charSize)
2603        : m_pattern(pattern)
2604        , m_charSize(charSize)
2605        , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo)
2606        , m_shouldFallBack(false)
2607        , m_checked(0)
2608    {
2609    }
2610
2611    void compile(VM* vm, YarrCodeBlock& jitObject)
2612    {
2613        generateEnter();
2614
2615        Jump hasInput = checkInput();
2616        move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
2617        move(TrustedImm32(0), returnRegister2);
2618        generateReturn();
2619        hasInput.link(this);
2620
2621        if (compileMode == IncludeSubpatterns) {
2622            for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i)
2623                store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int)));
2624        }
2625
2626        if (!m_pattern.m_body->m_hasFixedSize)
2627            setMatchStart(index);
2628
2629        initCallFrame();
2630
2631        // Compile the pattern to the internal 'YarrOp' representation.
2632        opCompileBody(m_pattern.m_body);
2633
2634        // If we encountered anything we can't handle in the JIT code
2635        // (e.g. backreferences) then return early.
2636        if (m_shouldFallBack) {
2637            jitObject.setFallBack(true);
2638            return;
2639        }
2640
2641        generate();
2642        backtrack();
2643
2644        // Link & finalize the code.
2645        LinkBuffer linkBuffer(*vm, this, REGEXP_CODE_ID);
2646        m_backtrackingState.linkDataLabels(linkBuffer);
2647
2648        if (compileMode == MatchOnly) {
2649            if (m_charSize == Char8)
2650                jitObject.set8BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 8-bit regular expression")));
2651            else
2652                jitObject.set16BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 16-bit regular expression")));
2653        } else {
2654            if (m_charSize == Char8)
2655                jitObject.set8BitCode(FINALIZE_CODE(linkBuffer, ("8-bit regular expression")));
2656            else
2657                jitObject.set16BitCode(FINALIZE_CODE(linkBuffer, ("16-bit regular expression")));
2658        }
2659        jitObject.setFallBack(m_shouldFallBack);
2660    }
2661
2662private:
2663    YarrPattern& m_pattern;
2664
2665    YarrCharSize m_charSize;
2666
2667    Scale m_charScale;
2668
2669    // Used to detect regular expression constructs that are not currently
2670    // supported in the JIT; fall back to the interpreter when this is detected.
2671    bool m_shouldFallBack;
2672
2673    // The regular expression expressed as a linear sequence of operations.
2674    Vector<YarrOp, 128> m_ops;
2675
2676    // This records the current input offset being applied due to the current
2677    // set of alternatives we are nested within. E.g. when matching the
2678    // character 'b' within the regular expression /abc/, we will know that
2679    // the minimum size for the alternative is 3, checked upon entry to the
2680    // alternative, and that 'b' is at offset 1 from the start, and as such
2681    // when matching 'b' we need to apply an offset of -2 to the load.
2682    //
2683    // FIXME: This should go away. Rather than tracking this value throughout
2684    // code generation, we should gather this information up front & store it
2685    // on the YarrOp structure.
2686    int m_checked;
2687
2688    // This class records state whilst generating the backtracking path of code.
2689    BacktrackingState m_backtrackingState;
2690};
2691
2692void jitCompile(YarrPattern& pattern, YarrCharSize charSize, VM* vm, YarrCodeBlock& jitObject, YarrJITCompileMode mode)
2693{
2694    if (mode == MatchOnly)
2695        YarrGenerator<MatchOnly>(pattern, charSize).compile(vm, jitObject);
2696    else
2697        YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(vm, jitObject);
2698}
2699
2700}}
2701
2702#endif
2703