1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "YarrInterpreter.h"
29
30#include "Yarr.h"
31#include "YarrCanonicalizeUCS2.h"
32#include <wtf/BumpPointerAllocator.h>
33#include <wtf/DataLog.h>
34#include <wtf/text/CString.h>
35#include <wtf/text/WTFString.h>
36
37using namespace WTF;
38
39namespace JSC { namespace Yarr {
40
41template<typename CharType>
42class Interpreter {
43public:
44    struct ParenthesesDisjunctionContext;
45
46    struct BackTrackInfoPatternCharacter {
47        uintptr_t matchAmount;
48    };
49    struct BackTrackInfoCharacterClass {
50        uintptr_t matchAmount;
51    };
52    struct BackTrackInfoBackReference {
53        uintptr_t begin; // Not really needed for greedy quantifiers.
54        uintptr_t matchAmount; // Not really needed for fixed quantifiers.
55    };
56    struct BackTrackInfoAlternative {
57        uintptr_t offset;
58    };
59    struct BackTrackInfoParentheticalAssertion {
60        uintptr_t begin;
61    };
62    struct BackTrackInfoParenthesesOnce {
63        uintptr_t begin;
64    };
65    struct BackTrackInfoParenthesesTerminal {
66        uintptr_t begin;
67    };
68    struct BackTrackInfoParentheses {
69        uintptr_t matchAmount;
70        ParenthesesDisjunctionContext* lastContext;
71    };
72
73    static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
74    {
75        context->next = backTrack->lastContext;
76        backTrack->lastContext = context;
77        ++backTrack->matchAmount;
78    }
79
80    static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
81    {
82        RELEASE_ASSERT(backTrack->matchAmount);
83        RELEASE_ASSERT(backTrack->lastContext);
84        backTrack->lastContext = backTrack->lastContext->next;
85        --backTrack->matchAmount;
86    }
87
88    struct DisjunctionContext
89    {
90        DisjunctionContext()
91            : term(0)
92        {
93        }
94
95        void* operator new(size_t, void* where)
96        {
97            return where;
98        }
99
100        int term;
101        unsigned matchBegin;
102        unsigned matchEnd;
103        uintptr_t frame[1];
104    };
105
106    DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
107    {
108        size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
109        allocatorPool = allocatorPool->ensureCapacity(size);
110        RELEASE_ASSERT(allocatorPool);
111        return new (allocatorPool->alloc(size)) DisjunctionContext();
112    }
113
114    void freeDisjunctionContext(DisjunctionContext* context)
115    {
116        allocatorPool = allocatorPool->dealloc(context);
117    }
118
119    struct ParenthesesDisjunctionContext
120    {
121        ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
122            : next(0)
123        {
124            unsigned firstSubpatternId = term.atom.subpatternId;
125            unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
126
127            for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
128                subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
129                output[(firstSubpatternId << 1) + i] = offsetNoMatch;
130            }
131
132            new (getDisjunctionContext(term)) DisjunctionContext();
133        }
134
135        void* operator new(size_t, void* where)
136        {
137            return where;
138        }
139
140        void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
141        {
142            for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
143                output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
144        }
145
146        DisjunctionContext* getDisjunctionContext(ByteTerm& term)
147        {
148            return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
149        }
150
151        ParenthesesDisjunctionContext* next;
152        unsigned subpatternBackup[1];
153    };
154
155    ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
156    {
157        size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
158        allocatorPool = allocatorPool->ensureCapacity(size);
159        RELEASE_ASSERT(allocatorPool);
160        return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
161    }
162
163    void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
164    {
165        allocatorPool = allocatorPool->dealloc(context);
166    }
167
168    class InputStream {
169    public:
170        InputStream(const CharType* input, unsigned start, unsigned length)
171            : input(input)
172            , pos(start)
173            , length(length)
174        {
175        }
176
177        void next()
178        {
179            ++pos;
180        }
181
182        void rewind(unsigned amount)
183        {
184            ASSERT(pos >= amount);
185            pos -= amount;
186        }
187
188        int read()
189        {
190            ASSERT(pos < length);
191            if (pos < length)
192                return input[pos];
193            return -1;
194        }
195
196        int readPair()
197        {
198            ASSERT(pos + 1 < length);
199            return input[pos] | input[pos + 1] << 16;
200        }
201
202        int readChecked(unsigned negativePositionOffest)
203        {
204            RELEASE_ASSERT(pos >= negativePositionOffest);
205            unsigned p = pos - negativePositionOffest;
206            ASSERT(p < length);
207            return input[p];
208        }
209
210        int reread(unsigned from)
211        {
212            ASSERT(from < length);
213            return input[from];
214        }
215
216        int prev()
217        {
218            ASSERT(!(pos > length));
219            if (pos && length)
220                return input[pos - 1];
221            return -1;
222        }
223
224        unsigned getPos()
225        {
226            return pos;
227        }
228
229        void setPos(unsigned p)
230        {
231            pos = p;
232        }
233
234        bool atStart()
235        {
236            return pos == 0;
237        }
238
239        bool atEnd()
240        {
241            return pos == length;
242        }
243
244        unsigned end()
245        {
246            return length;
247        }
248
249        bool checkInput(unsigned count)
250        {
251            if (((pos + count) <= length) && ((pos + count) >= pos)) {
252                pos += count;
253                return true;
254            }
255            return false;
256        }
257
258        void uncheckInput(unsigned count)
259        {
260            RELEASE_ASSERT(pos >= count);
261            pos -= count;
262        }
263
264        bool atStart(unsigned negativePositionOffest)
265        {
266            return pos == negativePositionOffest;
267        }
268
269        bool atEnd(unsigned negativePositionOffest)
270        {
271            RELEASE_ASSERT(pos >= negativePositionOffest);
272            return (pos - negativePositionOffest) == length;
273        }
274
275        bool isAvailableInput(unsigned offset)
276        {
277            return (((pos + offset) <= length) && ((pos + offset) >= pos));
278        }
279
280    private:
281        const CharType* input;
282        unsigned pos;
283        unsigned length;
284    };
285
286    bool testCharacterClass(CharacterClass* characterClass, int ch)
287    {
288        if (ch & 0xFF80) {
289            for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
290                if (ch == characterClass->m_matchesUnicode[i])
291                    return true;
292            for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
293                if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
294                    return true;
295        } else {
296            for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
297                if (ch == characterClass->m_matches[i])
298                    return true;
299            for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
300                if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
301                    return true;
302        }
303
304        return false;
305    }
306
307    bool checkCharacter(int testChar, unsigned negativeInputOffset)
308    {
309        return testChar == input.readChecked(negativeInputOffset);
310    }
311
312    bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
313    {
314        int ch = input.readChecked(negativeInputOffset);
315        return (loChar == ch) || (hiChar == ch);
316    }
317
318    bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
319    {
320        bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
321        return invert ? !match : match;
322    }
323
324    bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
325    {
326        unsigned matchSize = (unsigned)(matchEnd - matchBegin);
327
328        if (!input.checkInput(matchSize))
329            return false;
330
331        if (pattern->m_ignoreCase) {
332            for (unsigned i = 0; i < matchSize; ++i) {
333                int oldCh = input.reread(matchBegin + i);
334                int ch = input.readChecked(negativeInputOffset + matchSize - i);
335
336                if (oldCh == ch)
337                    continue;
338
339                // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
340                // unicode values are never allowed to match against ascii ones.
341                if (isASCII(oldCh) || isASCII(ch)) {
342                    if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
343                        continue;
344                } else if (areCanonicallyEquivalent(oldCh, ch))
345                    continue;
346
347                input.uncheckInput(matchSize);
348                return false;
349            }
350        } else {
351            for (unsigned i = 0; i < matchSize; ++i) {
352                if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {
353                    input.uncheckInput(matchSize);
354                    return false;
355                }
356            }
357        }
358
359        return true;
360    }
361
362    bool matchAssertionBOL(ByteTerm& term)
363    {
364        return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
365    }
366
367    bool matchAssertionEOL(ByteTerm& term)
368    {
369        if (term.inputPosition)
370            return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
371
372        return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
373    }
374
375    bool matchAssertionWordBoundary(ByteTerm& term)
376    {
377        bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
378        bool readIsWordchar;
379        if (term.inputPosition)
380            readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
381        else
382            readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
383
384        bool wordBoundary = prevIsWordchar != readIsWordchar;
385        return term.invert() ? !wordBoundary : wordBoundary;
386    }
387
388    bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
389    {
390        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
391
392        switch (term.atom.quantityType) {
393        case QuantifierFixedCount:
394            break;
395
396        case QuantifierGreedy:
397            if (backTrack->matchAmount) {
398                --backTrack->matchAmount;
399                input.uncheckInput(1);
400                return true;
401            }
402            break;
403
404        case QuantifierNonGreedy:
405            if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
406                ++backTrack->matchAmount;
407                if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
408                    return true;
409            }
410            input.uncheckInput(backTrack->matchAmount);
411            break;
412        }
413
414        return false;
415    }
416
417    bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
418    {
419        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
420
421        switch (term.atom.quantityType) {
422        case QuantifierFixedCount:
423            break;
424
425        case QuantifierGreedy:
426            if (backTrack->matchAmount) {
427                --backTrack->matchAmount;
428                input.uncheckInput(1);
429                return true;
430            }
431            break;
432
433        case QuantifierNonGreedy:
434            if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
435                ++backTrack->matchAmount;
436                if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
437                    return true;
438            }
439            input.uncheckInput(backTrack->matchAmount);
440            break;
441        }
442
443        return false;
444    }
445
446    bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
447    {
448        ASSERT(term.type == ByteTerm::TypeCharacterClass);
449        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
450
451        switch (term.atom.quantityType) {
452        case QuantifierFixedCount: {
453            for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
454                if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
455                    return false;
456            }
457            return true;
458        }
459
460        case QuantifierGreedy: {
461            unsigned matchAmount = 0;
462            while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
463                if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
464                    input.uncheckInput(1);
465                    break;
466                }
467                ++matchAmount;
468            }
469            backTrack->matchAmount = matchAmount;
470
471            return true;
472        }
473
474        case QuantifierNonGreedy:
475            backTrack->matchAmount = 0;
476            return true;
477        }
478
479        RELEASE_ASSERT_NOT_REACHED();
480        return false;
481    }
482
483    bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
484    {
485        ASSERT(term.type == ByteTerm::TypeCharacterClass);
486        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
487
488        switch (term.atom.quantityType) {
489        case QuantifierFixedCount:
490            break;
491
492        case QuantifierGreedy:
493            if (backTrack->matchAmount) {
494                --backTrack->matchAmount;
495                input.uncheckInput(1);
496                return true;
497            }
498            break;
499
500        case QuantifierNonGreedy:
501            if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
502                ++backTrack->matchAmount;
503                if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
504                    return true;
505            }
506            input.uncheckInput(backTrack->matchAmount);
507            break;
508        }
509
510        return false;
511    }
512
513    bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
514    {
515        ASSERT(term.type == ByteTerm::TypeBackReference);
516        BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
517
518        unsigned matchBegin = output[(term.atom.subpatternId << 1)];
519        unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
520
521        // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
522        // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
523        // Eg.: /(a\1)/
524        if (matchEnd == offsetNoMatch)
525            return true;
526
527        if (matchBegin == offsetNoMatch)
528            return true;
529
530        ASSERT(matchBegin <= matchEnd);
531
532        if (matchBegin == matchEnd)
533            return true;
534
535        switch (term.atom.quantityType) {
536        case QuantifierFixedCount: {
537            backTrack->begin = input.getPos();
538            for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
539                if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
540                    input.setPos(backTrack->begin);
541                    return false;
542                }
543            }
544            return true;
545        }
546
547        case QuantifierGreedy: {
548            unsigned matchAmount = 0;
549            while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
550                ++matchAmount;
551            backTrack->matchAmount = matchAmount;
552            return true;
553        }
554
555        case QuantifierNonGreedy:
556            backTrack->begin = input.getPos();
557            backTrack->matchAmount = 0;
558            return true;
559        }
560
561        RELEASE_ASSERT_NOT_REACHED();
562        return false;
563    }
564
565    bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
566    {
567        ASSERT(term.type == ByteTerm::TypeBackReference);
568        BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
569
570        unsigned matchBegin = output[(term.atom.subpatternId << 1)];
571        unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
572
573        if (matchBegin == offsetNoMatch)
574            return false;
575
576        ASSERT(matchBegin <= matchEnd);
577
578        if (matchBegin == matchEnd)
579            return false;
580
581        switch (term.atom.quantityType) {
582        case QuantifierFixedCount:
583            // for quantityCount == 1, could rewind.
584            input.setPos(backTrack->begin);
585            break;
586
587        case QuantifierGreedy:
588            if (backTrack->matchAmount) {
589                --backTrack->matchAmount;
590                input.rewind(matchEnd - matchBegin);
591                return true;
592            }
593            break;
594
595        case QuantifierNonGreedy:
596            if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
597                ++backTrack->matchAmount;
598                return true;
599            }
600            input.setPos(backTrack->begin);
601            break;
602        }
603
604        return false;
605    }
606
607    void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
608    {
609        if (term.capture()) {
610            unsigned subpatternId = term.atom.subpatternId;
611            output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
612            output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
613        }
614    }
615    void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
616    {
617        unsigned firstSubpatternId = term.atom.subpatternId;
618        unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
619        context->restoreOutput(output, firstSubpatternId, count);
620    }
621    JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
622    {
623        while (backTrack->matchAmount) {
624            ParenthesesDisjunctionContext* context = backTrack->lastContext;
625
626            JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
627            if (result == JSRegExpMatch)
628                return JSRegExpMatch;
629
630            resetMatches(term, context);
631            popParenthesesDisjunctionContext(backTrack);
632            freeParenthesesDisjunctionContext(context);
633
634            if (result != JSRegExpNoMatch)
635                return result;
636        }
637
638        return JSRegExpNoMatch;
639    }
640
641    bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
642    {
643        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
644        ASSERT(term.atom.quantityCount == 1);
645
646        BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
647
648        switch (term.atom.quantityType) {
649        case QuantifierGreedy: {
650            // set this speculatively; if we get to the parens end this will be true.
651            backTrack->begin = input.getPos();
652            break;
653        }
654        case QuantifierNonGreedy: {
655            backTrack->begin = notFound;
656            context->term += term.atom.parenthesesWidth;
657            return true;
658        }
659        case QuantifierFixedCount:
660            break;
661        }
662
663        if (term.capture()) {
664            unsigned subpatternId = term.atom.subpatternId;
665            output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
666        }
667
668        return true;
669    }
670
671    bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
672    {
673        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
674        ASSERT(term.atom.quantityCount == 1);
675
676        if (term.capture()) {
677            unsigned subpatternId = term.atom.subpatternId;
678            output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
679        }
680
681        if (term.atom.quantityType == QuantifierFixedCount)
682            return true;
683
684        BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
685        return backTrack->begin != input.getPos();
686    }
687
688    bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
689    {
690        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
691        ASSERT(term.atom.quantityCount == 1);
692
693        BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
694
695        if (term.capture()) {
696            unsigned subpatternId = term.atom.subpatternId;
697            output[(subpatternId << 1)] = offsetNoMatch;
698            output[(subpatternId << 1) + 1] = offsetNoMatch;
699        }
700
701        switch (term.atom.quantityType) {
702        case QuantifierGreedy:
703            // if we backtrack to this point, there is another chance - try matching nothing.
704            ASSERT(backTrack->begin != notFound);
705            backTrack->begin = notFound;
706            context->term += term.atom.parenthesesWidth;
707            return true;
708        case QuantifierNonGreedy:
709            ASSERT(backTrack->begin != notFound);
710            FALLTHROUGH;
711        case QuantifierFixedCount:
712            break;
713        }
714
715        return false;
716    }
717
718    bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
719    {
720        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
721        ASSERT(term.atom.quantityCount == 1);
722
723        BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
724
725        switch (term.atom.quantityType) {
726        case QuantifierGreedy:
727            if (backTrack->begin == notFound) {
728                context->term -= term.atom.parenthesesWidth;
729                return false;
730            }
731            FALLTHROUGH;
732        case QuantifierNonGreedy:
733            if (backTrack->begin == notFound) {
734                backTrack->begin = input.getPos();
735                if (term.capture()) {
736                    // Technically this access to inputPosition should be accessing the begin term's
737                    // inputPosition, but for repeats other than fixed these values should be
738                    // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
739                    ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
740                    ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
741                    unsigned subpatternId = term.atom.subpatternId;
742                    output[subpatternId << 1] = input.getPos() + term.inputPosition;
743                }
744                context->term -= term.atom.parenthesesWidth;
745                return true;
746            }
747            FALLTHROUGH;
748        case QuantifierFixedCount:
749            break;
750        }
751
752        return false;
753    }
754
755    bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
756    {
757        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
758        ASSERT(term.atom.quantityType == QuantifierGreedy);
759        ASSERT(term.atom.quantityCount == quantifyInfinite);
760        ASSERT(!term.capture());
761
762        BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
763        backTrack->begin = input.getPos();
764        return true;
765    }
766
767    bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
768    {
769        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
770
771        BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
772        // Empty match is a failed match.
773        if (backTrack->begin == input.getPos())
774            return false;
775
776        // Successful match! Okay, what's next? - loop around and try to match moar!
777        context->term -= (term.atom.parenthesesWidth + 1);
778        return true;
779    }
780
781    bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
782    {
783        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
784        ASSERT(term.atom.quantityType == QuantifierGreedy);
785        ASSERT(term.atom.quantityCount == quantifyInfinite);
786        ASSERT(!term.capture());
787
788        // If we backtrack to this point, we have failed to match this iteration of the parens.
789        // Since this is greedy / zero minimum a failed is also accepted as a match!
790        context->term += term.atom.parenthesesWidth;
791        return true;
792    }
793
794    bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
795    {
796        // 'Terminal' parentheses are at the end of the regex, and as such a match past end
797        // should always be returned as a successful match - we should never backtrack to here.
798        RELEASE_ASSERT_NOT_REACHED();
799        return false;
800    }
801
802    bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
803    {
804        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
805        ASSERT(term.atom.quantityCount == 1);
806
807        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
808
809        backTrack->begin = input.getPos();
810        return true;
811    }
812
813    bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
814    {
815        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
816        ASSERT(term.atom.quantityCount == 1);
817
818        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
819
820        input.setPos(backTrack->begin);
821
822        // We've reached the end of the parens; if they are inverted, this is failure.
823        if (term.invert()) {
824            context->term -= term.atom.parenthesesWidth;
825            return false;
826        }
827
828        return true;
829    }
830
831    bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
832    {
833        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
834        ASSERT(term.atom.quantityCount == 1);
835
836        // We've failed to match parens; if they are inverted, this is win!
837        if (term.invert()) {
838            context->term += term.atom.parenthesesWidth;
839            return true;
840        }
841
842        return false;
843    }
844
845    bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
846    {
847        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
848        ASSERT(term.atom.quantityCount == 1);
849
850        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
851
852        input.setPos(backTrack->begin);
853
854        context->term -= term.atom.parenthesesWidth;
855        return false;
856    }
857
858    JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
859    {
860        ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
861
862        BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
863        ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
864
865        backTrack->matchAmount = 0;
866        backTrack->lastContext = 0;
867
868        switch (term.atom.quantityType) {
869        case QuantifierFixedCount: {
870            // While we haven't yet reached our fixed limit,
871            while (backTrack->matchAmount < term.atom.quantityCount) {
872                // Try to do a match, and it it succeeds, add it to the list.
873                ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
874                JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
875                if (result == JSRegExpMatch)
876                    appendParenthesesDisjunctionContext(backTrack, context);
877                else {
878                    // The match failed; try to find an alternate point to carry on from.
879                    resetMatches(term, context);
880                    freeParenthesesDisjunctionContext(context);
881
882                    if (result != JSRegExpNoMatch)
883                        return result;
884                    JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
885                    if (backtrackResult != JSRegExpMatch)
886                        return backtrackResult;
887                }
888            }
889
890            ASSERT(backTrack->matchAmount == term.atom.quantityCount);
891            ParenthesesDisjunctionContext* context = backTrack->lastContext;
892            recordParenthesesMatch(term, context);
893            return JSRegExpMatch;
894        }
895
896        case QuantifierGreedy: {
897            while (backTrack->matchAmount < term.atom.quantityCount) {
898                ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
899                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
900                if (result == JSRegExpMatch)
901                    appendParenthesesDisjunctionContext(backTrack, context);
902                else {
903                    resetMatches(term, context);
904                    freeParenthesesDisjunctionContext(context);
905
906                    if (result != JSRegExpNoMatch)
907                        return result;
908
909                    break;
910                }
911            }
912
913            if (backTrack->matchAmount) {
914                ParenthesesDisjunctionContext* context = backTrack->lastContext;
915                recordParenthesesMatch(term, context);
916            }
917            return JSRegExpMatch;
918        }
919
920        case QuantifierNonGreedy:
921            return JSRegExpMatch;
922        }
923
924        RELEASE_ASSERT_NOT_REACHED();
925        return JSRegExpErrorNoMatch;
926    }
927
928    // Rules for backtracking differ depending on whether this is greedy or non-greedy.
929    //
930    // Greedy matches never should try just adding more - you should already have done
931    // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
932    // you backtrack an item off the list needs checking, since we'll never have matched
933    // the one less case.  Tracking forwards, still add as much as possible.
934    //
935    // Non-greedy, we've already done the one less case, so don't match on popping.
936    // We haven't done the one more case, so always try to add that.
937    //
938    JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
939    {
940        ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
941
942        BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
943        ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
944
945        switch (term.atom.quantityType) {
946        case QuantifierFixedCount: {
947            ASSERT(backTrack->matchAmount == term.atom.quantityCount);
948
949            ParenthesesDisjunctionContext* context = 0;
950            JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
951
952            if (result != JSRegExpMatch)
953                return result;
954
955            // While we haven't yet reached our fixed limit,
956            while (backTrack->matchAmount < term.atom.quantityCount) {
957                // Try to do a match, and it it succeeds, add it to the list.
958                context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
959                result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
960
961                if (result == JSRegExpMatch)
962                    appendParenthesesDisjunctionContext(backTrack, context);
963                else {
964                    // The match failed; try to find an alternate point to carry on from.
965                    resetMatches(term, context);
966                    freeParenthesesDisjunctionContext(context);
967
968                    if (result != JSRegExpNoMatch)
969                        return result;
970                    JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
971                    if (backtrackResult != JSRegExpMatch)
972                        return backtrackResult;
973                }
974            }
975
976            ASSERT(backTrack->matchAmount == term.atom.quantityCount);
977            context = backTrack->lastContext;
978            recordParenthesesMatch(term, context);
979            return JSRegExpMatch;
980        }
981
982        case QuantifierGreedy: {
983            if (!backTrack->matchAmount)
984                return JSRegExpNoMatch;
985
986            ParenthesesDisjunctionContext* context = backTrack->lastContext;
987            JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
988            if (result == JSRegExpMatch) {
989                while (backTrack->matchAmount < term.atom.quantityCount) {
990                    ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
991                    JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
992                    if (parenthesesResult == JSRegExpMatch)
993                        appendParenthesesDisjunctionContext(backTrack, context);
994                    else {
995                        resetMatches(term, context);
996                        freeParenthesesDisjunctionContext(context);
997
998                        if (parenthesesResult != JSRegExpNoMatch)
999                            return parenthesesResult;
1000
1001                        break;
1002                    }
1003                }
1004            } else {
1005                resetMatches(term, context);
1006                popParenthesesDisjunctionContext(backTrack);
1007                freeParenthesesDisjunctionContext(context);
1008
1009                if (result != JSRegExpNoMatch)
1010                    return result;
1011            }
1012
1013            if (backTrack->matchAmount) {
1014                ParenthesesDisjunctionContext* context = backTrack->lastContext;
1015                recordParenthesesMatch(term, context);
1016            }
1017            return JSRegExpMatch;
1018        }
1019
1020        case QuantifierNonGreedy: {
1021            // If we've not reached the limit, try to add one more match.
1022            if (backTrack->matchAmount < term.atom.quantityCount) {
1023                ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1024                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1025                if (result == JSRegExpMatch) {
1026                    appendParenthesesDisjunctionContext(backTrack, context);
1027                    recordParenthesesMatch(term, context);
1028                    return JSRegExpMatch;
1029                }
1030
1031                resetMatches(term, context);
1032                freeParenthesesDisjunctionContext(context);
1033
1034                if (result != JSRegExpNoMatch)
1035                    return result;
1036            }
1037
1038            // Nope - okay backtrack looking for an alternative.
1039            while (backTrack->matchAmount) {
1040                ParenthesesDisjunctionContext* context = backTrack->lastContext;
1041                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1042                if (result == JSRegExpMatch) {
1043                    // successful backtrack! we're back in the game!
1044                    if (backTrack->matchAmount) {
1045                        context = backTrack->lastContext;
1046                        recordParenthesesMatch(term, context);
1047                    }
1048                    return JSRegExpMatch;
1049                }
1050
1051                // pop a match off the stack
1052                resetMatches(term, context);
1053                popParenthesesDisjunctionContext(backTrack);
1054                freeParenthesesDisjunctionContext(context);
1055
1056                if (result != JSRegExpNoMatch)
1057                    return result;
1058            }
1059
1060            return JSRegExpNoMatch;
1061        }
1062        }
1063
1064        RELEASE_ASSERT_NOT_REACHED();
1065        return JSRegExpErrorNoMatch;
1066    }
1067
1068    bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1069    {
1070        UNUSED_PARAM(term);
1071        unsigned matchBegin = context->matchBegin;
1072
1073        if (matchBegin) {
1074            for (matchBegin--; true; matchBegin--) {
1075                if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1076                    ++matchBegin;
1077                    break;
1078                }
1079
1080                if (!matchBegin)
1081                    break;
1082            }
1083        }
1084
1085        unsigned matchEnd = input.getPos();
1086
1087        for (; (matchEnd != input.end())
1088             && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1089
1090        if (((matchBegin && term.anchors.m_bol)
1091             || ((matchEnd != input.end()) && term.anchors.m_eol))
1092            && !pattern->m_multiline)
1093            return false;
1094
1095        context->matchBegin = matchBegin;
1096        context->matchEnd = matchEnd;
1097        return true;
1098    }
1099
1100#define MATCH_NEXT() { ++context->term; goto matchAgain; }
1101#define BACKTRACK() { --context->term; goto backtrack; }
1102#define currentTerm() (disjunction->terms[context->term])
1103    JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1104    {
1105        if (!--remainingMatchCount)
1106            return JSRegExpErrorHitLimit;
1107
1108        if (btrack)
1109            BACKTRACK();
1110
1111        context->matchBegin = input.getPos();
1112        context->term = 0;
1113
1114    matchAgain:
1115        ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1116
1117        switch (currentTerm().type) {
1118        case ByteTerm::TypeSubpatternBegin:
1119            MATCH_NEXT();
1120        case ByteTerm::TypeSubpatternEnd:
1121            context->matchEnd = input.getPos();
1122            return JSRegExpMatch;
1123
1124        case ByteTerm::TypeBodyAlternativeBegin:
1125            MATCH_NEXT();
1126        case ByteTerm::TypeBodyAlternativeDisjunction:
1127        case ByteTerm::TypeBodyAlternativeEnd:
1128            context->matchEnd = input.getPos();
1129            return JSRegExpMatch;
1130
1131        case ByteTerm::TypeAlternativeBegin:
1132            MATCH_NEXT();
1133        case ByteTerm::TypeAlternativeDisjunction:
1134        case ByteTerm::TypeAlternativeEnd: {
1135            int offset = currentTerm().alternative.end;
1136            BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1137            backTrack->offset = offset;
1138            context->term += offset;
1139            MATCH_NEXT();
1140        }
1141
1142        case ByteTerm::TypeAssertionBOL:
1143            if (matchAssertionBOL(currentTerm()))
1144                MATCH_NEXT();
1145            BACKTRACK();
1146        case ByteTerm::TypeAssertionEOL:
1147            if (matchAssertionEOL(currentTerm()))
1148                MATCH_NEXT();
1149            BACKTRACK();
1150        case ByteTerm::TypeAssertionWordBoundary:
1151            if (matchAssertionWordBoundary(currentTerm()))
1152                MATCH_NEXT();
1153            BACKTRACK();
1154
1155        case ByteTerm::TypePatternCharacterOnce:
1156        case ByteTerm::TypePatternCharacterFixed: {
1157            for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1158                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))
1159                    BACKTRACK();
1160            }
1161            MATCH_NEXT();
1162        }
1163        case ByteTerm::TypePatternCharacterGreedy: {
1164            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1165            unsigned matchAmount = 0;
1166            while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1167                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
1168                    input.uncheckInput(1);
1169                    break;
1170                }
1171                ++matchAmount;
1172            }
1173            backTrack->matchAmount = matchAmount;
1174
1175            MATCH_NEXT();
1176        }
1177        case ByteTerm::TypePatternCharacterNonGreedy: {
1178            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1179            backTrack->matchAmount = 0;
1180            MATCH_NEXT();
1181        }
1182
1183        case ByteTerm::TypePatternCasedCharacterOnce:
1184        case ByteTerm::TypePatternCasedCharacterFixed: {
1185            for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1186                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1187                    BACKTRACK();
1188            }
1189            MATCH_NEXT();
1190        }
1191        case ByteTerm::TypePatternCasedCharacterGreedy: {
1192            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1193            unsigned matchAmount = 0;
1194            while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1195                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
1196                    input.uncheckInput(1);
1197                    break;
1198                }
1199                ++matchAmount;
1200            }
1201            backTrack->matchAmount = matchAmount;
1202
1203            MATCH_NEXT();
1204        }
1205        case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1206            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1207            backTrack->matchAmount = 0;
1208            MATCH_NEXT();
1209        }
1210
1211        case ByteTerm::TypeCharacterClass:
1212            if (matchCharacterClass(currentTerm(), context))
1213                MATCH_NEXT();
1214            BACKTRACK();
1215        case ByteTerm::TypeBackReference:
1216            if (matchBackReference(currentTerm(), context))
1217                MATCH_NEXT();
1218            BACKTRACK();
1219        case ByteTerm::TypeParenthesesSubpattern: {
1220            JSRegExpResult result = matchParentheses(currentTerm(), context);
1221
1222            if (result == JSRegExpMatch) {
1223                MATCH_NEXT();
1224            }  else if (result != JSRegExpNoMatch)
1225                return result;
1226
1227            BACKTRACK();
1228        }
1229        case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1230            if (matchParenthesesOnceBegin(currentTerm(), context))
1231                MATCH_NEXT();
1232            BACKTRACK();
1233        case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1234            if (matchParenthesesOnceEnd(currentTerm(), context))
1235                MATCH_NEXT();
1236            BACKTRACK();
1237        case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1238            if (matchParenthesesTerminalBegin(currentTerm(), context))
1239                MATCH_NEXT();
1240            BACKTRACK();
1241        case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1242            if (matchParenthesesTerminalEnd(currentTerm(), context))
1243                MATCH_NEXT();
1244            BACKTRACK();
1245        case ByteTerm::TypeParentheticalAssertionBegin:
1246            if (matchParentheticalAssertionBegin(currentTerm(), context))
1247                MATCH_NEXT();
1248            BACKTRACK();
1249        case ByteTerm::TypeParentheticalAssertionEnd:
1250            if (matchParentheticalAssertionEnd(currentTerm(), context))
1251                MATCH_NEXT();
1252            BACKTRACK();
1253
1254        case ByteTerm::TypeCheckInput:
1255            if (input.checkInput(currentTerm().checkInputCount))
1256                MATCH_NEXT();
1257            BACKTRACK();
1258
1259        case ByteTerm::TypeUncheckInput:
1260            input.uncheckInput(currentTerm().checkInputCount);
1261            MATCH_NEXT();
1262
1263        case ByteTerm::TypeDotStarEnclosure:
1264            if (matchDotStarEnclosure(currentTerm(), context))
1265                return JSRegExpMatch;
1266            BACKTRACK();
1267        }
1268
1269        // We should never fall-through to here.
1270        RELEASE_ASSERT_NOT_REACHED();
1271
1272    backtrack:
1273        ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1274
1275        switch (currentTerm().type) {
1276        case ByteTerm::TypeSubpatternBegin:
1277            return JSRegExpNoMatch;
1278        case ByteTerm::TypeSubpatternEnd:
1279            RELEASE_ASSERT_NOT_REACHED();
1280
1281        case ByteTerm::TypeBodyAlternativeBegin:
1282        case ByteTerm::TypeBodyAlternativeDisjunction: {
1283            int offset = currentTerm().alternative.next;
1284            context->term += offset;
1285            if (offset > 0)
1286                MATCH_NEXT();
1287
1288            if (input.atEnd())
1289                return JSRegExpNoMatch;
1290
1291            input.next();
1292
1293            context->matchBegin = input.getPos();
1294
1295            if (currentTerm().alternative.onceThrough)
1296                context->term += currentTerm().alternative.next;
1297
1298            MATCH_NEXT();
1299        }
1300        case ByteTerm::TypeBodyAlternativeEnd:
1301            RELEASE_ASSERT_NOT_REACHED();
1302
1303        case ByteTerm::TypeAlternativeBegin:
1304        case ByteTerm::TypeAlternativeDisjunction: {
1305            int offset = currentTerm().alternative.next;
1306            context->term += offset;
1307            if (offset > 0)
1308                MATCH_NEXT();
1309            BACKTRACK();
1310        }
1311        case ByteTerm::TypeAlternativeEnd: {
1312            // We should never backtrack back into an alternative of the main body of the regex.
1313            BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1314            unsigned offset = backTrack->offset;
1315            context->term -= offset;
1316            BACKTRACK();
1317        }
1318
1319        case ByteTerm::TypeAssertionBOL:
1320        case ByteTerm::TypeAssertionEOL:
1321        case ByteTerm::TypeAssertionWordBoundary:
1322            BACKTRACK();
1323
1324        case ByteTerm::TypePatternCharacterOnce:
1325        case ByteTerm::TypePatternCharacterFixed:
1326        case ByteTerm::TypePatternCharacterGreedy:
1327        case ByteTerm::TypePatternCharacterNonGreedy:
1328            if (backtrackPatternCharacter(currentTerm(), context))
1329                MATCH_NEXT();
1330            BACKTRACK();
1331        case ByteTerm::TypePatternCasedCharacterOnce:
1332        case ByteTerm::TypePatternCasedCharacterFixed:
1333        case ByteTerm::TypePatternCasedCharacterGreedy:
1334        case ByteTerm::TypePatternCasedCharacterNonGreedy:
1335            if (backtrackPatternCasedCharacter(currentTerm(), context))
1336                MATCH_NEXT();
1337            BACKTRACK();
1338        case ByteTerm::TypeCharacterClass:
1339            if (backtrackCharacterClass(currentTerm(), context))
1340                MATCH_NEXT();
1341            BACKTRACK();
1342        case ByteTerm::TypeBackReference:
1343            if (backtrackBackReference(currentTerm(), context))
1344                MATCH_NEXT();
1345            BACKTRACK();
1346        case ByteTerm::TypeParenthesesSubpattern: {
1347            JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1348
1349            if (result == JSRegExpMatch) {
1350                MATCH_NEXT();
1351            } else if (result != JSRegExpNoMatch)
1352                return result;
1353
1354            BACKTRACK();
1355        }
1356        case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1357            if (backtrackParenthesesOnceBegin(currentTerm(), context))
1358                MATCH_NEXT();
1359            BACKTRACK();
1360        case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1361            if (backtrackParenthesesOnceEnd(currentTerm(), context))
1362                MATCH_NEXT();
1363            BACKTRACK();
1364        case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1365            if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1366                MATCH_NEXT();
1367            BACKTRACK();
1368        case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1369            if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1370                MATCH_NEXT();
1371            BACKTRACK();
1372        case ByteTerm::TypeParentheticalAssertionBegin:
1373            if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1374                MATCH_NEXT();
1375            BACKTRACK();
1376        case ByteTerm::TypeParentheticalAssertionEnd:
1377            if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1378                MATCH_NEXT();
1379            BACKTRACK();
1380
1381        case ByteTerm::TypeCheckInput:
1382            input.uncheckInput(currentTerm().checkInputCount);
1383            BACKTRACK();
1384
1385        case ByteTerm::TypeUncheckInput:
1386            input.checkInput(currentTerm().checkInputCount);
1387            BACKTRACK();
1388
1389        case ByteTerm::TypeDotStarEnclosure:
1390            RELEASE_ASSERT_NOT_REACHED();
1391        }
1392
1393        RELEASE_ASSERT_NOT_REACHED();
1394        return JSRegExpErrorNoMatch;
1395    }
1396
1397    JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1398    {
1399        JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1400
1401        if (result == JSRegExpMatch) {
1402            while (context->matchBegin == context->matchEnd) {
1403                result = matchDisjunction(disjunction, context, true);
1404                if (result != JSRegExpMatch)
1405                    return result;
1406            }
1407            return JSRegExpMatch;
1408        }
1409
1410        return result;
1411    }
1412
1413    unsigned interpret()
1414    {
1415        if (!input.isAvailableInput(0))
1416            return offsetNoMatch;
1417
1418        for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
1419            output[i << 1] = offsetNoMatch;
1420
1421        allocatorPool = pattern->m_allocator->startAllocator();
1422        RELEASE_ASSERT(allocatorPool);
1423
1424        DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1425
1426        JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1427        if (result == JSRegExpMatch) {
1428            output[0] = context->matchBegin;
1429            output[1] = context->matchEnd;
1430        }
1431
1432        freeDisjunctionContext(context);
1433
1434        pattern->m_allocator->stopAllocator();
1435
1436        ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
1437        return output[0];
1438    }
1439
1440    Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1441        : pattern(pattern)
1442        , output(output)
1443        , input(input, start, length)
1444        , allocatorPool(0)
1445        , remainingMatchCount(matchLimit)
1446    {
1447    }
1448
1449private:
1450    BytecodePattern* pattern;
1451    unsigned* output;
1452    InputStream input;
1453    BumpPointerPool* allocatorPool;
1454    unsigned remainingMatchCount;
1455};
1456
1457class ByteCompiler {
1458    struct ParenthesesStackEntry {
1459        unsigned beginTerm;
1460        unsigned savedAlternativeIndex;
1461        ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1462            : beginTerm(beginTerm)
1463            , savedAlternativeIndex(savedAlternativeIndex)
1464        {
1465        }
1466    };
1467
1468public:
1469    ByteCompiler(YarrPattern& pattern)
1470        : m_pattern(pattern)
1471    {
1472        m_currentAlternativeIndex = 0;
1473    }
1474
1475    PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
1476    {
1477        regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1478        emitDisjunction(m_pattern.m_body);
1479        regexEnd();
1480
1481        return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
1482    }
1483
1484    void checkInput(unsigned count)
1485    {
1486        m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1487    }
1488
1489    void uncheckInput(unsigned count)
1490    {
1491        m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1492    }
1493
1494    void assertionBOL(unsigned inputPosition)
1495    {
1496        m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1497    }
1498
1499    void assertionEOL(unsigned inputPosition)
1500    {
1501        m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1502    }
1503
1504    void assertionWordBoundary(bool invert, unsigned inputPosition)
1505    {
1506        m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1507    }
1508
1509    void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1510    {
1511        if (m_pattern.m_ignoreCase) {
1512            ASSERT(u_tolower(ch) <= 0xFFFF);
1513            ASSERT(u_toupper(ch) <= 0xFFFF);
1514
1515            UChar lo = u_tolower(ch);
1516            UChar hi = u_toupper(ch);
1517
1518            if (lo != hi) {
1519                m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
1520                return;
1521            }
1522        }
1523
1524        m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
1525    }
1526
1527    void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1528    {
1529        m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1530
1531        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1532        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1533        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1534    }
1535
1536    void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1537    {
1538        ASSERT(subpatternId);
1539
1540        m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1541
1542        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1543        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1544        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1545    }
1546
1547    void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1548    {
1549        int beginTerm = m_bodyDisjunction->terms.size();
1550
1551        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1552        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1553        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1554        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1555
1556        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1557        m_currentAlternativeIndex = beginTerm + 1;
1558    }
1559
1560    void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1561    {
1562        int beginTerm = m_bodyDisjunction->terms.size();
1563
1564        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1565        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1566        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1567        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1568
1569        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1570        m_currentAlternativeIndex = beginTerm + 1;
1571    }
1572
1573    void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1574    {
1575        // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1576        // then fix this up at the end! - simplifying this should make it much clearer.
1577        // https://bugs.webkit.org/show_bug.cgi?id=50136
1578
1579        int beginTerm = m_bodyDisjunction->terms.size();
1580
1581        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1582        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1583        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1584        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1585
1586        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1587        m_currentAlternativeIndex = beginTerm + 1;
1588    }
1589
1590    void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1591    {
1592        int beginTerm = m_bodyDisjunction->terms.size();
1593
1594        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1595        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1596        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1597        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1598
1599        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1600        m_currentAlternativeIndex = beginTerm + 1;
1601    }
1602
1603    void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1604    {
1605        unsigned beginTerm = popParenthesesStack();
1606        closeAlternative(beginTerm + 1);
1607        unsigned endTerm = m_bodyDisjunction->terms.size();
1608
1609        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1610
1611        bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1612        unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1613
1614        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1615        m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1616        m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1617        m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1618
1619        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1620        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1621        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1622        m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1623    }
1624
1625    void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1626    {
1627        m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1628    }
1629
1630    unsigned popParenthesesStack()
1631    {
1632        ASSERT(m_parenthesesStack.size());
1633        int stackEnd = m_parenthesesStack.size() - 1;
1634        unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1635        m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1636        m_parenthesesStack.shrink(stackEnd);
1637
1638        ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1639        ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1640
1641        return beginTerm;
1642    }
1643
1644#ifndef NDEBUG
1645    void dumpDisjunction(ByteDisjunction* disjunction)
1646    {
1647        dataLogF("ByteDisjunction(%p):\n\t", disjunction);
1648        for (unsigned i = 0; i < disjunction->terms.size(); ++i)
1649            dataLogF("{ %d } ", disjunction->terms[i].type);
1650        dataLogF("\n");
1651    }
1652#endif
1653
1654    void closeAlternative(int beginTerm)
1655    {
1656        int origBeginTerm = beginTerm;
1657        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1658        int endIndex = m_bodyDisjunction->terms.size();
1659
1660        unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1661
1662        if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1663            m_bodyDisjunction->terms.remove(beginTerm);
1664        else {
1665            while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1666                beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1667                ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1668                m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1669                m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1670            }
1671
1672            m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1673
1674            m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1675            m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1676        }
1677    }
1678
1679    void closeBodyAlternative()
1680    {
1681        int beginTerm = 0;
1682        int origBeginTerm = 0;
1683        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1684        int endIndex = m_bodyDisjunction->terms.size();
1685
1686        unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1687
1688        while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1689            beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1690            ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1691            m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1692            m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1693        }
1694
1695        m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1696
1697        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1698        m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1699    }
1700
1701    void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1702    {
1703        unsigned beginTerm = popParenthesesStack();
1704        closeAlternative(beginTerm + 1);
1705        unsigned endTerm = m_bodyDisjunction->terms.size();
1706
1707        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1708
1709        ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1710
1711        bool capture = parenthesesBegin.capture();
1712        unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1713
1714        unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1715        OwnPtr<ByteDisjunction> parenthesesDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
1716
1717        unsigned firstTermInParentheses = beginTerm + 1;
1718        parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2);
1719
1720        parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1721        for (unsigned termInParentheses = firstTermInParentheses; termInParentheses < endTerm; ++termInParentheses)
1722            parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1723        parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1724
1725        m_bodyDisjunction->terms.shrink(beginTerm);
1726
1727        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
1728        m_allParenthesesInfo.append(parenthesesDisjunction.release());
1729
1730        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1731        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1732        m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1733    }
1734
1735    void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1736    {
1737        unsigned beginTerm = popParenthesesStack();
1738        closeAlternative(beginTerm + 1);
1739        unsigned endTerm = m_bodyDisjunction->terms.size();
1740
1741        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1742
1743        bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1744        unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1745
1746        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1747        m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1748        m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1749        m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1750
1751        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1752        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1753        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1754        m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1755    }
1756
1757    void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1758    {
1759        unsigned beginTerm = popParenthesesStack();
1760        closeAlternative(beginTerm + 1);
1761        unsigned endTerm = m_bodyDisjunction->terms.size();
1762
1763        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1764
1765        bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1766        unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1767
1768        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1769        m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1770        m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1771        m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1772
1773        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1774        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1775        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1776        m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1777    }
1778
1779    void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1780    {
1781        m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
1782        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
1783        m_bodyDisjunction->terms[0].frameLocation = 0;
1784        m_currentAlternativeIndex = 0;
1785    }
1786
1787    void regexEnd()
1788    {
1789        closeBodyAlternative();
1790    }
1791
1792    void alternativeBodyDisjunction(bool onceThrough)
1793    {
1794        int newAlternativeIndex = m_bodyDisjunction->terms.size();
1795        m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1796        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
1797
1798        m_currentAlternativeIndex = newAlternativeIndex;
1799    }
1800
1801    void alternativeDisjunction()
1802    {
1803        int newAlternativeIndex = m_bodyDisjunction->terms.size();
1804        m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1805        m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1806
1807        m_currentAlternativeIndex = newAlternativeIndex;
1808    }
1809
1810    void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
1811    {
1812        for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
1813            unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
1814
1815            PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
1816
1817            if (alt) {
1818                if (disjunction == m_pattern.m_body)
1819                    alternativeBodyDisjunction(alternative->onceThrough());
1820                else
1821                    alternativeDisjunction();
1822            }
1823
1824            unsigned minimumSize = alternative->m_minimumSize;
1825            ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
1826            unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
1827
1828            if (countToCheck) {
1829                checkInput(countToCheck);
1830                currentCountAlreadyChecked += countToCheck;
1831            }
1832
1833            for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
1834                PatternTerm& term = alternative->m_terms[i];
1835
1836                switch (term.type) {
1837                case PatternTerm::TypeAssertionBOL:
1838                    assertionBOL(currentCountAlreadyChecked - term.inputPosition);
1839                    break;
1840
1841                case PatternTerm::TypeAssertionEOL:
1842                    assertionEOL(currentCountAlreadyChecked - term.inputPosition);
1843                    break;
1844
1845                case PatternTerm::TypeAssertionWordBoundary:
1846                    assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
1847                    break;
1848
1849                case PatternTerm::TypePatternCharacter:
1850                    atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1851                    break;
1852
1853                case PatternTerm::TypeCharacterClass:
1854                    atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1855                    break;
1856
1857                case PatternTerm::TypeBackReference:
1858                    atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1859                        break;
1860
1861                case PatternTerm::TypeForwardReference:
1862                    break;
1863
1864                case PatternTerm::TypeParenthesesSubpattern: {
1865                    unsigned disjunctionAlreadyCheckedCount = 0;
1866                    if (term.quantityCount == 1 && !term.parentheses.isCopy) {
1867                        unsigned alternativeFrameLocation = term.frameLocation;
1868                        // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1869                        if (term.quantityType == QuantifierFixedCount)
1870                            disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
1871                        else
1872                            alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1873                        unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1874                        atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
1875                        emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1876                        atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1877                    } else if (term.parentheses.isTerminal) {
1878                        unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1879                        atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
1880                        emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1881                        atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1882                    } else {
1883                        unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1884                        atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
1885                        emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1886                        atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1887                    }
1888                    break;
1889                }
1890
1891                case PatternTerm::TypeParentheticalAssertion: {
1892                    unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
1893
1894                    ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
1895                    unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
1896                    unsigned uncheckAmount = 0;
1897                    if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
1898                        uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
1899                        uncheckInput(uncheckAmount);
1900                        currentCountAlreadyChecked -= uncheckAmount;
1901                    }
1902
1903                    atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
1904                    emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
1905                    atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
1906                    if (uncheckAmount) {
1907                        checkInput(uncheckAmount);
1908                        currentCountAlreadyChecked += uncheckAmount;
1909                    }
1910                    break;
1911                }
1912
1913                case PatternTerm::TypeDotStarEnclosure:
1914                    assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
1915                    break;
1916                }
1917            }
1918        }
1919    }
1920
1921private:
1922    YarrPattern& m_pattern;
1923    OwnPtr<ByteDisjunction> m_bodyDisjunction;
1924    unsigned m_currentAlternativeIndex;
1925    Vector<ParenthesesStackEntry> m_parenthesesStack;
1926    Vector<OwnPtr<ByteDisjunction>> m_allParenthesesInfo;
1927};
1928
1929PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
1930{
1931    return ByteCompiler(pattern).compile(allocator);
1932}
1933
1934unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
1935{
1936    if (input.is8Bit())
1937        return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
1938    return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
1939}
1940
1941unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
1942{
1943    return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
1944}
1945
1946unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
1947{
1948    return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
1949}
1950
1951// These should be the same for both UChar & LChar.
1952COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
1953COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
1954COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
1955COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
1956COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
1957COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
1958COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
1959
1960
1961} }
1962