1/* 2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "config.h" 27#include "YarrJIT.h" 28 29#include <wtf/ASCIICType.h> 30#include "LinkBuffer.h" 31#include "Options.h" 32#include "Yarr.h" 33#include "YarrCanonicalizeUCS2.h" 34 35#if ENABLE(YARR_JIT) 36 37using namespace WTF; 38 39namespace JSC { namespace Yarr { 40 41template<YarrJITCompileMode compileMode> 42class YarrGenerator : private MacroAssembler { 43 friend void jitCompile(VM*, YarrCodeBlock& jitObject, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); 44 45#if CPU(ARM) 46 static const RegisterID input = ARMRegisters::r0; 47 static const RegisterID index = ARMRegisters::r1; 48 static const RegisterID length = ARMRegisters::r2; 49 static const RegisterID output = ARMRegisters::r4; 50 51 static const RegisterID regT0 = ARMRegisters::r5; 52 static const RegisterID regT1 = ARMRegisters::r6; 53 54 static const RegisterID returnRegister = ARMRegisters::r0; 55 static const RegisterID returnRegister2 = ARMRegisters::r1; 56#elif CPU(MIPS) 57 static const RegisterID input = MIPSRegisters::a0; 58 static const RegisterID index = MIPSRegisters::a1; 59 static const RegisterID length = MIPSRegisters::a2; 60 static const RegisterID output = MIPSRegisters::a3; 61 62 static const RegisterID regT0 = MIPSRegisters::t4; 63 static const RegisterID regT1 = MIPSRegisters::t5; 64 65 static const RegisterID returnRegister = MIPSRegisters::v0; 66 static const RegisterID returnRegister2 = MIPSRegisters::v1; 67#elif CPU(SH4) 68 static const RegisterID input = SH4Registers::r4; 69 static const RegisterID index = SH4Registers::r5; 70 static const RegisterID length = SH4Registers::r6; 71 static const RegisterID output = SH4Registers::r7; 72 73 static const RegisterID regT0 = SH4Registers::r0; 74 static const RegisterID regT1 = SH4Registers::r1; 75 76 static const RegisterID returnRegister = SH4Registers::r0; 77 static const RegisterID returnRegister2 = SH4Registers::r1; 78#elif CPU(X86) 79 static const RegisterID input = X86Registers::eax; 80 static const RegisterID index = X86Registers::edx; 81 static const RegisterID length = X86Registers::ecx; 82 static const RegisterID output = X86Registers::edi; 83 84 static const RegisterID regT0 = X86Registers::ebx; 85 static const RegisterID regT1 = X86Registers::esi; 86 87 static const RegisterID returnRegister = X86Registers::eax; 88 static const RegisterID returnRegister2 = X86Registers::edx; 89#elif CPU(X86_64) 90#if !OS(WINDOWS) 91 static const RegisterID input = X86Registers::edi; 92 static const RegisterID index = X86Registers::esi; 93 static const RegisterID length = X86Registers::edx; 94 static const RegisterID output = X86Registers::ecx; 95#else 96 // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted. 97 // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx 98 COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits); 99 static const RegisterID input = X86Registers::edx; 100 static const RegisterID index = X86Registers::r8; 101 static const RegisterID length = X86Registers::r9; 102 static const RegisterID output = X86Registers::r10; 103#endif 104 105 static const RegisterID regT0 = X86Registers::eax; 106 static const RegisterID regT1 = X86Registers::ebx; 107 108 static const RegisterID returnRegister = X86Registers::eax; 109 static const RegisterID returnRegister2 = X86Registers::edx; 110#endif 111 112 void optimizeAlternative(PatternAlternative* alternative) 113 { 114 if (!alternative->m_terms.size()) 115 return; 116 117 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) { 118 PatternTerm& term = alternative->m_terms[i]; 119 PatternTerm& nextTerm = alternative->m_terms[i + 1]; 120 121 if ((term.type == PatternTerm::TypeCharacterClass) 122 && (term.quantityType == QuantifierFixedCount) 123 && (nextTerm.type == PatternTerm::TypePatternCharacter) 124 && (nextTerm.quantityType == QuantifierFixedCount)) { 125 PatternTerm termCopy = term; 126 alternative->m_terms[i] = nextTerm; 127 alternative->m_terms[i + 1] = termCopy; 128 } 129 } 130 } 131 132 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) 133 { 134 do { 135 // pick which range we're going to generate 136 int which = count >> 1; 137 char lo = ranges[which].begin; 138 char hi = ranges[which].end; 139 140 // check if there are any ranges or matches below lo. If not, just jl to failure - 141 // if there is anything else to check, check that first, if it falls through jmp to failure. 142 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { 143 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); 144 145 // generate code for all ranges before this one 146 if (which) 147 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); 148 149 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { 150 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); 151 ++*matchIndex; 152 } 153 failures.append(jump()); 154 155 loOrAbove.link(this); 156 } else if (which) { 157 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); 158 159 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); 160 failures.append(jump()); 161 162 loOrAbove.link(this); 163 } else 164 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); 165 166 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) 167 ++*matchIndex; 168 169 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); 170 // fall through to here, the value is above hi. 171 172 // shuffle along & loop around if there are any more matches to handle. 173 unsigned next = which + 1; 174 ranges += next; 175 count -= next; 176 } while (count); 177 } 178 179 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) 180 { 181 if (charClass->m_table) { 182 ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table)); 183 matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry)); 184 return; 185 } 186 Jump unicodeFail; 187 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { 188 Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f)); 189 190 if (charClass->m_matchesUnicode.size()) { 191 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { 192 UChar ch = charClass->m_matchesUnicode[i]; 193 matchDest.append(branch32(Equal, character, Imm32(ch))); 194 } 195 } 196 197 if (charClass->m_rangesUnicode.size()) { 198 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { 199 UChar lo = charClass->m_rangesUnicode[i].begin; 200 UChar hi = charClass->m_rangesUnicode[i].end; 201 202 Jump below = branch32(LessThan, character, Imm32(lo)); 203 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); 204 below.link(this); 205 } 206 } 207 208 unicodeFail = jump(); 209 isAscii.link(this); 210 } 211 212 if (charClass->m_ranges.size()) { 213 unsigned matchIndex = 0; 214 JumpList failures; 215 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size()); 216 while (matchIndex < charClass->m_matches.size()) 217 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); 218 219 failures.link(this); 220 } else if (charClass->m_matches.size()) { 221 // optimization: gather 'a','A' etc back together, can mask & test once. 222 Vector<char> matchesAZaz; 223 224 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) { 225 char ch = charClass->m_matches[i]; 226 if (m_pattern.m_ignoreCase) { 227 if (isASCIILower(ch)) { 228 matchesAZaz.append(ch); 229 continue; 230 } 231 if (isASCIIUpper(ch)) 232 continue; 233 } 234 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); 235 } 236 237 if (unsigned countAZaz = matchesAZaz.size()) { 238 or32(TrustedImm32(32), character); 239 for (unsigned i = 0; i < countAZaz; ++i) 240 matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i]))); 241 } 242 } 243 244 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) 245 unicodeFail.link(this); 246 } 247 248 // Jumps if input not available; will have (incorrectly) incremented already! 249 Jump jumpIfNoAvailableInput(unsigned countToCheck = 0) 250 { 251 if (countToCheck) 252 add32(Imm32(countToCheck), index); 253 return branch32(Above, index, length); 254 } 255 256 Jump jumpIfAvailableInput(unsigned countToCheck) 257 { 258 add32(Imm32(countToCheck), index); 259 return branch32(BelowOrEqual, index, length); 260 } 261 262 Jump checkInput() 263 { 264 return branch32(BelowOrEqual, index, length); 265 } 266 267 Jump atEndOfInput() 268 { 269 return branch32(Equal, index, length); 270 } 271 272 Jump notAtEndOfInput() 273 { 274 return branch32(NotEqual, index, length); 275 } 276 277 Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character) 278 { 279 readCharacter(inputPosition, character); 280 281 // For case-insesitive compares, non-ascii characters that have different 282 // upper & lower case representations are converted to a character class. 283 ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); 284 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 285 or32(TrustedImm32(0x20), character); 286 ch |= 0x20; 287 } 288 289 return branch32(NotEqual, character, Imm32(ch)); 290 } 291 292 void readCharacter(int inputPosition, RegisterID reg) 293 { 294 if (m_charSize == Char8) 295 load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg); 296 else 297 load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg); 298 } 299 300 void storeToFrame(RegisterID reg, unsigned frameLocation) 301 { 302 poke(reg, frameLocation); 303 } 304 305 void storeToFrame(TrustedImm32 imm, unsigned frameLocation) 306 { 307 poke(imm, frameLocation); 308 } 309 310 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation) 311 { 312 return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*))); 313 } 314 315 void loadFromFrame(unsigned frameLocation, RegisterID reg) 316 { 317 peek(reg, frameLocation); 318 } 319 320 void loadFromFrameAndJump(unsigned frameLocation) 321 { 322 jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); 323 } 324 325 void initCallFrame() 326 { 327 unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; 328 if (callFrameSize) 329 subPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister); 330 } 331 void removeCallFrame() 332 { 333 unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; 334 if (callFrameSize) 335 addPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister); 336 } 337 338 // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns. 339 void setSubpatternStart(RegisterID reg, unsigned subpattern) 340 { 341 ASSERT(subpattern); 342 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( 343 store32(reg, Address(output, (subpattern << 1) * sizeof(int))); 344 } 345 void setSubpatternEnd(RegisterID reg, unsigned subpattern) 346 { 347 ASSERT(subpattern); 348 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( 349 store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int))); 350 } 351 void clearSubpatternStart(unsigned subpattern) 352 { 353 ASSERT(subpattern); 354 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( 355 store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int))); 356 } 357 358 // We use one of three different strategies to track the start of the current match, 359 // while matching. 360 // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily 361 // at the end of matching. This is irrespective of compileMode, and in this case 362 // these methods should never be called. 363 // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output 364 // vector, store the match start in the output vector. 365 // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly 366 // in this register. 367 void setMatchStart(RegisterID reg) 368 { 369 ASSERT(!m_pattern.m_body->m_hasFixedSize); 370 if (compileMode == IncludeSubpatterns) 371 store32(reg, output); 372 else 373 move(reg, output); 374 } 375 void getMatchStart(RegisterID reg) 376 { 377 ASSERT(!m_pattern.m_body->m_hasFixedSize); 378 if (compileMode == IncludeSubpatterns) 379 load32(output, reg); 380 else 381 move(output, reg); 382 } 383 384 enum YarrOpCode { 385 // These nodes wrap body alternatives - those in the main disjunction, 386 // rather than subpatterns or assertions. These are chained together in 387 // a doubly linked list, with a 'begin' node for the first alternative, 388 // a 'next' node for each subsequent alternative, and an 'end' node at 389 // the end. In the case of repeating alternatives, the 'end' node also 390 // has a reference back to 'begin'. 391 OpBodyAlternativeBegin, 392 OpBodyAlternativeNext, 393 OpBodyAlternativeEnd, 394 // Similar to the body alternatives, but used for subpatterns with two 395 // or more alternatives. 396 OpNestedAlternativeBegin, 397 OpNestedAlternativeNext, 398 OpNestedAlternativeEnd, 399 // Used for alternatives in subpatterns where there is only a single 400 // alternative (backtrackingis easier in these cases), or for alternatives 401 // which never need to be backtracked (those in parenthetical assertions, 402 // terminal subpatterns). 403 OpSimpleNestedAlternativeBegin, 404 OpSimpleNestedAlternativeNext, 405 OpSimpleNestedAlternativeEnd, 406 // Used to wrap 'Once' subpattern matches (quantityCount == 1). 407 OpParenthesesSubpatternOnceBegin, 408 OpParenthesesSubpatternOnceEnd, 409 // Used to wrap 'Terminal' subpattern matches (at the end of the regexp). 410 OpParenthesesSubpatternTerminalBegin, 411 OpParenthesesSubpatternTerminalEnd, 412 // Used to wrap parenthetical assertions. 413 OpParentheticalAssertionBegin, 414 OpParentheticalAssertionEnd, 415 // Wraps all simple terms (pattern characters, character classes). 416 OpTerm, 417 // Where an expression contains only 'once through' body alternatives 418 // and no repeating ones, this op is used to return match failure. 419 OpMatchFailed 420 }; 421 422 // This structure is used to hold the compiled opcode information, 423 // including reference back to the original PatternTerm/PatternAlternatives, 424 // and JIT compilation data structures. 425 struct YarrOp { 426 explicit YarrOp(PatternTerm* term) 427 : m_op(OpTerm) 428 , m_term(term) 429 , m_isDeadCode(false) 430 { 431 } 432 433 explicit YarrOp(YarrOpCode op) 434 : m_op(op) 435 , m_isDeadCode(false) 436 { 437 } 438 439 // The operation, as a YarrOpCode, and also a reference to the PatternTerm. 440 YarrOpCode m_op; 441 PatternTerm* m_term; 442 443 // For alternatives, this holds the PatternAlternative and doubly linked 444 // references to this alternative's siblings. In the case of the 445 // OpBodyAlternativeEnd node at the end of a section of repeating nodes, 446 // m_nextOp will reference the OpBodyAlternativeBegin node of the first 447 // repeating alternative. 448 PatternAlternative* m_alternative; 449 size_t m_previousOp; 450 size_t m_nextOp; 451 452 // Used to record a set of Jumps out of the generated code, typically 453 // used for jumps out to backtracking code, and a single reentry back 454 // into the code for a node (likely where a backtrack will trigger 455 // rematching). 456 Label m_reentry; 457 JumpList m_jumps; 458 459 // Used for backtracking when the prior alternative did not consume any 460 // characters but matched. 461 Jump m_zeroLengthMatch; 462 463 // This flag is used to null out the second pattern character, when 464 // two are fused to match a pair together. 465 bool m_isDeadCode; 466 467 // Currently used in the case of some of the more complex management of 468 // 'm_checked', to cache the offset used in this alternative, to avoid 469 // recalculating it. 470 int m_checkAdjust; 471 472 // Used by OpNestedAlternativeNext/End to hold the pointer to the 473 // value that will be pushed into the pattern's frame to return to, 474 // upon backtracking back into the disjunction. 475 DataLabelPtr m_returnAddress; 476 }; 477 478 // BacktrackingState 479 // This class encapsulates information about the state of code generation 480 // whilst generating the code for backtracking, when a term fails to match. 481 // Upon entry to code generation of the backtracking code for a given node, 482 // the Backtracking state will hold references to all control flow sources 483 // that are outputs in need of further backtracking from the prior node 484 // generated (which is the subsequent operation in the regular expression, 485 // and in the m_ops Vector, since we generated backtracking backwards). 486 // These references to control flow take the form of: 487 // - A jump list of jumps, to be linked to code that will backtrack them 488 // further. 489 // - A set of DataLabelPtr values, to be populated with values to be 490 // treated effectively as return addresses backtracking into complex 491 // subpatterns. 492 // - A flag indicating that the current sequence of generated code up to 493 // this point requires backtracking. 494 class BacktrackingState { 495 public: 496 BacktrackingState() 497 : m_pendingFallthrough(false) 498 { 499 } 500 501 // Add a jump or jumps, a return address, or set the flag indicating 502 // that the current 'fallthrough' control flow requires backtracking. 503 void append(const Jump& jump) 504 { 505 m_laterFailures.append(jump); 506 } 507 void append(JumpList& jumpList) 508 { 509 m_laterFailures.append(jumpList); 510 } 511 void append(const DataLabelPtr& returnAddress) 512 { 513 m_pendingReturns.append(returnAddress); 514 } 515 void fallthrough() 516 { 517 ASSERT(!m_pendingFallthrough); 518 m_pendingFallthrough = true; 519 } 520 521 // These methods clear the backtracking state, either linking to the 522 // current location, a provided label, or copying the backtracking out 523 // to a JumpList. All actions may require code generation to take place, 524 // and as such are passed a pointer to the assembler. 525 void link(MacroAssembler* assembler) 526 { 527 if (m_pendingReturns.size()) { 528 Label here(assembler); 529 for (unsigned i = 0; i < m_pendingReturns.size(); ++i) 530 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); 531 m_pendingReturns.clear(); 532 } 533 m_laterFailures.link(assembler); 534 m_laterFailures.clear(); 535 m_pendingFallthrough = false; 536 } 537 void linkTo(Label label, MacroAssembler* assembler) 538 { 539 if (m_pendingReturns.size()) { 540 for (unsigned i = 0; i < m_pendingReturns.size(); ++i) 541 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label)); 542 m_pendingReturns.clear(); 543 } 544 if (m_pendingFallthrough) 545 assembler->jump(label); 546 m_laterFailures.linkTo(label, assembler); 547 m_laterFailures.clear(); 548 m_pendingFallthrough = false; 549 } 550 void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler) 551 { 552 if (m_pendingReturns.size()) { 553 Label here(assembler); 554 for (unsigned i = 0; i < m_pendingReturns.size(); ++i) 555 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); 556 m_pendingReturns.clear(); 557 m_pendingFallthrough = true; 558 } 559 if (m_pendingFallthrough) 560 jumpList.append(assembler->jump()); 561 jumpList.append(m_laterFailures); 562 m_laterFailures.clear(); 563 m_pendingFallthrough = false; 564 } 565 566 bool isEmpty() 567 { 568 return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough; 569 } 570 571 // Called at the end of code generation to link all return addresses. 572 void linkDataLabels(LinkBuffer& linkBuffer) 573 { 574 ASSERT(isEmpty()); 575 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i) 576 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation)); 577 } 578 579 private: 580 struct ReturnAddressRecord { 581 ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation) 582 : m_dataLabel(dataLabel) 583 , m_backtrackLocation(backtrackLocation) 584 { 585 } 586 587 DataLabelPtr m_dataLabel; 588 Label m_backtrackLocation; 589 }; 590 591 JumpList m_laterFailures; 592 bool m_pendingFallthrough; 593 Vector<DataLabelPtr, 4> m_pendingReturns; 594 Vector<ReturnAddressRecord, 4> m_backtrackRecords; 595 }; 596 597 // Generation methods: 598 // =================== 599 600 // This method provides a default implementation of backtracking common 601 // to many terms; terms commonly jump out of the forwards matching path 602 // on any failed conditions, and add these jumps to the m_jumps list. If 603 // no special handling is required we can often just backtrack to m_jumps. 604 void backtrackTermDefault(size_t opIndex) 605 { 606 YarrOp& op = m_ops[opIndex]; 607 m_backtrackingState.append(op.m_jumps); 608 } 609 610 void generateAssertionBOL(size_t opIndex) 611 { 612 YarrOp& op = m_ops[opIndex]; 613 PatternTerm* term = op.m_term; 614 615 if (m_pattern.m_multiline) { 616 const RegisterID character = regT0; 617 618 JumpList matchDest; 619 if (!term->inputPosition) 620 matchDest.append(branch32(Equal, index, Imm32(m_checked))); 621 622 readCharacter((term->inputPosition - m_checked) - 1, character); 623 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); 624 op.m_jumps.append(jump()); 625 626 matchDest.link(this); 627 } else { 628 // Erk, really should poison out these alternatives early. :-/ 629 if (term->inputPosition) 630 op.m_jumps.append(jump()); 631 else 632 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked))); 633 } 634 } 635 void backtrackAssertionBOL(size_t opIndex) 636 { 637 backtrackTermDefault(opIndex); 638 } 639 640 void generateAssertionEOL(size_t opIndex) 641 { 642 YarrOp& op = m_ops[opIndex]; 643 PatternTerm* term = op.m_term; 644 645 if (m_pattern.m_multiline) { 646 const RegisterID character = regT0; 647 648 JumpList matchDest; 649 if (term->inputPosition == m_checked) 650 matchDest.append(atEndOfInput()); 651 652 readCharacter(term->inputPosition - m_checked, character); 653 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); 654 op.m_jumps.append(jump()); 655 656 matchDest.link(this); 657 } else { 658 if (term->inputPosition == m_checked) 659 op.m_jumps.append(notAtEndOfInput()); 660 // Erk, really should poison out these alternatives early. :-/ 661 else 662 op.m_jumps.append(jump()); 663 } 664 } 665 void backtrackAssertionEOL(size_t opIndex) 666 { 667 backtrackTermDefault(opIndex); 668 } 669 670 // Also falls though on nextIsNotWordChar. 671 void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) 672 { 673 YarrOp& op = m_ops[opIndex]; 674 PatternTerm* term = op.m_term; 675 676 const RegisterID character = regT0; 677 678 if (term->inputPosition == m_checked) 679 nextIsNotWordChar.append(atEndOfInput()); 680 681 readCharacter((term->inputPosition - m_checked), character); 682 matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); 683 } 684 685 void generateAssertionWordBoundary(size_t opIndex) 686 { 687 YarrOp& op = m_ops[opIndex]; 688 PatternTerm* term = op.m_term; 689 690 const RegisterID character = regT0; 691 692 Jump atBegin; 693 JumpList matchDest; 694 if (!term->inputPosition) 695 atBegin = branch32(Equal, index, Imm32(m_checked)); 696 readCharacter((term->inputPosition - m_checked) - 1, character); 697 matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); 698 if (!term->inputPosition) 699 atBegin.link(this); 700 701 // We fall through to here if the last character was not a wordchar. 702 JumpList nonWordCharThenWordChar; 703 JumpList nonWordCharThenNonWordChar; 704 if (term->invert()) { 705 matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar); 706 nonWordCharThenWordChar.append(jump()); 707 } else { 708 matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar); 709 nonWordCharThenNonWordChar.append(jump()); 710 } 711 op.m_jumps.append(nonWordCharThenNonWordChar); 712 713 // We jump here if the last character was a wordchar. 714 matchDest.link(this); 715 JumpList wordCharThenWordChar; 716 JumpList wordCharThenNonWordChar; 717 if (term->invert()) { 718 matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar); 719 wordCharThenWordChar.append(jump()); 720 } else { 721 matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar); 722 // This can fall-though! 723 } 724 725 op.m_jumps.append(wordCharThenWordChar); 726 727 nonWordCharThenWordChar.link(this); 728 wordCharThenNonWordChar.link(this); 729 } 730 void backtrackAssertionWordBoundary(size_t opIndex) 731 { 732 backtrackTermDefault(opIndex); 733 } 734 735 void generatePatternCharacterOnce(size_t opIndex) 736 { 737 YarrOp& op = m_ops[opIndex]; 738 739 if (op.m_isDeadCode) 740 return; 741 742 // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed 743 // node, so there must always be at least one more node. 744 ASSERT(opIndex + 1 < m_ops.size()); 745 YarrOp* nextOp = &m_ops[opIndex + 1]; 746 747 PatternTerm* term = op.m_term; 748 UChar ch = term->patternCharacter; 749 750 if ((ch > 0xff) && (m_charSize == Char8)) { 751 // Have a 16 bit pattern character and an 8 bit string - short circuit 752 op.m_jumps.append(jump()); 753 return; 754 } 755 756 const RegisterID character = regT0; 757 int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2; 758 unsigned ignoreCaseMask = 0; 759#if CPU(BIG_ENDIAN) 760 int allCharacters = ch << (m_charSize == Char8 ? 24 : 16); 761#else 762 int allCharacters = ch; 763#endif 764 int numberCharacters; 765 int startTermPosition = term->inputPosition; 766 767 // For case-insesitive compares, non-ascii characters that have different 768 // upper & lower case representations are converted to a character class. 769 ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); 770 771 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) 772#if CPU(BIG_ENDIAN) 773 ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16); 774#else 775 ignoreCaseMask |= 32; 776#endif 777 778 for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) { 779 PatternTerm* nextTerm = nextOp->m_term; 780 781 if (nextTerm->type != PatternTerm::TypePatternCharacter 782 || nextTerm->quantityType != QuantifierFixedCount 783 || nextTerm->quantityCount != 1 784 || nextTerm->inputPosition != (startTermPosition + numberCharacters)) 785 break; 786 787 nextOp->m_isDeadCode = true; 788 789#if CPU(BIG_ENDIAN) 790 int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters); 791#else 792 int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters; 793#endif 794 795 UChar currentCharacter = nextTerm->patternCharacter; 796 797 if ((currentCharacter > 0xff) && (m_charSize == Char8)) { 798 // Have a 16 bit pattern character and an 8 bit string - short circuit 799 op.m_jumps.append(jump()); 800 return; 801 } 802 803 // For case-insesitive compares, non-ascii characters that have different 804 // upper & lower case representations are converted to a character class. 805 ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter)); 806 807 allCharacters |= (currentCharacter << shiftAmount); 808 809 if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter))) 810 ignoreCaseMask |= 32 << shiftAmount; 811 } 812 813 if (m_charSize == Char8) { 814 switch (numberCharacters) { 815 case 1: 816 op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character)); 817 return; 818 case 2: { 819 BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); 820 load16Unaligned(address, character); 821 break; 822 } 823 case 3: { 824 BaseIndex highAddress(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); 825 load16Unaligned(highAddress, character); 826 if (ignoreCaseMask) 827 or32(Imm32(ignoreCaseMask), character); 828 op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask))); 829 op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, startTermPosition + 2 - m_checked, character)); 830 return; 831 } 832 case 4: { 833 BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); 834 load32WithUnalignedHalfWords(address, character); 835 break; 836 } 837 } 838 } else { 839 switch (numberCharacters) { 840 case 1: 841 op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); 842 return; 843 case 2: 844 BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar)); 845 load32WithUnalignedHalfWords(address, character); 846 break; 847 } 848 } 849 850 if (ignoreCaseMask) 851 or32(Imm32(ignoreCaseMask), character); 852 op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask))); 853 return; 854 } 855 void backtrackPatternCharacterOnce(size_t opIndex) 856 { 857 backtrackTermDefault(opIndex); 858 } 859 860 void generatePatternCharacterFixed(size_t opIndex) 861 { 862 YarrOp& op = m_ops[opIndex]; 863 PatternTerm* term = op.m_term; 864 UChar ch = term->patternCharacter; 865 866 const RegisterID character = regT0; 867 const RegisterID countRegister = regT1; 868 869 move(index, countRegister); 870 sub32(Imm32(term->quantityCount.unsafeGet()), countRegister); 871 872 Label loop(this); 873 BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet()); 874 875 if (m_charSize == Char8) 876 load8(address, character); 877 else 878 load16(address, character); 879 880 // For case-insesitive compares, non-ascii characters that have different 881 // upper & lower case representations are converted to a character class. 882 ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); 883 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 884 or32(TrustedImm32(0x20), character); 885 ch |= 0x20; 886 } 887 888 op.m_jumps.append(branch32(NotEqual, character, Imm32(ch))); 889 add32(TrustedImm32(1), countRegister); 890 branch32(NotEqual, countRegister, index).linkTo(loop, this); 891 } 892 void backtrackPatternCharacterFixed(size_t opIndex) 893 { 894 backtrackTermDefault(opIndex); 895 } 896 897 void generatePatternCharacterGreedy(size_t opIndex) 898 { 899 YarrOp& op = m_ops[opIndex]; 900 PatternTerm* term = op.m_term; 901 UChar ch = term->patternCharacter; 902 903 const RegisterID character = regT0; 904 const RegisterID countRegister = regT1; 905 906 move(TrustedImm32(0), countRegister); 907 908 // Unless have a 16 bit pattern character and an 8 bit string - short circuit 909 if (!((ch > 0xff) && (m_charSize == Char8))) { 910 JumpList failures; 911 Label loop(this); 912 failures.append(atEndOfInput()); 913 failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); 914 915 add32(TrustedImm32(1), countRegister); 916 add32(TrustedImm32(1), index); 917 if (term->quantityCount == quantifyInfinite) 918 jump(loop); 919 else 920 branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this); 921 922 failures.link(this); 923 } 924 op.m_reentry = label(); 925 926 storeToFrame(countRegister, term->frameLocation); 927 } 928 void backtrackPatternCharacterGreedy(size_t opIndex) 929 { 930 YarrOp& op = m_ops[opIndex]; 931 PatternTerm* term = op.m_term; 932 933 const RegisterID countRegister = regT1; 934 935 m_backtrackingState.link(this); 936 937 loadFromFrame(term->frameLocation, countRegister); 938 m_backtrackingState.append(branchTest32(Zero, countRegister)); 939 sub32(TrustedImm32(1), countRegister); 940 sub32(TrustedImm32(1), index); 941 jump(op.m_reentry); 942 } 943 944 void generatePatternCharacterNonGreedy(size_t opIndex) 945 { 946 YarrOp& op = m_ops[opIndex]; 947 PatternTerm* term = op.m_term; 948 949 const RegisterID countRegister = regT1; 950 951 move(TrustedImm32(0), countRegister); 952 op.m_reentry = label(); 953 storeToFrame(countRegister, term->frameLocation); 954 } 955 void backtrackPatternCharacterNonGreedy(size_t opIndex) 956 { 957 YarrOp& op = m_ops[opIndex]; 958 PatternTerm* term = op.m_term; 959 UChar ch = term->patternCharacter; 960 961 const RegisterID character = regT0; 962 const RegisterID countRegister = regT1; 963 964 m_backtrackingState.link(this); 965 966 loadFromFrame(term->frameLocation, countRegister); 967 968 // Unless have a 16 bit pattern character and an 8 bit string - short circuit 969 if (!((ch > 0xff) && (m_charSize == Char8))) { 970 JumpList nonGreedyFailures; 971 nonGreedyFailures.append(atEndOfInput()); 972 if (term->quantityCount != quantifyInfinite) 973 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet()))); 974 nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); 975 976 add32(TrustedImm32(1), countRegister); 977 add32(TrustedImm32(1), index); 978 979 jump(op.m_reentry); 980 nonGreedyFailures.link(this); 981 } 982 983 sub32(countRegister, index); 984 m_backtrackingState.fallthrough(); 985 } 986 987 void generateCharacterClassOnce(size_t opIndex) 988 { 989 YarrOp& op = m_ops[opIndex]; 990 PatternTerm* term = op.m_term; 991 992 const RegisterID character = regT0; 993 994 JumpList matchDest; 995 readCharacter(term->inputPosition - m_checked, character); 996 matchCharacterClass(character, matchDest, term->characterClass); 997 998 if (term->invert()) 999 op.m_jumps.append(matchDest); 1000 else { 1001 op.m_jumps.append(jump()); 1002 matchDest.link(this); 1003 } 1004 } 1005 void backtrackCharacterClassOnce(size_t opIndex) 1006 { 1007 backtrackTermDefault(opIndex); 1008 } 1009 1010 void generateCharacterClassFixed(size_t opIndex) 1011 { 1012 YarrOp& op = m_ops[opIndex]; 1013 PatternTerm* term = op.m_term; 1014 1015 const RegisterID character = regT0; 1016 const RegisterID countRegister = regT1; 1017 1018 move(index, countRegister); 1019 sub32(Imm32(term->quantityCount.unsafeGet()), countRegister); 1020 1021 Label loop(this); 1022 JumpList matchDest; 1023 if (m_charSize == Char8) 1024 load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character); 1025 else 1026 load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character); 1027 matchCharacterClass(character, matchDest, term->characterClass); 1028 1029 if (term->invert()) 1030 op.m_jumps.append(matchDest); 1031 else { 1032 op.m_jumps.append(jump()); 1033 matchDest.link(this); 1034 } 1035 1036 add32(TrustedImm32(1), countRegister); 1037 branch32(NotEqual, countRegister, index).linkTo(loop, this); 1038 } 1039 void backtrackCharacterClassFixed(size_t opIndex) 1040 { 1041 backtrackTermDefault(opIndex); 1042 } 1043 1044 void generateCharacterClassGreedy(size_t opIndex) 1045 { 1046 YarrOp& op = m_ops[opIndex]; 1047 PatternTerm* term = op.m_term; 1048 1049 const RegisterID character = regT0; 1050 const RegisterID countRegister = regT1; 1051 1052 move(TrustedImm32(0), countRegister); 1053 1054 JumpList failures; 1055 Label loop(this); 1056 failures.append(atEndOfInput()); 1057 1058 if (term->invert()) { 1059 readCharacter(term->inputPosition - m_checked, character); 1060 matchCharacterClass(character, failures, term->characterClass); 1061 } else { 1062 JumpList matchDest; 1063 readCharacter(term->inputPosition - m_checked, character); 1064 matchCharacterClass(character, matchDest, term->characterClass); 1065 failures.append(jump()); 1066 matchDest.link(this); 1067 } 1068 1069 add32(TrustedImm32(1), countRegister); 1070 add32(TrustedImm32(1), index); 1071 if (term->quantityCount != quantifyInfinite) { 1072 branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this); 1073 failures.append(jump()); 1074 } else 1075 jump(loop); 1076 1077 failures.link(this); 1078 op.m_reentry = label(); 1079 1080 storeToFrame(countRegister, term->frameLocation); 1081 } 1082 void backtrackCharacterClassGreedy(size_t opIndex) 1083 { 1084 YarrOp& op = m_ops[opIndex]; 1085 PatternTerm* term = op.m_term; 1086 1087 const RegisterID countRegister = regT1; 1088 1089 m_backtrackingState.link(this); 1090 1091 loadFromFrame(term->frameLocation, countRegister); 1092 m_backtrackingState.append(branchTest32(Zero, countRegister)); 1093 sub32(TrustedImm32(1), countRegister); 1094 sub32(TrustedImm32(1), index); 1095 jump(op.m_reentry); 1096 } 1097 1098 void generateCharacterClassNonGreedy(size_t opIndex) 1099 { 1100 YarrOp& op = m_ops[opIndex]; 1101 PatternTerm* term = op.m_term; 1102 1103 const RegisterID countRegister = regT1; 1104 1105 move(TrustedImm32(0), countRegister); 1106 op.m_reentry = label(); 1107 storeToFrame(countRegister, term->frameLocation); 1108 } 1109 void backtrackCharacterClassNonGreedy(size_t opIndex) 1110 { 1111 YarrOp& op = m_ops[opIndex]; 1112 PatternTerm* term = op.m_term; 1113 1114 const RegisterID character = regT0; 1115 const RegisterID countRegister = regT1; 1116 1117 JumpList nonGreedyFailures; 1118 1119 m_backtrackingState.link(this); 1120 1121 loadFromFrame(term->frameLocation, countRegister); 1122 1123 nonGreedyFailures.append(atEndOfInput()); 1124 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet()))); 1125 1126 JumpList matchDest; 1127 readCharacter(term->inputPosition - m_checked, character); 1128 matchCharacterClass(character, matchDest, term->characterClass); 1129 1130 if (term->invert()) 1131 nonGreedyFailures.append(matchDest); 1132 else { 1133 nonGreedyFailures.append(jump()); 1134 matchDest.link(this); 1135 } 1136 1137 add32(TrustedImm32(1), countRegister); 1138 add32(TrustedImm32(1), index); 1139 1140 jump(op.m_reentry); 1141 1142 nonGreedyFailures.link(this); 1143 sub32(countRegister, index); 1144 m_backtrackingState.fallthrough(); 1145 } 1146 1147 void generateDotStarEnclosure(size_t opIndex) 1148 { 1149 YarrOp& op = m_ops[opIndex]; 1150 PatternTerm* term = op.m_term; 1151 1152 const RegisterID character = regT0; 1153 const RegisterID matchPos = regT1; 1154 1155 JumpList foundBeginningNewLine; 1156 JumpList saveStartIndex; 1157 JumpList foundEndingNewLine; 1158 1159 ASSERT(!m_pattern.m_body->m_hasFixedSize); 1160 getMatchStart(matchPos); 1161 1162 saveStartIndex.append(branchTest32(Zero, matchPos)); 1163 Label findBOLLoop(this); 1164 sub32(TrustedImm32(1), matchPos); 1165 if (m_charSize == Char8) 1166 load8(BaseIndex(input, matchPos, TimesOne, 0), character); 1167 else 1168 load16(BaseIndex(input, matchPos, TimesTwo, 0), character); 1169 matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass()); 1170 branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this); 1171 saveStartIndex.append(jump()); 1172 1173 foundBeginningNewLine.link(this); 1174 add32(TrustedImm32(1), matchPos); // Advance past newline 1175 saveStartIndex.link(this); 1176 1177 if (!m_pattern.m_multiline && term->anchors.bolAnchor) 1178 op.m_jumps.append(branchTest32(NonZero, matchPos)); 1179 1180 ASSERT(!m_pattern.m_body->m_hasFixedSize); 1181 setMatchStart(matchPos); 1182 1183 move(index, matchPos); 1184 1185 Label findEOLLoop(this); 1186 foundEndingNewLine.append(branch32(Equal, matchPos, length)); 1187 if (m_charSize == Char8) 1188 load8(BaseIndex(input, matchPos, TimesOne, 0), character); 1189 else 1190 load16(BaseIndex(input, matchPos, TimesTwo, 0), character); 1191 matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass()); 1192 add32(TrustedImm32(1), matchPos); 1193 jump(findEOLLoop); 1194 1195 foundEndingNewLine.link(this); 1196 1197 if (!m_pattern.m_multiline && term->anchors.eolAnchor) 1198 op.m_jumps.append(branch32(NotEqual, matchPos, length)); 1199 1200 move(matchPos, index); 1201 } 1202 1203 void backtrackDotStarEnclosure(size_t opIndex) 1204 { 1205 backtrackTermDefault(opIndex); 1206 } 1207 1208 // Code generation/backtracking for simple terms 1209 // (pattern characters, character classes, and assertions). 1210 // These methods farm out work to the set of functions above. 1211 void generateTerm(size_t opIndex) 1212 { 1213 YarrOp& op = m_ops[opIndex]; 1214 PatternTerm* term = op.m_term; 1215 1216 switch (term->type) { 1217 case PatternTerm::TypePatternCharacter: 1218 switch (term->quantityType) { 1219 case QuantifierFixedCount: 1220 if (term->quantityCount == 1) 1221 generatePatternCharacterOnce(opIndex); 1222 else 1223 generatePatternCharacterFixed(opIndex); 1224 break; 1225 case QuantifierGreedy: 1226 generatePatternCharacterGreedy(opIndex); 1227 break; 1228 case QuantifierNonGreedy: 1229 generatePatternCharacterNonGreedy(opIndex); 1230 break; 1231 } 1232 break; 1233 1234 case PatternTerm::TypeCharacterClass: 1235 switch (term->quantityType) { 1236 case QuantifierFixedCount: 1237 if (term->quantityCount == 1) 1238 generateCharacterClassOnce(opIndex); 1239 else 1240 generateCharacterClassFixed(opIndex); 1241 break; 1242 case QuantifierGreedy: 1243 generateCharacterClassGreedy(opIndex); 1244 break; 1245 case QuantifierNonGreedy: 1246 generateCharacterClassNonGreedy(opIndex); 1247 break; 1248 } 1249 break; 1250 1251 case PatternTerm::TypeAssertionBOL: 1252 generateAssertionBOL(opIndex); 1253 break; 1254 1255 case PatternTerm::TypeAssertionEOL: 1256 generateAssertionEOL(opIndex); 1257 break; 1258 1259 case PatternTerm::TypeAssertionWordBoundary: 1260 generateAssertionWordBoundary(opIndex); 1261 break; 1262 1263 case PatternTerm::TypeForwardReference: 1264 break; 1265 1266 case PatternTerm::TypeParenthesesSubpattern: 1267 case PatternTerm::TypeParentheticalAssertion: 1268 RELEASE_ASSERT_NOT_REACHED(); 1269 case PatternTerm::TypeBackReference: 1270 m_shouldFallBack = true; 1271 break; 1272 case PatternTerm::TypeDotStarEnclosure: 1273 generateDotStarEnclosure(opIndex); 1274 break; 1275 } 1276 } 1277 void backtrackTerm(size_t opIndex) 1278 { 1279 YarrOp& op = m_ops[opIndex]; 1280 PatternTerm* term = op.m_term; 1281 1282 switch (term->type) { 1283 case PatternTerm::TypePatternCharacter: 1284 switch (term->quantityType) { 1285 case QuantifierFixedCount: 1286 if (term->quantityCount == 1) 1287 backtrackPatternCharacterOnce(opIndex); 1288 else 1289 backtrackPatternCharacterFixed(opIndex); 1290 break; 1291 case QuantifierGreedy: 1292 backtrackPatternCharacterGreedy(opIndex); 1293 break; 1294 case QuantifierNonGreedy: 1295 backtrackPatternCharacterNonGreedy(opIndex); 1296 break; 1297 } 1298 break; 1299 1300 case PatternTerm::TypeCharacterClass: 1301 switch (term->quantityType) { 1302 case QuantifierFixedCount: 1303 if (term->quantityCount == 1) 1304 backtrackCharacterClassOnce(opIndex); 1305 else 1306 backtrackCharacterClassFixed(opIndex); 1307 break; 1308 case QuantifierGreedy: 1309 backtrackCharacterClassGreedy(opIndex); 1310 break; 1311 case QuantifierNonGreedy: 1312 backtrackCharacterClassNonGreedy(opIndex); 1313 break; 1314 } 1315 break; 1316 1317 case PatternTerm::TypeAssertionBOL: 1318 backtrackAssertionBOL(opIndex); 1319 break; 1320 1321 case PatternTerm::TypeAssertionEOL: 1322 backtrackAssertionEOL(opIndex); 1323 break; 1324 1325 case PatternTerm::TypeAssertionWordBoundary: 1326 backtrackAssertionWordBoundary(opIndex); 1327 break; 1328 1329 case PatternTerm::TypeForwardReference: 1330 break; 1331 1332 case PatternTerm::TypeParenthesesSubpattern: 1333 case PatternTerm::TypeParentheticalAssertion: 1334 RELEASE_ASSERT_NOT_REACHED(); 1335 1336 case PatternTerm::TypeDotStarEnclosure: 1337 backtrackDotStarEnclosure(opIndex); 1338 break; 1339 1340 case PatternTerm::TypeBackReference: 1341 m_shouldFallBack = true; 1342 break; 1343 } 1344 } 1345 1346 void generate() 1347 { 1348 // Forwards generate the matching code. 1349 ASSERT(m_ops.size()); 1350 size_t opIndex = 0; 1351 1352 do { 1353 YarrOp& op = m_ops[opIndex]; 1354 switch (op.m_op) { 1355 1356 case OpTerm: 1357 generateTerm(opIndex); 1358 break; 1359 1360 // OpBodyAlternativeBegin/Next/End 1361 // 1362 // These nodes wrap the set of alternatives in the body of the regular expression. 1363 // There may be either one or two chains of OpBodyAlternative nodes, one representing 1364 // the 'once through' sequence of alternatives (if any exist), and one representing 1365 // the repeating alternatives (again, if any exist). 1366 // 1367 // Upon normal entry to the Begin alternative, we will check that input is available. 1368 // Reentry to the Begin alternative will take place after the check has taken place, 1369 // and will assume that the input position has already been progressed as appropriate. 1370 // 1371 // Entry to subsequent Next/End alternatives occurs when the prior alternative has 1372 // successfully completed a match - return a success state from JIT code. 1373 // 1374 // Next alternatives allow for reentry optimized to suit backtracking from its 1375 // preceding alternative. It expects the input position to still be set to a position 1376 // appropriate to its predecessor, and it will only perform an input check if the 1377 // predecessor had a minimum size less than its own. 1378 // 1379 // In the case 'once through' expressions, the End node will also have a reentry 1380 // point to jump to when the last alternative fails. Again, this expects the input 1381 // position to still reflect that expected by the prior alternative. 1382 case OpBodyAlternativeBegin: { 1383 PatternAlternative* alternative = op.m_alternative; 1384 1385 // Upon entry at the head of the set of alternatives, check if input is available 1386 // to run the first alternative. (This progresses the input position). 1387 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize)); 1388 // We will reenter after the check, and assume the input position to have been 1389 // set as appropriate to this alternative. 1390 op.m_reentry = label(); 1391 1392 m_checked += alternative->m_minimumSize; 1393 break; 1394 } 1395 case OpBodyAlternativeNext: 1396 case OpBodyAlternativeEnd: { 1397 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; 1398 PatternAlternative* alternative = op.m_alternative; 1399 1400 // If we get here, the prior alternative matched - return success. 1401 1402 // Adjust the stack pointer to remove the pattern's frame. 1403 removeCallFrame(); 1404 1405 // Load appropriate values into the return register and the first output 1406 // slot, and return. In the case of pattern with a fixed size, we will 1407 // not have yet set the value in the first 1408 ASSERT(index != returnRegister); 1409 if (m_pattern.m_body->m_hasFixedSize) { 1410 move(index, returnRegister); 1411 if (priorAlternative->m_minimumSize) 1412 sub32(Imm32(priorAlternative->m_minimumSize), returnRegister); 1413 if (compileMode == IncludeSubpatterns) 1414 store32(returnRegister, output); 1415 } else 1416 getMatchStart(returnRegister); 1417 if (compileMode == IncludeSubpatterns) 1418 store32(index, Address(output, 4)); 1419 move(index, returnRegister2); 1420 1421 generateReturn(); 1422 1423 // This is the divide between the tail of the prior alternative, above, and 1424 // the head of the subsequent alternative, below. 1425 1426 if (op.m_op == OpBodyAlternativeNext) { 1427 // This is the reentry point for the Next alternative. We expect any code 1428 // that jumps here to do so with the input position matching that of the 1429 // PRIOR alteranative, and we will only check input availability if we 1430 // need to progress it forwards. 1431 op.m_reentry = label(); 1432 if (alternative->m_minimumSize > priorAlternative->m_minimumSize) { 1433 add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index); 1434 op.m_jumps.append(jumpIfNoAvailableInput()); 1435 } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize) 1436 sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index); 1437 } else if (op.m_nextOp == notFound) { 1438 // This is the reentry point for the End of 'once through' alternatives, 1439 // jumped to when the last alternative fails to match. 1440 op.m_reentry = label(); 1441 sub32(Imm32(priorAlternative->m_minimumSize), index); 1442 } 1443 1444 if (op.m_op == OpBodyAlternativeNext) 1445 m_checked += alternative->m_minimumSize; 1446 m_checked -= priorAlternative->m_minimumSize; 1447 break; 1448 } 1449 1450 // OpSimpleNestedAlternativeBegin/Next/End 1451 // OpNestedAlternativeBegin/Next/End 1452 // 1453 // These nodes are used to handle sets of alternatives that are nested within 1454 // subpatterns and parenthetical assertions. The 'simple' forms are used where 1455 // we do not need to be able to backtrack back into any alternative other than 1456 // the last, the normal forms allow backtracking into any alternative. 1457 // 1458 // Each Begin/Next node is responsible for planting an input check to ensure 1459 // sufficient input is available on entry. Next nodes additionally need to 1460 // jump to the end - Next nodes use the End node's m_jumps list to hold this 1461 // set of jumps. 1462 // 1463 // In the non-simple forms, successful alternative matches must store a 1464 // 'return address' using a DataLabelPtr, used to store the address to jump 1465 // to when backtracking, to get to the code for the appropriate alternative. 1466 case OpSimpleNestedAlternativeBegin: 1467 case OpNestedAlternativeBegin: { 1468 PatternTerm* term = op.m_term; 1469 PatternAlternative* alternative = op.m_alternative; 1470 PatternDisjunction* disjunction = term->parentheses.disjunction; 1471 1472 // Calculate how much input we need to check for, and if non-zero check. 1473 op.m_checkAdjust = alternative->m_minimumSize; 1474 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) 1475 op.m_checkAdjust -= disjunction->m_minimumSize; 1476 if (op.m_checkAdjust) 1477 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); 1478 1479 m_checked += op.m_checkAdjust; 1480 break; 1481 } 1482 case OpSimpleNestedAlternativeNext: 1483 case OpNestedAlternativeNext: { 1484 PatternTerm* term = op.m_term; 1485 PatternAlternative* alternative = op.m_alternative; 1486 PatternDisjunction* disjunction = term->parentheses.disjunction; 1487 1488 // In the non-simple case, store a 'return address' so we can backtrack correctly. 1489 if (op.m_op == OpNestedAlternativeNext) { 1490 unsigned parenthesesFrameLocation = term->frameLocation; 1491 unsigned alternativeFrameLocation = parenthesesFrameLocation; 1492 if (term->quantityType != QuantifierFixedCount) 1493 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; 1494 op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); 1495 } 1496 1497 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) { 1498 // If the previous alternative matched without consuming characters then 1499 // backtrack to try to match while consumming some input. 1500 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); 1501 } 1502 1503 // If we reach here then the last alternative has matched - jump to the 1504 // End node, to skip over any further alternatives. 1505 // 1506 // FIXME: this is logically O(N^2) (though N can be expected to be very 1507 // small). We could avoid this either by adding an extra jump to the JIT 1508 // data structures, or by making backtracking code that jumps to Next 1509 // alternatives are responsible for checking that input is available (if 1510 // we didn't need to plant the input checks, then m_jumps would be free). 1511 YarrOp* endOp = &m_ops[op.m_nextOp]; 1512 while (endOp->m_nextOp != notFound) { 1513 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); 1514 endOp = &m_ops[endOp->m_nextOp]; 1515 } 1516 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); 1517 endOp->m_jumps.append(jump()); 1518 1519 // This is the entry point for the next alternative. 1520 op.m_reentry = label(); 1521 1522 // Calculate how much input we need to check for, and if non-zero check. 1523 op.m_checkAdjust = alternative->m_minimumSize; 1524 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) 1525 op.m_checkAdjust -= disjunction->m_minimumSize; 1526 if (op.m_checkAdjust) 1527 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); 1528 1529 YarrOp& lastOp = m_ops[op.m_previousOp]; 1530 m_checked -= lastOp.m_checkAdjust; 1531 m_checked += op.m_checkAdjust; 1532 break; 1533 } 1534 case OpSimpleNestedAlternativeEnd: 1535 case OpNestedAlternativeEnd: { 1536 PatternTerm* term = op.m_term; 1537 1538 // In the non-simple case, store a 'return address' so we can backtrack correctly. 1539 if (op.m_op == OpNestedAlternativeEnd) { 1540 unsigned parenthesesFrameLocation = term->frameLocation; 1541 unsigned alternativeFrameLocation = parenthesesFrameLocation; 1542 if (term->quantityType != QuantifierFixedCount) 1543 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; 1544 op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); 1545 } 1546 1547 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) { 1548 // If the previous alternative matched without consuming characters then 1549 // backtrack to try to match while consumming some input. 1550 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); 1551 } 1552 1553 // If this set of alternatives contains more than one alternative, 1554 // then the Next nodes will have planted jumps to the End, and added 1555 // them to this node's m_jumps list. 1556 op.m_jumps.link(this); 1557 op.m_jumps.clear(); 1558 1559 YarrOp& lastOp = m_ops[op.m_previousOp]; 1560 m_checked -= lastOp.m_checkAdjust; 1561 break; 1562 } 1563 1564 // OpParenthesesSubpatternOnceBegin/End 1565 // 1566 // These nodes support (optionally) capturing subpatterns, that have a 1567 // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). 1568 case OpParenthesesSubpatternOnceBegin: { 1569 PatternTerm* term = op.m_term; 1570 unsigned parenthesesFrameLocation = term->frameLocation; 1571 const RegisterID indexTemporary = regT0; 1572 ASSERT(term->quantityCount == 1); 1573 1574 // Upon entry to a Greedy quantified set of parenthese store the index. 1575 // We'll use this for two purposes: 1576 // - To indicate which iteration we are on of mathing the remainder of 1577 // the expression after the parentheses - the first, including the 1578 // match within the parentheses, or the second having skipped over them. 1579 // - To check for empty matches, which must be rejected. 1580 // 1581 // At the head of a NonGreedy set of parentheses we'll immediately set the 1582 // value on the stack to -1 (indicating a match skipping the subpattern), 1583 // and plant a jump to the end. We'll also plant a label to backtrack to 1584 // to reenter the subpattern later, with a store to set up index on the 1585 // second iteration. 1586 // 1587 // FIXME: for capturing parens, could use the index in the capture array? 1588 if (term->quantityType == QuantifierGreedy) 1589 storeToFrame(index, parenthesesFrameLocation); 1590 else if (term->quantityType == QuantifierNonGreedy) { 1591 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); 1592 op.m_jumps.append(jump()); 1593 op.m_reentry = label(); 1594 storeToFrame(index, parenthesesFrameLocation); 1595 } 1596 1597 // If the parenthese are capturing, store the starting index value to the 1598 // captures array, offsetting as necessary. 1599 // 1600 // FIXME: could avoid offsetting this value in JIT code, apply 1601 // offsets only afterwards, at the point the results array is 1602 // being accessed. 1603 if (term->capture() && compileMode == IncludeSubpatterns) { 1604 int inputOffset = term->inputPosition - m_checked; 1605 if (term->quantityType == QuantifierFixedCount) 1606 inputOffset -= term->parentheses.disjunction->m_minimumSize; 1607 if (inputOffset) { 1608 move(index, indexTemporary); 1609 add32(Imm32(inputOffset), indexTemporary); 1610 setSubpatternStart(indexTemporary, term->parentheses.subpatternId); 1611 } else 1612 setSubpatternStart(index, term->parentheses.subpatternId); 1613 } 1614 break; 1615 } 1616 case OpParenthesesSubpatternOnceEnd: { 1617 PatternTerm* term = op.m_term; 1618 const RegisterID indexTemporary = regT0; 1619 ASSERT(term->quantityCount == 1); 1620 1621#ifndef NDEBUG 1622 // Runtime ASSERT to make sure that the nested alternative handled the 1623 // "no input consumed" check. 1624 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) { 1625 Jump pastBreakpoint; 1626 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); 1627 breakpoint(); 1628 pastBreakpoint.link(this); 1629 } 1630#endif 1631 1632 // If the parenthese are capturing, store the ending index value to the 1633 // captures array, offsetting as necessary. 1634 // 1635 // FIXME: could avoid offsetting this value in JIT code, apply 1636 // offsets only afterwards, at the point the results array is 1637 // being accessed. 1638 if (term->capture() && compileMode == IncludeSubpatterns) { 1639 int inputOffset = term->inputPosition - m_checked; 1640 if (inputOffset) { 1641 move(index, indexTemporary); 1642 add32(Imm32(inputOffset), indexTemporary); 1643 setSubpatternEnd(indexTemporary, term->parentheses.subpatternId); 1644 } else 1645 setSubpatternEnd(index, term->parentheses.subpatternId); 1646 } 1647 1648 // If the parentheses are quantified Greedy then add a label to jump back 1649 // to if get a failed match from after the parentheses. For NonGreedy 1650 // parentheses, link the jump from before the subpattern to here. 1651 if (term->quantityType == QuantifierGreedy) 1652 op.m_reentry = label(); 1653 else if (term->quantityType == QuantifierNonGreedy) { 1654 YarrOp& beginOp = m_ops[op.m_previousOp]; 1655 beginOp.m_jumps.link(this); 1656 } 1657 break; 1658 } 1659 1660 // OpParenthesesSubpatternTerminalBegin/End 1661 case OpParenthesesSubpatternTerminalBegin: { 1662 PatternTerm* term = op.m_term; 1663 ASSERT(term->quantityType == QuantifierGreedy); 1664 ASSERT(term->quantityCount == quantifyInfinite); 1665 ASSERT(!term->capture()); 1666 1667 // Upon entry set a label to loop back to. 1668 op.m_reentry = label(); 1669 1670 // Store the start index of the current match; we need to reject zero 1671 // length matches. 1672 storeToFrame(index, term->frameLocation); 1673 break; 1674 } 1675 case OpParenthesesSubpatternTerminalEnd: { 1676 YarrOp& beginOp = m_ops[op.m_previousOp]; 1677#ifndef NDEBUG 1678 PatternTerm* term = op.m_term; 1679 1680 // Runtime ASSERT to make sure that the nested alternative handled the 1681 // "no input consumed" check. 1682 Jump pastBreakpoint; 1683 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); 1684 breakpoint(); 1685 pastBreakpoint.link(this); 1686#endif 1687 1688 // We know that the match is non-zero, we can accept it and 1689 // loop back up to the head of the subpattern. 1690 jump(beginOp.m_reentry); 1691 1692 // This is the entry point to jump to when we stop matching - we will 1693 // do so once the subpattern cannot match any more. 1694 op.m_reentry = label(); 1695 break; 1696 } 1697 1698 // OpParentheticalAssertionBegin/End 1699 case OpParentheticalAssertionBegin: { 1700 PatternTerm* term = op.m_term; 1701 1702 // Store the current index - assertions should not update index, so 1703 // we will need to restore it upon a successful match. 1704 unsigned parenthesesFrameLocation = term->frameLocation; 1705 storeToFrame(index, parenthesesFrameLocation); 1706 1707 // Check 1708 op.m_checkAdjust = m_checked - term->inputPosition; 1709 if (op.m_checkAdjust) 1710 sub32(Imm32(op.m_checkAdjust), index); 1711 1712 m_checked -= op.m_checkAdjust; 1713 break; 1714 } 1715 case OpParentheticalAssertionEnd: { 1716 PatternTerm* term = op.m_term; 1717 1718 // Restore the input index value. 1719 unsigned parenthesesFrameLocation = term->frameLocation; 1720 loadFromFrame(parenthesesFrameLocation, index); 1721 1722 // If inverted, a successful match of the assertion must be treated 1723 // as a failure, so jump to backtracking. 1724 if (term->invert()) { 1725 op.m_jumps.append(jump()); 1726 op.m_reentry = label(); 1727 } 1728 1729 YarrOp& lastOp = m_ops[op.m_previousOp]; 1730 m_checked += lastOp.m_checkAdjust; 1731 break; 1732 } 1733 1734 case OpMatchFailed: 1735 removeCallFrame(); 1736 move(TrustedImmPtr((void*)WTF::notFound), returnRegister); 1737 move(TrustedImm32(0), returnRegister2); 1738 generateReturn(); 1739 break; 1740 } 1741 1742 ++opIndex; 1743 } while (opIndex < m_ops.size()); 1744 } 1745 1746 void backtrack() 1747 { 1748 // Backwards generate the backtracking code. 1749 size_t opIndex = m_ops.size(); 1750 ASSERT(opIndex); 1751 1752 do { 1753 --opIndex; 1754 YarrOp& op = m_ops[opIndex]; 1755 switch (op.m_op) { 1756 1757 case OpTerm: 1758 backtrackTerm(opIndex); 1759 break; 1760 1761 // OpBodyAlternativeBegin/Next/End 1762 // 1763 // For each Begin/Next node representing an alternative, we need to decide what to do 1764 // in two circumstances: 1765 // - If we backtrack back into this node, from within the alternative. 1766 // - If the input check at the head of the alternative fails (if this exists). 1767 // 1768 // We treat these two cases differently since in the former case we have slightly 1769 // more information - since we are backtracking out of a prior alternative we know 1770 // that at least enough input was available to run it. For example, given the regular 1771 // expression /a|b/, if we backtrack out of the first alternative (a failed pattern 1772 // character match of 'a'), then we need not perform an additional input availability 1773 // check before running the second alternative. 1774 // 1775 // Backtracking required differs for the last alternative, which in the case of the 1776 // repeating set of alternatives must loop. The code generated for the last alternative 1777 // will also be used to handle all input check failures from any prior alternatives - 1778 // these require similar functionality, in seeking the next available alternative for 1779 // which there is sufficient input. 1780 // 1781 // Since backtracking of all other alternatives simply requires us to link backtracks 1782 // to the reentry point for the subsequent alternative, we will only be generating any 1783 // code when backtracking the last alternative. 1784 case OpBodyAlternativeBegin: 1785 case OpBodyAlternativeNext: { 1786 PatternAlternative* alternative = op.m_alternative; 1787 1788 if (op.m_op == OpBodyAlternativeNext) { 1789 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; 1790 m_checked += priorAlternative->m_minimumSize; 1791 } 1792 m_checked -= alternative->m_minimumSize; 1793 1794 // Is this the last alternative? If not, then if we backtrack to this point we just 1795 // need to jump to try to match the next alternative. 1796 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) { 1797 m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this); 1798 break; 1799 } 1800 YarrOp& endOp = m_ops[op.m_nextOp]; 1801 1802 YarrOp* beginOp = &op; 1803 while (beginOp->m_op != OpBodyAlternativeBegin) { 1804 ASSERT(beginOp->m_op == OpBodyAlternativeNext); 1805 beginOp = &m_ops[beginOp->m_previousOp]; 1806 } 1807 1808 bool onceThrough = endOp.m_nextOp == notFound; 1809 1810 // First, generate code to handle cases where we backtrack out of an attempted match 1811 // of the last alternative. If this is a 'once through' set of alternatives then we 1812 // have nothing to do - link this straight through to the End. 1813 if (onceThrough) 1814 m_backtrackingState.linkTo(endOp.m_reentry, this); 1815 else { 1816 // If we don't need to move the input poistion, and the pattern has a fixed size 1817 // (in which case we omit the store of the start index until the pattern has matched) 1818 // then we can just link the backtrack out of the last alternative straight to the 1819 // head of the first alternative. 1820 if (m_pattern.m_body->m_hasFixedSize 1821 && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) 1822 && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1)) 1823 m_backtrackingState.linkTo(beginOp->m_reentry, this); 1824 else { 1825 // We need to generate a trampoline of code to execute before looping back 1826 // around to the first alternative. 1827 m_backtrackingState.link(this); 1828 1829 // If the pattern size is not fixed, then store the start index, for use if we match. 1830 if (!m_pattern.m_body->m_hasFixedSize) { 1831 if (alternative->m_minimumSize == 1) 1832 setMatchStart(index); 1833 else { 1834 move(index, regT0); 1835 if (alternative->m_minimumSize) 1836 sub32(Imm32(alternative->m_minimumSize - 1), regT0); 1837 else 1838 add32(TrustedImm32(1), regT0); 1839 setMatchStart(regT0); 1840 } 1841 } 1842 1843 // Generate code to loop. Check whether the last alternative is longer than the 1844 // first (e.g. /a|xy/ or /a|xyz/). 1845 if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) { 1846 // We want to loop, and increment input position. If the delta is 1, it is 1847 // already correctly incremented, if more than one then decrement as appropriate. 1848 unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize; 1849 ASSERT(delta); 1850 if (delta != 1) 1851 sub32(Imm32(delta - 1), index); 1852 jump(beginOp->m_reentry); 1853 } else { 1854 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot 1855 // be sufficent input available to handle this, so just fall through. 1856 unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize; 1857 if (delta != 0xFFFFFFFFu) { 1858 // We need to check input because we are incrementing the input. 1859 add32(Imm32(delta + 1), index); 1860 checkInput().linkTo(beginOp->m_reentry, this); 1861 } 1862 } 1863 } 1864 } 1865 1866 // We can reach this point in the code in two ways: 1867 // - Fallthrough from the code above (a repeating alternative backtracked out of its 1868 // last alternative, and did not have sufficent input to run the first). 1869 // - We will loop back up to the following label when a releating alternative loops, 1870 // following a failed input check. 1871 // 1872 // Either way, we have just failed the input check for the first alternative. 1873 Label firstInputCheckFailed(this); 1874 1875 // Generate code to handle input check failures from alternatives except the last. 1876 // prevOp is the alternative we're handling a bail out from (initially Begin), and 1877 // nextOp is the alternative we will be attempting to reenter into. 1878 // 1879 // We will link input check failures from the forwards matching path back to the code 1880 // that can handle them. 1881 YarrOp* prevOp = beginOp; 1882 YarrOp* nextOp = &m_ops[beginOp->m_nextOp]; 1883 while (nextOp->m_op != OpBodyAlternativeEnd) { 1884 prevOp->m_jumps.link(this); 1885 1886 // We only get here if an input check fails, it is only worth checking again 1887 // if the next alternative has a minimum size less than the last. 1888 if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) { 1889 // FIXME: if we added an extra label to YarrOp, we could avoid needing to 1890 // subtract delta back out, and reduce this code. Should performance test 1891 // the benefit of this. 1892 unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize; 1893 sub32(Imm32(delta), index); 1894 Jump fail = jumpIfNoAvailableInput(); 1895 add32(Imm32(delta), index); 1896 jump(nextOp->m_reentry); 1897 fail.link(this); 1898 } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize) 1899 add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index); 1900 prevOp = nextOp; 1901 nextOp = &m_ops[nextOp->m_nextOp]; 1902 } 1903 1904 // We fall through to here if there is insufficient input to run the last alternative. 1905 1906 // If there is insufficient input to run the last alternative, then for 'once through' 1907 // alternatives we are done - just jump back up into the forwards matching path at the End. 1908 if (onceThrough) { 1909 op.m_jumps.linkTo(endOp.m_reentry, this); 1910 jump(endOp.m_reentry); 1911 break; 1912 } 1913 1914 // For repeating alternatives, link any input check failure from the last alternative to 1915 // this point. 1916 op.m_jumps.link(this); 1917 1918 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize; 1919 1920 // Check for cases where input position is already incremented by 1 for the last 1921 // alternative (this is particularly useful where the minimum size of the body 1922 // disjunction is 0, e.g. /a*|b/). 1923 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) { 1924 // index is already incremented by 1, so just store it now! 1925 setMatchStart(index); 1926 needsToUpdateMatchStart = false; 1927 } 1928 1929 // Check whether there is sufficient input to loop. Increment the input position by 1930 // one, and check. Also add in the minimum disjunction size before checking - there 1931 // is no point in looping if we're just going to fail all the input checks around 1932 // the next iteration. 1933 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize); 1934 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) { 1935 // If the last alternative had the same minimum size as the disjunction, 1936 // just simply increment input pos by 1, no adjustment based on minimum size. 1937 add32(TrustedImm32(1), index); 1938 } else { 1939 // If the minumum for the last alternative was one greater than than that 1940 // for the disjunction, we're already progressed by 1, nothing to do! 1941 unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1; 1942 if (delta) 1943 sub32(Imm32(delta), index); 1944 } 1945 Jump matchFailed = jumpIfNoAvailableInput(); 1946 1947 if (needsToUpdateMatchStart) { 1948 if (!m_pattern.m_body->m_minimumSize) 1949 setMatchStart(index); 1950 else { 1951 move(index, regT0); 1952 sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0); 1953 setMatchStart(regT0); 1954 } 1955 } 1956 1957 // Calculate how much more input the first alternative requires than the minimum 1958 // for the body as a whole. If no more is needed then we dont need an additional 1959 // input check here - jump straight back up to the start of the first alternative. 1960 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) 1961 jump(beginOp->m_reentry); 1962 else { 1963 if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize) 1964 add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index); 1965 else 1966 sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index); 1967 checkInput().linkTo(beginOp->m_reentry, this); 1968 jump(firstInputCheckFailed); 1969 } 1970 1971 // We jump to here if we iterate to the point that there is insufficient input to 1972 // run any matches, and need to return a failure state from JIT code. 1973 matchFailed.link(this); 1974 1975 removeCallFrame(); 1976 move(TrustedImmPtr((void*)WTF::notFound), returnRegister); 1977 move(TrustedImm32(0), returnRegister2); 1978 generateReturn(); 1979 break; 1980 } 1981 case OpBodyAlternativeEnd: { 1982 // We should never backtrack back into a body disjunction. 1983 ASSERT(m_backtrackingState.isEmpty()); 1984 1985 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; 1986 m_checked += priorAlternative->m_minimumSize; 1987 break; 1988 } 1989 1990 // OpSimpleNestedAlternativeBegin/Next/End 1991 // OpNestedAlternativeBegin/Next/End 1992 // 1993 // Generate code for when we backtrack back out of an alternative into 1994 // a Begin or Next node, or when the entry input count check fails. If 1995 // there are more alternatives we need to jump to the next alternative, 1996 // if not we backtrack back out of the current set of parentheses. 1997 // 1998 // In the case of non-simple nested assertions we need to also link the 1999 // 'return address' appropriately to backtrack back out into the correct 2000 // alternative. 2001 case OpSimpleNestedAlternativeBegin: 2002 case OpSimpleNestedAlternativeNext: 2003 case OpNestedAlternativeBegin: 2004 case OpNestedAlternativeNext: { 2005 YarrOp& nextOp = m_ops[op.m_nextOp]; 2006 bool isBegin = op.m_previousOp == notFound; 2007 bool isLastAlternative = nextOp.m_nextOp == notFound; 2008 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin)); 2009 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd)); 2010 2011 // Treat an input check failure the same as a failed match. 2012 m_backtrackingState.append(op.m_jumps); 2013 2014 // Set the backtracks to jump to the appropriate place. We may need 2015 // to link the backtracks in one of three different way depending on 2016 // the type of alternative we are dealing with: 2017 // - A single alternative, with no simplings. 2018 // - The last alternative of a set of two or more. 2019 // - An alternative other than the last of a set of two or more. 2020 // 2021 // In the case of a single alternative on its own, we don't need to 2022 // jump anywhere - if the alternative fails to match we can just 2023 // continue to backtrack out of the parentheses without jumping. 2024 // 2025 // In the case of the last alternative in a set of more than one, we 2026 // need to jump to return back out to the beginning. We'll do so by 2027 // adding a jump to the End node's m_jumps list, and linking this 2028 // when we come to generate the Begin node. For alternatives other 2029 // than the last, we need to jump to the next alternative. 2030 // 2031 // If the alternative had adjusted the input position we must link 2032 // backtracking to here, correct, and then jump on. If not we can 2033 // link the backtracks directly to their destination. 2034 if (op.m_checkAdjust) { 2035 // Handle the cases where we need to link the backtracks here. 2036 m_backtrackingState.link(this); 2037 sub32(Imm32(op.m_checkAdjust), index); 2038 if (!isLastAlternative) { 2039 // An alternative that is not the last should jump to its successor. 2040 jump(nextOp.m_reentry); 2041 } else if (!isBegin) { 2042 // The last of more than one alternatives must jump back to the beginning. 2043 nextOp.m_jumps.append(jump()); 2044 } else { 2045 // A single alternative on its own can fall through. 2046 m_backtrackingState.fallthrough(); 2047 } 2048 } else { 2049 // Handle the cases where we can link the backtracks directly to their destinations. 2050 if (!isLastAlternative) { 2051 // An alternative that is not the last should jump to its successor. 2052 m_backtrackingState.linkTo(nextOp.m_reentry, this); 2053 } else if (!isBegin) { 2054 // The last of more than one alternatives must jump back to the beginning. 2055 m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this); 2056 } 2057 // In the case of a single alternative on its own do nothing - it can fall through. 2058 } 2059 2060 // If there is a backtrack jump from a zero length match link it here. 2061 if (op.m_zeroLengthMatch.isSet()) 2062 m_backtrackingState.append(op.m_zeroLengthMatch); 2063 2064 // At this point we've handled the backtracking back into this node. 2065 // Now link any backtracks that need to jump to here. 2066 2067 // For non-simple alternatives, link the alternative's 'return address' 2068 // so that we backtrack back out into the previous alternative. 2069 if (op.m_op == OpNestedAlternativeNext) 2070 m_backtrackingState.append(op.m_returnAddress); 2071 2072 // If there is more than one alternative, then the last alternative will 2073 // have planted a jump to be linked to the end. This jump was added to the 2074 // End node's m_jumps list. If we are back at the beginning, link it here. 2075 if (isBegin) { 2076 YarrOp* endOp = &m_ops[op.m_nextOp]; 2077 while (endOp->m_nextOp != notFound) { 2078 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); 2079 endOp = &m_ops[endOp->m_nextOp]; 2080 } 2081 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); 2082 m_backtrackingState.append(endOp->m_jumps); 2083 } 2084 2085 if (!isBegin) { 2086 YarrOp& lastOp = m_ops[op.m_previousOp]; 2087 m_checked += lastOp.m_checkAdjust; 2088 } 2089 m_checked -= op.m_checkAdjust; 2090 break; 2091 } 2092 case OpSimpleNestedAlternativeEnd: 2093 case OpNestedAlternativeEnd: { 2094 PatternTerm* term = op.m_term; 2095 2096 // If there is a backtrack jump from a zero length match link it here. 2097 if (op.m_zeroLengthMatch.isSet()) 2098 m_backtrackingState.append(op.m_zeroLengthMatch); 2099 2100 // If we backtrack into the end of a simple subpattern do nothing; 2101 // just continue through into the last alternative. If we backtrack 2102 // into the end of a non-simple set of alterntives we need to jump 2103 // to the backtracking return address set up during generation. 2104 if (op.m_op == OpNestedAlternativeEnd) { 2105 m_backtrackingState.link(this); 2106 2107 // Plant a jump to the return address. 2108 unsigned parenthesesFrameLocation = term->frameLocation; 2109 unsigned alternativeFrameLocation = parenthesesFrameLocation; 2110 if (term->quantityType != QuantifierFixedCount) 2111 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; 2112 loadFromFrameAndJump(alternativeFrameLocation); 2113 2114 // Link the DataLabelPtr associated with the end of the last 2115 // alternative to this point. 2116 m_backtrackingState.append(op.m_returnAddress); 2117 } 2118 2119 YarrOp& lastOp = m_ops[op.m_previousOp]; 2120 m_checked += lastOp.m_checkAdjust; 2121 break; 2122 } 2123 2124 // OpParenthesesSubpatternOnceBegin/End 2125 // 2126 // When we are backtracking back out of a capturing subpattern we need 2127 // to clear the start index in the matches output array, to record that 2128 // this subpattern has not been captured. 2129 // 2130 // When backtracking back out of a Greedy quantified subpattern we need 2131 // to catch this, and try running the remainder of the alternative after 2132 // the subpattern again, skipping the parentheses. 2133 // 2134 // Upon backtracking back into a quantified set of parentheses we need to 2135 // check whether we were currently skipping the subpattern. If not, we 2136 // can backtrack into them, if we were we need to either backtrack back 2137 // out of the start of the parentheses, or jump back to the forwards 2138 // matching start, depending of whether the match is Greedy or NonGreedy. 2139 case OpParenthesesSubpatternOnceBegin: { 2140 PatternTerm* term = op.m_term; 2141 ASSERT(term->quantityCount == 1); 2142 2143 // We only need to backtrack to thispoint if capturing or greedy. 2144 if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) { 2145 m_backtrackingState.link(this); 2146 2147 // If capturing, clear the capture (we only need to reset start). 2148 if (term->capture() && compileMode == IncludeSubpatterns) 2149 clearSubpatternStart(term->parentheses.subpatternId); 2150 2151 // If Greedy, jump to the end. 2152 if (term->quantityType == QuantifierGreedy) { 2153 // Clear the flag in the stackframe indicating we ran through the subpattern. 2154 unsigned parenthesesFrameLocation = term->frameLocation; 2155 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); 2156 // Jump to after the parentheses, skipping the subpattern. 2157 jump(m_ops[op.m_nextOp].m_reentry); 2158 // A backtrack from after the parentheses, when skipping the subpattern, 2159 // will jump back to here. 2160 op.m_jumps.link(this); 2161 } 2162 2163 m_backtrackingState.fallthrough(); 2164 } 2165 break; 2166 } 2167 case OpParenthesesSubpatternOnceEnd: { 2168 PatternTerm* term = op.m_term; 2169 2170 if (term->quantityType != QuantifierFixedCount) { 2171 m_backtrackingState.link(this); 2172 2173 // Check whether we should backtrack back into the parentheses, or if we 2174 // are currently in a state where we had skipped over the subpattern 2175 // (in which case the flag value on the stack will be -1). 2176 unsigned parenthesesFrameLocation = term->frameLocation; 2177 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1)); 2178 2179 if (term->quantityType == QuantifierGreedy) { 2180 // For Greedy parentheses, we skip after having already tried going 2181 // through the subpattern, so if we get here we're done. 2182 YarrOp& beginOp = m_ops[op.m_previousOp]; 2183 beginOp.m_jumps.append(hadSkipped); 2184 } else { 2185 // For NonGreedy parentheses, we try skipping the subpattern first, 2186 // so if we get here we need to try running through the subpattern 2187 // next. Jump back to the start of the parentheses in the forwards 2188 // matching path. 2189 ASSERT(term->quantityType == QuantifierNonGreedy); 2190 YarrOp& beginOp = m_ops[op.m_previousOp]; 2191 hadSkipped.linkTo(beginOp.m_reentry, this); 2192 } 2193 2194 m_backtrackingState.fallthrough(); 2195 } 2196 2197 m_backtrackingState.append(op.m_jumps); 2198 break; 2199 } 2200 2201 // OpParenthesesSubpatternTerminalBegin/End 2202 // 2203 // Terminal subpatterns will always match - there is nothing after them to 2204 // force a backtrack, and they have a minimum count of 0, and as such will 2205 // always produce an acceptable result. 2206 case OpParenthesesSubpatternTerminalBegin: { 2207 // We will backtrack to this point once the subpattern cannot match any 2208 // more. Since no match is accepted as a successful match (we are Greedy 2209 // quantified with a minimum of zero) jump back to the forwards matching 2210 // path at the end. 2211 YarrOp& endOp = m_ops[op.m_nextOp]; 2212 m_backtrackingState.linkTo(endOp.m_reentry, this); 2213 break; 2214 } 2215 case OpParenthesesSubpatternTerminalEnd: 2216 // We should never be backtracking to here (hence the 'terminal' in the name). 2217 ASSERT(m_backtrackingState.isEmpty()); 2218 m_backtrackingState.append(op.m_jumps); 2219 break; 2220 2221 // OpParentheticalAssertionBegin/End 2222 case OpParentheticalAssertionBegin: { 2223 PatternTerm* term = op.m_term; 2224 YarrOp& endOp = m_ops[op.m_nextOp]; 2225 2226 // We need to handle the backtracks upon backtracking back out 2227 // of a parenthetical assertion if either we need to correct 2228 // the input index, or the assertion was inverted. 2229 if (op.m_checkAdjust || term->invert()) { 2230 m_backtrackingState.link(this); 2231 2232 if (op.m_checkAdjust) 2233 add32(Imm32(op.m_checkAdjust), index); 2234 2235 // In an inverted assertion failure to match the subpattern 2236 // is treated as a successful match - jump to the end of the 2237 // subpattern. We already have adjusted the input position 2238 // back to that before the assertion, which is correct. 2239 if (term->invert()) 2240 jump(endOp.m_reentry); 2241 2242 m_backtrackingState.fallthrough(); 2243 } 2244 2245 // The End node's jump list will contain any backtracks into 2246 // the end of the assertion. Also, if inverted, we will have 2247 // added the failure caused by a successful match to this. 2248 m_backtrackingState.append(endOp.m_jumps); 2249 2250 m_checked += op.m_checkAdjust; 2251 break; 2252 } 2253 case OpParentheticalAssertionEnd: { 2254 // FIXME: We should really be clearing any nested subpattern 2255 // matches on bailing out from after the pattern. Firefox has 2256 // this bug too (presumably because they use YARR!) 2257 2258 // Never backtrack into an assertion; later failures bail to before the begin. 2259 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this); 2260 2261 YarrOp& lastOp = m_ops[op.m_previousOp]; 2262 m_checked -= lastOp.m_checkAdjust; 2263 break; 2264 } 2265 2266 case OpMatchFailed: 2267 break; 2268 } 2269 2270 } while (opIndex); 2271 } 2272 2273 // Compilation methods: 2274 // ==================== 2275 2276 // opCompileParenthesesSubpattern 2277 // Emits ops for a subpattern (set of parentheses). These consist 2278 // of a set of alternatives wrapped in an outer set of nodes for 2279 // the parentheses. 2280 // Supported types of parentheses are 'Once' (quantityCount == 1) 2281 // and 'Terminal' (non-capturing parentheses quantified as greedy 2282 // and infinite). 2283 // Alternatives will use the 'Simple' set of ops if either the 2284 // subpattern is terminal (in which case we will never need to 2285 // backtrack), or if the subpattern only contains one alternative. 2286 void opCompileParenthesesSubpattern(PatternTerm* term) 2287 { 2288 YarrOpCode parenthesesBeginOpCode; 2289 YarrOpCode parenthesesEndOpCode; 2290 YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin; 2291 YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext; 2292 YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd; 2293 2294 // We can currently only compile quantity 1 subpatterns that are 2295 // not copies. We generate a copy in the case of a range quantifier, 2296 // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to 2297 // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem 2298 // comes where the subpattern is capturing, in which case we would 2299 // need to restore the capture from the first subpattern upon a 2300 // failure in the second. 2301 if (term->quantityCount == 1 && !term->parentheses.isCopy) { 2302 // Select the 'Once' nodes. 2303 parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin; 2304 parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd; 2305 2306 // If there is more than one alternative we cannot use the 'simple' nodes. 2307 if (term->parentheses.disjunction->m_alternatives.size() != 1) { 2308 alternativeBeginOpCode = OpNestedAlternativeBegin; 2309 alternativeNextOpCode = OpNestedAlternativeNext; 2310 alternativeEndOpCode = OpNestedAlternativeEnd; 2311 } 2312 } else if (term->parentheses.isTerminal) { 2313 // Select the 'Terminal' nodes. 2314 parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin; 2315 parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd; 2316 } else { 2317 // This subpattern is not supported by the JIT. 2318 m_shouldFallBack = true; 2319 return; 2320 } 2321 2322 size_t parenBegin = m_ops.size(); 2323 m_ops.append(parenthesesBeginOpCode); 2324 2325 m_ops.append(alternativeBeginOpCode); 2326 m_ops.last().m_previousOp = notFound; 2327 m_ops.last().m_term = term; 2328 Vector<OwnPtr<PatternAlternative> >& alternatives = term->parentheses.disjunction->m_alternatives; 2329 for (unsigned i = 0; i < alternatives.size(); ++i) { 2330 size_t lastOpIndex = m_ops.size() - 1; 2331 2332 PatternAlternative* nestedAlternative = alternatives[i].get(); 2333 opCompileAlternative(nestedAlternative); 2334 2335 size_t thisOpIndex = m_ops.size(); 2336 m_ops.append(YarrOp(alternativeNextOpCode)); 2337 2338 YarrOp& lastOp = m_ops[lastOpIndex]; 2339 YarrOp& thisOp = m_ops[thisOpIndex]; 2340 2341 lastOp.m_alternative = nestedAlternative; 2342 lastOp.m_nextOp = thisOpIndex; 2343 thisOp.m_previousOp = lastOpIndex; 2344 thisOp.m_term = term; 2345 } 2346 YarrOp& lastOp = m_ops.last(); 2347 ASSERT(lastOp.m_op == alternativeNextOpCode); 2348 lastOp.m_op = alternativeEndOpCode; 2349 lastOp.m_alternative = 0; 2350 lastOp.m_nextOp = notFound; 2351 2352 size_t parenEnd = m_ops.size(); 2353 m_ops.append(parenthesesEndOpCode); 2354 2355 m_ops[parenBegin].m_term = term; 2356 m_ops[parenBegin].m_previousOp = notFound; 2357 m_ops[parenBegin].m_nextOp = parenEnd; 2358 m_ops[parenEnd].m_term = term; 2359 m_ops[parenEnd].m_previousOp = parenBegin; 2360 m_ops[parenEnd].m_nextOp = notFound; 2361 } 2362 2363 // opCompileParentheticalAssertion 2364 // Emits ops for a parenthetical assertion. These consist of an 2365 // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping 2366 // the alternatives, with these wrapped by an outer pair of 2367 // OpParentheticalAssertionBegin/End nodes. 2368 // We can always use the OpSimpleNestedAlternative nodes in the 2369 // case of parenthetical assertions since these only ever match 2370 // once, and will never backtrack back into the assertion. 2371 void opCompileParentheticalAssertion(PatternTerm* term) 2372 { 2373 size_t parenBegin = m_ops.size(); 2374 m_ops.append(OpParentheticalAssertionBegin); 2375 2376 m_ops.append(OpSimpleNestedAlternativeBegin); 2377 m_ops.last().m_previousOp = notFound; 2378 m_ops.last().m_term = term; 2379 Vector<OwnPtr<PatternAlternative> >& alternatives = term->parentheses.disjunction->m_alternatives; 2380 for (unsigned i = 0; i < alternatives.size(); ++i) { 2381 size_t lastOpIndex = m_ops.size() - 1; 2382 2383 PatternAlternative* nestedAlternative = alternatives[i].get(); 2384 opCompileAlternative(nestedAlternative); 2385 2386 size_t thisOpIndex = m_ops.size(); 2387 m_ops.append(YarrOp(OpSimpleNestedAlternativeNext)); 2388 2389 YarrOp& lastOp = m_ops[lastOpIndex]; 2390 YarrOp& thisOp = m_ops[thisOpIndex]; 2391 2392 lastOp.m_alternative = nestedAlternative; 2393 lastOp.m_nextOp = thisOpIndex; 2394 thisOp.m_previousOp = lastOpIndex; 2395 thisOp.m_term = term; 2396 } 2397 YarrOp& lastOp = m_ops.last(); 2398 ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext); 2399 lastOp.m_op = OpSimpleNestedAlternativeEnd; 2400 lastOp.m_alternative = 0; 2401 lastOp.m_nextOp = notFound; 2402 2403 size_t parenEnd = m_ops.size(); 2404 m_ops.append(OpParentheticalAssertionEnd); 2405 2406 m_ops[parenBegin].m_term = term; 2407 m_ops[parenBegin].m_previousOp = notFound; 2408 m_ops[parenBegin].m_nextOp = parenEnd; 2409 m_ops[parenEnd].m_term = term; 2410 m_ops[parenEnd].m_previousOp = parenBegin; 2411 m_ops[parenEnd].m_nextOp = notFound; 2412 } 2413 2414 // opCompileAlternative 2415 // Called to emit nodes for all terms in an alternative. 2416 void opCompileAlternative(PatternAlternative* alternative) 2417 { 2418 optimizeAlternative(alternative); 2419 2420 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { 2421 PatternTerm* term = &alternative->m_terms[i]; 2422 2423 switch (term->type) { 2424 case PatternTerm::TypeParenthesesSubpattern: 2425 opCompileParenthesesSubpattern(term); 2426 break; 2427 2428 case PatternTerm::TypeParentheticalAssertion: 2429 opCompileParentheticalAssertion(term); 2430 break; 2431 2432 default: 2433 m_ops.append(term); 2434 } 2435 } 2436 } 2437 2438 // opCompileBody 2439 // This method compiles the body disjunction of the regular expression. 2440 // The body consists of two sets of alternatives - zero or more 'once 2441 // through' (BOL anchored) alternatives, followed by zero or more 2442 // repeated alternatives. 2443 // For each of these two sets of alteratives, if not empty they will be 2444 // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the 2445 // 'begin' node referencing the first alternative, and 'next' nodes 2446 // referencing any further alternatives. The begin/next/end nodes are 2447 // linked together in a doubly linked list. In the case of repeating 2448 // alternatives, the end node is also linked back to the beginning. 2449 // If no repeating alternatives exist, then a OpMatchFailed node exists 2450 // to return the failing result. 2451 void opCompileBody(PatternDisjunction* disjunction) 2452 { 2453 Vector<OwnPtr<PatternAlternative> >& alternatives = disjunction->m_alternatives; 2454 size_t currentAlternativeIndex = 0; 2455 2456 // Emit the 'once through' alternatives. 2457 if (alternatives.size() && alternatives[0]->onceThrough()) { 2458 m_ops.append(YarrOp(OpBodyAlternativeBegin)); 2459 m_ops.last().m_previousOp = notFound; 2460 2461 do { 2462 size_t lastOpIndex = m_ops.size() - 1; 2463 PatternAlternative* alternative = alternatives[currentAlternativeIndex].get(); 2464 opCompileAlternative(alternative); 2465 2466 size_t thisOpIndex = m_ops.size(); 2467 m_ops.append(YarrOp(OpBodyAlternativeNext)); 2468 2469 YarrOp& lastOp = m_ops[lastOpIndex]; 2470 YarrOp& thisOp = m_ops[thisOpIndex]; 2471 2472 lastOp.m_alternative = alternative; 2473 lastOp.m_nextOp = thisOpIndex; 2474 thisOp.m_previousOp = lastOpIndex; 2475 2476 ++currentAlternativeIndex; 2477 } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough()); 2478 2479 YarrOp& lastOp = m_ops.last(); 2480 2481 ASSERT(lastOp.m_op == OpBodyAlternativeNext); 2482 lastOp.m_op = OpBodyAlternativeEnd; 2483 lastOp.m_alternative = 0; 2484 lastOp.m_nextOp = notFound; 2485 } 2486 2487 if (currentAlternativeIndex == alternatives.size()) { 2488 m_ops.append(YarrOp(OpMatchFailed)); 2489 return; 2490 } 2491 2492 // Emit the repeated alternatives. 2493 size_t repeatLoop = m_ops.size(); 2494 m_ops.append(YarrOp(OpBodyAlternativeBegin)); 2495 m_ops.last().m_previousOp = notFound; 2496 do { 2497 size_t lastOpIndex = m_ops.size() - 1; 2498 PatternAlternative* alternative = alternatives[currentAlternativeIndex].get(); 2499 ASSERT(!alternative->onceThrough()); 2500 opCompileAlternative(alternative); 2501 2502 size_t thisOpIndex = m_ops.size(); 2503 m_ops.append(YarrOp(OpBodyAlternativeNext)); 2504 2505 YarrOp& lastOp = m_ops[lastOpIndex]; 2506 YarrOp& thisOp = m_ops[thisOpIndex]; 2507 2508 lastOp.m_alternative = alternative; 2509 lastOp.m_nextOp = thisOpIndex; 2510 thisOp.m_previousOp = lastOpIndex; 2511 2512 ++currentAlternativeIndex; 2513 } while (currentAlternativeIndex < alternatives.size()); 2514 YarrOp& lastOp = m_ops.last(); 2515 ASSERT(lastOp.m_op == OpBodyAlternativeNext); 2516 lastOp.m_op = OpBodyAlternativeEnd; 2517 lastOp.m_alternative = 0; 2518 lastOp.m_nextOp = repeatLoop; 2519 } 2520 2521 void generateEnter() 2522 { 2523#if CPU(X86_64) 2524 push(X86Registers::ebp); 2525 move(stackPointerRegister, X86Registers::ebp); 2526 push(X86Registers::ebx); 2527 // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves. 2528 zeroExtend32ToPtr(index, index); 2529 zeroExtend32ToPtr(length, length); 2530#if OS(WINDOWS) 2531 if (compileMode == IncludeSubpatterns) 2532 loadPtr(Address(X86Registers::ebp, 6 * sizeof(void*)), output); 2533#endif 2534#elif CPU(X86) 2535 push(X86Registers::ebp); 2536 move(stackPointerRegister, X86Registers::ebp); 2537 // TODO: do we need spill registers to fill the output pointer if there are no sub captures? 2538 push(X86Registers::ebx); 2539 push(X86Registers::edi); 2540 push(X86Registers::esi); 2541 // load output into edi (2 = saved ebp + return address). 2542 #if COMPILER(MSVC) 2543 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input); 2544 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index); 2545 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length); 2546 if (compileMode == IncludeSubpatterns) 2547 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output); 2548 #else 2549 if (compileMode == IncludeSubpatterns) 2550 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output); 2551 #endif 2552#elif CPU(ARM) 2553 push(ARMRegisters::r4); 2554 push(ARMRegisters::r5); 2555 push(ARMRegisters::r6); 2556#if CPU(ARM_TRADITIONAL) 2557 push(ARMRegisters::r8); // scratch register 2558#endif 2559 if (compileMode == IncludeSubpatterns) 2560 move(ARMRegisters::r3, output); 2561#elif CPU(SH4) 2562 push(SH4Registers::r11); 2563 push(SH4Registers::r13); 2564#elif CPU(MIPS) 2565 // Do nothing. 2566#endif 2567 } 2568 2569 void generateReturn() 2570 { 2571#if CPU(X86_64) 2572#if OS(WINDOWS) 2573 // Store the return value in the allocated space pointed by rcx. 2574 store64(returnRegister, Address(X86Registers::ecx)); 2575 store64(returnRegister2, Address(X86Registers::ecx, sizeof(void*))); 2576 move(X86Registers::ecx, returnRegister); 2577#endif 2578 pop(X86Registers::ebx); 2579 pop(X86Registers::ebp); 2580#elif CPU(X86) 2581 pop(X86Registers::esi); 2582 pop(X86Registers::edi); 2583 pop(X86Registers::ebx); 2584 pop(X86Registers::ebp); 2585#elif CPU(ARM) 2586#if CPU(ARM_TRADITIONAL) 2587 pop(ARMRegisters::r8); // scratch register 2588#endif 2589 pop(ARMRegisters::r6); 2590 pop(ARMRegisters::r5); 2591 pop(ARMRegisters::r4); 2592#elif CPU(SH4) 2593 pop(SH4Registers::r13); 2594 pop(SH4Registers::r11); 2595#elif CPU(MIPS) 2596 // Do nothing 2597#endif 2598 ret(); 2599 } 2600 2601public: 2602 YarrGenerator(YarrPattern& pattern, YarrCharSize charSize) 2603 : m_pattern(pattern) 2604 , m_charSize(charSize) 2605 , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo) 2606 , m_shouldFallBack(false) 2607 , m_checked(0) 2608 { 2609 } 2610 2611 void compile(VM* vm, YarrCodeBlock& jitObject) 2612 { 2613 generateEnter(); 2614 2615 Jump hasInput = checkInput(); 2616 move(TrustedImmPtr((void*)WTF::notFound), returnRegister); 2617 move(TrustedImm32(0), returnRegister2); 2618 generateReturn(); 2619 hasInput.link(this); 2620 2621 if (compileMode == IncludeSubpatterns) { 2622 for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i) 2623 store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int))); 2624 } 2625 2626 if (!m_pattern.m_body->m_hasFixedSize) 2627 setMatchStart(index); 2628 2629 initCallFrame(); 2630 2631 // Compile the pattern to the internal 'YarrOp' representation. 2632 opCompileBody(m_pattern.m_body); 2633 2634 // If we encountered anything we can't handle in the JIT code 2635 // (e.g. backreferences) then return early. 2636 if (m_shouldFallBack) { 2637 jitObject.setFallBack(true); 2638 return; 2639 } 2640 2641 generate(); 2642 backtrack(); 2643 2644 // Link & finalize the code. 2645 LinkBuffer linkBuffer(*vm, this, REGEXP_CODE_ID); 2646 m_backtrackingState.linkDataLabels(linkBuffer); 2647 2648 if (compileMode == MatchOnly) { 2649 if (m_charSize == Char8) 2650 jitObject.set8BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 8-bit regular expression"))); 2651 else 2652 jitObject.set16BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 16-bit regular expression"))); 2653 } else { 2654 if (m_charSize == Char8) 2655 jitObject.set8BitCode(FINALIZE_CODE(linkBuffer, ("8-bit regular expression"))); 2656 else 2657 jitObject.set16BitCode(FINALIZE_CODE(linkBuffer, ("16-bit regular expression"))); 2658 } 2659 jitObject.setFallBack(m_shouldFallBack); 2660 } 2661 2662private: 2663 YarrPattern& m_pattern; 2664 2665 YarrCharSize m_charSize; 2666 2667 Scale m_charScale; 2668 2669 // Used to detect regular expression constructs that are not currently 2670 // supported in the JIT; fall back to the interpreter when this is detected. 2671 bool m_shouldFallBack; 2672 2673 // The regular expression expressed as a linear sequence of operations. 2674 Vector<YarrOp, 128> m_ops; 2675 2676 // This records the current input offset being applied due to the current 2677 // set of alternatives we are nested within. E.g. when matching the 2678 // character 'b' within the regular expression /abc/, we will know that 2679 // the minimum size for the alternative is 3, checked upon entry to the 2680 // alternative, and that 'b' is at offset 1 from the start, and as such 2681 // when matching 'b' we need to apply an offset of -2 to the load. 2682 // 2683 // FIXME: This should go away. Rather than tracking this value throughout 2684 // code generation, we should gather this information up front & store it 2685 // on the YarrOp structure. 2686 int m_checked; 2687 2688 // This class records state whilst generating the backtracking path of code. 2689 BacktrackingState m_backtrackingState; 2690}; 2691 2692void jitCompile(YarrPattern& pattern, YarrCharSize charSize, VM* vm, YarrCodeBlock& jitObject, YarrJITCompileMode mode) 2693{ 2694 if (mode == MatchOnly) 2695 YarrGenerator<MatchOnly>(pattern, charSize).compile(vm, jitObject); 2696 else 2697 YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(vm, jitObject); 2698} 2699 2700}} 2701 2702#endif 2703