1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20package jdk.nashorn.internal.runtime.regexp.joni; 21 22import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; 23import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine; 24import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition; 25import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest; 26import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindNotEmpty; 27import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol; 28import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol; 29import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion; 30import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 31import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode; 32import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; 33import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; 34import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; 35 36class ByteCodeMachine extends StackMachine { 37 private int bestLen; // return value 38 private int s = 0; // current char 39 40 private int range; // right range 41 private int sprev; 42 private int sstart; 43 private int sbegin; 44 45 private final int[] code; // byte code 46 private int ip; // instruction pointer 47 48 ByteCodeMachine(final Regex regex, final char[] chars, final int p, final int end) { 49 super(regex, chars, p, end); 50 this.code = regex.code; 51 } 52 53 private boolean stringCmpIC(final int caseFlodFlag, final int s1p, final IntHolder ps2, final int mbLen, final int textEnd) { 54 int s1 = s1p; 55 int s2 = ps2.value; 56 final int end1 = s1 + mbLen; 57 58 while (s1 < end1) { 59 final char c1 = EncodingHelper.toLowerCase(chars[s1++]); 60 final char c2 = EncodingHelper.toLowerCase(chars[s2++]); 61 62 if (c1 != c2) { 63 return false; 64 } 65 } 66 ps2.value = s2; 67 return true; 68 } 69 70 private void debugMatchBegin() { 71 Config.log.println("match_at: " + 72 "str: " + str + 73 ", end: " + end + 74 ", start: " + this.sstart + 75 ", sprev: " + this.sprev); 76 Config.log.println("size: " + (end - str) + ", start offset: " + (this.sstart - str)); 77 } 78 79 private void debugMatchLoop() { 80 if (Config.DEBUG_MATCH) { 81 Config.log.printf("%4d", (s - str)).print("> \""); 82 int q, i; 83 for (i=0, q=s; i<7 && q<end && s>=0; i++) { 84 if (q < end) { 85 Config.log.print(new String(new char[]{chars[q++]})); 86 } 87 } 88 final String string = q < end ? "...\"" : "\""; 89 q += string.length(); 90 Config.log.print(string); 91 for (i=0; i<20-(q-s);i++) { 92 Config.log.print(" "); 93 } 94 final StringBuilder sb = new StringBuilder(); 95 new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip); 96 Config.log.println(sb.toString()); 97 } 98 } 99 100 @Override 101 protected final int matchAt(final int r, final int ss, final int sp) { 102 this.range = r; 103 this.sstart = ss; 104 this.sprev = sp; 105 106 stk = 0; 107 ip = 0; 108 109 if (Config.DEBUG_MATCH) { 110 debugMatchBegin(); 111 } 112 113 init(); 114 115 bestLen = -1; 116 s = ss; 117 118 final int[] c = this.code; 119 while (true) { 120 if (Config.DEBUG_MATCH) { 121 debugMatchLoop(); 122 } 123 124 sbegin = s; 125 switch (c[ip++]) { 126 case OPCode.END: if (opEnd()) { 127 return finish(); 128 } break; 129 case OPCode.EXACT1: opExact1(); break; 130 case OPCode.EXACT2: opExact2(); continue; 131 case OPCode.EXACT3: opExact3(); continue; 132 case OPCode.EXACT4: opExact4(); continue; 133 case OPCode.EXACT5: opExact5(); continue; 134 case OPCode.EXACTN: opExactN(); continue; 135 136 case OPCode.EXACT1_IC: opExact1IC(); break; 137 case OPCode.EXACTN_IC: opExactNIC(); continue; 138 139 case OPCode.CCLASS: opCClass(); break; 140 case OPCode.CCLASS_MB: opCClassMB(); break; 141 case OPCode.CCLASS_MIX: opCClassMIX(); break; 142 case OPCode.CCLASS_NOT: opCClassNot(); break; 143 case OPCode.CCLASS_MB_NOT: opCClassMBNot(); break; 144 case OPCode.CCLASS_MIX_NOT: opCClassMIXNot(); break; 145 case OPCode.CCLASS_NODE: opCClassNode(); break; 146 147 case OPCode.ANYCHAR: opAnyChar(); break; 148 case OPCode.ANYCHAR_ML: opAnyCharML(); break; 149 case OPCode.ANYCHAR_STAR: opAnyCharStar(); break; 150 case OPCode.ANYCHAR_ML_STAR: opAnyCharMLStar(); break; 151 case OPCode.ANYCHAR_STAR_PEEK_NEXT: opAnyCharStarPeekNext(); break; 152 case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: opAnyCharMLStarPeekNext(); break; 153 154 case OPCode.WORD: opWord(); break; 155 case OPCode.NOT_WORD: opNotWord(); break; 156 case OPCode.WORD_BOUND: opWordBound(); continue; 157 case OPCode.NOT_WORD_BOUND: opNotWordBound(); continue; 158 case OPCode.WORD_BEGIN: opWordBegin(); continue; 159 case OPCode.WORD_END: opWordEnd(); continue; 160 161 case OPCode.BEGIN_BUF: opBeginBuf(); continue; 162 case OPCode.END_BUF: opEndBuf(); continue; 163 case OPCode.BEGIN_LINE: opBeginLine(); continue; 164 case OPCode.END_LINE: opEndLine(); continue; 165 case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue; 166 case OPCode.BEGIN_POSITION: opBeginPosition(); continue; 167 168 case OPCode.MEMORY_START_PUSH: opMemoryStartPush(); continue; 169 case OPCode.MEMORY_START: opMemoryStart(); continue; 170 case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue; 171 case OPCode.MEMORY_END: opMemoryEnd(); continue; 172 case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue; 173 case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue; 174 175 case OPCode.BACKREF1: opBackRef1(); continue; 176 case OPCode.BACKREF2: opBackRef2(); continue; 177 case OPCode.BACKREFN: opBackRefN(); continue; 178 case OPCode.BACKREFN_IC: opBackRefNIC(); continue; 179 case OPCode.BACKREF_MULTI: opBackRefMulti(); continue; 180 case OPCode.BACKREF_MULTI_IC: opBackRefMultiIC(); continue; 181 case OPCode.BACKREF_WITH_LEVEL: opBackRefAtLevel(); continue; 182 183 case OPCode.NULL_CHECK_START: opNullCheckStart(); continue; 184 case OPCode.NULL_CHECK_END: opNullCheckEnd(); continue; 185 case OPCode.NULL_CHECK_END_MEMST: opNullCheckEndMemST(); continue; 186 187 case OPCode.JUMP: opJump(); continue; 188 case OPCode.PUSH: opPush(); continue; 189 190 case OPCode.POP: opPop(); continue; 191 case OPCode.PUSH_OR_JUMP_EXACT1: opPushOrJumpExact1(); continue; 192 case OPCode.PUSH_IF_PEEK_NEXT: opPushIfPeekNext(); continue; 193 194 case OPCode.REPEAT: opRepeat(); continue; 195 case OPCode.REPEAT_NG: opRepeatNG(); continue; 196 case OPCode.REPEAT_INC: opRepeatInc(); continue; 197 case OPCode.REPEAT_INC_SG: opRepeatIncSG(); continue; 198 case OPCode.REPEAT_INC_NG: opRepeatIncNG(); continue; 199 case OPCode.REPEAT_INC_NG_SG: opRepeatIncNGSG(); continue; 200 201 case OPCode.PUSH_POS: opPushPos(); continue; 202 case OPCode.POP_POS: opPopPos(); continue; 203 case OPCode.PUSH_POS_NOT: opPushPosNot(); continue; 204 case OPCode.FAIL_POS: opFailPos(); continue; 205 case OPCode.PUSH_STOP_BT: opPushStopBT(); continue; 206 case OPCode.POP_STOP_BT: opPopStopBT(); continue; 207 208 case OPCode.LOOK_BEHIND: opLookBehind(); continue; 209 case OPCode.PUSH_LOOK_BEHIND_NOT: opPushLookBehindNot(); continue; 210 case OPCode.FAIL_LOOK_BEHIND_NOT: opFailLookBehindNot(); continue; 211 212 case OPCode.FINISH: 213 return finish(); 214 215 case OPCode.FAIL: opFail(); continue; 216 217 default: 218 throw new InternalException(ErrorMessages.ERR_UNDEFINED_BYTECODE); 219 220 } // main switch 221 } // main while 222 } 223 224 private boolean opEnd() { 225 final int n = s - sstart; 226 227 if (n > bestLen) { 228 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 229 if (isFindLongest(regex.options)) { 230 if (n > msaBestLen) { 231 msaBestLen = n; 232 msaBestS = sstart; 233 } else { 234 // goto end_best_len; 235 return endBestLength(); 236 } 237 } 238 } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 239 240 bestLen = n; 241 final Region region = msaRegion; 242 if (region != null) { 243 // USE_POSIX_REGION_OPTION ... else ... 244 region.beg[0] = msaBegin = sstart - str; 245 region.end[0] = msaEnd = s - str; 246 for (int i = 1; i <= regex.numMem; i++) { 247 // opt! 248 if (repeatStk[memEndStk + i] != INVALID_INDEX) { 249 region.beg[i] = bsAt(regex.btMemStart, i) ? 250 stack[repeatStk[memStartStk + i]].getMemPStr() - str : 251 repeatStk[memStartStk + i] - str; 252 253 254 region.end[i] = bsAt(regex.btMemEnd, i) ? 255 stack[repeatStk[memEndStk + i]].getMemPStr() : 256 repeatStk[memEndStk + i] - str; 257 258 } else { 259 region.beg[i] = region.end[i] = Region.REGION_NOTPOS; 260 } 261 262 } 263 264 } else { 265 msaBegin = sstart - str; 266 msaEnd = s - str; 267 } 268 } else { 269 final Region region = msaRegion; 270 if (Config.USE_POSIX_API_REGION_OPTION) { 271 if (!isPosixRegion(regex.options)) { 272 if (region != null) { 273 region.clear(); 274 } else { 275 msaBegin = msaEnd = 0; 276 } 277 } 278 } else { 279 if (region != null) { 280 region.clear(); 281 } else { 282 msaBegin = msaEnd = 0; 283 } 284 } // USE_POSIX_REGION_OPTION 285 } 286 // end_best_len: 287 /* default behavior: return first-matching result. */ 288 return endBestLength(); 289 } 290 291 private boolean endBestLength() { 292 if (isFindCondition(regex.options)) { 293 if (isFindNotEmpty(regex.options) && s == sstart) { 294 bestLen = -1; 295 {opFail(); return false;} /* for retry */ 296 } 297 if (isFindLongest(regex.options) && s < range) { 298 {opFail(); return false;} /* for retry */ 299 } 300 } 301 // goto finish; 302 return true; 303 } 304 305 private void opExact1() { 306 if (s >= range || code[ip] != chars[s++]) {opFail(); return;} 307 //if (s > range) {opFail(); return;} 308 ip++; 309 sprev = sbegin; // break; 310 } 311 312 private void opExact2() { 313 if (s + 2 > range) {opFail(); return;} 314 if (code[ip] != chars[s]) {opFail(); return;} 315 ip++; s++; 316 if (code[ip] != chars[s]) {opFail(); return;} 317 sprev = s; 318 ip++; s++; 319 } 320 321 private void opExact3() { 322 if (s + 3 > range) {opFail(); return;} 323 if (code[ip] != chars[s]) {opFail(); return;} 324 ip++; s++; 325 if (code[ip] != chars[s]) {opFail(); return;} 326 ip++; s++; 327 if (code[ip] != chars[s]) {opFail(); return;} 328 sprev = s; 329 ip++; s++; 330 } 331 332 private void opExact4() { 333 if (s + 4 > range) {opFail(); return;} 334 if (code[ip] != chars[s]) {opFail(); return;} 335 ip++; s++; 336 if (code[ip] != chars[s]) {opFail(); return;} 337 ip++; s++; 338 if (code[ip] != chars[s]) {opFail(); return;} 339 ip++; s++; 340 if (code[ip] != chars[s]) {opFail(); return;} 341 sprev = s; 342 ip++; s++; 343 } 344 345 private void opExact5() { 346 if (s + 5 > range) {opFail(); return;} 347 if (code[ip] != chars[s]) {opFail(); return;} 348 ip++; s++; 349 if (code[ip] != chars[s]) {opFail(); return;} 350 ip++; s++; 351 if (code[ip] != chars[s]) {opFail(); return;} 352 ip++; s++; 353 if (code[ip] != chars[s]) {opFail(); return;} 354 ip++; s++; 355 if (code[ip] != chars[s]) {opFail(); return;} 356 sprev = s; 357 ip++; s++; 358 } 359 360 private void opExactN() { 361 int tlen = code[ip++]; 362 if (s + tlen > range) {opFail(); return;} 363 364 if (Config.USE_STRING_TEMPLATES) { 365 final char[] bs = regex.templates[code[ip++]]; 366 int ps = code[ip++]; 367 368 while (tlen-- > 0) { 369 if (bs[ps++] != chars[s++]) {opFail(); return;} 370 } 371 372 } else { 373 while (tlen-- > 0) { 374 if (code[ip++] != chars[s++]) {opFail(); return;} 375 } 376 } 377 sprev = s - 1; 378 } 379 380 private void opExact1IC() { 381 if (s >= range || code[ip] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;} 382 ip++; 383 sprev = sbegin; // break; 384 } 385 386 private void opExactNIC() { 387 int tlen = code[ip++]; 388 if (s + tlen > range) {opFail(); return;} 389 390 if (Config.USE_STRING_TEMPLATES) { 391 final char[] bs = regex.templates[code[ip++]]; 392 int ps = code[ip++]; 393 394 while (tlen-- > 0) { 395 if (bs[ps++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;} 396 } 397 } else { 398 399 while (tlen-- > 0) { 400 if (code[ip++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;} 401 } 402 } 403 sprev = s - 1; 404 } 405 406 private boolean isInBitSet() { 407 final int c = chars[s]; 408 return (c <= 0xff && (code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0); 409 } 410 411 private void opCClass() { 412 if (s >= range || !isInBitSet()) {opFail(); return;} 413 ip += BitSet.BITSET_SIZE; 414 s++; 415 sprev = sbegin; // break; 416 } 417 418 private boolean isInClassMB() { 419 final int tlen = code[ip++]; 420 if (s >= range) { 421 return false; 422 } 423 final int ss = s; 424 s++; 425 final int c = chars[ss]; 426 if (!EncodingHelper.isInCodeRange(code, ip, c)) { 427 return false; 428 } 429 ip += tlen; 430 return true; 431 } 432 433 private void opCClassMB() { 434 // beyond string check 435 if (s >= range || chars[s] <= 0xff) {opFail(); return;} 436 if (!isInClassMB()) {opFail(); return;} // not!!! 437 sprev = sbegin; // break; 438 } 439 440 private void opCClassMIX() { 441 if (s >= range) {opFail(); return;} 442 if (chars[s] > 0xff) { 443 ip += BitSet.BITSET_SIZE; 444 if (!isInClassMB()) {opFail(); return;} 445 } else { 446 if (!isInBitSet()) {opFail(); return;} 447 ip += BitSet.BITSET_SIZE; 448 final int tlen = code[ip++]; // by code range length 449 ip += tlen; 450 s++; 451 } 452 sprev = sbegin; // break; 453 } 454 455 private void opCClassNot() { 456 if (s >= range || isInBitSet()) {opFail(); return;} 457 ip += BitSet.BITSET_SIZE; 458 s++; 459 sprev = sbegin; // break; 460 } 461 462 private boolean isNotInClassMB() { 463 final int tlen = code[ip++]; 464 465 if (!(s + 1 <= range)) { 466 if (s >= range) { 467 return false; 468 } 469 s = end; 470 ip += tlen; 471 return true; 472 } 473 474 final int ss = s; 475 s++; 476 final int c = chars[ss]; 477 478 if (EncodingHelper.isInCodeRange(code, ip, c)) { 479 return false; 480 } 481 ip += tlen; 482 return true; 483 } 484 485 private void opCClassMBNot() { 486 if (s >= range) {opFail(); return;} 487 if (chars[s] <= 0xff) { 488 s++; 489 final int tlen = code[ip++]; 490 ip += tlen; 491 sprev = sbegin; // break; 492 return; 493 } 494 if (!isNotInClassMB()) {opFail(); return;} 495 sprev = sbegin; // break; 496 } 497 498 private void opCClassMIXNot() { 499 if (s >= range) {opFail(); return;} 500 if (chars[s] > 0xff) { 501 ip += BitSet.BITSET_SIZE; 502 if (!isNotInClassMB()) {opFail(); return;} 503 } else { 504 if (isInBitSet()) {opFail(); return;} 505 ip += BitSet.BITSET_SIZE; 506 final int tlen = code[ip++]; 507 ip += tlen; 508 s++; 509 } 510 sprev = sbegin; // break; 511 } 512 513 private void opCClassNode() { 514 if (s >= range) {opFail(); return;} 515 final CClassNode cc = (CClassNode)regex.operands[code[ip++]]; 516 final int ss = s; 517 s++; 518 final int c = chars[ss]; 519 if (!cc.isCodeInCCLength(c)) {opFail(); return;} 520 sprev = sbegin; // break; 521 } 522 523 private void opAnyChar() { 524 if (s >= range) {opFail(); return;} 525 if (isNewLine(chars[s])) {opFail(); return;} 526 s++; 527 sprev = sbegin; // break; 528 } 529 530 private void opAnyCharML() { 531 if (s >= range) {opFail(); return;} 532 s++; 533 sprev = sbegin; // break; 534 } 535 536 private void opAnyCharStar() { 537 final char[] ch = this.chars; 538 while (s < range) { 539 pushAlt(ip, s, sprev); 540 if (isNewLine(ch, s, end)) {opFail(); return;} 541 sprev = s; 542 s++; 543 } 544 sprev = sbegin; // break; 545 } 546 547 private void opAnyCharMLStar() { 548 while (s < range) { 549 pushAlt(ip, s, sprev); 550 sprev = s; 551 s++; 552 } 553 sprev = sbegin; // break; 554 } 555 556 private void opAnyCharStarPeekNext() { 557 final char c = (char)code[ip]; 558 final char[] ch = this.chars; 559 560 while (s < range) { 561 final char b = ch[s]; 562 if (c == b) { 563 pushAlt(ip + 1, s, sprev); 564 } 565 if (isNewLine(b)) {opFail(); return;} 566 sprev = s; 567 s++; 568 } 569 ip++; 570 sprev = sbegin; // break; 571 } 572 573 private void opAnyCharMLStarPeekNext() { 574 final char c = (char)code[ip]; 575 final char[] ch = this.chars; 576 577 while (s < range) { 578 if (c == ch[s]) { 579 pushAlt(ip + 1, s, sprev); 580 } 581 sprev = s; 582 s++; 583 } 584 ip++; 585 sprev = sbegin; // break; 586 } 587 588 private void opWord() { 589 if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;} 590 s++; 591 sprev = sbegin; // break; 592 } 593 594 private void opNotWord() { 595 if (s >= range || EncodingHelper.isWord(chars[s])) {opFail(); return;} 596 s++; 597 sprev = sbegin; // break; 598 } 599 600 private void opWordBound() { 601 if (s == str) { 602 if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;} 603 } else if (s == end) { 604 if (sprev >= end || !EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 605 } else { 606 if (EncodingHelper.isWord(chars[s]) == EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 607 } 608 } 609 610 private void opNotWordBound() { 611 if (s == str) { 612 if (s < range && EncodingHelper.isWord(chars[s])) {opFail(); return;} 613 } else if (s == end) { 614 if (sprev < end && EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 615 } else { 616 if (EncodingHelper.isWord(chars[s]) != EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 617 } 618 } 619 620 private void opWordBegin() { 621 if (s < range && EncodingHelper.isWord(chars[s])) { 622 if (s == str || !EncodingHelper.isWord(chars[sprev])) { 623 return; 624 } 625 } 626 opFail(); 627 } 628 629 private void opWordEnd() { 630 if (s != str && EncodingHelper.isWord(chars[sprev])) { 631 if (s == end || !EncodingHelper.isWord(chars[s])) { 632 return; 633 } 634 } 635 opFail(); 636 } 637 638 private void opBeginBuf() { 639 if (s != str) { 640 opFail(); 641 } 642 } 643 644 private void opEndBuf() { 645 if (s != end) { 646 opFail(); 647 } 648 } 649 650 private void opBeginLine() { 651 if (s == str) { 652 if (isNotBol(msaOptions)) { 653 opFail(); 654 } 655 return; 656 } else if (isNewLine(chars, sprev, end) && s != end) { 657 return; 658 } 659 opFail(); 660 } 661 662 private void opEndLine() { 663 if (s == end) { 664 if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 665 if (str == end || !isNewLine(chars, sprev, end)) { 666 if (isNotEol(msaOptions)) { 667 opFail(); 668 } 669 } 670 return; 671 } 672 if (isNotEol(msaOptions)) { 673 opFail(); 674 } 675 return; 676 } else if (isNewLine(chars, s, end)) { 677 return; 678 } 679 opFail(); 680 } 681 682 private void opSemiEndBuf() { 683 if (s == end) { 684 if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 685 if (str == end || !isNewLine(chars, sprev, end)) { 686 if (isNotEol(msaOptions)) { 687 opFail(); 688 } 689 } 690 return; 691 } 692 if (isNotEol(msaOptions)) { 693 opFail(); 694 } 695 return; 696 } else if (isNewLine(chars, s, end) && s + 1 == end) { 697 return; 698 } 699 opFail(); 700 } 701 702 private void opBeginPosition() { 703 if (s != msaStart) { 704 opFail(); 705 } 706 } 707 708 private void opMemoryStartPush() { 709 final int mem = code[ip++]; 710 pushMemStart(mem, s); 711 } 712 713 private void opMemoryStart() { 714 final int mem = code[ip++]; 715 repeatStk[memStartStk + mem] = s; 716 } 717 718 private void opMemoryEndPush() { 719 final int mem = code[ip++]; 720 pushMemEnd(mem, s); 721 } 722 723 private void opMemoryEnd() { 724 final int mem = code[ip++]; 725 repeatStk[memEndStk + mem] = s; 726 } 727 728 private void opMemoryEndPushRec() { 729 final int mem = code[ip++]; 730 final int stkp = getMemStart(mem); /* should be before push mem-end. */ 731 pushMemEnd(mem, s); 732 repeatStk[memStartStk + mem] = stkp; 733 } 734 735 private void opMemoryEndRec() { 736 final int mem = code[ip++]; 737 repeatStk[memEndStk + mem] = s; 738 final int stkp = getMemStart(mem); 739 740 if (BitStatus.bsAt(regex.btMemStart, mem)) { 741 repeatStk[memStartStk + mem] = stkp; 742 } else { 743 repeatStk[memStartStk + mem] = stack[stkp].getMemPStr(); 744 } 745 746 pushMemEndMark(mem); 747 } 748 749 private boolean backrefInvalid(final int mem) { 750 return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX; 751 } 752 753 private int backrefStart(final int mem) { 754 return bsAt(regex.btMemStart, mem) ? stack[repeatStk[memStartStk + mem]].getMemPStr() : repeatStk[memStartStk + mem]; 755 } 756 757 private int backrefEnd(final int mem) { 758 return bsAt(regex.btMemEnd, mem) ? stack[repeatStk[memEndStk + mem]].getMemPStr() : repeatStk[memEndStk + mem]; 759 } 760 761 private void backref(final int mem) { 762 /* if you want to remove following line, 763 you should check in parse and compile time. (numMem) */ 764 if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} 765 766 int pstart = backrefStart(mem); 767 final int pend = backrefEnd(mem); 768 769 int n = pend - pstart; 770 if (s + n > range) {opFail(); return;} 771 sprev = s; 772 773 // STRING_CMP 774 while(n-- > 0) { 775 if (chars[pstart++] != chars[s++]) {opFail(); return;} 776 } 777 778 // beyond string check 779 if (sprev < range) { 780 while (sprev + 1 < s) { 781 sprev++; 782 } 783 } 784 } 785 786 private void opBackRef1() { 787 backref(1); 788 } 789 790 private void opBackRef2() { 791 backref(2); 792 } 793 794 private void opBackRefN() { 795 backref(code[ip++]); 796 } 797 798 private void opBackRefNIC() { 799 final int mem = code[ip++]; 800 /* if you want to remove following line, 801 you should check in parse and compile time. (numMem) */ 802 if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} 803 804 final int pstart = backrefStart(mem); 805 final int pend = backrefEnd(mem); 806 807 final int n = pend - pstart; 808 if (s + n > range) {opFail(); return;} 809 sprev = s; 810 811 value = s; 812 if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) {opFail(); return;} 813 s = value; 814 815 // if (sprev < chars.length) 816 while (sprev + 1 < s) { 817 sprev++; 818 } 819 } 820 821 private void opBackRefMulti() { 822 final int tlen = code[ip++]; 823 824 int i; 825 loop:for (i=0; i<tlen; i++) { 826 final int mem = code[ip++]; 827 if (backrefInvalid(mem)) { 828 continue; 829 } 830 831 int pstart = backrefStart(mem); 832 final int pend = backrefEnd(mem); 833 834 int n = pend - pstart; 835 if (s + n > range) {opFail(); return;} 836 837 sprev = s; 838 int swork = s; 839 840 while (n-- > 0) { 841 if (chars[pstart++] != chars[swork++]) { 842 continue loop; 843 } 844 } 845 846 s = swork; 847 848 // beyond string check 849 if (sprev < range) { 850 while (sprev + 1 < s) { 851 sprev++; 852 } 853 } 854 855 ip += tlen - i - 1; // * SIZE_MEMNUM (1) 856 break; /* success */ 857 } 858 if (i == tlen) {opFail(); return;} 859 } 860 861 private void opBackRefMultiIC() { 862 final int tlen = code[ip++]; 863 864 int i; 865 loop:for (i=0; i<tlen; i++) { 866 final int mem = code[ip++]; 867 if (backrefInvalid(mem)) { 868 continue; 869 } 870 871 final int pstart = backrefStart(mem); 872 final int pend = backrefEnd(mem); 873 874 final int n = pend - pstart; 875 if (s + n > range) {opFail(); return;} 876 877 sprev = s; 878 879 value = s; 880 if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) 881 { 882 continue loop; // STRING_CMP_VALUE_IC 883 } 884 s = value; 885 886 // if (sprev < chars.length) 887 while (sprev + 1 < s) { 888 sprev++; 889 } 890 891 ip += tlen - i - 1; // * SIZE_MEMNUM (1) 892 break; /* success */ 893 } 894 if (i == tlen) {opFail(); return;} 895 } 896 897 private boolean memIsInMemp(final int mem, final int num, final int mempp) { 898 for (int i=0, memp = mempp; i<num; i++) { 899 final int m = code[memp++]; 900 if (mem == m) { 901 return true; 902 } 903 } 904 return false; 905 } 906 907 // USE_BACKREF_AT_LEVEL // (s) and (end) implicit 908 private boolean backrefMatchAtNestedLevel(final boolean ignoreCase, final int caseFoldFlag, 909 final int nest, final int memNum, final int memp) { 910 int pend = -1; 911 int level = 0; 912 int k = stk - 1; 913 914 while (k >= 0) { 915 final StackEntry e = stack[k]; 916 917 if (e.type == CALL_FRAME) { 918 level--; 919 } else if (e.type == RETURN) { 920 level++; 921 } else if (level == nest) { 922 if (e.type == MEM_START) { 923 if (memIsInMemp(e.getMemNum(), memNum, memp)) { 924 final int pstart = e.getMemPStr(); 925 if (pend != -1) { 926 if (pend - pstart > end - s) { 927 return false; /* or goto next_mem; */ 928 } 929 int p = pstart; 930 931 value = s; 932 if (ignoreCase) { 933 if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart, end)) { 934 return false; /* or goto next_mem; */ 935 } 936 } else { 937 while (p < pend) { 938 if (chars[p++] != chars[value++]) { 939 return false; /* or goto next_mem; */ 940 } 941 } 942 } 943 s = value; 944 945 return true; 946 } 947 } 948 } else if (e.type == MEM_END) { 949 if (memIsInMemp(e.getMemNum(), memNum, memp)) { 950 pend = e.getMemPStr(); 951 } 952 } 953 } 954 k--; 955 } 956 return false; 957 } 958 959 private void opBackRefAtLevel() { 960 final int ic = code[ip++]; 961 final int level = code[ip++]; 962 final int tlen = code[ip++]; 963 964 sprev = s; 965 if (backrefMatchAtNestedLevel(ic != 0, regex.caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit 966 while (sprev + 1 < s) { 967 sprev++; 968 } 969 ip += tlen; // * SIZE_MEMNUM 970 } else { 971 {opFail(); return;} 972 } 973 } 974 975 private void opNullCheckStart() { 976 final int mem = code[ip++]; 977 pushNullCheckStart(mem, s); 978 } 979 980 private void nullCheckFound() { 981 // null_check_found: 982 /* empty loop founded, skip next instruction */ 983 switch(code[ip++]) { 984 case OPCode.JUMP: 985 case OPCode.PUSH: 986 ip++; // p += SIZE_RELADDR; 987 break; 988 case OPCode.REPEAT_INC: 989 case OPCode.REPEAT_INC_NG: 990 case OPCode.REPEAT_INC_SG: 991 case OPCode.REPEAT_INC_NG_SG: 992 ip++; // p += SIZE_MEMNUM; 993 break; 994 default: 995 throw new InternalException(ErrorMessages.ERR_UNEXPECTED_BYTECODE); 996 } // switch 997 } 998 999 private void opNullCheckEnd() { 1000 final int mem = code[ip++]; 1001 final int isNull = nullCheck(mem, s); /* mem: null check id */ 1002 1003 if (isNull != 0) { 1004 if (Config.DEBUG_MATCH) { 1005 Config.log.println("NULL_CHECK_END: skip id:" + mem + ", s:" + s); 1006 } 1007 1008 nullCheckFound(); 1009 } 1010 } 1011 1012 // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK 1013 private void opNullCheckEndMemST() { 1014 final int mem = code[ip++]; /* mem: null check id */ 1015 final int isNull = nullCheckMemSt(mem, s); 1016 1017 if (isNull != 0) { 1018 if (Config.DEBUG_MATCH) { 1019 Config.log.println("NULL_CHECK_END_MEMST: skip id:" + mem + ", s:" + s); 1020 } 1021 1022 if (isNull == -1) {opFail(); return;} 1023 nullCheckFound(); 1024 } 1025 } 1026 1027 private void opJump() { 1028 ip += code[ip] + 1; 1029 } 1030 1031 private void opPush() { 1032 final int addr = code[ip++]; 1033 pushAlt(ip + addr, s, sprev); 1034 } 1035 1036 private void opPop() { 1037 popOne(); 1038 } 1039 1040 private void opPushOrJumpExact1() { 1041 final int addr = code[ip++]; 1042 // beyond string check 1043 if (s < range && code[ip] == chars[s]) { 1044 ip++; 1045 pushAlt(ip + addr, s, sprev); 1046 return; 1047 } 1048 ip += addr + 1; 1049 } 1050 1051 private void opPushIfPeekNext() { 1052 final int addr = code[ip++]; 1053 // beyond string check 1054 if (s < range && code[ip] == chars[s]) { 1055 ip++; 1056 pushAlt(ip + addr, s, sprev); 1057 return; 1058 } 1059 ip++; 1060 } 1061 1062 private void opRepeat() { 1063 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1064 final int addr= code[ip++]; 1065 1066 // ensure1(); 1067 repeatStk[mem] = stk; 1068 pushRepeat(mem, ip); 1069 1070 if (regex.repeatRangeLo[mem] == 0) { // lower 1071 pushAlt(ip + addr, s, sprev); 1072 } 1073 } 1074 1075 private void opRepeatNG() { 1076 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1077 final int addr= code[ip++]; 1078 1079 // ensure1(); 1080 repeatStk[mem] = stk; 1081 pushRepeat(mem, ip); 1082 1083 if (regex.repeatRangeLo[mem] == 0) { 1084 pushAlt(ip, s, sprev); 1085 ip += addr; 1086 } 1087 } 1088 1089 private void repeatInc(final int mem, final int si) { 1090 final StackEntry e = stack[si]; 1091 1092 e.increaseRepeatCount(); 1093 1094 if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) { 1095 /* end of repeat. Nothing to do. */ 1096 } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { 1097 pushAlt(ip, s, sprev); 1098 ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */ 1099 } else { 1100 ip = e.getRepeatPCode(); 1101 } 1102 pushRepeatInc(si); 1103 } 1104 1105 private void opRepeatInc() { 1106 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1107 final int si = repeatStk[mem]; 1108 repeatInc(mem, si); 1109 } 1110 1111 private void opRepeatIncSG() { 1112 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1113 final int si = getRepeat(mem); 1114 repeatInc(mem, si); 1115 } 1116 1117 private void repeatIncNG(final int mem, final int si) { 1118 final StackEntry e = stack[si]; 1119 1120 e.increaseRepeatCount(); 1121 1122 if (e.getRepeatCount() < regex.repeatRangeHi[mem]) { 1123 if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { 1124 final int pcode = e.getRepeatPCode(); 1125 pushRepeatInc(si); 1126 pushAlt(pcode, s, sprev); 1127 } else { 1128 ip = e.getRepeatPCode(); 1129 pushRepeatInc(si); 1130 } 1131 } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) { 1132 pushRepeatInc(si); 1133 } 1134 } 1135 1136 private void opRepeatIncNG() { 1137 final int mem = code[ip++]; 1138 final int si = repeatStk[mem]; 1139 repeatIncNG(mem, si); 1140 } 1141 1142 private void opRepeatIncNGSG() { 1143 final int mem = code[ip++]; 1144 final int si = getRepeat(mem); 1145 repeatIncNG(mem, si); 1146 } 1147 1148 private void opPushPos() { 1149 pushPos(s, sprev); 1150 } 1151 1152 private void opPopPos() { 1153 final StackEntry e = stack[posEnd()]; 1154 s = e.getStatePStr(); 1155 sprev= e.getStatePStrPrev(); 1156 } 1157 1158 private void opPushPosNot() { 1159 final int addr = code[ip++]; 1160 pushPosNot(ip + addr, s, sprev); 1161 } 1162 1163 private void opFailPos() { 1164 popTilPosNot(); 1165 opFail(); 1166 } 1167 1168 private void opPushStopBT() { 1169 pushStopBT(); 1170 } 1171 1172 private void opPopStopBT() { 1173 stopBtEnd(); 1174 } 1175 1176 private void opLookBehind() { 1177 final int tlen = code[ip++]; 1178 s = EncodingHelper.stepBack(str, s, tlen); 1179 if (s == -1) {opFail(); return;} 1180 sprev = EncodingHelper.prevCharHead(str, s); 1181 } 1182 1183 private void opPushLookBehindNot() { 1184 final int addr = code[ip++]; 1185 final int tlen = code[ip++]; 1186 final int q = EncodingHelper.stepBack(str, s, tlen); 1187 if (q == -1) { 1188 /* too short case -> success. ex. /(?<!XXX)a/.match("a") 1189 If you want to change to fail, replace following line. */ 1190 ip += addr; 1191 // return FAIL; 1192 } else { 1193 pushLookBehindNot(ip + addr, s, sprev); 1194 s = q; 1195 sprev = EncodingHelper.prevCharHead(str, s); 1196 } 1197 } 1198 1199 private void opFailLookBehindNot() { 1200 popTilLookBehindNot(); 1201 opFail(); 1202 } 1203 1204 private void opFail() { 1205 if (stack == null) { 1206 ip = regex.codeLength - 1; 1207 return; 1208 } 1209 1210 1211 final StackEntry e = pop(); 1212 ip = e.getStatePCode(); 1213 s = e.getStatePStr(); 1214 sprev = e.getStatePStrPrev(); 1215 } 1216 1217 private int finish() { 1218 return bestLen; 1219 } 1220} 1221