Analyser.java revision 1981:03d3d3c6bc5a
1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20package jdk.nashorn.internal.runtime.regexp.joni; 21 22import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAll; 23import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; 24import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAt; 25import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition; 26import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase; 27import static jdk.nashorn.internal.runtime.regexp.joni.Option.isMultiline; 28import static jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode.newAltNode; 29import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite; 30import java.util.HashSet; 31import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; 32import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; 33import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 34import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; 35import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; 36import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; 37import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; 38import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; 39import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; 40import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; 41import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; 42import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel; 43import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo; 44import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr; 45import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; 46import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; 47import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; 48 49final class Analyser extends Parser { 50 51 protected Analyser(final ScanEnvironment env, final char[] chars, final int p, final int end) { 52 super(env, chars, p, end); 53 } 54 55 @SuppressWarnings("unused") 56 protected final void compile() { 57 if (Config.DEBUG) { 58 Config.log.println(new String(chars, getBegin(), getEnd())); 59 } 60 61 reset(); 62 63 regex.numMem = 0; 64 regex.numRepeat = 0; 65 regex.numNullCheck = 0; 66 //regex.repeatRangeAlloc = 0; 67 regex.repeatRangeLo = null; 68 regex.repeatRangeHi = null; 69 70 parse(); 71 72 if (Config.DEBUG_PARSE_TREE_RAW && Config.DEBUG_PARSE_TREE) { 73 Config.log.println("<RAW TREE>"); 74 Config.log.println(root + "\n"); 75 } 76 77 root = setupTree(root, 0); 78 if (Config.DEBUG_PARSE_TREE) { 79 if (Config.DEBUG_PARSE_TREE_RAW) { 80 Config.log.println("<TREE>"); 81 } 82 root.verifyTree(new HashSet<Node>(), env.reg.warnings); 83 Config.log.println(root + "\n"); 84 } 85 86 regex.captureHistory = env.captureHistory; 87 regex.btMemStart = env.btMemStart; 88 regex.btMemEnd = env.btMemEnd; 89 90 if (isFindCondition(regex.options)) { 91 regex.btMemEnd = bsAll(); 92 } else { 93 regex.btMemEnd = env.btMemEnd; 94 regex.btMemEnd |= regex.captureHistory; 95 } 96 97 regex.clearOptimizeInfo(); 98 99 if (!Config.DONT_OPTIMIZE) { 100 setOptimizedInfoFromTree(root); 101 } 102 103 env.memNodes = null; 104 105 new ArrayCompiler(this).compile(); 106 107 if (regex.numRepeat != 0 || regex.btMemEnd != 0) { 108 regex.stackPopLevel = StackPopLevel.ALL; 109 } else { 110 if (regex.btMemStart != 0) { 111 regex.stackPopLevel = StackPopLevel.MEM_START; 112 } else { 113 regex.stackPopLevel = StackPopLevel.FREE; 114 } 115 } 116 117 if (Config.DEBUG_COMPILE) { 118 Config.log.println("stack used: " + regex.stackNeeded); 119 if (Config.USE_STRING_TEMPLATES) { 120 Config.log.print("templates: " + regex.templateNum + "\n"); 121 } 122 Config.log.println(new ByteCodePrinter(regex).byteCodeListToString()); 123 124 } // DEBUG_COMPILE 125 } 126 127 private void swap(final Node a, final Node b) { 128 a.swap(b); 129 130 if (root == b) { 131 root = a; 132 } else if (root == a) { 133 root = b; 134 } 135 } 136 137 // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK 138 private int quantifiersMemoryInfo(final Node node) { 139 int info = 0; 140 141 switch(node.getType()) { 142 case NodeType.LIST: 143 case NodeType.ALT: 144 ConsAltNode can = (ConsAltNode)node; 145 do { 146 final int v = quantifiersMemoryInfo(can.car); 147 if (v > info) { 148 info = v; 149 } 150 } while ((can = can.cdr) != null); 151 break; 152 153 case NodeType.QTFR: 154 final QuantifierNode qn = (QuantifierNode)node; 155 if (qn.upper != 0) { 156 info = quantifiersMemoryInfo(qn.target); 157 } 158 break; 159 160 case NodeType.ENCLOSE: 161 final EncloseNode en = (EncloseNode)node; 162 switch (en.type) { 163 case EncloseType.MEMORY: 164 return TargetInfo.IS_EMPTY_MEM; 165 166 case EncloseType.OPTION: 167 case EncloseNode.STOP_BACKTRACK: 168 info = quantifiersMemoryInfo(en.target); 169 break; 170 171 default: 172 break; 173 } // inner switch 174 break; 175 176 case NodeType.BREF: 177 case NodeType.STR: 178 case NodeType.CTYPE: 179 case NodeType.CCLASS: 180 case NodeType.CANY: 181 case NodeType.ANCHOR: 182 default: 183 break; 184 } // switch 185 186 return info; 187 } 188 189 private int getMinMatchLength(final Node node) { 190 int min = 0; 191 192 switch (node.getType()) { 193 case NodeType.BREF: 194 final BackRefNode br = (BackRefNode)node; 195 if (br.isRecursion()) { 196 break; 197 } 198 199 if (br.backRef > env.numMem) { 200 throw new ValueException(ERR_INVALID_BACKREF); 201 } 202 min = getMinMatchLength(env.memNodes[br.backRef]); 203 204 break; 205 206 case NodeType.LIST: 207 ConsAltNode can = (ConsAltNode)node; 208 do { 209 min += getMinMatchLength(can.car); 210 } while ((can = can.cdr) != null); 211 break; 212 213 case NodeType.ALT: 214 ConsAltNode y = (ConsAltNode)node; 215 do { 216 final Node x = y.car; 217 final int tmin = getMinMatchLength(x); 218 if (y == node) { 219 min = tmin; 220 } else if (min > tmin) { 221 min = tmin; 222 } 223 } while ((y = y.cdr) != null); 224 break; 225 226 case NodeType.STR: 227 min = ((StringNode)node).length(); 228 break; 229 230 case NodeType.CTYPE: 231 min = 1; 232 break; 233 234 case NodeType.CCLASS: 235 case NodeType.CANY: 236 min = 1; 237 break; 238 239 case NodeType.QTFR: 240 final QuantifierNode qn = (QuantifierNode)node; 241 if (qn.lower > 0) { 242 min = getMinMatchLength(qn.target); 243 min = MinMaxLen.distanceMultiply(min, qn.lower); 244 } 245 break; 246 247 case NodeType.ENCLOSE: 248 final EncloseNode en = (EncloseNode)node; 249 switch (en.type) { 250 case EncloseType.MEMORY: 251 if (en.isMinFixed()) { 252 min = en.minLength; 253 } else { 254 min = getMinMatchLength(en.target); 255 en.minLength = min; 256 en.setMinFixed(); 257 } 258 break; 259 260 case EncloseType.OPTION: 261 case EncloseType.STOP_BACKTRACK: 262 min = getMinMatchLength(en.target); 263 break; 264 265 default: 266 break; 267 } // inner switch 268 break; 269 270 case NodeType.ANCHOR: 271 default: 272 break; 273 } // switch 274 275 return min; 276 } 277 278 private int getMaxMatchLength(final Node node) { 279 int max = 0; 280 281 switch (node.getType()) { 282 case NodeType.LIST: 283 ConsAltNode ln = (ConsAltNode)node; 284 do { 285 final int tmax = getMaxMatchLength(ln.car); 286 max = MinMaxLen.distanceAdd(max, tmax); 287 } while ((ln = ln.cdr) != null); 288 break; 289 290 case NodeType.ALT: 291 ConsAltNode an = (ConsAltNode)node; 292 do { 293 final int tmax = getMaxMatchLength(an.car); 294 if (max < tmax) { 295 max = tmax; 296 } 297 } while ((an = an.cdr) != null); 298 break; 299 300 case NodeType.STR: 301 max = ((StringNode)node).length(); 302 break; 303 304 case NodeType.CTYPE: 305 max = 1; 306 break; 307 308 case NodeType.CCLASS: 309 case NodeType.CANY: 310 max = 1; 311 break; 312 313 case NodeType.BREF: 314 final BackRefNode br = (BackRefNode)node; 315 if (br.isRecursion()) { 316 max = MinMaxLen.INFINITE_DISTANCE; 317 break; 318 } 319 320 if (br.backRef > env.numMem) { 321 throw new ValueException(ERR_INVALID_BACKREF); 322 } 323 final int tmax = getMaxMatchLength(env.memNodes[br.backRef]); 324 if (max < tmax) { 325 max = tmax; 326 } 327 break; 328 329 case NodeType.QTFR: 330 final QuantifierNode qn = (QuantifierNode)node; 331 if (qn.upper != 0) { 332 max = getMaxMatchLength(qn.target); 333 if (max != 0) { 334 if (!isRepeatInfinite(qn.upper)) { 335 max = MinMaxLen.distanceMultiply(max, qn.upper); 336 } else { 337 max = MinMaxLen.INFINITE_DISTANCE; 338 } 339 } 340 } 341 break; 342 343 case NodeType.ENCLOSE: 344 final EncloseNode en = (EncloseNode)node; 345 switch (en.type) { 346 case EncloseType.MEMORY: 347 if (en.isMaxFixed()) { 348 max = en.maxLength; 349 } else { 350 max = getMaxMatchLength(en.target); 351 en.maxLength = max; 352 en.setMaxFixed(); 353 } 354 break; 355 356 case EncloseType.OPTION: 357 case EncloseType.STOP_BACKTRACK: 358 max = getMaxMatchLength(en.target); 359 break; 360 361 default: 362 break; 363 } // inner switch 364 break; 365 366 case NodeType.ANCHOR: 367 default: 368 break; 369 } // switch 370 371 return max; 372 } 373 374 private static final int GET_CHAR_LEN_VARLEN = -1; 375 private static final int GET_CHAR_LEN_TOP_ALT_VARLEN = -2; 376 protected final int getCharLengthTree(final Node node) { 377 return getCharLengthTree(node, 0); 378 } 379 380 private int getCharLengthTree(final Node node, final int levelp) { 381 final int level = levelp + 1; 382 383 int len = 0; 384 returnCode = 0; 385 386 switch(node.getType()) { 387 case NodeType.LIST: 388 ConsAltNode ln = (ConsAltNode)node; 389 do { 390 final int tlen = getCharLengthTree(ln.car, level); 391 if (returnCode == 0) { 392 len = MinMaxLen.distanceAdd(len, tlen); 393 } 394 } while (returnCode == 0 && (ln = ln.cdr) != null); 395 break; 396 397 case NodeType.ALT: 398 ConsAltNode an = (ConsAltNode)node; 399 boolean varLen = false; 400 401 int tlen = getCharLengthTree(an.car, level); 402 while (returnCode == 0 && (an = an.cdr) != null) { 403 final int tlen2 = getCharLengthTree(an.car, level); 404 if (returnCode == 0) { 405 if (tlen != tlen2) { 406 varLen = true; 407 } 408 } 409 } 410 411 if (returnCode == 0) { 412 if (varLen) { 413 if (level == 1) { 414 returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN; 415 } else { 416 returnCode = GET_CHAR_LEN_VARLEN; 417 } 418 } else { 419 len = tlen; 420 } 421 } 422 break; 423 424 case NodeType.STR: 425 final StringNode sn = (StringNode)node; 426 len = sn.length(); 427 break; 428 429 case NodeType.QTFR: 430 final QuantifierNode qn = (QuantifierNode)node; 431 if (qn.lower == qn.upper) { 432 tlen = getCharLengthTree(qn.target, level); 433 if (returnCode == 0) { 434 len = MinMaxLen.distanceMultiply(tlen, qn.lower); 435 } 436 } else { 437 returnCode = GET_CHAR_LEN_VARLEN; 438 } 439 break; 440 441 case NodeType.CTYPE: 442 case NodeType.CCLASS: 443 case NodeType.CANY: 444 len = 1; 445 break; 446 447 case NodeType.ENCLOSE: 448 final EncloseNode en = (EncloseNode)node; 449 switch(en.type) { 450 case EncloseType.MEMORY: 451 if (en.isCLenFixed()) { 452 len = en.charLength; 453 } else { 454 len = getCharLengthTree(en.target, level); 455 if (returnCode == 0) { 456 en.charLength = len; 457 en.setCLenFixed(); 458 } 459 } 460 break; 461 462 case EncloseType.OPTION: 463 case EncloseType.STOP_BACKTRACK: 464 len = getCharLengthTree(en.target, level); 465 break; 466 467 default: 468 break; 469 } // inner switch 470 break; 471 472 case NodeType.ANCHOR: 473 break; 474 475 default: 476 returnCode = GET_CHAR_LEN_VARLEN; 477 } // switch 478 return len; 479 } 480 481 /* x is not included y ==> 1 : 0 */ 482 private static boolean isNotIncluded(final Node xn, final Node yn) { 483 Node x = xn; 484 Node y = yn; 485 Node tmp; 486 487 // !retry:! 488 retry: while(true) { 489 490 final int yType = y.getType(); 491 492 switch(x.getType()) { 493 case NodeType.CTYPE: 494 switch(yType) { 495 496 case NodeType.CCLASS: 497 // !swap:! 498 tmp = x; 499 x = y; 500 y = tmp; 501 // !goto retry;! 502 continue retry; 503 504 case NodeType.STR: 505 // !goto swap;! 506 tmp = x; 507 x = y; 508 y = tmp; 509 continue retry; 510 511 default: 512 break; 513 } // inner switch 514 break; 515 516 case NodeType.CCLASS: 517 final CClassNode xc = (CClassNode)x; 518 519 switch(yType) { 520 521 case NodeType.CCLASS: 522 final CClassNode yc = (CClassNode)y; 523 524 for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { 525 boolean v = xc.bs.at(i); 526 if ((v && !xc.isNot()) || (!v && xc.isNot())) { 527 v = yc.bs.at(i); 528 if ((v && !yc.isNot()) || (!v && yc.isNot())) { 529 return false; 530 } 531 } 532 } 533 if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) { 534 return true; 535 } 536 return false; 537 // break; not reached 538 539 case NodeType.STR: 540 // !goto swap;! 541 tmp = x; 542 x = y; 543 y = tmp; 544 continue retry; 545 546 default: 547 break; 548 549 } // inner switch 550 break; // case NodeType.CCLASS 551 552 case NodeType.STR: 553 final StringNode xs = (StringNode)x; 554 if (xs.length() == 0) { 555 break; 556 } 557 558 switch (yType) { 559 560 case NodeType.CCLASS: 561 final CClassNode cc = (CClassNode)y; 562 final int code = xs.chars[xs.p]; 563 return !cc.isCodeInCC(code); 564 565 case NodeType.STR: 566 final StringNode ys = (StringNode)y; 567 int len = xs.length(); 568 if (len > ys.length()) { 569 len = ys.length(); 570 } 571 if (xs.isAmbig() || ys.isAmbig()) { 572 /* tiny version */ 573 return false; 574 } 575 for (int i=0, pt=ys.p, q=xs.p; i<len; i++, pt++, q++) { 576 if (ys.chars[pt] != xs.chars[q]) { 577 return true; 578 } 579 } 580 break; 581 582 default: 583 break; 584 } // inner switch 585 586 break; // case NodeType.STR 587 default: 588 break; 589 590 } // switch 591 592 break; 593 } // retry: while 594 return false; 595 } 596 597 private Node getHeadValueNode(final Node node, final boolean exact) { 598 Node n = null; 599 600 switch(node.getType()) { 601 case NodeType.BREF: 602 case NodeType.ALT: 603 case NodeType.CANY: 604 break; 605 606 case NodeType.CTYPE: 607 case NodeType.CCLASS: 608 if (!exact) { 609 n = node; 610 } 611 break; 612 613 case NodeType.LIST: 614 n = getHeadValueNode(((ConsAltNode)node).car, exact); 615 break; 616 617 case NodeType.STR: 618 final StringNode sn = (StringNode)node; 619 if (sn.end <= sn.p) 620 { 621 break; // ??? 622 } 623 624 if (exact && !sn.isRaw() && isIgnoreCase(regex.options)){ 625 // nothing 626 } else { 627 n = node; 628 } 629 break; 630 631 case NodeType.QTFR: 632 final QuantifierNode qn = (QuantifierNode)node; 633 if (qn.lower > 0) { 634 if (qn.headExact != null) { 635 n = qn.headExact; 636 } else { 637 n = getHeadValueNode(qn.target, exact); 638 } 639 } 640 break; 641 642 case NodeType.ENCLOSE: 643 final EncloseNode en = (EncloseNode)node; 644 645 switch (en.type) { 646 case EncloseType.OPTION: 647 final int options = regex.options; 648 regex.options = en.option; 649 n = getHeadValueNode(en.target, exact); 650 regex.options = options; 651 break; 652 653 case EncloseType.MEMORY: 654 case EncloseType.STOP_BACKTRACK: 655 n = getHeadValueNode(en.target, exact); 656 break; 657 658 default: 659 break; 660 } // inner switch 661 break; 662 663 case NodeType.ANCHOR: 664 final AnchorNode an = (AnchorNode)node; 665 if (an.type == AnchorType.PREC_READ) { 666 n = getHeadValueNode(an.target, exact); 667 } 668 break; 669 670 default: 671 break; 672 } // switch 673 674 return n; 675 } 676 677 // true: invalid 678 private boolean checkTypeTree(final Node node, final int typeMask, final int encloseMask, final int anchorMask) { 679 if ((node.getType2Bit() & typeMask) == 0) { 680 return true; 681 } 682 683 boolean invalid = false; 684 685 switch(node.getType()) { 686 case NodeType.LIST: 687 case NodeType.ALT: 688 ConsAltNode can = (ConsAltNode)node; 689 do { 690 invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask); 691 } while (!invalid && (can = can.cdr) != null); 692 break; 693 694 case NodeType.QTFR: 695 invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask); 696 break; 697 698 case NodeType.ENCLOSE: 699 final EncloseNode en = (EncloseNode)node; 700 if ((en.type & encloseMask) == 0) { 701 return true; 702 } 703 invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask); 704 break; 705 706 case NodeType.ANCHOR: 707 final AnchorNode an = (AnchorNode)node; 708 if ((an.type & anchorMask) == 0) { 709 return true; 710 } 711 712 if (an.target != null) { 713 invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask); 714 } 715 break; 716 717 default: 718 break; 719 720 } // switch 721 722 return invalid; 723 } 724 725 /* divide different length alternatives in look-behind. 726 (?<=A|B) ==> (?<=A)|(?<=B) 727 (?<!A|B) ==> (?<!A)(?<!B) 728 */ 729 private Node divideLookBehindAlternatives(final Node nodep) { 730 Node node = nodep; 731 final AnchorNode an = (AnchorNode)node; 732 final int anchorType = an.type; 733 Node head = an.target; 734 Node np = ((ConsAltNode)head).car; 735 736 swap(node, head); 737 738 final Node tmp = node; 739 node = head; 740 head = tmp; 741 742 ((ConsAltNode)node).setCar(head); 743 ((AnchorNode)head).setTarget(np); 744 np = node; 745 746 while ((np = ((ConsAltNode)np).cdr) != null) { 747 final AnchorNode insert = new AnchorNode(anchorType); 748 insert.setTarget(((ConsAltNode)np).car); 749 ((ConsAltNode)np).setCar(insert); 750 } 751 752 if (anchorType == AnchorType.LOOK_BEHIND_NOT) { 753 np = node; 754 do { 755 ((ConsAltNode)np).toListNode(); /* alt -> list */ 756 } while ((np = ((ConsAltNode)np).cdr) != null); 757 } 758 759 return node; 760 } 761 762 private Node setupLookBehind(final Node node) { 763 final AnchorNode an = (AnchorNode)node; 764 final int len = getCharLengthTree(an.target); 765 switch (returnCode) { 766 case 0: 767 an.charLength = len; 768 break; 769 case GET_CHAR_LEN_VARLEN: 770 throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 771 case GET_CHAR_LEN_TOP_ALT_VARLEN: 772 if (syntax.differentLengthAltLookBehind()) { 773 return divideLookBehindAlternatives(node); 774 } 775 throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 776 default: 777 break; 778 } 779 return node; 780 } 781 782 private void nextSetup(final Node nodep, final Node nextNode) { 783 Node node = nodep; 784 785 // retry: 786 retry: while(true) { 787 788 final int type = node.getType(); 789 if (type == NodeType.QTFR) { 790 final QuantifierNode qn = (QuantifierNode)node; 791 if (qn.greedy && isRepeatInfinite(qn.upper)) { 792 if (Config.USE_QTFR_PEEK_NEXT) { 793 final StringNode n = (StringNode)getHeadValueNode(nextNode, true); 794 /* '\0': for UTF-16BE etc... */ 795 if (n != null && n.chars[n.p] != 0) { // ????????? 796 qn.nextHeadExact = n; 797 } 798 } // USE_QTFR_PEEK_NEXT 799 /* automatic posseivation a*b ==> (?>a*)b */ 800 if (qn.lower <= 1) { 801 if (qn.target.isSimple()) { 802 final Node x = getHeadValueNode(qn.target, false); 803 if (x != null) { 804 final Node y = getHeadValueNode(nextNode, false); 805 if (y != null && isNotIncluded(x, y)) { 806 final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose 807 en.setStopBtSimpleRepeat(); 808 //en.setTarget(qn.target); // optimize it ?? 809 swap(node, en); 810 811 en.setTarget(node); 812 } 813 } 814 } 815 } 816 } 817 } else if (type == NodeType.ENCLOSE) { 818 final EncloseNode en = (EncloseNode)node; 819 if (en.isMemory()) { 820 node = en.target; 821 // !goto retry;! 822 continue retry; 823 } 824 } 825 826 break; 827 } // while 828 } 829 830 private void updateStringNodeCaseFoldMultiByte(final StringNode sn) { 831 final char[] ch = sn.chars; 832 final int end = sn.end; 833 value = sn.p; 834 int sp = 0; 835 char buf; 836 837 while (value < end) { 838 final int ovalue = value; 839 buf = EncodingHelper.toLowerCase(ch[value++]); 840 841 if (ch[ovalue] != buf) { 842 843 char[] sbuf = new char[sn.length() << 1]; 844 System.arraycopy(ch, sn.p, sbuf, 0, ovalue - sn.p); 845 value = ovalue; 846 while (value < end) { 847 buf = EncodingHelper.toLowerCase(ch[value++]); 848 if (sp >= sbuf.length) { 849 final char[]tmp = new char[sbuf.length << 1]; 850 System.arraycopy(sbuf, 0, tmp, 0, sbuf.length); 851 sbuf = tmp; 852 } 853 sbuf[sp++] = buf; 854 } 855 sn.set(sbuf, 0, sp); 856 return; 857 } 858 sp++; 859 } 860 } 861 862 private void updateStringNodeCaseFold(final Node node) { 863 final StringNode sn = (StringNode)node; 864 updateStringNodeCaseFoldMultiByte(sn); 865 } 866 867 private Node expandCaseFoldMakeRemString(final char[] ch, final int pp, final int end) { 868 final StringNode node = new StringNode(ch, pp, end); 869 870 updateStringNodeCaseFold(node); 871 node.setAmbig(); 872 node.setDontGetOptInfo(); 873 return node; 874 } 875 876 private static boolean expandCaseFoldStringAlt(final int itemNum, final char[] items, 877 final char[] chars, final int p, final int slen, final int end, final ObjPtr<Node> node) { 878 879 ConsAltNode altNode; 880 node.p = altNode = newAltNode(null, null); 881 882 StringNode snode = new StringNode(chars, p, p + slen); 883 altNode.setCar(snode); 884 885 for (int i=0; i<itemNum; i++) { 886 snode = new StringNode(); 887 888 snode.catCode(items[i]); 889 890 final ConsAltNode an = newAltNode(null, null); 891 an.setCar(snode); 892 altNode.setCdr(an); 893 altNode = an; 894 } 895 return false; 896 } 897 898 private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8; 899 private Node expandCaseFoldString(final Node node) { 900 final StringNode sn = (StringNode)node; 901 902 if (sn.isAmbig() || sn.length() <= 0) { 903 return node; 904 } 905 906 final char[] chars1 = sn.chars; 907 int pt = sn.p; 908 final int end = sn.end; 909 int altNum = 1; 910 911 ConsAltNode topRoot = null, r = null; 912 @SuppressWarnings("unused") 913 final ObjPtr<Node> prevNode = new ObjPtr<Node>(); 914 StringNode stringNode = null; 915 916 while (pt < end) { 917 final char[] items = EncodingHelper.caseFoldCodesByString(regex.caseFoldFlag, chars1[pt]); 918 919 if (items.length == 0) { 920 if (stringNode == null) { 921 if (r == null && prevNode.p != null) { 922 topRoot = r = ConsAltNode.listAdd(null, prevNode.p); 923 } 924 925 prevNode.p = stringNode = new StringNode(); // onig_node_new_str(NULL, NULL); 926 927 if (r != null) { 928 ConsAltNode.listAdd(r, stringNode); 929 } 930 931 } 932 933 stringNode.cat(chars1, pt, pt + 1); 934 } else { 935 altNum *= (items.length + 1); 936 if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) { 937 break; 938 } 939 940 if (r == null && prevNode.p != null) { 941 topRoot = r = ConsAltNode.listAdd(null, prevNode.p); 942 } 943 944 expandCaseFoldStringAlt(items.length, items, chars1, pt, 1, end, prevNode); 945 if (r != null) { 946 ConsAltNode.listAdd(r, prevNode.p); 947 } 948 stringNode = null; 949 } 950 pt++; 951 } 952 953 if (pt < end) { 954 final Node srem = expandCaseFoldMakeRemString(chars1, pt, end); 955 956 if (prevNode.p != null && r == null) { 957 topRoot = r = ConsAltNode.listAdd(null, prevNode.p); 958 } 959 960 if (r == null) { 961 prevNode.p = srem; 962 } else { 963 ConsAltNode.listAdd(r, srem); 964 } 965 } 966 /* ending */ 967 final Node xnode = topRoot != null ? topRoot : prevNode.p; 968 969 swap(node, xnode); 970 return xnode; 971 } 972 973 private static final int IN_ALT = (1<<0); 974 private static final int IN_NOT = (1<<1); 975 private static final int IN_REPEAT = (1<<2); 976 private static final int IN_VAR_REPEAT = (1<<3); 977 private static final int EXPAND_STRING_MAX_LENGTH = 100; 978 979 /* setup_tree does the following work. 980 1. check empty loop. (set qn->target_empty_info) 981 2. expand ignore-case in char class. 982 3. set memory status bit flags. (reg->mem_stats) 983 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. 984 5. find invalid patterns in look-behind. 985 6. expand repeated string. 986 */ 987 protected final Node setupTree(final Node nodep, final int statep) { 988 Node node = nodep; 989 int state = statep; 990 991 restart: while (true) { 992 switch (node.getType()) { 993 case NodeType.LIST: 994 ConsAltNode lin = (ConsAltNode)node; 995 Node prev = null; 996 do { 997 setupTree(lin.car, state); 998 if (prev != null) { 999 nextSetup(prev, lin.car); 1000 } 1001 prev = lin.car; 1002 } while ((lin = lin.cdr) != null); 1003 break; 1004 1005 case NodeType.ALT: 1006 ConsAltNode aln = (ConsAltNode)node; 1007 do { 1008 setupTree(aln.car, (state | IN_ALT)); 1009 } while ((aln = aln.cdr) != null); 1010 break; 1011 1012 case NodeType.CCLASS: 1013 break; 1014 1015 case NodeType.STR: 1016 if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) { 1017 node = expandCaseFoldString(node); 1018 } 1019 break; 1020 1021 case NodeType.CTYPE: 1022 case NodeType.CANY: 1023 break; 1024 1025 case NodeType.BREF: 1026 final BackRefNode br = (BackRefNode)node; 1027 if (br.backRef > env.numMem) { 1028 throw new ValueException(ERR_INVALID_BACKREF); 1029 } 1030 env.backrefedMem = bsOnAt(env.backrefedMem, br.backRef); 1031 env.btMemStart = bsOnAt(env.btMemStart, br.backRef); 1032 ((EncloseNode)env.memNodes[br.backRef]).setMemBackrefed(); 1033 break; 1034 1035 case NodeType.QTFR: 1036 final QuantifierNode qn = (QuantifierNode)node; 1037 Node target = qn.target; 1038 1039 if ((state & IN_REPEAT) != 0) { 1040 qn.setInRepeat(); 1041 } 1042 1043 if (isRepeatInfinite(qn.upper) || qn.lower >= 1) { 1044 final int d = getMinMatchLength(target); 1045 if (d == 0) { 1046 qn.targetEmptyInfo = TargetInfo.IS_EMPTY; 1047 if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) { 1048 final int info = quantifiersMemoryInfo(target); 1049 if (info > 0) { 1050 qn.targetEmptyInfo = info; 1051 } 1052 } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK 1053 // strange stuff here (turned off) 1054 } 1055 } 1056 1057 state |= IN_REPEAT; 1058 if (qn.lower != qn.upper) { 1059 state |= IN_VAR_REPEAT; 1060 } 1061 1062 target = setupTree(target, state); 1063 1064 /* expand string */ 1065 if (target.getType() == NodeType.STR) { 1066 if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper && 1067 qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) { 1068 final StringNode sn = (StringNode)target; 1069 final int len = sn.length(); 1070 1071 if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) { 1072 final StringNode str = qn.convertToString(sn.flag); 1073 final int n = qn.lower; 1074 for (int i = 0; i < n; i++) { 1075 str.cat(sn.chars, sn.p, sn.end); 1076 } 1077 break; /* break case NT_QTFR: */ 1078 } 1079 1080 } 1081 } 1082 if (Config.USE_OP_PUSH_OR_JUMP_EXACT) { 1083 if (qn.greedy && qn.targetEmptyInfo != 0) { 1084 if (target.getType() == NodeType.QTFR) { 1085 final QuantifierNode tqn = (QuantifierNode)target; 1086 if (tqn.headExact != null) { 1087 qn.headExact = tqn.headExact; 1088 tqn.headExact = null; 1089 } 1090 } else { 1091 qn.headExact = getHeadValueNode(qn.target, true); 1092 } 1093 } 1094 } // USE_OP_PUSH_OR_JUMP_EXACT 1095 break; 1096 1097 case NodeType.ENCLOSE: 1098 final EncloseNode en = (EncloseNode)node; 1099 switch (en.type) { 1100 case EncloseType.OPTION: 1101 final int options = regex.options; 1102 regex.options = en.option; 1103 setupTree(en.target, state); 1104 regex.options = options; 1105 break; 1106 1107 case EncloseType.MEMORY: 1108 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { 1109 env.btMemStart = bsOnAt(env.btMemStart, en.regNum); 1110 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ 1111 1112 } 1113 setupTree(en.target, state); 1114 break; 1115 1116 case EncloseType.STOP_BACKTRACK: 1117 setupTree(en.target, state); 1118 if (en.target.getType() == NodeType.QTFR) { 1119 final QuantifierNode tqn = (QuantifierNode)en.target; 1120 if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) { 1121 /* (?>a*), a*+ etc... */ 1122 if (tqn.target.isSimple()) { 1123 en.setStopBtSimpleRepeat(); 1124 } 1125 } 1126 } 1127 break; 1128 1129 default: 1130 break; 1131 1132 } // inner switch 1133 break; 1134 1135 case NodeType.ANCHOR: 1136 final AnchorNode an = (AnchorNode)node; 1137 switch (an.type) { 1138 case AnchorType.PREC_READ: 1139 setupTree(an.target, state); 1140 break; 1141 1142 case AnchorType.PREC_READ_NOT: 1143 setupTree(an.target, (state | IN_NOT)); 1144 break; 1145 1146 case AnchorType.LOOK_BEHIND: 1147 if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) { 1148 throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 1149 } 1150 node = setupLookBehind(node); 1151 if (node.getType() != NodeType.ANCHOR) { 1152 continue restart; 1153 } 1154 setupTree(((AnchorNode)node).target, state); 1155 break; 1156 1157 case AnchorType.LOOK_BEHIND_NOT: 1158 if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) { 1159 throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 1160 } 1161 node = setupLookBehind(node); 1162 if (node.getType() != NodeType.ANCHOR) { 1163 continue restart; 1164 } 1165 setupTree(((AnchorNode)node).target, (state | IN_NOT)); 1166 break; 1167 1168 default: 1169 break; 1170 1171 } // inner switch 1172 break; 1173 default: 1174 break; 1175 } // switch 1176 return node; 1177 } // restart: while 1178 } 1179 1180 private static final int MAX_NODE_OPT_INFO_REF_COUNT = 5; 1181 private void optimizeNodeLeft(final Node node, final NodeOptInfo opt, final OptEnvironment oenv) { // oenv remove, pass mmd 1182 opt.clear(); 1183 opt.setBoundNode(oenv.mmd); 1184 1185 switch (node.getType()) { 1186 case NodeType.LIST: { 1187 final OptEnvironment nenv = new OptEnvironment(); 1188 final NodeOptInfo nopt = new NodeOptInfo(); 1189 nenv.copy(oenv); 1190 ConsAltNode lin = (ConsAltNode)node; 1191 do { 1192 optimizeNodeLeft(lin.car, nopt, nenv); 1193 nenv.mmd.add(nopt.length); 1194 opt.concatLeftNode(nopt); 1195 } while ((lin = lin.cdr) != null); 1196 break; 1197 } 1198 1199 case NodeType.ALT: { 1200 final NodeOptInfo nopt = new NodeOptInfo(); 1201 ConsAltNode aln = (ConsAltNode)node; 1202 do { 1203 optimizeNodeLeft(aln.car, nopt, oenv); 1204 if (aln == node) { 1205 opt.copy(nopt); 1206 } else { 1207 opt.altMerge(nopt, oenv); 1208 } 1209 } while ((aln = aln.cdr) != null); 1210 break; 1211 } 1212 1213 case NodeType.STR: { 1214 final StringNode sn = (StringNode)node; 1215 1216 final int slen = sn.length(); 1217 1218 if (!sn.isAmbig()) { 1219 opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw()); 1220 1221 if (slen > 0) { 1222 opt.map.addChar(sn.chars[sn.p]); 1223 } 1224 1225 opt.length.set(slen, slen); 1226 } else { 1227 int max; 1228 if (sn.isDontGetOptInfo()) { 1229 max = sn.length(); 1230 } else { 1231 opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw()); 1232 opt.exb.ignoreCase = true; 1233 1234 if (slen > 0) { 1235 opt.map.addCharAmb(sn.chars, sn.p, sn.end, oenv.caseFoldFlag); 1236 } 1237 1238 max = slen; 1239 } 1240 opt.length.set(slen, max); 1241 } 1242 1243 if (opt.exb.length == slen) { 1244 opt.exb.reachEnd = true; 1245 } 1246 break; 1247 } 1248 1249 case NodeType.CCLASS: { 1250 final CClassNode cc = (CClassNode)node; 1251 /* no need to check ignore case. (setted in setup_tree()) */ 1252 if (cc.mbuf != null || cc.isNot()) { 1253 opt.length.set(1, 1); 1254 } else { 1255 for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { 1256 final boolean z = cc.bs.at(i); 1257 if ((z && !cc.isNot()) || (!z && cc.isNot())) { 1258 opt.map.addChar(i); 1259 } 1260 } 1261 opt.length.set(1, 1); 1262 } 1263 break; 1264 } 1265 1266 case NodeType.CANY: { 1267 opt.length.set(1, 1); 1268 break; 1269 } 1270 1271 case NodeType.ANCHOR: { 1272 final AnchorNode an = (AnchorNode)node; 1273 switch (an.type) { 1274 case AnchorType.BEGIN_BUF: 1275 case AnchorType.BEGIN_POSITION: 1276 case AnchorType.BEGIN_LINE: 1277 case AnchorType.END_BUF: 1278 case AnchorType.SEMI_END_BUF: 1279 case AnchorType.END_LINE: 1280 opt.anchor.add(an.type); 1281 break; 1282 1283 case AnchorType.PREC_READ: 1284 final NodeOptInfo nopt = new NodeOptInfo(); 1285 optimizeNodeLeft(an.target, nopt, oenv); 1286 if (nopt.exb.length > 0) { 1287 opt.expr.copy(nopt.exb); 1288 } else if (nopt.exm.length > 0) { 1289 opt.expr.copy(nopt.exm); 1290 } 1291 opt.expr.reachEnd = false; 1292 if (nopt.map.value > 0) { 1293 opt.map.copy(nopt.map); 1294 } 1295 break; 1296 1297 case AnchorType.PREC_READ_NOT: 1298 case AnchorType.LOOK_BEHIND: /* Sorry, I can't make use of it. */ 1299 case AnchorType.LOOK_BEHIND_NOT: 1300 break; 1301 1302 default: 1303 break; 1304 1305 } // inner switch 1306 break; 1307 } 1308 1309 case NodeType.BREF: { 1310 final BackRefNode br = (BackRefNode)node; 1311 1312 if (br.isRecursion()) { 1313 opt.length.set(0, MinMaxLen.INFINITE_DISTANCE); 1314 break; 1315 } 1316 1317 final Node[]nodes = oenv.scanEnv.memNodes; 1318 1319 final int min = getMinMatchLength(nodes[br.backRef]); 1320 final int max = getMaxMatchLength(nodes[br.backRef]); 1321 1322 opt.length.set(min, max); 1323 break; 1324 } 1325 1326 1327 case NodeType.QTFR: { 1328 final NodeOptInfo nopt = new NodeOptInfo(); 1329 final QuantifierNode qn = (QuantifierNode)node; 1330 optimizeNodeLeft(qn.target, nopt, oenv); 1331 if (qn.lower == 0 && isRepeatInfinite(qn.upper)) { 1332 if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) { 1333 if (isMultiline(oenv.options)) { 1334 opt.anchor.add(AnchorType.ANYCHAR_STAR_ML); 1335 } else { 1336 opt.anchor.add(AnchorType.ANYCHAR_STAR); 1337 } 1338 } 1339 } else { 1340 if (qn.lower > 0) { 1341 opt.copy(nopt); 1342 if (nopt.exb.length > 0) { 1343 if (nopt.exb.reachEnd) { 1344 int i; 1345 for (i = 2; i <= qn.lower && !opt.exb.isFull(); i++) { 1346 opt.exb.concat(nopt.exb); 1347 } 1348 if (i < qn.lower) { 1349 opt.exb.reachEnd = false; 1350 } 1351 } 1352 } 1353 if (qn.lower != qn.upper) { 1354 opt.exb.reachEnd = false; 1355 opt.exm.reachEnd = false; 1356 } 1357 if (qn.lower > 1) { 1358 opt.exm.reachEnd = false; 1359 } 1360 1361 } 1362 } 1363 final int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower); 1364 int max; 1365 if (isRepeatInfinite(qn.upper)) { 1366 max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0; 1367 } else { 1368 max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper); 1369 } 1370 opt.length.set(min, max); 1371 break; 1372 } 1373 1374 case NodeType.ENCLOSE: { 1375 final EncloseNode en = (EncloseNode)node; 1376 switch (en.type) { 1377 case EncloseType.OPTION: 1378 final int save = oenv.options; 1379 oenv.options = en.option; 1380 optimizeNodeLeft(en.target, opt, oenv); 1381 oenv.options = save; 1382 break; 1383 1384 case EncloseType.MEMORY: 1385 if (++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) { 1386 int min = 0; 1387 int max = MinMaxLen.INFINITE_DISTANCE; 1388 if (en.isMinFixed()) { 1389 min = en.minLength; 1390 } 1391 if (en.isMaxFixed()) { 1392 max = en.maxLength; 1393 } 1394 opt.length.set(min, max); 1395 } else { // USE_SUBEXP_CALL 1396 optimizeNodeLeft(en.target, opt, oenv); 1397 if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) { 1398 if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) { 1399 opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK); 1400 } 1401 } 1402 } 1403 break; 1404 1405 case EncloseType.STOP_BACKTRACK: 1406 optimizeNodeLeft(en.target, opt, oenv); 1407 break; 1408 1409 default: 1410 break; 1411 } // inner switch 1412 break; 1413 } 1414 1415 default: 1416 throw new InternalException(ERR_PARSER_BUG); 1417 } // switch 1418 } 1419 1420 @SuppressWarnings("unused") 1421 protected final void setOptimizedInfoFromTree(final Node node) { 1422 final NodeOptInfo opt = new NodeOptInfo(); 1423 final OptEnvironment oenv = new OptEnvironment(); 1424 1425 oenv.options = regex.options; 1426 oenv.caseFoldFlag = regex.caseFoldFlag; 1427 oenv.scanEnv = env; 1428 oenv.mmd.clear(); // ?? 1429 1430 optimizeNodeLeft(node, opt, oenv); 1431 1432 regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF | 1433 AnchorType.BEGIN_POSITION | 1434 AnchorType.ANYCHAR_STAR | 1435 AnchorType.ANYCHAR_STAR_ML); 1436 1437 regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF | 1438 AnchorType.SEMI_END_BUF); 1439 1440 if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) { 1441 regex.anchorDmin = opt.length.min; 1442 regex.anchorDmax = opt.length.max; 1443 } 1444 1445 if (opt.exb.length > 0 || opt.exm.length > 0) { 1446 opt.exb.select(opt.exm); 1447 if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) { 1448 // !goto set_map;! 1449 regex.setOptimizeMapInfo(opt.map); 1450 regex.setSubAnchor(opt.map.anchor); 1451 } else { 1452 regex.setExactInfo(opt.exb); 1453 regex.setSubAnchor(opt.exb.anchor); 1454 } 1455 } else if (opt.map.value > 0) { 1456 // !set_map:! 1457 regex.setOptimizeMapInfo(opt.map); 1458 regex.setSubAnchor(opt.map.anchor); 1459 } else { 1460 regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE; 1461 if (opt.length.max == 0) { 1462 regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE; 1463 } 1464 } 1465 1466 if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) { 1467 Config.log.println(regex.optimizeInfoToString()); 1468 } 1469 } 1470} 1471