1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20package jdk.nashorn.internal.runtime.regexp.joni; 21 22import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnOff; 23import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup; 24import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase; 25import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; 26import jdk.nashorn.internal.runtime.regexp.joni.ast.AnyCharNode; 27import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; 28import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 29import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode.CCStateArg; 30import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; 31import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; 32import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; 33import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; 34import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; 35import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; 36import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE; 37import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE; 38import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; 39import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; 40import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType; 41import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; 42import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; 43import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; 44import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; 45 46class Parser extends Lexer { 47 48 protected final Regex regex; 49 protected Node root; 50 51 protected int returnCode; // return code used by parser methods (they itself return parsed nodes) 52 // this approach will not affect recursive calls 53 54 protected Parser(final ScanEnvironment env, final char[] chars, final int p, final int end) { 55 super(env, chars, p, end); 56 regex = env.reg; 57 } 58 59 // onig_parse_make_tree 60 protected final Node parse() { 61 root = parseRegexp(); 62 regex.numMem = env.numMem; 63 return root; 64 } 65 66 private boolean codeExistCheck(final int code, final boolean ignoreEscaped) { 67 mark(); 68 69 boolean inEsc = false; 70 while (left()) { 71 if (ignoreEscaped && inEsc) { 72 inEsc = false; 73 } else { 74 fetch(); 75 if (c == code) { 76 restore(); 77 return true; 78 } 79 if (c == syntax.metaCharTable.esc) { 80 inEsc = true; 81 } 82 } 83 } 84 85 restore(); 86 return false; 87 } 88 89 private CClassNode parseCharClass() { 90 fetchTokenInCC(); 91 92 final boolean neg; 93 if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) { 94 neg = true; 95 fetchTokenInCC(); 96 } else { 97 neg = false; 98 } 99 100 if (token.type == TokenType.CC_CLOSE) { 101 if (!codeExistCheck(']', true)) { 102 throw new SyntaxException(ERR_EMPTY_CHAR_CLASS); 103 } 104 env.ccEscWarn("]"); 105 token.type = TokenType.CHAR; /* allow []...] */ 106 } 107 108 CClassNode cc = new CClassNode(); 109 CClassNode prevCC = null; 110 CClassNode workCC = null; 111 112 final CCStateArg arg = new CCStateArg(); 113 114 boolean andStart = false; 115 arg.state = CCSTATE.START; 116 117 while (token.type != TokenType.CC_CLOSE) { 118 boolean fetched = false; 119 120 switch (token.type) { 121 122 case CHAR: 123 if (token.getC() > 0xff) { 124 arg.inType = CCVALTYPE.CODE_POINT; 125 } else { 126 arg.inType = CCVALTYPE.SB; // sb_char: 127 } 128 arg.v = token.getC(); 129 arg.vIsRaw = false; 130 parseCharClassValEntry2(cc, arg); // goto val_entry2 131 break; 132 133 case RAW_BYTE: 134 arg.v = token.getC(); 135 arg.inType = CCVALTYPE.SB; // raw_single: 136 arg.vIsRaw = true; 137 parseCharClassValEntry2(cc, arg); // goto val_entry2 138 break; 139 140 case CODE_POINT: 141 arg.v = token.getCode(); 142 arg.vIsRaw = true; 143 parseCharClassValEntry(cc, arg); // val_entry:, val_entry2 144 break; 145 146 case CHAR_TYPE: 147 cc.addCType(token.getPropCType(), token.getPropNot(), env, this); 148 cc.nextStateClass(arg, env); // next_class: 149 break; 150 151 case CC_RANGE: 152 if (arg.state == CCSTATE.VALUE) { 153 fetchTokenInCC(); 154 fetched = true; 155 if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */ 156 parseCharClassRangeEndVal(cc, arg); // range_end_val:, goto val_entry; 157 break; 158 } else if (token.type == TokenType.CC_AND) { 159 env.ccEscWarn("-"); 160 parseCharClassRangeEndVal(cc, arg); // goto range_end_val 161 break; 162 } 163 arg.state = CCSTATE.RANGE; 164 } else if (arg.state == CCSTATE.START) { 165 arg.v = token.getC(); /* [-xa] is allowed */ 166 arg.vIsRaw = false; 167 fetchTokenInCC(); 168 fetched = true; 169 if (token.type == TokenType.CC_RANGE || andStart) { 170 env.ccEscWarn("-"); /* [--x] or [a&&-x] is warned. */ 171 } 172 parseCharClassValEntry(cc, arg); // goto val_entry 173 break; 174 } else if (arg.state == CCSTATE.RANGE) { 175 env.ccEscWarn("-"); 176 parseCharClassSbChar(cc, arg); // goto sb_char /* [!--x] is allowed */ 177 break; 178 } else { /* CCS_COMPLETE */ 179 fetchTokenInCC(); 180 fetched = true; 181 if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */ 182 parseCharClassRangeEndVal(cc, arg); // goto range_end_val 183 break; 184 } else if (token.type == TokenType.CC_AND) { 185 env.ccEscWarn("-"); 186 parseCharClassRangeEndVal(cc, arg); // goto range_end_val 187 break; 188 } 189 190 if (syntax.allowDoubleRangeOpInCC()) { 191 env.ccEscWarn("-"); 192 arg.inType = CCVALTYPE.SB; 193 arg.v = '-'; 194 arg.vIsRaw = false; 195 parseCharClassValEntry2(cc, arg); // goto val_entry2 /* [0-9-a] is allowed as [0-9\-a] */ 196 break; 197 } 198 throw new SyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS); 199 } 200 break; 201 202 case CC_CC_OPEN: /* [ */ 203 final CClassNode acc = parseCharClass(); 204 cc.or(acc); 205 break; 206 207 case CC_AND: /* && */ 208 if (arg.state == CCSTATE.VALUE) { 209 arg.v = 0; // ??? safe v ? 210 arg.vIsRaw = false; 211 cc.nextStateValue(arg, env); 212 } 213 /* initialize local variables */ 214 andStart = true; 215 arg.state = CCSTATE.START; 216 if (prevCC != null) { 217 prevCC.and(cc); 218 } else { 219 prevCC = cc; 220 if (workCC == null) { 221 workCC = new CClassNode(); 222 } 223 cc = workCC; 224 } 225 cc.clear(); 226 break; 227 228 case EOT: 229 throw new SyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS); 230 231 default: 232 throw new InternalException(ERR_PARSER_BUG); 233 } // switch 234 235 if (!fetched) { 236 fetchTokenInCC(); 237 } 238 239 } // while 240 241 if (arg.state == CCSTATE.VALUE) { 242 arg.v = 0; // ??? safe v ? 243 arg.vIsRaw = false; 244 cc.nextStateValue(arg, env); 245 } 246 247 if (prevCC != null) { 248 prevCC.and(cc); 249 cc = prevCC; 250 } 251 252 if (neg) { 253 cc.setNot(); 254 } else { 255 cc.clearNot(); 256 } 257 258 if (cc.isNot() && syntax.notNewlineInNegativeCC()) { 259 if (!cc.isEmpty()) { 260 final int NEW_LINE = 0x0a; 261 if (EncodingHelper.isNewLine(NEW_LINE)) { 262 cc.bs.set(NEW_LINE); 263 } 264 } 265 } 266 267 return cc; 268 } 269 270 private void parseCharClassSbChar(final CClassNode cc, final CCStateArg arg) { 271 arg.inType = CCVALTYPE.SB; 272 arg.v = token.getC(); 273 arg.vIsRaw = false; 274 parseCharClassValEntry2(cc, arg); // goto val_entry2 275 } 276 277 private void parseCharClassRangeEndVal(final CClassNode cc, final CCStateArg arg) { 278 arg.v = '-'; 279 arg.vIsRaw = false; 280 parseCharClassValEntry(cc, arg); // goto val_entry 281 } 282 283 private void parseCharClassValEntry(final CClassNode cc, final CCStateArg arg) { 284 arg.inType = arg.v <= 0xff ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT; 285 parseCharClassValEntry2(cc, arg); // val_entry2: 286 } 287 288 private void parseCharClassValEntry2(final CClassNode cc, final CCStateArg arg) { 289 cc.nextStateValue(arg, env); 290 } 291 292 private Node parseEnclose(final TokenType term) { 293 Node node = null; 294 295 if (!left()) { 296 throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); 297 } 298 299 int option = env.option; 300 301 if (peekIs('?') && syntax.op2QMarkGroupEffect()) { 302 inc(); 303 if (!left()) { 304 throw new SyntaxException(ERR_END_PATTERN_IN_GROUP); 305 } 306 307 fetch(); 308 switch(c) { 309 case ':': /* (?:...) grouping only */ 310 fetchToken(); // group: 311 node = parseSubExp(term); 312 returnCode = 1; /* group */ 313 return node; 314 case '=': 315 node = new AnchorNode(AnchorType.PREC_READ); 316 break; 317 case '!': /* preceding read */ 318 node = new AnchorNode(AnchorType.PREC_READ_NOT); 319 break; 320 case '>': /* (?>...) stop backtrack */ 321 node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose 322 break; 323 case '\'': 324 break; 325 case '<': /* look behind (?<=...), (?<!...) */ 326 fetch(); 327 if (c == '=') { 328 node = new AnchorNode(AnchorType.LOOK_BEHIND); 329 } else if (c == '!') { 330 node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT); 331 } else { 332 throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); 333 } 334 break; 335 case '@': 336 if (syntax.op2AtMarkCaptureHistory()) { 337 final EncloseNode en = new EncloseNode(); // node_new_enclose_memory 338 final int num = env.addMemEntry(); 339 if (num >= BitStatus.BIT_STATUS_BITS_NUM) { 340 throw new ValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY); 341 } 342 en.regNum = num; 343 node = en; 344 } else { 345 throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); 346 } 347 break; 348 349 // case 'p': #ifdef USE_POSIXLINE_OPTION 350 case '-': 351 case 'i': 352 case 'm': 353 case 's': 354 case 'x': 355 boolean neg = false; 356 while (true) { 357 switch(c) { 358 case ':': 359 case ')': 360 break; 361 case '-': 362 neg = true; 363 break; 364 case 'x': 365 option = bsOnOff(option, Option.EXTEND, neg); 366 break; 367 case 'i': 368 option = bsOnOff(option, Option.IGNORECASE, neg); 369 break; 370 case 's': 371 if (syntax.op2OptionPerl()) { 372 option = bsOnOff(option, Option.MULTILINE, neg); 373 } else { 374 throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); 375 } 376 break; 377 case 'm': 378 if (syntax.op2OptionPerl()) { 379 option = bsOnOff(option, Option.SINGLELINE, !neg); 380 } else if (syntax.op2OptionRuby()) { 381 option = bsOnOff(option, Option.MULTILINE, neg); 382 } else { 383 throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); 384 } 385 break; 386 // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined 387 // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg); 388 // break; 389 390 default: 391 throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); 392 } // switch 393 394 if (c == ')') { 395 final EncloseNode en = new EncloseNode(option, 0); // node_new_option 396 node = en; 397 returnCode = 2; /* option only */ 398 return node; 399 } else if (c == ':') { 400 final int prev = env.option; 401 env.option = option; 402 fetchToken(); 403 final Node target = parseSubExp(term); 404 env.option = prev; 405 final EncloseNode en = new EncloseNode(option, 0); // node_new_option 406 en.setTarget(target); 407 node = en; 408 returnCode = 0; 409 return node; 410 } 411 if (!left()) { 412 throw new SyntaxException(ERR_END_PATTERN_IN_GROUP); 413 } 414 fetch(); 415 } // while 416 417 default: 418 throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); 419 } // switch 420 421 } else { 422 if (isDontCaptureGroup(env.option)) { 423 fetchToken(); // goto group 424 node = parseSubExp(term); 425 returnCode = 1; /* group */ 426 return node; 427 } 428 final EncloseNode en = new EncloseNode(); // node_new_enclose_memory 429 final int num = env.addMemEntry(); 430 en.regNum = num; 431 node = en; 432 } 433 434 fetchToken(); 435 final Node target = parseSubExp(term); 436 437 if (node.getType() == NodeType.ANCHOR) { 438 final AnchorNode an = (AnchorNode) node; 439 an.setTarget(target); 440 } else { 441 final EncloseNode en = (EncloseNode)node; 442 en.setTarget(target); 443 if (en.type == EncloseType.MEMORY) { 444 /* Don't move this to previous of parse_subexp() */ 445 env.setMemNode(en.regNum, node); 446 } 447 } 448 returnCode = 0; 449 return node; // ?? 450 } 451 452 private Node parseExp(final TokenType term) { 453 if (token.type == term) 454 { 455 return StringNode.EMPTY; // goto end_of_token 456 } 457 458 Node node = null; 459 boolean group = false; 460 461 switch(token.type) { 462 case ALT: 463 case EOT: 464 return StringNode.EMPTY; // end_of_token:, node_new_empty 465 466 case SUBEXP_OPEN: 467 node = parseEnclose(TokenType.SUBEXP_CLOSE); 468 if (returnCode == 1) { 469 group = true; 470 } else if (returnCode == 2) { /* option only */ 471 final int prev = env.option; 472 final EncloseNode en = (EncloseNode)node; 473 env.option = en.option; 474 fetchToken(); 475 final Node target = parseSubExp(term); 476 env.option = prev; 477 en.setTarget(target); 478 return node; 479 } 480 break; 481 case SUBEXP_CLOSE: 482 if (!syntax.allowUnmatchedCloseSubexp()) { 483 throw new SyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS); 484 } 485 if (token.escaped) { 486 return parseExpTkRawByte(group); // goto tk_raw_byte 487 } 488 return parseExpTkByte(group); // goto tk_byte 489 case STRING: 490 return parseExpTkByte(group); // tk_byte: 491 492 case RAW_BYTE: 493 return parseExpTkRawByte(group); // tk_raw_byte: 494 case CODE_POINT: 495 final char[] buf = new char[] {(char)token.getCode()}; 496 // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else 497 node = new StringNode(buf, 0, 1); 498 break; 499 500 case CHAR_TYPE: 501 switch(token.getPropCType()) { 502 case CharacterType.D: 503 case CharacterType.S: 504 case CharacterType.W: 505 if (Config.NON_UNICODE_SDW) { 506 final CClassNode cc = new CClassNode(); 507 cc.addCType(token.getPropCType(), false, env, this); 508 if (token.getPropNot()) { 509 cc.setNot(); 510 } 511 node = cc; 512 } 513 break; 514 515 case CharacterType.SPACE: 516 case CharacterType.DIGIT: 517 case CharacterType.XDIGIT: 518 // #ifdef USE_SHARED_CCLASS_TABLE ... #endif 519 final CClassNode ccn = new CClassNode(); 520 ccn.addCType(token.getPropCType(), false, env, this); 521 if (token.getPropNot()) { 522 ccn.setNot(); 523 } 524 node = ccn; 525 break; 526 527 default: 528 throw new InternalException(ERR_PARSER_BUG); 529 530 } // inner switch 531 break; 532 533 case CC_CC_OPEN: 534 final CClassNode cc = parseCharClass(); 535 node = cc; 536 if (isIgnoreCase(env.option)) { 537 final ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc); 538 EncodingHelper.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg); 539 540 if (arg.altRoot != null) { 541 node = ConsAltNode.newAltNode(node, arg.altRoot); 542 } 543 } 544 break; 545 546 case ANYCHAR: 547 node = new AnyCharNode(); 548 break; 549 550 case ANYCHAR_ANYTIME: 551 node = new AnyCharNode(); 552 final QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false); 553 qn.setTarget(node); 554 node = qn; 555 break; 556 557 case BACKREF: 558 final int backRef = token.getBackrefRef(); 559 node = new BackRefNode(backRef, env); 560 break; 561 562 case ANCHOR: 563 node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma 564 break; 565 566 case OP_REPEAT: 567 case INTERVAL: 568 if (syntax.contextIndepRepeatOps()) { 569 if (syntax.contextInvalidRepeatOps()) { 570 throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); 571 } 572 node = StringNode.EMPTY; // node_new_empty 573 } else { 574 return parseExpTkByte(group); // goto tk_byte 575 } 576 break; 577 578 default: 579 throw new InternalException(ERR_PARSER_BUG); 580 } //switch 581 582 //targetp = node; 583 584 fetchToken(); // re_entry: 585 586 return parseExpRepeat(node, group); // repeat: 587 } 588 589 private Node parseExpTkByte(final boolean group) { 590 final StringNode node = new StringNode(chars, token.backP, p); // tk_byte: 591 while (true) { 592 fetchToken(); 593 if (token.type != TokenType.STRING) { 594 break; 595 } 596 597 if (token.backP == node.end) { 598 node.end = p; // non escaped character, remain shared, just increase shared range 599 } else { 600 node.cat(chars, token.backP, p); // non continuous string stream, need to COW 601 } 602 } 603 // targetp = node; 604 return parseExpRepeat(node, group); // string_end:, goto repeat 605 } 606 607 private Node parseExpTkRawByte(final boolean group) { 608 // tk_raw_byte: 609 610 // important: we don't use 0xff mask here neither in the compiler 611 // (in the template string) so we won't have to mask target 612 // strings when comparing against them in the matcher 613 final StringNode node = new StringNode((char)token.getC()); 614 node.setRaw(); 615 616 fetchToken(); 617 node.clearRaw(); 618 // !goto string_end;! 619 return parseExpRepeat(node, group); 620 } 621 622 private Node parseExpRepeat(final Node targetp, final boolean group) { 623 Node target = targetp; 624 while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: 625 if (target.isInvalidQuantifier()) { 626 throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID); 627 } 628 629 final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), 630 token.getRepeatUpper(), 631 token.type == TokenType.INTERVAL); 632 633 qtfr.greedy = token.getRepeatGreedy(); 634 final int ret = qtfr.setQuantifier(target, group, env, chars, getBegin(), getEnd()); 635 Node qn = qtfr; 636 637 if (token.getRepeatPossessive()) { 638 final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose 639 en.setTarget(qn); 640 qn = en; 641 } 642 643 if (ret == 0) { 644 target = qn; 645 } else if (ret == 2) { /* split case: /abc+/ */ 646 target = ConsAltNode.newListNode(target, null); 647 final ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null)); 648 649 fetchToken(); 650 return parseExpRepeatForCar(target, tmp, group); 651 } 652 fetchToken(); // goto re_entry 653 } 654 return target; 655 } 656 657 private Node parseExpRepeatForCar(final Node top, final ConsAltNode target, final boolean group) { 658 while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: 659 if (target.car.isInvalidQuantifier()) { 660 throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID); 661 } 662 663 final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), 664 token.getRepeatUpper(), 665 token.type == TokenType.INTERVAL); 666 667 qtfr.greedy = token.getRepeatGreedy(); 668 final int ret = qtfr.setQuantifier(target.car, group, env, chars, getBegin(), getEnd()); 669 Node qn = qtfr; 670 671 if (token.getRepeatPossessive()) { 672 final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose 673 en.setTarget(qn); 674 qn = en; 675 } 676 677 if (ret == 0) { 678 target.setCar(qn); 679 } else if (ret == 2) { /* split case: /abc+/ */ 680 assert false; 681 } 682 fetchToken(); // goto re_entry 683 } 684 return top; 685 } 686 687 private Node parseBranch(final TokenType term) { 688 Node node = parseExp(term); 689 690 if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) { 691 return node; 692 } 693 final ConsAltNode top = ConsAltNode.newListNode(node, null); 694 ConsAltNode t = top; 695 696 while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) { 697 node = parseExp(term); 698 if (node.getType() == NodeType.LIST) { 699 t.setCdr((ConsAltNode)node); 700 while (((ConsAltNode)node).cdr != null ) { 701 node = ((ConsAltNode)node).cdr; 702 } 703 704 t = ((ConsAltNode)node); 705 } else { 706 t.setCdr(ConsAltNode.newListNode(node, null)); 707 t = t.cdr; 708 } 709 } 710 return top; 711 } 712 713 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ 714 private Node parseSubExp(final TokenType term) { 715 Node node = parseBranch(term); 716 717 if (token.type == term) { 718 return node; 719 } else if (token.type == TokenType.ALT) { 720 final ConsAltNode top = ConsAltNode.newAltNode(node, null); 721 ConsAltNode t = top; 722 while (token.type == TokenType.ALT) { 723 fetchToken(); 724 node = parseBranch(term); 725 726 t.setCdr(ConsAltNode.newAltNode(node, null)); 727 t = t.cdr; 728 } 729 730 if (token.type != term) { 731 parseSubExpError(term); 732 } 733 return top; 734 } else { 735 parseSubExpError(term); 736 return null; //not reached 737 } 738 } 739 740 private static void parseSubExpError(final TokenType term) { 741 if (term == TokenType.SUBEXP_CLOSE) { 742 throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); 743 } 744 throw new InternalException(ERR_PARSER_BUG); 745 } 746 747 private Node parseRegexp() { 748 fetchToken(); 749 return parseSubExp(TokenType.EOT); 750 } 751} 752