Lexer.java revision 1088:7e62d98d4625
1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20package jdk.nashorn.internal.runtime.regexp.joni; 21 22import static jdk.nashorn.internal.runtime.regexp.joni.Option.isSingleline; 23import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite; 24import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; 25import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; 26import jdk.nashorn.internal.runtime.regexp.joni.constants.MetaChar; 27import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType; 28import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; 29import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; 30import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; 31import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; 32 33class Lexer extends ScannerSupport { 34 protected final ScanEnvironment env; 35 protected final Syntax syntax; // fast access to syntax 36 protected final Token token = new Token(); // current token 37 38 protected Lexer(final ScanEnvironment env, final char[] chars, final int p, final int end) { 39 super(chars, p, end); 40 this.env = env; 41 this.syntax = env.syntax; 42 } 43 44 /** 45 * @return 0: normal {n,m}, 2: fixed {n} 46 * !introduce returnCode here 47 */ 48 private int fetchRangeQuantifier() { 49 mark(); 50 final boolean synAllow = syntax.allowInvalidInterval(); 51 52 if (!left()) { 53 if (synAllow) { 54 return 1; /* "....{" : OK! */ 55 } 56 throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE); 57 } 58 59 if (!synAllow) { 60 c = peek(); 61 if (c == ')' || c == '(' || c == '|') { 62 throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE); 63 } 64 } 65 66 int low = scanUnsignedNumber(); 67 if (low < 0) { 68 throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); 69 } 70 if (low > Config.MAX_REPEAT_NUM) { 71 throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); 72 } 73 74 boolean nonLow = false; 75 if (p == _p) { /* can't read low */ 76 if (syntax.allowIntervalLowAbbrev()) { 77 low = 0; 78 nonLow = true; 79 } else { 80 return invalidRangeQuantifier(synAllow); 81 } 82 } 83 84 if (!left()) { 85 return invalidRangeQuantifier(synAllow); 86 } 87 88 fetch(); 89 int up; 90 int ret = 0; 91 if (c == ',') { 92 final int prev = p; // ??? last 93 up = scanUnsignedNumber(); 94 if (up < 0) { 95 throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); 96 } 97 if (up > Config.MAX_REPEAT_NUM) { 98 throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); 99 } 100 101 if (p == prev) { 102 if (nonLow) { 103 return invalidRangeQuantifier(synAllow); 104 } 105 up = QuantifierNode.REPEAT_INFINITE; /* {n,} : {n,infinite} */ 106 } 107 } else { 108 if (nonLow) { 109 return invalidRangeQuantifier(synAllow); 110 } 111 unfetch(); 112 up = low; /* {n} : exact n times */ 113 ret = 2; /* fixed */ 114 } 115 116 if (!left()) { 117 return invalidRangeQuantifier(synAllow); 118 } 119 fetch(); 120 121 if (syntax.opEscBraceInterval()) { 122 if (c != syntax.metaCharTable.esc) { 123 return invalidRangeQuantifier(synAllow); 124 } 125 fetch(); 126 } 127 128 if (c != '}') { 129 return invalidRangeQuantifier(synAllow); 130 } 131 132 if (!isRepeatInfinite(up) && low > up) { 133 throw new ValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE); 134 } 135 136 token.type = TokenType.INTERVAL; 137 token.setRepeatLower(low); 138 token.setRepeatUpper(up); 139 140 return ret; /* 0: normal {n,m}, 2: fixed {n} */ 141 } 142 143 private int invalidRangeQuantifier(final boolean synAllow) { 144 if (synAllow) { 145 restore(); 146 return 1; 147 } 148 throw new SyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN); 149 } 150 151 @SuppressWarnings("fallthrough") 152 /* \M-, \C-, \c, or \... */ 153 private int fetchEscapedValue() { 154 if (!left()) { 155 throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE); 156 } 157 fetch(); 158 159 switch(c) { 160 161 case 'M': 162 if (syntax.op2EscCapitalMBarMeta()) { 163 if (!left()) { 164 throw new SyntaxException(ERR_END_PATTERN_AT_META); 165 } 166 fetch(); 167 if (c != '-') { 168 throw new SyntaxException(ERR_META_CODE_SYNTAX); 169 } 170 if (!left()) { 171 throw new SyntaxException(ERR_END_PATTERN_AT_META); 172 } 173 fetch(); 174 if (c == syntax.metaCharTable.esc) { 175 c = fetchEscapedValue(); 176 } 177 c = ((c & 0xff) | 0x80); 178 } else { 179 fetchEscapedValueBackSlash(); 180 } 181 break; 182 183 case 'C': 184 if (syntax.op2EscCapitalCBarControl()) { 185 if (!left()) { 186 throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL); 187 } 188 fetch(); 189 if (c != '-') { 190 throw new SyntaxException(ERR_CONTROL_CODE_SYNTAX); 191 } 192 fetchEscapedValueControl(); 193 } else { 194 fetchEscapedValueBackSlash(); 195 } 196 break; 197 198 case 'c': 199 if (syntax.opEscCControl()) { 200 fetchEscapedValueControl(); 201 } 202 /* fall through */ 203 204 default: 205 fetchEscapedValueBackSlash(); 206 } // switch 207 208 return c; // ??? 209 } 210 211 private void fetchEscapedValueBackSlash() { 212 c = env.convertBackslashValue(c); 213 } 214 215 private void fetchEscapedValueControl() { 216 if (!left()) { 217 throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL); 218 } 219 fetch(); 220 if (c == '?') { 221 c = 0177; 222 } else { 223 if (c == syntax.metaCharTable.esc) { 224 c = fetchEscapedValue(); 225 } 226 c &= 0x9f; 227 } 228 } 229 230 private void fetchTokenInCCFor_charType(final boolean flag, final int type) { 231 token.type = TokenType.CHAR_TYPE; 232 token.setPropCType(type); 233 token.setPropNot(flag); 234 } 235 236 private void fetchTokenInCCFor_x() { 237 if (!left()) { 238 return; 239 } 240 final int last = p; 241 242 if (peekIs('{') && syntax.opEscXBraceHex8()) { 243 inc(); 244 final int num = scanUnsignedHexadecimalNumber(8); 245 if (num < 0) { 246 throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE); 247 } 248 if (left()) { 249 final int c2 = peek(); 250 if (EncodingHelper.isXDigit(c2)) { 251 throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); 252 } 253 } 254 255 if (p > last + 1 && left() && peekIs('}')) { 256 inc(); 257 token.type = TokenType.CODE_POINT; 258 token.setCode(num); 259 } else { 260 /* can't read nothing or invalid format */ 261 p = last; 262 } 263 } else if (syntax.opEscXHex2()) { 264 int num = scanUnsignedHexadecimalNumber(2); 265 if (num < 0) { 266 throw new ValueException(ERR_TOO_BIG_NUMBER); 267 } 268 if (p == last) { /* can't read nothing. */ 269 num = 0; /* but, it's not error */ 270 } 271 token.type = TokenType.RAW_BYTE; 272 token.setC(num); 273 } 274 } 275 276 private void fetchTokenInCCFor_u() { 277 if (!left()) { 278 return; 279 } 280 final int last = p; 281 282 if (syntax.op2EscUHex4()) { 283 int num = scanUnsignedHexadecimalNumber(4); 284 if (num < 0) { 285 throw new ValueException(ERR_TOO_BIG_NUMBER); 286 } 287 if (p == last) { /* can't read nothing. */ 288 num = 0; /* but, it's not error */ 289 } 290 token.type = TokenType.CODE_POINT; 291 token.setCode(num); 292 } 293 } 294 295 private void fetchTokenInCCFor_digit() { 296 if (syntax.opEscOctal3()) { 297 unfetch(); 298 final int last = p; 299 int num = scanUnsignedOctalNumber(3); 300 if (num < 0) { 301 throw new ValueException(ERR_TOO_BIG_NUMBER); 302 } 303 if (p == last) { /* can't read nothing. */ 304 num = 0; /* but, it's not error */ 305 } 306 token.type = TokenType.RAW_BYTE; 307 token.setC(num); 308 } 309 } 310 311 private void fetchTokenInCCFor_and() { 312 if (syntax.op2CClassSetOp() && left() && peekIs('&')) { 313 inc(); 314 token.type = TokenType.CC_AND; 315 } 316 } 317 318 protected final TokenType fetchTokenInCC() { 319 if (!left()) { 320 token.type = TokenType.EOT; 321 return token.type; 322 } 323 324 fetch(); 325 token.type = TokenType.CHAR; 326 token.setC(c); 327 token.escaped = false; 328 329 if (c == ']') { 330 token.type = TokenType.CC_CLOSE; 331 } else if (c == '-') { 332 token.type = TokenType.CC_RANGE; 333 } else if (c == syntax.metaCharTable.esc) { 334 if (!syntax.backSlashEscapeInCC()) { 335 return token.type; 336 } 337 if (!left()) { 338 throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE); 339 } 340 fetch(); 341 token.escaped = true; 342 token.setC(c); 343 344 switch (c) { 345 case 'w': 346 fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); 347 break; 348 case 'W': 349 fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); 350 break; 351 case 'd': 352 fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); 353 break; 354 case 'D': 355 fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); 356 break; 357 case 's': 358 fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); 359 break; 360 case 'S': 361 fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); 362 break; 363 case 'h': 364 if (syntax.op2EscHXDigit()) { 365 fetchTokenInCCFor_charType(false, CharacterType.XDIGIT); 366 } 367 break; 368 case 'H': 369 if (syntax.op2EscHXDigit()) { 370 fetchTokenInCCFor_charType(true, CharacterType.XDIGIT); 371 } 372 break; 373 case 'x': 374 fetchTokenInCCFor_x(); 375 break; 376 case 'u': 377 fetchTokenInCCFor_u(); 378 break; 379 case '0': 380 case '1': 381 case '2': 382 case '3': 383 case '4': 384 case '5': 385 case '6': 386 case '7': 387 fetchTokenInCCFor_digit(); 388 break; 389 390 default: 391 unfetch(); 392 final int num = fetchEscapedValue(); 393 if (token.getC() != num) { 394 token.setCode(num); 395 token.type = TokenType.CODE_POINT; 396 } 397 break; 398 } // switch 399 400 } else if (c == '&') { 401 fetchTokenInCCFor_and(); 402 } 403 return token.type; 404 } 405 406 private void fetchTokenFor_repeat(final int lower, final int upper) { 407 token.type = TokenType.OP_REPEAT; 408 token.setRepeatLower(lower); 409 token.setRepeatUpper(upper); 410 greedyCheck(); 411 } 412 413 private void fetchTokenFor_openBrace() { 414 switch (fetchRangeQuantifier()) { 415 case 0: 416 greedyCheck(); 417 break; 418 case 2: 419 if (syntax.fixedIntervalIsGreedyOnly()) { 420 possessiveCheck(); 421 } else { 422 greedyCheck(); 423 } 424 break; 425 default: /* 1 : normal char */ 426 } // inner switch 427 } 428 429 private void fetchTokenFor_anchor(final int subType) { 430 token.type = TokenType.ANCHOR; 431 token.setAnchor(subType); 432 } 433 434 private void fetchTokenFor_xBrace() { 435 if (!left()) { 436 return; 437 } 438 439 final int last = p; 440 if (peekIs('{') && syntax.opEscXBraceHex8()) { 441 inc(); 442 final int num = scanUnsignedHexadecimalNumber(8); 443 if (num < 0) { 444 throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE); 445 } 446 if (left()) { 447 if (EncodingHelper.isXDigit(peek())) { 448 throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); 449 } 450 } 451 452 if (p > last + 1 && left() && peekIs('}')) { 453 inc(); 454 token.type = TokenType.CODE_POINT; 455 token.setCode(num); 456 } else { 457 /* can't read nothing or invalid format */ 458 p = last; 459 } 460 } else if (syntax.opEscXHex2()) { 461 int num = scanUnsignedHexadecimalNumber(2); 462 if (num < 0) { 463 throw new ValueException(ERR_TOO_BIG_NUMBER); 464 } 465 if (p == last) { /* can't read nothing. */ 466 num = 0; /* but, it's not error */ 467 } 468 token.type = TokenType.RAW_BYTE; 469 token.setC(num); 470 } 471 } 472 473 private void fetchTokenFor_uHex() { 474 if (!left()) { 475 return; 476 } 477 final int last = p; 478 479 if (syntax.op2EscUHex4()) { 480 int num = scanUnsignedHexadecimalNumber(4); 481 if (num < 0) { 482 throw new ValueException(ERR_TOO_BIG_NUMBER); 483 } 484 if (p == last) { /* can't read nothing. */ 485 num = 0; /* but, it's not error */ 486 } 487 token.type = TokenType.CODE_POINT; 488 token.setCode(num); 489 } 490 } 491 492 private void fetchTokenFor_digit() { 493 unfetch(); 494 final int last = p; 495 final int num = scanUnsignedNumber(); 496 if (num < 0 || num > Config.MAX_BACKREF_NUM) { // goto skip_backref 497 } else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */ 498 if (syntax.strictCheckBackref()) { 499 if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) { 500 throw new ValueException(ERR_INVALID_BACKREF); 501 } 502 } 503 token.type = TokenType.BACKREF; 504 token.setBackrefRef(num); 505 return; 506 } 507 508 if (c == '8' || c == '9') { /* normal char */ // skip_backref: 509 p = last; 510 inc(); 511 return; 512 } 513 p = last; 514 515 fetchTokenFor_zero(); /* fall through */ 516 } 517 518 private void fetchTokenFor_zero() { 519 if (syntax.opEscOctal3()) { 520 final int last = p; 521 int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3); 522 if (num < 0) { 523 throw new ValueException(ERR_TOO_BIG_NUMBER); 524 } 525 if (p == last) { /* can't read nothing. */ 526 num = 0; /* but, it's not error */ 527 } 528 token.type = TokenType.RAW_BYTE; 529 token.setC(num); 530 } else if (c != '0') { 531 inc(); 532 } 533 } 534 535 private void fetchTokenFor_metaChars() { 536 if (c == syntax.metaCharTable.anyChar) { 537 token.type = TokenType.ANYCHAR; 538 } else if (c == syntax.metaCharTable.anyTime) { 539 fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); 540 } else if (c == syntax.metaCharTable.zeroOrOneTime) { 541 fetchTokenFor_repeat(0, 1); 542 } else if (c == syntax.metaCharTable.oneOrMoreTime) { 543 fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); 544 } else if (c == syntax.metaCharTable.anyCharAnyTime) { 545 token.type = TokenType.ANYCHAR_ANYTIME; 546 // goto out 547 } 548 } 549 550 protected final TokenType fetchToken() { 551 // mark(); // out 552 start: 553 while(true) { 554 if (!left()) { 555 token.type = TokenType.EOT; 556 return token.type; 557 } 558 559 token.type = TokenType.STRING; 560 token.backP = p; 561 562 fetch(); 563 564 if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn) 565 if (!left()) { 566 throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE); 567 } 568 569 token.backP = p; 570 fetch(); 571 572 token.setC(c); 573 token.escaped = true; 574 switch(c) { 575 576 case '*': 577 if (syntax.opEscAsteriskZeroInf()) { 578 fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); 579 } 580 break; 581 case '+': 582 if (syntax.opEscPlusOneInf()) { 583 fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); 584 } 585 break; 586 case '?': 587 if (syntax.opEscQMarkZeroOne()) { 588 fetchTokenFor_repeat(0, 1); 589 } 590 break; 591 case '{': 592 if (syntax.opEscBraceInterval()) { 593 fetchTokenFor_openBrace(); 594 } 595 break; 596 case '|': 597 if (syntax.opEscVBarAlt()) { 598 token.type = TokenType.ALT; 599 } 600 break; 601 case '(': 602 if (syntax.opEscLParenSubexp()) { 603 token.type = TokenType.SUBEXP_OPEN; 604 } 605 break; 606 case ')': 607 if (syntax.opEscLParenSubexp()) { 608 token.type = TokenType.SUBEXP_CLOSE; 609 } 610 break; 611 case 'w': 612 if (syntax.opEscWWord()) { 613 fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); 614 } 615 break; 616 case 'W': 617 if (syntax.opEscWWord()) { 618 fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); 619 } 620 break; 621 case 'b': 622 if (syntax.opEscBWordBound()) { 623 fetchTokenFor_anchor(AnchorType.WORD_BOUND); 624 } 625 break; 626 case 'B': 627 if (syntax.opEscBWordBound()) { 628 fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND); 629 } 630 break; 631 case '<': 632 if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) { 633 fetchTokenFor_anchor(AnchorType.WORD_BEGIN); 634 } 635 break; 636 case '>': 637 if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) { 638 fetchTokenFor_anchor(AnchorType.WORD_END); 639 } 640 break; 641 case 's': 642 if (syntax.opEscSWhiteSpace()) { 643 fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); 644 } 645 break; 646 case 'S': 647 if (syntax.opEscSWhiteSpace()) { 648 fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); 649 } 650 break; 651 case 'd': 652 if (syntax.opEscDDigit()) { 653 fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); 654 } 655 break; 656 case 'D': 657 if (syntax.opEscDDigit()) { 658 fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); 659 } 660 break; 661 case 'h': 662 if (syntax.op2EscHXDigit()) { 663 fetchTokenInCCFor_charType(false, CharacterType.XDIGIT); 664 } 665 break; 666 case 'H': 667 if (syntax.op2EscHXDigit()) { 668 fetchTokenInCCFor_charType(true, CharacterType.XDIGIT); 669 } 670 break; 671 case 'A': 672 if (syntax.opEscAZBufAnchor()) { 673 fetchTokenFor_anchor(AnchorType.BEGIN_BUF); 674 } 675 break; 676 case 'Z': 677 if (syntax.opEscAZBufAnchor()) { 678 fetchTokenFor_anchor(AnchorType.SEMI_END_BUF); 679 } 680 break; 681 case 'z': 682 if (syntax.opEscAZBufAnchor()) { 683 fetchTokenFor_anchor(AnchorType.END_BUF); 684 } 685 break; 686 case 'G': 687 if (syntax.opEscCapitalGBeginAnchor()) { 688 fetchTokenFor_anchor(AnchorType.BEGIN_POSITION); 689 } 690 break; 691 case '`': 692 if (syntax.op2EscGnuBufAnchor()) { 693 fetchTokenFor_anchor(AnchorType.BEGIN_BUF); 694 } 695 break; 696 case '\'': 697 if (syntax.op2EscGnuBufAnchor()) { 698 fetchTokenFor_anchor(AnchorType.END_BUF); 699 } 700 break; 701 case 'x': 702 fetchTokenFor_xBrace(); 703 break; 704 case 'u': 705 fetchTokenFor_uHex(); 706 break; 707 case '1': 708 case '2': 709 case '3': 710 case '4': 711 case '5': 712 case '6': 713 case '7': 714 case '8': 715 case '9': 716 fetchTokenFor_digit(); 717 break; 718 case '0': 719 fetchTokenFor_zero(); 720 break; 721 722 default: 723 unfetch(); 724 final int num = fetchEscapedValue(); 725 726 /* set_raw: */ 727 if (token.getC() != num) { 728 token.type = TokenType.CODE_POINT; 729 token.setCode(num); 730 } else { /* string */ 731 p = token.backP + 1; 732 } 733 break; 734 735 } // switch (c) 736 737 } else { 738 token.setC(c); 739 token.escaped = false; 740 741 if (Config.USE_VARIABLE_META_CHARS && (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters())) { 742 fetchTokenFor_metaChars(); 743 break; 744 } 745 746 { 747 switch(c) { 748 case '.': 749 if (syntax.opDotAnyChar()) { 750 token.type = TokenType.ANYCHAR; 751 } 752 break; 753 case '*': 754 if (syntax.opAsteriskZeroInf()) { 755 fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); 756 } 757 break; 758 case '+': 759 if (syntax.opPlusOneInf()) { 760 fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); 761 } 762 break; 763 case '?': 764 if (syntax.opQMarkZeroOne()) { 765 fetchTokenFor_repeat(0, 1); 766 } 767 break; 768 case '{': 769 if (syntax.opBraceInterval()) { 770 fetchTokenFor_openBrace(); 771 } 772 break; 773 case '|': 774 if (syntax.opVBarAlt()) { 775 token.type = TokenType.ALT; 776 } 777 break; 778 779 case '(': 780 if (peekIs('?') && syntax.op2QMarkGroupEffect()) { 781 inc(); 782 if (peekIs('#')) { 783 fetch(); 784 while (true) { 785 if (!left()) { 786 throw new SyntaxException(ERR_END_PATTERN_IN_GROUP); 787 } 788 fetch(); 789 if (c == syntax.metaCharTable.esc) { 790 if (left()) { 791 fetch(); 792 } 793 } else { 794 if (c == ')') { 795 break; 796 } 797 } 798 } 799 continue start; // goto start 800 } 801 unfetch(); 802 } 803 804 if (syntax.opLParenSubexp()) { 805 token.type = TokenType.SUBEXP_OPEN; 806 } 807 break; 808 case ')': 809 if (syntax.opLParenSubexp()) { 810 token.type = TokenType.SUBEXP_CLOSE; 811 } 812 break; 813 case '^': 814 if (syntax.opLineAnchor()) { 815 fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE); 816 } 817 break; 818 case '$': 819 if (syntax.opLineAnchor()) { 820 fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.END_BUF : AnchorType.END_LINE); 821 } 822 break; 823 case '[': 824 if (syntax.opBracketCC()) { 825 token.type = TokenType.CC_CC_OPEN; 826 } 827 break; 828 case ']': 829 //if (*src > env->pattern) /* /].../ is allowed. */ 830 //CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); 831 break; 832 case '#': 833 if (Option.isExtend(env.option)) { 834 while (left()) { 835 fetch(); 836 if (EncodingHelper.isNewLine(c)) { 837 break; 838 } 839 } 840 continue start; // goto start 841 } 842 break; 843 844 case ' ': 845 case '\t': 846 case '\n': 847 case '\r': 848 case '\f': 849 if (Option.isExtend(env.option)) 850 { 851 continue start; // goto start 852 } 853 break; 854 855 default: // string 856 break; 857 858 } // switch 859 } 860 } 861 862 break; 863 } // while 864 return token.type; 865 } 866 867 private void greedyCheck() { 868 if (left() && peekIs('?') && syntax.opQMarkNonGreedy()) { 869 870 fetch(); 871 872 token.setRepeatGreedy(false); 873 token.setRepeatPossessive(false); 874 } else { 875 possessiveCheck(); 876 } 877 } 878 879 private void possessiveCheck() { 880 if (left() && peekIs('+') && 881 (syntax.op2PlusPossessiveRepeat() && token.type != TokenType.INTERVAL || 882 syntax.op2PlusPossessiveInterval() && token.type == TokenType.INTERVAL)) { 883 884 fetch(); 885 886 token.setRepeatGreedy(true); 887 token.setRepeatPossessive(true); 888 } else { 889 token.setRepeatGreedy(true); 890 token.setRepeatPossessive(false); 891 } 892 } 893 894 protected final void syntaxWarn(final String message, final char ch) { 895 syntaxWarn(message.replace("<%n>", Character.toString(ch))); 896 } 897 898 protected final void syntaxWarn(final String message) { 899 if (Config.USE_WARN) { 900 env.reg.warnings.warn(message + ": /" + new String(chars, getBegin(), getEnd()) + "/"); 901 } 902 } 903} 904