Lexer.java revision 1393:d61744c0d1d2
1/* 2 * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.nashorn.internal.parser; 27 28import static jdk.nashorn.internal.parser.TokenType.ADD; 29import static jdk.nashorn.internal.parser.TokenType.COMMENT; 30import static jdk.nashorn.internal.parser.TokenType.DECIMAL; 31import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT; 32import static jdk.nashorn.internal.parser.TokenType.EOF; 33import static jdk.nashorn.internal.parser.TokenType.EOL; 34import static jdk.nashorn.internal.parser.TokenType.ERROR; 35import static jdk.nashorn.internal.parser.TokenType.ESCSTRING; 36import static jdk.nashorn.internal.parser.TokenType.EXECSTRING; 37import static jdk.nashorn.internal.parser.TokenType.FLOATING; 38import static jdk.nashorn.internal.parser.TokenType.FUNCTION; 39import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL; 40import static jdk.nashorn.internal.parser.TokenType.LBRACE; 41import static jdk.nashorn.internal.parser.TokenType.LPAREN; 42import static jdk.nashorn.internal.parser.TokenType.OCTAL; 43import static jdk.nashorn.internal.parser.TokenType.RBRACE; 44import static jdk.nashorn.internal.parser.TokenType.REGEX; 45import static jdk.nashorn.internal.parser.TokenType.RPAREN; 46import static jdk.nashorn.internal.parser.TokenType.STRING; 47import static jdk.nashorn.internal.parser.TokenType.XML; 48 49import java.io.Serializable; 50import jdk.nashorn.internal.runtime.ECMAErrors; 51import jdk.nashorn.internal.runtime.ErrorManager; 52import jdk.nashorn.internal.runtime.JSErrorType; 53import jdk.nashorn.internal.runtime.JSType; 54import jdk.nashorn.internal.runtime.ParserException; 55import jdk.nashorn.internal.runtime.Source; 56import jdk.nashorn.internal.runtime.options.Options; 57 58/** 59 * Responsible for converting source content into a stream of tokens. 60 * 61 */ 62@SuppressWarnings("fallthrough") 63public class Lexer extends Scanner { 64 private static final long MIN_INT_L = Integer.MIN_VALUE; 65 private static final long MAX_INT_L = Integer.MAX_VALUE; 66 67 private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals"); 68 69 /** Content source. */ 70 private final Source source; 71 72 /** Buffered stream for tokens. */ 73 private final TokenStream stream; 74 75 /** True if here and edit strings are supported. */ 76 private final boolean scripting; 77 78 /** True if a nested scan. (scan to completion, no EOF.) */ 79 private final boolean nested; 80 81 /** Pending new line number and position. */ 82 int pendingLine; 83 84 /** Position of last EOL + 1. */ 85 private int linePosition; 86 87 /** Type of last token added. */ 88 private TokenType last; 89 90 private final boolean pauseOnFunctionBody; 91 private boolean pauseOnNextLeftBrace; 92 93 private static final String SPACETAB = " \t"; // ASCII space and tab 94 private static final String LFCR = "\n\r"; // line feed and carriage return (ctrl-m) 95 96 private static final String JAVASCRIPT_WHITESPACE_EOL = 97 LFCR + 98 "\u2028" + // line separator 99 "\u2029" // paragraph separator 100 ; 101 private static final String JAVASCRIPT_WHITESPACE = 102 SPACETAB + 103 JAVASCRIPT_WHITESPACE_EOL + 104 "\u000b" + // tabulation line 105 "\u000c" + // ff (ctrl-l) 106 "\u00a0" + // Latin-1 space 107 "\u1680" + // Ogham space mark 108 "\u180e" + // separator, Mongolian vowel 109 "\u2000" + // en quad 110 "\u2001" + // em quad 111 "\u2002" + // en space 112 "\u2003" + // em space 113 "\u2004" + // three-per-em space 114 "\u2005" + // four-per-em space 115 "\u2006" + // six-per-em space 116 "\u2007" + // figure space 117 "\u2008" + // punctuation space 118 "\u2009" + // thin space 119 "\u200a" + // hair space 120 "\u202f" + // narrow no-break space 121 "\u205f" + // medium mathematical space 122 "\u3000" + // ideographic space 123 "\ufeff" // byte order mark 124 ; 125 126 private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP = 127 "\\u000a" + // line feed 128 "\\u000d" + // carriage return (ctrl-m) 129 "\\u2028" + // line separator 130 "\\u2029" + // paragraph separator 131 "\\u0009" + // tab 132 "\\u0020" + // ASCII space 133 "\\u000b" + // tabulation line 134 "\\u000c" + // ff (ctrl-l) 135 "\\u00a0" + // Latin-1 space 136 "\\u1680" + // Ogham space mark 137 "\\u180e" + // separator, Mongolian vowel 138 "\\u2000" + // en quad 139 "\\u2001" + // em quad 140 "\\u2002" + // en space 141 "\\u2003" + // em space 142 "\\u2004" + // three-per-em space 143 "\\u2005" + // four-per-em space 144 "\\u2006" + // six-per-em space 145 "\\u2007" + // figure space 146 "\\u2008" + // punctuation space 147 "\\u2009" + // thin space 148 "\\u200a" + // hair space 149 "\\u202f" + // narrow no-break space 150 "\\u205f" + // medium mathematical space 151 "\\u3000" + // ideographic space 152 "\\ufeff" // byte order mark 153 ; 154 155 static String unicodeEscape(final char ch) { 156 final StringBuilder sb = new StringBuilder(); 157 158 sb.append("\\u"); 159 160 final String hex = Integer.toHexString(ch); 161 for (int i = hex.length(); i < 4; i++) { 162 sb.append('0'); 163 } 164 sb.append(hex); 165 166 return sb.toString(); 167 } 168 169 /** 170 * Constructor 171 * 172 * @param source the source 173 * @param stream the token stream to lex 174 */ 175 public Lexer(final Source source, final TokenStream stream) { 176 this(source, stream, false); 177 } 178 179 /** 180 * Constructor 181 * 182 * @param source the source 183 * @param stream the token stream to lex 184 * @param scripting are we in scripting mode 185 */ 186 public Lexer(final Source source, final TokenStream stream, final boolean scripting) { 187 this(source, 0, source.getLength(), stream, scripting, false); 188 } 189 190 /** 191 * Constructor 192 * 193 * @param source the source 194 * @param start start position in source from which to start lexing 195 * @param len length of source segment to lex 196 * @param stream token stream to lex 197 * @param scripting are we in scripting mode 198 * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a 199 * function body. This is used with the feature where the parser is skipping nested function bodies to 200 * avoid reading ahead unnecessarily when we skip the function bodies. 201 */ 202 203 public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean pauseOnFunctionBody) { 204 super(source.getContent(), 1, start, len); 205 this.source = source; 206 this.stream = stream; 207 this.scripting = scripting; 208 this.nested = false; 209 this.pendingLine = 1; 210 this.last = EOL; 211 212 this.pauseOnFunctionBody = pauseOnFunctionBody; 213 } 214 215 private Lexer(final Lexer lexer, final State state) { 216 super(lexer, state); 217 218 source = lexer.source; 219 stream = lexer.stream; 220 scripting = lexer.scripting; 221 nested = true; 222 223 pendingLine = state.pendingLine; 224 linePosition = state.linePosition; 225 last = EOL; 226 pauseOnFunctionBody = false; 227 } 228 229 static class State extends Scanner.State { 230 /** Pending new line number and position. */ 231 public final int pendingLine; 232 233 /** Position of last EOL + 1. */ 234 public final int linePosition; 235 236 /** Type of last token added. */ 237 public final TokenType last; 238 239 /* 240 * Constructor. 241 */ 242 243 State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) { 244 super(position, limit, line); 245 246 this.pendingLine = pendingLine; 247 this.linePosition = linePosition; 248 this.last = last; 249 } 250 } 251 252 /** 253 * Save the state of the scan. 254 * 255 * @return Captured state. 256 */ 257 @Override 258 State saveState() { 259 return new State(position, limit, line, pendingLine, linePosition, last); 260 } 261 262 /** 263 * Restore the state of the scan. 264 * 265 * @param state 266 * Captured state. 267 */ 268 void restoreState(final State state) { 269 super.restoreState(state); 270 271 pendingLine = state.pendingLine; 272 linePosition = state.linePosition; 273 last = state.last; 274 } 275 276 /** 277 * Add a new token to the stream. 278 * 279 * @param type 280 * Token type. 281 * @param start 282 * Start position. 283 * @param end 284 * End position. 285 */ 286 protected void add(final TokenType type, final int start, final int end) { 287 // Record last token. 288 last = type; 289 290 // Only emit the last EOL in a cluster. 291 if (type == EOL) { 292 pendingLine = end; 293 linePosition = start; 294 } else { 295 // Write any pending EOL to stream. 296 if (pendingLine != -1) { 297 stream.put(Token.toDesc(EOL, linePosition, pendingLine)); 298 pendingLine = -1; 299 } 300 301 // Write token to stream. 302 stream.put(Token.toDesc(type, start, end - start)); 303 } 304 } 305 306 /** 307 * Add a new token to the stream. 308 * 309 * @param type 310 * Token type. 311 * @param start 312 * Start position. 313 */ 314 protected void add(final TokenType type, final int start) { 315 add(type, start, position); 316 } 317 318 /** 319 * Return the String of valid whitespace characters for regular 320 * expressions in JavaScript 321 * @return regexp whitespace string 322 */ 323 public static String getWhitespaceRegExp() { 324 return JAVASCRIPT_WHITESPACE_IN_REGEXP; 325 } 326 327 /** 328 * Skip end of line. 329 * 330 * @param addEOL true if EOL token should be recorded. 331 */ 332 private void skipEOL(final boolean addEOL) { 333 334 if (ch0 == '\r') { // detect \r\n pattern 335 skip(1); 336 if (ch0 == '\n') { 337 skip(1); 338 } 339 } else { // all other space, ch0 is guaranteed to be EOL or \0 340 skip(1); 341 } 342 343 // bump up line count 344 line++; 345 346 if (addEOL) { 347 // Add an EOL token. 348 add(EOL, position, line); 349 } 350 } 351 352 /** 353 * Skip over rest of line including end of line. 354 * 355 * @param addEOL true if EOL token should be recorded. 356 */ 357 private void skipLine(final boolean addEOL) { 358 // Ignore characters. 359 while (!isEOL(ch0) && !atEOF()) { 360 skip(1); 361 } 362 // Skip over end of line. 363 skipEOL(addEOL); 364 } 365 366 /** 367 * Test whether a char is valid JavaScript whitespace 368 * @param ch a char 369 * @return true if valid JavaScript whitespace 370 */ 371 public static boolean isJSWhitespace(final char ch) { 372 return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1; 373 } 374 375 /** 376 * Test whether a char is valid JavaScript end of line 377 * @param ch a char 378 * @return true if valid JavaScript end of line 379 */ 380 public static boolean isJSEOL(final char ch) { 381 return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1; 382 } 383 384 /** 385 * Test if char is a string delimiter, e.g. '\' or '"'. Also scans exec 386 * strings ('`') in scripting mode. 387 * @param ch a char 388 * @return true if string delimiter 389 */ 390 protected boolean isStringDelimiter(final char ch) { 391 return ch == '\'' || ch == '"' || (scripting && ch == '`'); 392 } 393 394 /** 395 * Test whether a char is valid JavaScript whitespace 396 * @param ch a char 397 * @return true if valid JavaScript whitespace 398 */ 399 protected boolean isWhitespace(final char ch) { 400 return Lexer.isJSWhitespace(ch); 401 } 402 403 /** 404 * Test whether a char is valid JavaScript end of line 405 * @param ch a char 406 * @return true if valid JavaScript end of line 407 */ 408 protected boolean isEOL(final char ch) { 409 return Lexer.isJSEOL(ch); 410 } 411 412 /** 413 * Skip over whitespace and detect end of line, adding EOL tokens if 414 * encountered. 415 * 416 * @param addEOL true if EOL tokens should be recorded. 417 */ 418 private void skipWhitespace(final boolean addEOL) { 419 while (isWhitespace(ch0)) { 420 if (isEOL(ch0)) { 421 skipEOL(addEOL); 422 } else { 423 skip(1); 424 } 425 } 426 } 427 428 /** 429 * Skip over comments. 430 * 431 * @return True if a comment. 432 */ 433 protected boolean skipComments() { 434 // Save the current position. 435 final int start = position; 436 437 if (ch0 == '/') { 438 // Is it a // comment. 439 if (ch1 == '/') { 440 // Skip over //. 441 skip(2); 442 443 boolean directiveComment = false; 444 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) { 445 directiveComment = true; 446 } 447 448 // Scan for EOL. 449 while (!atEOF() && !isEOL(ch0)) { 450 skip(1); 451 } 452 // Did detect a comment. 453 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start); 454 return true; 455 } else if (ch1 == '*') { 456 // Skip over /*. 457 skip(2); 458 // Scan for */. 459 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) { 460 // If end of line handle else skip character. 461 if (isEOL(ch0)) { 462 skipEOL(true); 463 } else { 464 skip(1); 465 } 466 } 467 468 if (atEOF()) { 469 // TODO - Report closing */ missing in parser. 470 add(ERROR, start); 471 } else { 472 // Skip */. 473 skip(2); 474 } 475 476 // Did detect a comment. 477 add(COMMENT, start); 478 return true; 479 } 480 } else if (ch0 == '#') { 481 assert scripting; 482 // shell style comment 483 // Skip over #. 484 skip(1); 485 // Scan for EOL. 486 while (!atEOF() && !isEOL(ch0)) { 487 skip(1); 488 } 489 // Did detect a comment. 490 add(COMMENT, start); 491 return true; 492 } 493 494 // Not a comment. 495 return false; 496 } 497 498 /** 499 * Convert a regex token to a token object. 500 * 501 * @param start Position in source content. 502 * @param length Length of regex token. 503 * @return Regex token object. 504 */ 505 public RegexToken valueOfPattern(final int start, final int length) { 506 // Save the current position. 507 final int savePosition = position; 508 // Reset to beginning of content. 509 reset(start); 510 // Buffer for recording characters. 511 final StringBuilder sb = new StringBuilder(length); 512 513 // Skip /. 514 skip(1); 515 boolean inBrackets = false; 516 // Scan for closing /, stopping at end of line. 517 while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) { 518 // Skip over escaped character. 519 if (ch0 == '\\') { 520 sb.append(ch0); 521 sb.append(ch1); 522 skip(2); 523 } else { 524 if (ch0 == '[') { 525 inBrackets = true; 526 } else if (ch0 == ']') { 527 inBrackets = false; 528 } 529 530 // Skip literal character. 531 sb.append(ch0); 532 skip(1); 533 } 534 } 535 536 // Get pattern as string. 537 final String regex = sb.toString(); 538 539 // Skip /. 540 skip(1); 541 542 // Options as string. 543 final String options = source.getString(position, scanIdentifier()); 544 545 reset(savePosition); 546 547 // Compile the pattern. 548 return new RegexToken(regex, options); 549 } 550 551 /** 552 * Return true if the given token can be the beginning of a literal. 553 * 554 * @param token a token 555 * @return true if token can start a literal. 556 */ 557 public boolean canStartLiteral(final TokenType token) { 558 return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<')); 559 } 560 561 /** 562 * interface to receive line information for multi-line literals. 563 */ 564 protected interface LineInfoReceiver { 565 /** 566 * Receives line information 567 * @param line last line number 568 * @param linePosition position of last line 569 */ 570 public void lineInfo(int line, int linePosition); 571 } 572 573 /** 574 * Check whether the given token represents the beginning of a literal. If so scan 575 * the literal and return <tt>true</tt>, otherwise return false. 576 * 577 * @param token the token. 578 * @param startTokenType the token type. 579 * @param lir LineInfoReceiver that receives line info for multi-line string literals. 580 * @return True if a literal beginning with startToken was found and scanned. 581 */ 582 protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) { 583 // Check if it can be a literal. 584 if (!canStartLiteral(startTokenType)) { 585 return false; 586 } 587 // We break on ambiguous tokens so if we already moved on it can't be a literal. 588 if (stream.get(stream.last()) != token) { 589 return false; 590 } 591 // Rewind to token start position 592 reset(Token.descPosition(token)); 593 594 if (ch0 == '/') { 595 return scanRegEx(); 596 } else if (ch0 == '<') { 597 if (ch1 == '<') { 598 return scanHereString(lir); 599 } else if (Character.isJavaIdentifierStart(ch1)) { 600 return scanXMLLiteral(); 601 } 602 } 603 604 return false; 605 } 606 607 /** 608 * Scan over regex literal. 609 * 610 * @return True if a regex literal. 611 */ 612 private boolean scanRegEx() { 613 assert ch0 == '/'; 614 // Make sure it's not a comment. 615 if (ch1 != '/' && ch1 != '*') { 616 // Record beginning of literal. 617 final int start = position; 618 // Skip /. 619 skip(1); 620 boolean inBrackets = false; 621 622 // Scan for closing /, stopping at end of line. 623 while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) { 624 // Skip over escaped character. 625 if (ch0 == '\\') { 626 skip(1); 627 if (isEOL(ch0)) { 628 reset(start); 629 return false; 630 } 631 skip(1); 632 } else { 633 if (ch0 == '[') { 634 inBrackets = true; 635 } else if (ch0 == ']') { 636 inBrackets = false; 637 } 638 639 // Skip literal character. 640 skip(1); 641 } 642 } 643 644 // If regex literal. 645 if (ch0 == '/') { 646 // Skip /. 647 skip(1); 648 649 // Skip over options. 650 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') { 651 skip(1); 652 } 653 654 // Add regex token. 655 add(REGEX, start); 656 // Regex literal detected. 657 return true; 658 } 659 660 // False start try again. 661 reset(start); 662 } 663 664 // Regex literal not detected. 665 return false; 666 } 667 668 /** 669 * Convert a digit to a integer. Can't use Character.digit since we are 670 * restricted to ASCII by the spec. 671 * 672 * @param ch Character to convert. 673 * @param base Numeric base. 674 * 675 * @return The converted digit or -1 if invalid. 676 */ 677 protected static int convertDigit(final char ch, final int base) { 678 int digit; 679 680 if ('0' <= ch && ch <= '9') { 681 digit = ch - '0'; 682 } else if ('A' <= ch && ch <= 'Z') { 683 digit = ch - 'A' + 10; 684 } else if ('a' <= ch && ch <= 'z') { 685 digit = ch - 'a' + 10; 686 } else { 687 return -1; 688 } 689 690 return digit < base ? digit : -1; 691 } 692 693 694 /** 695 * Get the value of a hexadecimal numeric sequence. 696 * 697 * @param length Number of digits. 698 * @param type Type of token to report against. 699 * @return Value of sequence or < 0 if no digits. 700 */ 701 private int hexSequence(final int length, final TokenType type) { 702 int value = 0; 703 704 for (int i = 0; i < length; i++) { 705 final int digit = convertDigit(ch0, 16); 706 707 if (digit == -1) { 708 error(Lexer.message("invalid.hex"), type, position, limit); 709 return i == 0 ? -1 : value; 710 } 711 712 value = digit | value << 4; 713 skip(1); 714 } 715 716 return value; 717 } 718 719 /** 720 * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255. 721 * 722 * @return Value of sequence. 723 */ 724 private int octalSequence() { 725 int value = 0; 726 727 for (int i = 0; i < 3; i++) { 728 final int digit = convertDigit(ch0, 8); 729 730 if (digit == -1) { 731 break; 732 } 733 value = digit | value << 3; 734 skip(1); 735 736 if (i == 1 && value >= 32) { 737 break; 738 } 739 } 740 return value; 741 } 742 743 /** 744 * Convert a string to a JavaScript identifier. 745 * 746 * @param start Position in source content. 747 * @param length Length of token. 748 * @return Ident string or null if an error. 749 */ 750 private String valueOfIdent(final int start, final int length) throws RuntimeException { 751 // Save the current position. 752 final int savePosition = position; 753 // End of scan. 754 final int end = start + length; 755 // Reset to beginning of content. 756 reset(start); 757 // Buffer for recording characters. 758 final StringBuilder sb = new StringBuilder(length); 759 760 // Scan until end of line or end of file. 761 while (!atEOF() && position < end && !isEOL(ch0)) { 762 // If escape character. 763 if (ch0 == '\\' && ch1 == 'u') { 764 skip(2); 765 final int ch = hexSequence(4, TokenType.IDENT); 766 if (isWhitespace((char)ch)) { 767 return null; 768 } 769 if (ch < 0) { 770 sb.append('\\'); 771 sb.append('u'); 772 } else { 773 sb.append((char)ch); 774 } 775 } else { 776 // Add regular character. 777 sb.append(ch0); 778 skip(1); 779 } 780 } 781 782 // Restore position. 783 reset(savePosition); 784 785 return sb.toString(); 786 } 787 788 /** 789 * Scan over and identifier or keyword. Handles identifiers containing 790 * encoded Unicode chars. 791 * 792 * Example: 793 * 794 * var \u0042 = 44; 795 */ 796 private void scanIdentifierOrKeyword() { 797 // Record beginning of identifier. 798 final int start = position; 799 // Scan identifier. 800 final int length = scanIdentifier(); 801 // Check to see if it is a keyword. 802 final TokenType type = TokenLookup.lookupKeyword(content, start, length); 803 if (type == FUNCTION && pauseOnFunctionBody) { 804 pauseOnNextLeftBrace = true; 805 } 806 // Add keyword or identifier token. 807 add(type, start); 808 } 809 810 /** 811 * Convert a string to a JavaScript string object. 812 * 813 * @param start Position in source content. 814 * @param length Length of token. 815 * @return JavaScript string object. 816 */ 817 private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException { 818 // Save the current position. 819 final int savePosition = position; 820 // Calculate the end position. 821 final int end = start + length; 822 // Reset to beginning of string. 823 reset(start); 824 825 // Buffer for recording characters. 826 final StringBuilder sb = new StringBuilder(length); 827 828 // Scan until end of string. 829 while (position < end) { 830 // If escape character. 831 if (ch0 == '\\') { 832 skip(1); 833 834 final char next = ch0; 835 final int afterSlash = position; 836 837 skip(1); 838 839 // Special characters. 840 switch (next) { 841 case '0': 842 case '1': 843 case '2': 844 case '3': 845 case '4': 846 case '5': 847 case '6': 848 case '7': { 849 if (strict) { 850 // "\0" itself is allowed in strict mode. Only other 'real' 851 // octal escape sequences are not allowed (eg. "\02", "\31"). 852 // See section 7.8.4 String literals production EscapeSequence 853 if (next != '0' || (ch0 >= '0' && ch0 <= '9')) { 854 error(Lexer.message("strict.no.octal"), STRING, position, limit); 855 } 856 } 857 reset(afterSlash); 858 // Octal sequence. 859 final int ch = octalSequence(); 860 861 if (ch < 0) { 862 sb.append('\\'); 863 sb.append('x'); 864 } else { 865 sb.append((char)ch); 866 } 867 break; 868 } 869 case 'n': 870 sb.append('\n'); 871 break; 872 case 't': 873 sb.append('\t'); 874 break; 875 case 'b': 876 sb.append('\b'); 877 break; 878 case 'f': 879 sb.append('\f'); 880 break; 881 case 'r': 882 sb.append('\r'); 883 break; 884 case '\'': 885 sb.append('\''); 886 break; 887 case '\"': 888 sb.append('\"'); 889 break; 890 case '\\': 891 sb.append('\\'); 892 break; 893 case '\r': // CR | CRLF 894 if (ch0 == '\n') { 895 skip(1); 896 } 897 // fall through 898 case '\n': // LF 899 case '\u2028': // LS 900 case '\u2029': // PS 901 // continue on the next line, slash-return continues string 902 // literal 903 break; 904 case 'x': { 905 // Hex sequence. 906 final int ch = hexSequence(2, STRING); 907 908 if (ch < 0) { 909 sb.append('\\'); 910 sb.append('x'); 911 } else { 912 sb.append((char)ch); 913 } 914 } 915 break; 916 case 'u': { 917 // Unicode sequence. 918 final int ch = hexSequence(4, STRING); 919 920 if (ch < 0) { 921 sb.append('\\'); 922 sb.append('u'); 923 } else { 924 sb.append((char)ch); 925 } 926 } 927 break; 928 case 'v': 929 sb.append('\u000B'); 930 break; 931 // All other characters. 932 default: 933 sb.append(next); 934 break; 935 } 936 } else { 937 // Add regular character. 938 sb.append(ch0); 939 skip(1); 940 } 941 } 942 943 // Restore position. 944 reset(savePosition); 945 946 return sb.toString(); 947 } 948 949 /** 950 * Scan over a string literal. 951 * @param add true if we nare not just scanning but should actually modify the token stream 952 */ 953 protected void scanString(final boolean add) { 954 // Type of string. 955 TokenType type = STRING; 956 // Record starting quote. 957 final char quote = ch0; 958 // Skip over quote. 959 skip(1); 960 961 // Record beginning of string content. 962 final State stringState = saveState(); 963 964 // Scan until close quote or end of line. 965 while (!atEOF() && ch0 != quote && !isEOL(ch0)) { 966 // Skip over escaped character. 967 if (ch0 == '\\') { 968 type = ESCSTRING; 969 skip(1); 970 if (! isEscapeCharacter(ch0)) { 971 error(Lexer.message("invalid.escape.char"), STRING, position, limit); 972 } 973 if (isEOL(ch0)) { 974 // Multiline string literal 975 skipEOL(false); 976 continue; 977 } 978 } 979 // Skip literal character. 980 skip(1); 981 } 982 983 // If close quote. 984 if (ch0 == quote) { 985 // Skip close quote. 986 skip(1); 987 } else { 988 error(Lexer.message("missing.close.quote"), STRING, position, limit); 989 } 990 991 // If not just scanning. 992 if (add) { 993 // Record end of string. 994 stringState.setLimit(position - 1); 995 996 if (scripting && !stringState.isEmpty()) { 997 switch (quote) { 998 case '`': 999 // Mark the beginning of an exec string. 1000 add(EXECSTRING, stringState.position, stringState.limit); 1001 // Frame edit string with left brace. 1002 add(LBRACE, stringState.position, stringState.position); 1003 // Process edit string. 1004 editString(type, stringState); 1005 // Frame edit string with right brace. 1006 add(RBRACE, stringState.limit, stringState.limit); 1007 break; 1008 case '"': 1009 // Only edit double quoted strings. 1010 editString(type, stringState); 1011 break; 1012 case '\'': 1013 // Add string token without editing. 1014 add(type, stringState.position, stringState.limit); 1015 break; 1016 default: 1017 break; 1018 } 1019 } else { 1020 /// Add string token without editing. 1021 add(type, stringState.position, stringState.limit); 1022 } 1023 } 1024 } 1025 1026 /** 1027 * Is the given character a valid escape char after "\" ? 1028 * 1029 * @param ch character to be checked 1030 * @return if the given character is valid after "\" 1031 */ 1032 protected boolean isEscapeCharacter(final char ch) { 1033 return true; 1034 } 1035 1036 /** 1037 * Convert string to number. 1038 * 1039 * @param valueString String to convert. 1040 * @param radix Numeric base. 1041 * @return Converted number. 1042 */ 1043 private static Number valueOf(final String valueString, final int radix) throws NumberFormatException { 1044 try { 1045 final long value = Long.parseLong(valueString, radix); 1046 if(value >= MIN_INT_L && value <= MAX_INT_L) { 1047 return (int)value; 1048 } 1049 return value; 1050 } catch (final NumberFormatException e) { 1051 if (radix == 10) { 1052 return Double.valueOf(valueString); 1053 } 1054 1055 double value = 0.0; 1056 1057 for (int i = 0; i < valueString.length(); i++) { 1058 final char ch = valueString.charAt(i); 1059 // Preverified, should always be a valid digit. 1060 final int digit = convertDigit(ch, radix); 1061 value *= radix; 1062 value += digit; 1063 } 1064 1065 return value; 1066 } 1067 } 1068 1069 /** 1070 * Scan a number. 1071 */ 1072 protected void scanNumber() { 1073 // Record beginning of number. 1074 final int start = position; 1075 // Assume value is a decimal. 1076 TokenType type = DECIMAL; 1077 1078 // First digit of number. 1079 int digit = convertDigit(ch0, 10); 1080 1081 // If number begins with 0x. 1082 if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) { 1083 // Skip over 0xN. 1084 skip(3); 1085 // Skip over remaining digits. 1086 while (convertDigit(ch0, 16) != -1) { 1087 skip(1); 1088 } 1089 1090 type = HEXADECIMAL; 1091 } else { 1092 // Check for possible octal constant. 1093 boolean octal = digit == 0; 1094 // Skip first digit if not leading '.'. 1095 if (digit != -1) { 1096 skip(1); 1097 } 1098 1099 // Skip remaining digits. 1100 while ((digit = convertDigit(ch0, 10)) != -1) { 1101 // Check octal only digits. 1102 octal = octal && digit < 8; 1103 // Skip digit. 1104 skip(1); 1105 } 1106 1107 if (octal && position - start > 1) { 1108 type = OCTAL; 1109 } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') { 1110 // Must be a double. 1111 if (ch0 == '.') { 1112 // Skip period. 1113 skip(1); 1114 // Skip mantissa. 1115 while (convertDigit(ch0, 10) != -1) { 1116 skip(1); 1117 } 1118 } 1119 1120 // Detect exponent. 1121 if (ch0 == 'E' || ch0 == 'e') { 1122 // Skip E. 1123 skip(1); 1124 // Detect and skip exponent sign. 1125 if (ch0 == '+' || ch0 == '-') { 1126 skip(1); 1127 } 1128 // Skip exponent. 1129 while (convertDigit(ch0, 10) != -1) { 1130 skip(1); 1131 } 1132 } 1133 1134 type = FLOATING; 1135 } 1136 } 1137 1138 if (Character.isJavaIdentifierStart(ch0)) { 1139 error(Lexer.message("missing.space.after.number"), type, position, 1); 1140 } 1141 1142 // Add number token. 1143 add(type, start); 1144 } 1145 1146 /** 1147 * Convert a regex token to a token object. 1148 * 1149 * @param start Position in source content. 1150 * @param length Length of regex token. 1151 * @return Regex token object. 1152 */ 1153 XMLToken valueOfXML(final int start, final int length) { 1154 return new XMLToken(source.getString(start, length)); 1155 } 1156 1157 /** 1158 * Scan over a XML token. 1159 * 1160 * @return TRUE if is an XML literal. 1161 */ 1162 private boolean scanXMLLiteral() { 1163 assert ch0 == '<' && Character.isJavaIdentifierStart(ch1); 1164 if (XML_LITERALS) { 1165 // Record beginning of xml expression. 1166 final int start = position; 1167 1168 int openCount = 0; 1169 1170 do { 1171 if (ch0 == '<') { 1172 if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) { 1173 skip(3); 1174 openCount--; 1175 } else if (Character.isJavaIdentifierStart(ch1)) { 1176 skip(2); 1177 openCount++; 1178 } else if (ch1 == '?') { 1179 skip(2); 1180 } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') { 1181 skip(4); 1182 } else { 1183 reset(start); 1184 return false; 1185 } 1186 1187 while (!atEOF() && ch0 != '>') { 1188 if (ch0 == '/' && ch1 == '>') { 1189 openCount--; 1190 skip(1); 1191 break; 1192 } else if (ch0 == '\"' || ch0 == '\'') { 1193 scanString(false); 1194 } else { 1195 skip(1); 1196 } 1197 } 1198 1199 if (ch0 != '>') { 1200 reset(start); 1201 return false; 1202 } 1203 1204 skip(1); 1205 } else if (atEOF()) { 1206 reset(start); 1207 return false; 1208 } else { 1209 skip(1); 1210 } 1211 } while (openCount > 0); 1212 1213 add(XML, start); 1214 return true; 1215 } 1216 1217 return false; 1218 } 1219 1220 /** 1221 * Scan over identifier characters. 1222 * 1223 * @return Length of identifier or zero if none found. 1224 */ 1225 private int scanIdentifier() { 1226 final int start = position; 1227 1228 // Make sure first character is valid start character. 1229 if (ch0 == '\\' && ch1 == 'u') { 1230 skip(2); 1231 final int ch = hexSequence(4, TokenType.IDENT); 1232 1233 if (!Character.isJavaIdentifierStart(ch)) { 1234 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1235 } 1236 } else if (!Character.isJavaIdentifierStart(ch0)) { 1237 // Not an identifier. 1238 return 0; 1239 } 1240 1241 // Make sure remaining characters are valid part characters. 1242 while (!atEOF()) { 1243 if (ch0 == '\\' && ch1 == 'u') { 1244 skip(2); 1245 final int ch = hexSequence(4, TokenType.IDENT); 1246 1247 if (!Character.isJavaIdentifierPart(ch)) { 1248 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1249 } 1250 } else if (Character.isJavaIdentifierPart(ch0)) { 1251 skip(1); 1252 } else { 1253 break; 1254 } 1255 } 1256 1257 // Length of identifier sequence. 1258 return position - start; 1259 } 1260 1261 /** 1262 * Compare two identifiers (in content) for equality. 1263 * 1264 * @param aStart Start of first identifier. 1265 * @param aLength Length of first identifier. 1266 * @param bStart Start of second identifier. 1267 * @param bLength Length of second identifier. 1268 * @return True if equal. 1269 */ 1270 private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) { 1271 if (aLength == bLength) { 1272 for (int i = 0; i < aLength; i++) { 1273 if (content[aStart + i] != content[bStart + i]) { 1274 return false; 1275 } 1276 } 1277 1278 return true; 1279 } 1280 1281 return false; 1282 } 1283 1284 /** 1285 * Detect if a line starts with a marker identifier. 1286 * 1287 * @param identStart Start of identifier. 1288 * @param identLength Length of identifier. 1289 * @return True if detected. 1290 */ 1291 private boolean hasHereMarker(final int identStart, final int identLength) { 1292 // Skip any whitespace. 1293 skipWhitespace(false); 1294 1295 return identifierEqual(identStart, identLength, position, scanIdentifier()); 1296 } 1297 1298 /** 1299 * Lexer to service edit strings. 1300 */ 1301 private static class EditStringLexer extends Lexer { 1302 /** Type of string literals to emit. */ 1303 final TokenType stringType; 1304 1305 /* 1306 * Constructor. 1307 */ 1308 1309 EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) { 1310 super(lexer, stringState); 1311 1312 this.stringType = stringType; 1313 } 1314 1315 /** 1316 * Lexify the contents of the string. 1317 */ 1318 @Override 1319 public void lexify() { 1320 // Record start of string position. 1321 int stringStart = position; 1322 // Indicate that the priming first string has not been emitted. 1323 boolean primed = false; 1324 1325 while (true) { 1326 // Detect end of content. 1327 if (atEOF()) { 1328 break; 1329 } 1330 1331 // Honour escapes (should be well formed.) 1332 if (ch0 == '\\' && stringType == ESCSTRING) { 1333 skip(2); 1334 1335 continue; 1336 } 1337 1338 // If start of expression. 1339 if (ch0 == '$' && ch1 == '{') { 1340 if (!primed || stringStart != position) { 1341 if (primed) { 1342 add(ADD, stringStart, stringStart + 1); 1343 } 1344 1345 add(stringType, stringStart, position); 1346 primed = true; 1347 } 1348 1349 // Skip ${ 1350 skip(2); 1351 1352 // Save expression state. 1353 final State expressionState = saveState(); 1354 1355 // Start with one open brace. 1356 int braceCount = 1; 1357 1358 // Scan for the rest of the string. 1359 while (!atEOF()) { 1360 // If closing brace. 1361 if (ch0 == '}') { 1362 // Break only only if matching brace. 1363 if (--braceCount == 0) { 1364 break; 1365 } 1366 } else if (ch0 == '{') { 1367 // Bump up the brace count. 1368 braceCount++; 1369 } 1370 1371 // Skip to next character. 1372 skip(1); 1373 } 1374 1375 // If braces don't match then report an error. 1376 if (braceCount != 0) { 1377 error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1); 1378 } 1379 1380 // Mark end of expression. 1381 expressionState.setLimit(position); 1382 // Skip closing brace. 1383 skip(1); 1384 1385 // Start next string. 1386 stringStart = position; 1387 1388 // Concatenate expression. 1389 add(ADD, expressionState.position, expressionState.position + 1); 1390 add(LPAREN, expressionState.position, expressionState.position + 1); 1391 1392 // Scan expression. 1393 final Lexer lexer = new Lexer(this, expressionState); 1394 lexer.lexify(); 1395 1396 // Close out expression parenthesis. 1397 add(RPAREN, position - 1, position); 1398 1399 continue; 1400 } 1401 1402 // Next character in string. 1403 skip(1); 1404 } 1405 1406 // If there is any unemitted string portion. 1407 if (stringStart != limit) { 1408 // Concatenate remaining string. 1409 if (primed) { 1410 add(ADD, stringStart, 1); 1411 } 1412 1413 add(stringType, stringStart, limit); 1414 } 1415 } 1416 1417 } 1418 1419 /** 1420 * Edit string for nested expressions. 1421 * 1422 * @param stringType Type of string literals to emit. 1423 * @param stringState State of lexer at start of string. 1424 */ 1425 private void editString(final TokenType stringType, final State stringState) { 1426 // Use special lexer to scan string. 1427 final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState); 1428 lexer.lexify(); 1429 1430 // Need to keep lexer informed. 1431 last = stringType; 1432 } 1433 1434 /** 1435 * Scan over a here string. 1436 * 1437 * @return TRUE if is a here string. 1438 */ 1439 private boolean scanHereString(final LineInfoReceiver lir) { 1440 assert ch0 == '<' && ch1 == '<'; 1441 if (scripting) { 1442 // Record beginning of here string. 1443 final State saved = saveState(); 1444 1445 // << or <<< 1446 final boolean excludeLastEOL = ch2 != '<'; 1447 1448 if (excludeLastEOL) { 1449 skip(2); 1450 } else { 1451 skip(3); 1452 } 1453 1454 // Scan identifier. It might be quoted, indicating that no string editing should take place. 1455 final char quoteChar = ch0; 1456 final boolean noStringEditing = quoteChar == '"' || quoteChar == '\''; 1457 if (noStringEditing) { 1458 skip(1); 1459 } 1460 final int identStart = position; 1461 final int identLength = scanIdentifier(); 1462 if (noStringEditing) { 1463 if (ch0 != quoteChar) { 1464 error(Lexer.message("here.non.matching.delimiter"), last, position, position); 1465 restoreState(saved); 1466 return false; 1467 } 1468 skip(1); 1469 } 1470 1471 // Check for identifier. 1472 if (identLength == 0) { 1473 // Treat as shift. 1474 restoreState(saved); 1475 1476 return false; 1477 } 1478 1479 // Record rest of line. 1480 final State restState = saveState(); 1481 // keep line number updated 1482 int lastLine = line; 1483 1484 skipLine(false); 1485 lastLine++; 1486 int lastLinePosition = position; 1487 restState.setLimit(position); 1488 1489 // Record beginning of string. 1490 final State stringState = saveState(); 1491 int stringEnd = position; 1492 1493 // Hunt down marker. 1494 while (!atEOF()) { 1495 // Skip any whitespace. 1496 skipWhitespace(false); 1497 1498 if (hasHereMarker(identStart, identLength)) { 1499 break; 1500 } 1501 1502 skipLine(false); 1503 lastLine++; 1504 lastLinePosition = position; 1505 stringEnd = position; 1506 } 1507 1508 // notify last line information 1509 lir.lineInfo(lastLine, lastLinePosition); 1510 1511 // Record end of string. 1512 stringState.setLimit(stringEnd); 1513 1514 // If marker is missing. 1515 if (stringState.isEmpty() || atEOF()) { 1516 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position); 1517 restoreState(saved); 1518 1519 return false; 1520 } 1521 1522 // Remove last end of line if specified. 1523 if (excludeLastEOL) { 1524 // Handles \n. 1525 if (content[stringEnd - 1] == '\n') { 1526 stringEnd--; 1527 } 1528 1529 // Handles \r and \r\n. 1530 if (content[stringEnd - 1] == '\r') { 1531 stringEnd--; 1532 } 1533 1534 // Update end of string. 1535 stringState.setLimit(stringEnd); 1536 } 1537 1538 // Edit string if appropriate. 1539 if (!noStringEditing && !stringState.isEmpty()) { 1540 editString(STRING, stringState); 1541 } else { 1542 // Add here string. 1543 add(STRING, stringState.position, stringState.limit); 1544 } 1545 1546 // Scan rest of original line. 1547 final Lexer restLexer = new Lexer(this, restState); 1548 1549 restLexer.lexify(); 1550 1551 return true; 1552 } 1553 1554 return false; 1555 } 1556 1557 /** 1558 * Breaks source content down into lex units, adding tokens to the token 1559 * stream. The routine scans until the stream buffer is full. Can be called 1560 * repeatedly until EOF is detected. 1561 */ 1562 public void lexify() { 1563 while (!stream.isFull() || nested) { 1564 // Skip over whitespace. 1565 skipWhitespace(true); 1566 1567 // Detect end of file. 1568 if (atEOF()) { 1569 if (!nested) { 1570 // Add an EOF token at the end. 1571 add(EOF, position); 1572 } 1573 1574 break; 1575 } 1576 1577 // Check for comments. Note that we don't scan for regexp and other literals here as 1578 // we may not have enough context to distinguish them from similar looking operators. 1579 // Instead we break on ambiguous operators below and let the parser decide. 1580 if (ch0 == '/' && skipComments()) { 1581 continue; 1582 } 1583 1584 if (scripting && ch0 == '#' && skipComments()) { 1585 continue; 1586 } 1587 1588 // TokenType for lookup of delimiter or operator. 1589 TokenType type; 1590 1591 if (ch0 == '.' && convertDigit(ch1, 10) != -1) { 1592 // '.' followed by digit. 1593 // Scan and add a number. 1594 scanNumber(); 1595 } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) { 1596 // Get the number of characters in the token. 1597 final int typeLength = type.getLength(); 1598 // Skip that many characters. 1599 skip(typeLength); 1600 // Add operator token. 1601 add(type, position - typeLength); 1602 // Some operator tokens also mark the beginning of regexp, XML, or here string literals. 1603 // We break to let the parser decide what it is. 1604 if (canStartLiteral(type)) { 1605 break; 1606 } else if (type == LBRACE && pauseOnNextLeftBrace) { 1607 pauseOnNextLeftBrace = false; 1608 break; 1609 } 1610 } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') { 1611 // Scan and add identifier or keyword. 1612 scanIdentifierOrKeyword(); 1613 } else if (isStringDelimiter(ch0)) { 1614 // Scan and add a string. 1615 scanString(true); 1616 } else if (Character.isDigit(ch0)) { 1617 // Scan and add a number. 1618 scanNumber(); 1619 } else { 1620 // Don't recognize this character. 1621 skip(1); 1622 add(ERROR, position - 1); 1623 } 1624 } 1625 } 1626 1627 /** 1628 * Return value of token given its token descriptor. 1629 * 1630 * @param token Token descriptor. 1631 * @return JavaScript value. 1632 */ 1633 Object getValueOf(final long token, final boolean strict) { 1634 final int start = Token.descPosition(token); 1635 final int len = Token.descLength(token); 1636 1637 switch (Token.descType(token)) { 1638 case DECIMAL: 1639 return Lexer.valueOf(source.getString(start, len), 10); // number 1640 case OCTAL: 1641 return Lexer.valueOf(source.getString(start, len), 8); // number 1642 case HEXADECIMAL: 1643 return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number 1644 case FLOATING: 1645 final String str = source.getString(start, len); 1646 final double value = Double.valueOf(str); 1647 if (str.indexOf('.') != -1) { 1648 return value; //number 1649 } 1650 //anything without an explicit decimal point is still subject to a 1651 //"representable as int or long" check. Then the programmer does not 1652 //explicitly code something as a double. For example new Color(int, int, int) 1653 //and new Color(float, float, float) will get ambiguous for cases like 1654 //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point. 1655 //yet we don't want e.g. 1e6 to be a double unnecessarily 1656 if (JSType.isStrictlyRepresentableAsInt(value)) { 1657 return (int)value; 1658 } else if (JSType.isStrictlyRepresentableAsLong(value)) { 1659 return (long)value; 1660 } 1661 return value; 1662 case STRING: 1663 return source.getString(start, len); // String 1664 case ESCSTRING: 1665 return valueOfString(start, len, strict); // String 1666 case IDENT: 1667 return valueOfIdent(start, len); // String 1668 case REGEX: 1669 return valueOfPattern(start, len); // RegexToken::LexerToken 1670 case XML: 1671 return valueOfXML(start, len); // XMLToken::LexerToken 1672 case DIRECTIVE_COMMENT: 1673 return source.getString(start, len); 1674 default: 1675 break; 1676 } 1677 1678 return null; 1679 } 1680 1681 /** 1682 * Get the correctly localized error message for a given message id format arguments 1683 * @param msgId message id 1684 * @param args format arguments 1685 * @return message 1686 */ 1687 protected static String message(final String msgId, final String... args) { 1688 return ECMAErrors.getMessage("lexer.error." + msgId, args); 1689 } 1690 1691 /** 1692 * Generate a runtime exception 1693 * 1694 * @param message error message 1695 * @param type token type 1696 * @param start start position of lexed error 1697 * @param length length of lexed error 1698 * @throws ParserException unconditionally 1699 */ 1700 protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException { 1701 final long token = Token.toDesc(type, start, length); 1702 final int pos = Token.descPosition(token); 1703 final int lineNum = source.getLine(pos); 1704 final int columnNum = source.getColumn(pos); 1705 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token); 1706 throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token); 1707 } 1708 1709 /** 1710 * Helper class for Lexer tokens, e.g XML or RegExp tokens. 1711 * This is the abstract superclass 1712 */ 1713 public static abstract class LexerToken implements Serializable { 1714 private static final long serialVersionUID = 1L; 1715 1716 private final String expression; 1717 1718 /** 1719 * Constructor 1720 * @param expression token expression 1721 */ 1722 protected LexerToken(final String expression) { 1723 this.expression = expression; 1724 } 1725 1726 /** 1727 * Get the expression 1728 * @return expression 1729 */ 1730 public String getExpression() { 1731 return expression; 1732 } 1733 } 1734 1735 /** 1736 * Temporary container for regular expressions. 1737 */ 1738 public static class RegexToken extends LexerToken { 1739 private static final long serialVersionUID = 1L; 1740 1741 /** Options. */ 1742 private final String options; 1743 1744 /** 1745 * Constructor. 1746 * 1747 * @param expression regexp expression 1748 * @param options regexp options 1749 */ 1750 public RegexToken(final String expression, final String options) { 1751 super(expression); 1752 this.options = options; 1753 } 1754 1755 /** 1756 * Get regexp options 1757 * @return options 1758 */ 1759 public String getOptions() { 1760 return options; 1761 } 1762 1763 @Override 1764 public String toString() { 1765 return '/' + getExpression() + '/' + options; 1766 } 1767 } 1768 1769 /** 1770 * Temporary container for XML expression. 1771 */ 1772 public static class XMLToken extends LexerToken { 1773 private static final long serialVersionUID = 1L; 1774 1775 /** 1776 * Constructor. 1777 * 1778 * @param expression XML expression 1779 */ 1780 public XMLToken(final String expression) { 1781 super(expression); 1782 } 1783 } 1784} 1785