Lexer.java revision 1392:5efd65e18b71
1/*
2 * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.nashorn.internal.parser;
27
28import static jdk.nashorn.internal.parser.TokenType.ADD;
29import static jdk.nashorn.internal.parser.TokenType.COMMENT;
30import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
31import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
32import static jdk.nashorn.internal.parser.TokenType.EOF;
33import static jdk.nashorn.internal.parser.TokenType.EOL;
34import static jdk.nashorn.internal.parser.TokenType.ERROR;
35import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
36import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
37import static jdk.nashorn.internal.parser.TokenType.FLOATING;
38import static jdk.nashorn.internal.parser.TokenType.FUNCTION;
39import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
40import static jdk.nashorn.internal.parser.TokenType.LBRACE;
41import static jdk.nashorn.internal.parser.TokenType.LPAREN;
42import static jdk.nashorn.internal.parser.TokenType.OCTAL;
43import static jdk.nashorn.internal.parser.TokenType.RBRACE;
44import static jdk.nashorn.internal.parser.TokenType.REGEX;
45import static jdk.nashorn.internal.parser.TokenType.RPAREN;
46import static jdk.nashorn.internal.parser.TokenType.STRING;
47import static jdk.nashorn.internal.parser.TokenType.XML;
48
49import java.io.Serializable;
50import jdk.nashorn.internal.runtime.ECMAErrors;
51import jdk.nashorn.internal.runtime.ErrorManager;
52import jdk.nashorn.internal.runtime.JSErrorType;
53import jdk.nashorn.internal.runtime.JSType;
54import jdk.nashorn.internal.runtime.ParserException;
55import jdk.nashorn.internal.runtime.Source;
56import jdk.nashorn.internal.runtime.options.Options;
57
58/**
59 * Responsible for converting source content into a stream of tokens.
60 *
61 */
62@SuppressWarnings("fallthrough")
63public class Lexer extends Scanner {
64    private static final long MIN_INT_L = Integer.MIN_VALUE;
65    private static final long MAX_INT_L = Integer.MAX_VALUE;
66
67    private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
68
69    /** Content source. */
70    private final Source source;
71
72    /** Buffered stream for tokens. */
73    private final TokenStream stream;
74
75    /** True if here and edit strings are supported. */
76    private final boolean scripting;
77
78    /** True if a nested scan. (scan to completion, no EOF.) */
79    private final boolean nested;
80
81    /** Pending new line number and position. */
82    int pendingLine;
83
84    /** Position of last EOL + 1. */
85    private int linePosition;
86
87    /** Type of last token added. */
88    private TokenType last;
89
90    private final boolean pauseOnFunctionBody;
91    private boolean pauseOnNextLeftBrace;
92
93    private static final String SPACETAB = " \t";  // ASCII space and tab
94    private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
95
96    private static final String JAVASCRIPT_WHITESPACE_EOL =
97        LFCR +
98        "\u2028" + // line separator
99        "\u2029"   // paragraph separator
100        ;
101    private static final String JAVASCRIPT_WHITESPACE =
102        SPACETAB +
103        JAVASCRIPT_WHITESPACE_EOL +
104        "\u000b" + // tabulation line
105        "\u000c" + // ff (ctrl-l)
106        "\u00a0" + // Latin-1 space
107        "\u1680" + // Ogham space mark
108        "\u180e" + // separator, Mongolian vowel
109        "\u2000" + // en quad
110        "\u2001" + // em quad
111        "\u2002" + // en space
112        "\u2003" + // em space
113        "\u2004" + // three-per-em space
114        "\u2005" + // four-per-em space
115        "\u2006" + // six-per-em space
116        "\u2007" + // figure space
117        "\u2008" + // punctuation space
118        "\u2009" + // thin space
119        "\u200a" + // hair space
120        "\u202f" + // narrow no-break space
121        "\u205f" + // medium mathematical space
122        "\u3000" + // ideographic space
123        "\ufeff"   // byte order mark
124        ;
125
126    private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
127        "\\u000a" + // line feed
128        "\\u000d" + // carriage return (ctrl-m)
129        "\\u2028" + // line separator
130        "\\u2029" + // paragraph separator
131        "\\u0009" + // tab
132        "\\u0020" + // ASCII space
133        "\\u000b" + // tabulation line
134        "\\u000c" + // ff (ctrl-l)
135        "\\u00a0" + // Latin-1 space
136        "\\u1680" + // Ogham space mark
137        "\\u180e" + // separator, Mongolian vowel
138        "\\u2000" + // en quad
139        "\\u2001" + // em quad
140        "\\u2002" + // en space
141        "\\u2003" + // em space
142        "\\u2004" + // three-per-em space
143        "\\u2005" + // four-per-em space
144        "\\u2006" + // six-per-em space
145        "\\u2007" + // figure space
146        "\\u2008" + // punctuation space
147        "\\u2009" + // thin space
148        "\\u200a" + // hair space
149        "\\u202f" + // narrow no-break space
150        "\\u205f" + // medium mathematical space
151        "\\u3000" + // ideographic space
152        "\\ufeff"   // byte order mark
153        ;
154
155    static String unicodeEscape(final char ch) {
156        final StringBuilder sb = new StringBuilder();
157
158        sb.append("\\u");
159
160        final String hex = Integer.toHexString(ch);
161        for (int i = hex.length(); i < 4; i++) {
162            sb.append('0');
163        }
164        sb.append(hex);
165
166        return sb.toString();
167    }
168
169    /**
170     * Constructor
171     *
172     * @param source    the source
173     * @param stream    the token stream to lex
174     */
175    public Lexer(final Source source, final TokenStream stream) {
176        this(source, stream, false);
177    }
178
179    /**
180     * Constructor
181     *
182     * @param source    the source
183     * @param stream    the token stream to lex
184     * @param scripting are we in scripting mode
185     */
186    public Lexer(final Source source, final TokenStream stream, final boolean scripting) {
187        this(source, 0, source.getLength(), stream, scripting, false);
188    }
189
190    /**
191     * Constructor
192     *
193     * @param source    the source
194     * @param start     start position in source from which to start lexing
195     * @param len       length of source segment to lex
196     * @param stream    token stream to lex
197     * @param scripting are we in scripting mode
198     * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
199     * function body. This is used with the feature where the parser is skipping nested function bodies to
200     * avoid reading ahead unnecessarily when we skip the function bodies.
201     */
202
203    public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean pauseOnFunctionBody) {
204        super(source.getContent(), 1, start, len);
205        this.source      = source;
206        this.stream      = stream;
207        this.scripting   = scripting;
208        this.nested      = false;
209        this.pendingLine = 1;
210        this.last        = EOL;
211
212        this.pauseOnFunctionBody = pauseOnFunctionBody;
213    }
214
215    private Lexer(final Lexer lexer, final State state) {
216        super(lexer, state);
217
218        source = lexer.source;
219        stream = lexer.stream;
220        scripting = lexer.scripting;
221        nested = true;
222
223        pendingLine = state.pendingLine;
224        linePosition = state.linePosition;
225        last = EOL;
226        pauseOnFunctionBody = false;
227    }
228
229    static class State extends Scanner.State {
230        /** Pending new line number and position. */
231        public final int pendingLine;
232
233        /** Position of last EOL + 1. */
234        public final int linePosition;
235
236        /** Type of last token added. */
237        public final TokenType last;
238
239        /*
240         * Constructor.
241         */
242
243        State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
244            super(position, limit, line);
245
246            this.pendingLine = pendingLine;
247            this.linePosition = linePosition;
248            this.last = last;
249        }
250    }
251
252    /**
253     * Save the state of the scan.
254     *
255     * @return Captured state.
256     */
257    @Override
258    State saveState() {
259        return new State(position, limit, line, pendingLine, linePosition, last);
260    }
261
262    /**
263     * Restore the state of the scan.
264     *
265     * @param state
266     *            Captured state.
267     */
268    void restoreState(final State state) {
269        super.restoreState(state);
270
271        pendingLine = state.pendingLine;
272        linePosition = state.linePosition;
273        last = state.last;
274    }
275
276    /**
277     * Add a new token to the stream.
278     *
279     * @param type
280     *            Token type.
281     * @param start
282     *            Start position.
283     * @param end
284     *            End position.
285     */
286    protected void add(final TokenType type, final int start, final int end) {
287        // Record last token.
288        last = type;
289
290        // Only emit the last EOL in a cluster.
291        if (type == EOL) {
292            pendingLine = end;
293            linePosition = start;
294        } else {
295            // Write any pending EOL to stream.
296            if (pendingLine != -1) {
297                stream.put(Token.toDesc(EOL, linePosition, pendingLine));
298                pendingLine = -1;
299            }
300
301            // Write token to stream.
302            stream.put(Token.toDesc(type, start, end - start));
303        }
304    }
305
306    /**
307     * Add a new token to the stream.
308     *
309     * @param type
310     *            Token type.
311     * @param start
312     *            Start position.
313     */
314    protected void add(final TokenType type, final int start) {
315        add(type, start, position);
316    }
317
318    /**
319     * Return the String of valid whitespace characters for regular
320     * expressions in JavaScript
321     * @return regexp whitespace string
322     */
323    public static String getWhitespaceRegExp() {
324        return JAVASCRIPT_WHITESPACE_IN_REGEXP;
325    }
326
327    /**
328     * Skip end of line.
329     *
330     * @param addEOL true if EOL token should be recorded.
331     */
332    private void skipEOL(final boolean addEOL) {
333
334        if (ch0 == '\r') { // detect \r\n pattern
335            skip(1);
336            if (ch0 == '\n') {
337                skip(1);
338            }
339        } else { // all other space, ch0 is guaranteed to be EOL or \0
340            skip(1);
341        }
342
343        // bump up line count
344        line++;
345
346        if (addEOL) {
347            // Add an EOL token.
348            add(EOL, position, line);
349        }
350    }
351
352    /**
353     * Skip over rest of line including end of line.
354     *
355     * @param addEOL true if EOL token should be recorded.
356     */
357    private void skipLine(final boolean addEOL) {
358        // Ignore characters.
359        while (!isEOL(ch0) && !atEOF()) {
360            skip(1);
361        }
362        // Skip over end of line.
363        skipEOL(addEOL);
364    }
365
366    /**
367     * Test whether a char is valid JavaScript whitespace
368     * @param ch a char
369     * @return true if valid JavaScript whitespace
370     */
371    public static boolean isJSWhitespace(final char ch) {
372        return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
373    }
374
375    /**
376     * Test whether a char is valid JavaScript end of line
377     * @param ch a char
378     * @return true if valid JavaScript end of line
379     */
380    public static boolean isJSEOL(final char ch) {
381        return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
382    }
383
384    /**
385     * Test if char is a string delimiter, e.g. '\' or '"'.  Also scans exec
386     * strings ('`') in scripting mode.
387     * @param ch a char
388     * @return true if string delimiter
389     */
390    protected boolean isStringDelimiter(final char ch) {
391        return ch == '\'' || ch == '"' || (scripting && ch == '`');
392    }
393
394    /**
395     * Test whether a char is valid JavaScript whitespace
396     * @param ch a char
397     * @return true if valid JavaScript whitespace
398     */
399    protected boolean isWhitespace(final char ch) {
400        return Lexer.isJSWhitespace(ch);
401    }
402
403    /**
404     * Test whether a char is valid JavaScript end of line
405     * @param ch a char
406     * @return true if valid JavaScript end of line
407     */
408    protected boolean isEOL(final char ch) {
409        return Lexer.isJSEOL(ch);
410    }
411
412    /**
413     * Skip over whitespace and detect end of line, adding EOL tokens if
414     * encountered.
415     *
416     * @param addEOL true if EOL tokens should be recorded.
417     */
418    private void skipWhitespace(final boolean addEOL) {
419        while (isWhitespace(ch0)) {
420            if (isEOL(ch0)) {
421                skipEOL(addEOL);
422            } else {
423                skip(1);
424            }
425        }
426    }
427
428    /**
429     * Skip over comments.
430     *
431     * @return True if a comment.
432     */
433    protected boolean skipComments() {
434        // Save the current position.
435        final int start = position;
436
437        if (ch0 == '/') {
438            // Is it a // comment.
439            if (ch1 == '/') {
440                // Skip over //.
441                skip(2);
442
443                boolean directiveComment = false;
444                if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
445                    directiveComment = true;
446                }
447
448                // Scan for EOL.
449                while (!atEOF() && !isEOL(ch0)) {
450                    skip(1);
451                }
452                // Did detect a comment.
453                add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
454                return true;
455            } else if (ch1 == '*') {
456                // Skip over /*.
457                skip(2);
458                // Scan for */.
459                while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
460                    // If end of line handle else skip character.
461                    if (isEOL(ch0)) {
462                        skipEOL(true);
463                    } else {
464                        skip(1);
465                    }
466                }
467
468                if (atEOF()) {
469                    // TODO - Report closing */ missing in parser.
470                    add(ERROR, start);
471                } else {
472                    // Skip */.
473                    skip(2);
474                }
475
476                // Did detect a comment.
477                add(COMMENT, start);
478                return true;
479            }
480        } else if (ch0 == '#') {
481            assert scripting;
482            // shell style comment
483            // Skip over #.
484            skip(1);
485            // Scan for EOL.
486            while (!atEOF() && !isEOL(ch0)) {
487                skip(1);
488            }
489            // Did detect a comment.
490            add(COMMENT, start);
491            return true;
492        }
493
494        // Not a comment.
495        return false;
496    }
497
498    /**
499     * Convert a regex token to a token object.
500     *
501     * @param start  Position in source content.
502     * @param length Length of regex token.
503     * @return Regex token object.
504     */
505    public RegexToken valueOfPattern(final int start, final int length) {
506        // Save the current position.
507        final int savePosition = position;
508        // Reset to beginning of content.
509        reset(start);
510        // Buffer for recording characters.
511        final StringBuilder sb = new StringBuilder(length);
512
513        // Skip /.
514        skip(1);
515        boolean inBrackets = false;
516        // Scan for closing /, stopping at end of line.
517        while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
518            // Skip over escaped character.
519            if (ch0 == '\\') {
520                sb.append(ch0);
521                sb.append(ch1);
522                skip(2);
523            } else {
524                if (ch0 == '[') {
525                    inBrackets = true;
526                } else if (ch0 == ']') {
527                    inBrackets = false;
528                }
529
530                // Skip literal character.
531                sb.append(ch0);
532                skip(1);
533            }
534        }
535
536        // Get pattern as string.
537        final String regex = sb.toString();
538
539        // Skip /.
540        skip(1);
541
542        // Options as string.
543        final String options = source.getString(position, scanIdentifier());
544
545        reset(savePosition);
546
547        // Compile the pattern.
548        return new RegexToken(regex, options);
549    }
550
551    /**
552     * Return true if the given token can be the beginning of a literal.
553     *
554     * @param token a token
555     * @return true if token can start a literal.
556     */
557    public boolean canStartLiteral(final TokenType token) {
558        return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
559    }
560
561    /**
562     * interface to receive line information for multi-line literals.
563     */
564    protected interface LineInfoReceiver {
565        /**
566         * Receives line information
567         * @param line last line number
568         * @param linePosition position of last line
569         */
570        public void lineInfo(int line, int linePosition);
571    }
572
573    /**
574     * Check whether the given token represents the beginning of a literal. If so scan
575     * the literal and return <tt>true</tt>, otherwise return false.
576     *
577     * @param token the token.
578     * @param startTokenType the token type.
579     * @param lir LineInfoReceiver that receives line info for multi-line string literals.
580     * @return True if a literal beginning with startToken was found and scanned.
581     */
582    protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
583        // Check if it can be a literal.
584        if (!canStartLiteral(startTokenType)) {
585            return false;
586        }
587        // We break on ambiguous tokens so if we already moved on it can't be a literal.
588        if (stream.get(stream.last()) != token) {
589            return false;
590        }
591        // Rewind to token start position
592        reset(Token.descPosition(token));
593
594        if (ch0 == '/') {
595            return scanRegEx();
596        } else if (ch0 == '<') {
597            if (ch1 == '<') {
598                return scanHereString(lir);
599            } else if (Character.isJavaIdentifierStart(ch1)) {
600                return scanXMLLiteral();
601            }
602        }
603
604        return false;
605    }
606
607    /**
608     * Scan over regex literal.
609     *
610     * @return True if a regex literal.
611     */
612    private boolean scanRegEx() {
613        assert ch0 == '/';
614        // Make sure it's not a comment.
615        if (ch1 != '/' && ch1 != '*') {
616            // Record beginning of literal.
617            final int start = position;
618            // Skip /.
619            skip(1);
620            boolean inBrackets = false;
621
622            // Scan for closing /, stopping at end of line.
623            while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
624                // Skip over escaped character.
625                if (ch0 == '\\') {
626                    skip(1);
627                    if (isEOL(ch0)) {
628                        reset(start);
629                        return false;
630                    }
631                    skip(1);
632                } else {
633                    if (ch0 == '[') {
634                        inBrackets = true;
635                    } else if (ch0 == ']') {
636                        inBrackets = false;
637                    }
638
639                    // Skip literal character.
640                    skip(1);
641                }
642            }
643
644            // If regex literal.
645            if (ch0 == '/') {
646                // Skip /.
647                skip(1);
648
649                // Skip over options.
650                while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
651                    skip(1);
652                }
653
654                // Add regex token.
655                add(REGEX, start);
656                // Regex literal detected.
657                return true;
658            }
659
660            // False start try again.
661            reset(start);
662        }
663
664        // Regex literal not detected.
665        return false;
666    }
667
668    /**
669     * Convert a digit to a integer.  Can't use Character.digit since we are
670     * restricted to ASCII by the spec.
671     *
672     * @param ch   Character to convert.
673     * @param base Numeric base.
674     *
675     * @return The converted digit or -1 if invalid.
676     */
677    protected static int convertDigit(final char ch, final int base) {
678        int digit;
679
680        if ('0' <= ch && ch <= '9') {
681            digit = ch - '0';
682        } else if ('A' <= ch && ch <= 'Z') {
683            digit = ch - 'A' + 10;
684        } else if ('a' <= ch && ch <= 'z') {
685            digit = ch - 'a' + 10;
686        } else {
687            return -1;
688        }
689
690        return digit < base ? digit : -1;
691    }
692
693
694    /**
695     * Get the value of a hexadecimal numeric sequence.
696     *
697     * @param length Number of digits.
698     * @param type   Type of token to report against.
699     * @return Value of sequence or < 0 if no digits.
700     */
701    private int hexSequence(final int length, final TokenType type) {
702        int value = 0;
703
704        for (int i = 0; i < length; i++) {
705            final int digit = convertDigit(ch0, 16);
706
707            if (digit == -1) {
708                error(Lexer.message("invalid.hex"), type, position, limit);
709                return i == 0 ? -1 : value;
710            }
711
712            value = digit | value << 4;
713            skip(1);
714        }
715
716        return value;
717    }
718
719    /**
720     * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
721     *
722     * @return Value of sequence.
723     */
724    private int octalSequence() {
725        int value = 0;
726
727        for (int i = 0; i < 3; i++) {
728            final int digit = convertDigit(ch0, 8);
729
730            if (digit == -1) {
731                break;
732            }
733            value = digit | value << 3;
734            skip(1);
735
736            if (i == 1 && value >= 32) {
737                break;
738            }
739        }
740        return value;
741    }
742
743    /**
744     * Convert a string to a JavaScript identifier.
745     *
746     * @param start  Position in source content.
747     * @param length Length of token.
748     * @return Ident string or null if an error.
749     */
750    private String valueOfIdent(final int start, final int length) throws RuntimeException {
751        // Save the current position.
752        final int savePosition = position;
753        // End of scan.
754        final int end = start + length;
755        // Reset to beginning of content.
756        reset(start);
757        // Buffer for recording characters.
758        final StringBuilder sb = new StringBuilder(length);
759
760        // Scan until end of line or end of file.
761        while (!atEOF() && position < end && !isEOL(ch0)) {
762            // If escape character.
763            if (ch0 == '\\' && ch1 == 'u') {
764                skip(2);
765                final int ch = hexSequence(4, TokenType.IDENT);
766                if (isWhitespace((char)ch)) {
767                    return null;
768                }
769                if (ch < 0) {
770                    sb.append('\\');
771                    sb.append('u');
772                } else {
773                    sb.append((char)ch);
774                }
775            } else {
776                // Add regular character.
777                sb.append(ch0);
778                skip(1);
779            }
780        }
781
782        // Restore position.
783        reset(savePosition);
784
785        return sb.toString();
786    }
787
788    /**
789     * Scan over and identifier or keyword. Handles identifiers containing
790     * encoded Unicode chars.
791     *
792     * Example:
793     *
794     * var \u0042 = 44;
795     */
796    private void scanIdentifierOrKeyword() {
797        // Record beginning of identifier.
798        final int start = position;
799        // Scan identifier.
800        final int length = scanIdentifier();
801        // Check to see if it is a keyword.
802        final TokenType type = TokenLookup.lookupKeyword(content, start, length);
803        if (type == FUNCTION && pauseOnFunctionBody) {
804            pauseOnNextLeftBrace = true;
805        }
806        // Add keyword or identifier token.
807        add(type, start);
808    }
809
810    /**
811     * Convert a string to a JavaScript string object.
812     *
813     * @param start  Position in source content.
814     * @param length Length of token.
815     * @return JavaScript string object.
816     */
817    private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
818        // Save the current position.
819        final int savePosition = position;
820        // Calculate the end position.
821        final int end = start + length;
822        // Reset to beginning of string.
823        reset(start);
824
825        // Buffer for recording characters.
826        final StringBuilder sb = new StringBuilder(length);
827
828        // Scan until end of string.
829        while (position < end) {
830            // If escape character.
831            if (ch0 == '\\') {
832                skip(1);
833
834                final char next = ch0;
835                final int afterSlash = position;
836
837                skip(1);
838
839                // Special characters.
840                switch (next) {
841                case '0':
842                case '1':
843                case '2':
844                case '3':
845                case '4':
846                case '5':
847                case '6':
848                case '7': {
849                    if (strict) {
850                        // "\0" itself is allowed in strict mode. Only other 'real'
851                        // octal escape sequences are not allowed (eg. "\02", "\31").
852                        // See section 7.8.4 String literals production EscapeSequence
853                        if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
854                            error(Lexer.message("strict.no.octal"), STRING, position, limit);
855                        }
856                    }
857                    reset(afterSlash);
858                    // Octal sequence.
859                    final int ch = octalSequence();
860
861                    if (ch < 0) {
862                        sb.append('\\');
863                        sb.append('x');
864                    } else {
865                        sb.append((char)ch);
866                    }
867                    break;
868                }
869                case 'n':
870                    sb.append('\n');
871                    break;
872                case 't':
873                    sb.append('\t');
874                    break;
875                case 'b':
876                    sb.append('\b');
877                    break;
878                case 'f':
879                    sb.append('\f');
880                    break;
881                case 'r':
882                    sb.append('\r');
883                    break;
884                case '\'':
885                    sb.append('\'');
886                    break;
887                case '\"':
888                    sb.append('\"');
889                    break;
890                case '\\':
891                    sb.append('\\');
892                    break;
893                case '\r': // CR | CRLF
894                    if (ch0 == '\n') {
895                        skip(1);
896                    }
897                    // fall through
898                case '\n': // LF
899                case '\u2028': // LS
900                case '\u2029': // PS
901                    // continue on the next line, slash-return continues string
902                    // literal
903                    break;
904                case 'x': {
905                    // Hex sequence.
906                    final int ch = hexSequence(2, STRING);
907
908                    if (ch < 0) {
909                        sb.append('\\');
910                        sb.append('x');
911                    } else {
912                        sb.append((char)ch);
913                    }
914                }
915                    break;
916                case 'u': {
917                    // Unicode sequence.
918                    final int ch = hexSequence(4, STRING);
919
920                    if (ch < 0) {
921                        sb.append('\\');
922                        sb.append('u');
923                    } else {
924                        sb.append((char)ch);
925                    }
926                }
927                    break;
928                case 'v':
929                    sb.append('\u000B');
930                    break;
931                // All other characters.
932                default:
933                    sb.append(next);
934                    break;
935                }
936            } else {
937                // Add regular character.
938                sb.append(ch0);
939                skip(1);
940            }
941        }
942
943        // Restore position.
944        reset(savePosition);
945
946        return sb.toString();
947    }
948
949    /**
950     * Scan over a string literal.
951     * @param add true if we nare not just scanning but should actually modify the token stream
952     */
953    protected void scanString(final boolean add) {
954        // Type of string.
955        TokenType type = STRING;
956        // Record starting quote.
957        final char quote = ch0;
958        // Skip over quote.
959        skip(1);
960
961        // Record beginning of string content.
962        final State stringState = saveState();
963
964        // Scan until close quote or end of line.
965        while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
966            // Skip over escaped character.
967            if (ch0 == '\\') {
968                type = ESCSTRING;
969                skip(1);
970                if (! isEscapeCharacter(ch0)) {
971                    error(Lexer.message("invalid.escape.char"), STRING, position, limit);
972                }
973                if (isEOL(ch0)) {
974                    // Multiline string literal
975                    skipEOL(false);
976                    continue;
977                }
978            }
979            // Skip literal character.
980            skip(1);
981        }
982
983        // If close quote.
984        if (ch0 == quote) {
985            // Skip close quote.
986            skip(1);
987        } else {
988            error(Lexer.message("missing.close.quote"), STRING, position, limit);
989        }
990
991        // If not just scanning.
992        if (add) {
993            // Record end of string.
994            stringState.setLimit(position - 1);
995
996            if (scripting && !stringState.isEmpty()) {
997                switch (quote) {
998                case '`':
999                    // Mark the beginning of an exec string.
1000                    add(EXECSTRING, stringState.position, stringState.limit);
1001                    // Frame edit string with left brace.
1002                    add(LBRACE, stringState.position, stringState.position);
1003                    // Process edit string.
1004                    editString(type, stringState);
1005                    // Frame edit string with right brace.
1006                    add(RBRACE, stringState.limit, stringState.limit);
1007                    break;
1008                case '"':
1009                    // Only edit double quoted strings.
1010                    editString(type, stringState);
1011                    break;
1012                case '\'':
1013                    // Add string token without editing.
1014                    add(type, stringState.position, stringState.limit);
1015                    break;
1016                default:
1017                    break;
1018                }
1019            } else {
1020                /// Add string token without editing.
1021                add(type, stringState.position, stringState.limit);
1022            }
1023        }
1024    }
1025
1026    /**
1027     * Is the given character a valid escape char after "\" ?
1028     *
1029     * @param ch character to be checked
1030     * @return if the given character is valid after "\"
1031     */
1032    protected boolean isEscapeCharacter(final char ch) {
1033        return true;
1034    }
1035
1036    /**
1037     * Convert string to number.
1038     *
1039     * @param valueString  String to convert.
1040     * @param radix        Numeric base.
1041     * @return Converted number.
1042     */
1043    private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1044        try {
1045            final long value = Long.parseLong(valueString, radix);
1046            if(value >= MIN_INT_L && value <= MAX_INT_L) {
1047                return (int)value;
1048            }
1049            return value;
1050        } catch (final NumberFormatException e) {
1051            if (radix == 10) {
1052                return Double.valueOf(valueString);
1053            }
1054
1055            double value = 0.0;
1056
1057            for (int i = 0; i < valueString.length(); i++) {
1058                final char ch = valueString.charAt(i);
1059                // Preverified, should always be a valid digit.
1060                final int digit = convertDigit(ch, radix);
1061                value *= radix;
1062                value += digit;
1063            }
1064
1065            return value;
1066        }
1067    }
1068
1069    /**
1070     * Scan a number.
1071     */
1072    protected void scanNumber() {
1073        // Record beginning of number.
1074        final int start = position;
1075        // Assume value is a decimal.
1076        TokenType type = DECIMAL;
1077
1078        // First digit of number.
1079        int digit = convertDigit(ch0, 10);
1080
1081        // If number begins with 0x.
1082        if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1083            // Skip over 0xN.
1084            skip(3);
1085            // Skip over remaining digits.
1086            while (convertDigit(ch0, 16) != -1) {
1087                skip(1);
1088            }
1089
1090            type = HEXADECIMAL;
1091        } else {
1092            // Check for possible octal constant.
1093            boolean octal = digit == 0;
1094            // Skip first digit if not leading '.'.
1095            if (digit != -1) {
1096                skip(1);
1097            }
1098
1099            // Skip remaining digits.
1100            while ((digit = convertDigit(ch0, 10)) != -1) {
1101                // Check octal only digits.
1102                octal = octal && digit < 8;
1103                // Skip digit.
1104                skip(1);
1105            }
1106
1107            if (octal && position - start > 1) {
1108                type = OCTAL;
1109            } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1110                // Must be a double.
1111                if (ch0 == '.') {
1112                    // Skip period.
1113                    skip(1);
1114                    // Skip mantissa.
1115                    while (convertDigit(ch0, 10) != -1) {
1116                        skip(1);
1117                    }
1118                }
1119
1120                // Detect exponent.
1121                if (ch0 == 'E' || ch0 == 'e') {
1122                    // Skip E.
1123                    skip(1);
1124                    // Detect and skip exponent sign.
1125                    if (ch0 == '+' || ch0 == '-') {
1126                        skip(1);
1127                    }
1128                    // Skip exponent.
1129                    while (convertDigit(ch0, 10) != -1) {
1130                        skip(1);
1131                    }
1132                }
1133
1134                type = FLOATING;
1135            }
1136        }
1137
1138        if (Character.isJavaIdentifierStart(ch0)) {
1139            error(Lexer.message("missing.space.after.number"), type, position, 1);
1140        }
1141
1142        // Add number token.
1143        add(type, start);
1144    }
1145
1146    /**
1147     * Convert a regex token to a token object.
1148     *
1149     * @param start  Position in source content.
1150     * @param length Length of regex token.
1151     * @return Regex token object.
1152     */
1153    XMLToken valueOfXML(final int start, final int length) {
1154        return new XMLToken(source.getString(start, length));
1155    }
1156
1157    /**
1158     * Scan over a XML token.
1159     *
1160     * @return TRUE if is an XML literal.
1161     */
1162    private boolean scanXMLLiteral() {
1163        assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1164        if (XML_LITERALS) {
1165            // Record beginning of xml expression.
1166            final int start = position;
1167
1168            int openCount = 0;
1169
1170            do {
1171                if (ch0 == '<') {
1172                    if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1173                        skip(3);
1174                        openCount--;
1175                    } else if (Character.isJavaIdentifierStart(ch1)) {
1176                        skip(2);
1177                        openCount++;
1178                    } else if (ch1 == '?') {
1179                        skip(2);
1180                    } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1181                        skip(4);
1182                    } else {
1183                        reset(start);
1184                        return false;
1185                    }
1186
1187                    while (!atEOF() && ch0 != '>') {
1188                        if (ch0 == '/' && ch1 == '>') {
1189                            openCount--;
1190                            skip(1);
1191                            break;
1192                        } else if (ch0 == '\"' || ch0 == '\'') {
1193                            scanString(false);
1194                        } else {
1195                            skip(1);
1196                        }
1197                    }
1198
1199                    if (ch0 != '>') {
1200                        reset(start);
1201                        return false;
1202                    }
1203
1204                    skip(1);
1205                } else if (atEOF()) {
1206                    reset(start);
1207                    return false;
1208                } else {
1209                    skip(1);
1210                }
1211            } while (openCount > 0);
1212
1213            add(XML, start);
1214            return true;
1215        }
1216
1217        return false;
1218    }
1219
1220    /**
1221     * Scan over identifier characters.
1222     *
1223     * @return Length of identifier or zero if none found.
1224     */
1225    private int scanIdentifier() {
1226        final int start = position;
1227
1228        // Make sure first character is valid start character.
1229        if (ch0 == '\\' && ch1 == 'u') {
1230            skip(2);
1231            final int ch = hexSequence(4, TokenType.IDENT);
1232
1233            if (!Character.isJavaIdentifierStart(ch)) {
1234                error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1235            }
1236        } else if (!Character.isJavaIdentifierStart(ch0)) {
1237            // Not an identifier.
1238            return 0;
1239        }
1240
1241        // Make sure remaining characters are valid part characters.
1242        while (!atEOF()) {
1243            if (ch0 == '\\' && ch1 == 'u') {
1244                skip(2);
1245                final int ch = hexSequence(4, TokenType.IDENT);
1246
1247                if (!Character.isJavaIdentifierPart(ch)) {
1248                    error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1249                }
1250            } else if (Character.isJavaIdentifierPart(ch0)) {
1251                skip(1);
1252            } else {
1253                break;
1254            }
1255        }
1256
1257        // Length of identifier sequence.
1258        return position - start;
1259    }
1260
1261    /**
1262     * Compare two identifiers (in content) for equality.
1263     *
1264     * @param aStart  Start of first identifier.
1265     * @param aLength Length of first identifier.
1266     * @param bStart  Start of second identifier.
1267     * @param bLength Length of second identifier.
1268     * @return True if equal.
1269     */
1270    private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1271        if (aLength == bLength) {
1272            for (int i = 0; i < aLength; i++) {
1273                if (content[aStart + i] != content[bStart + i]) {
1274                    return false;
1275                }
1276            }
1277
1278            return true;
1279        }
1280
1281        return false;
1282    }
1283
1284    /**
1285     * Detect if a line starts with a marker identifier.
1286     *
1287     * @param identStart  Start of identifier.
1288     * @param identLength Length of identifier.
1289     * @return True if detected.
1290     */
1291    private boolean hasHereMarker(final int identStart, final int identLength) {
1292        // Skip any whitespace.
1293        skipWhitespace(false);
1294
1295        return identifierEqual(identStart, identLength, position, scanIdentifier());
1296    }
1297
1298    /**
1299     * Lexer to service edit strings.
1300     */
1301    private static class EditStringLexer extends Lexer {
1302        /** Type of string literals to emit. */
1303        final TokenType stringType;
1304
1305        /*
1306         * Constructor.
1307         */
1308
1309        EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1310            super(lexer, stringState);
1311
1312            this.stringType = stringType;
1313        }
1314
1315        /**
1316         * Lexify the contents of the string.
1317         */
1318        @Override
1319        public void lexify() {
1320            // Record start of string position.
1321            int stringStart = position;
1322            // Indicate that the priming first string has not been emitted.
1323            boolean primed = false;
1324
1325            while (true) {
1326                // Detect end of content.
1327                if (atEOF()) {
1328                    break;
1329                }
1330
1331                // Honour escapes (should be well formed.)
1332                if (ch0 == '\\' && stringType == ESCSTRING) {
1333                    skip(2);
1334
1335                    continue;
1336                }
1337
1338                // If start of expression.
1339                if (ch0 == '$' && ch1 == '{') {
1340                    if (!primed || stringStart != position) {
1341                        if (primed) {
1342                            add(ADD, stringStart, stringStart + 1);
1343                        }
1344
1345                        add(stringType, stringStart, position);
1346                        primed = true;
1347                    }
1348
1349                    // Skip ${
1350                    skip(2);
1351
1352                    // Save expression state.
1353                    final State expressionState = saveState();
1354
1355                    // Start with one open brace.
1356                    int braceCount = 1;
1357
1358                    // Scan for the rest of the string.
1359                    while (!atEOF()) {
1360                        // If closing brace.
1361                        if (ch0 == '}') {
1362                            // Break only only if matching brace.
1363                            if (--braceCount == 0) {
1364                                break;
1365                            }
1366                        } else if (ch0 == '{') {
1367                            // Bump up the brace count.
1368                            braceCount++;
1369                        }
1370
1371                        // Skip to next character.
1372                        skip(1);
1373                    }
1374
1375                    // If braces don't match then report an error.
1376                    if (braceCount != 0) {
1377                        error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1378                    }
1379
1380                    // Mark end of expression.
1381                    expressionState.setLimit(position);
1382                    // Skip closing brace.
1383                    skip(1);
1384
1385                    // Start next string.
1386                    stringStart = position;
1387
1388                    // Concatenate expression.
1389                    add(ADD, expressionState.position, expressionState.position + 1);
1390                    add(LPAREN, expressionState.position, expressionState.position + 1);
1391
1392                    // Scan expression.
1393                    final Lexer lexer = new Lexer(this, expressionState);
1394                    lexer.lexify();
1395
1396                    // Close out expression parenthesis.
1397                    add(RPAREN, position - 1, position);
1398
1399                    continue;
1400                }
1401
1402                // Next character in string.
1403                skip(1);
1404            }
1405
1406            // If there is any unemitted string portion.
1407            if (stringStart != limit) {
1408                // Concatenate remaining string.
1409                if (primed) {
1410                    add(ADD, stringStart, 1);
1411                }
1412
1413                add(stringType, stringStart, limit);
1414            }
1415        }
1416
1417    }
1418
1419    /**
1420     * Edit string for nested expressions.
1421     *
1422     * @param stringType  Type of string literals to emit.
1423     * @param stringState State of lexer at start of string.
1424     */
1425    private void editString(final TokenType stringType, final State stringState) {
1426        // Use special lexer to scan string.
1427        final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1428        lexer.lexify();
1429
1430        // Need to keep lexer informed.
1431        last = stringType;
1432    }
1433
1434    /**
1435     * Scan over a here string.
1436     *
1437     * @return TRUE if is a here string.
1438     */
1439    private boolean scanHereString(final LineInfoReceiver lir) {
1440        assert ch0 == '<' && ch1 == '<';
1441        if (scripting) {
1442            // Record beginning of here string.
1443            final State saved = saveState();
1444
1445            // << or <<<
1446            final boolean excludeLastEOL = ch2 != '<';
1447
1448            if (excludeLastEOL) {
1449                skip(2);
1450            } else {
1451                skip(3);
1452            }
1453
1454            // Scan identifier. It might be quoted, indicating that no string editing should take place.
1455            final char quoteChar = ch0;
1456            final boolean noStringEditing = isStringDelimiter(quoteChar);
1457            if (noStringEditing) {
1458                skip(1);
1459            }
1460            final int identStart = position;
1461            final int identLength = scanIdentifier();
1462            if (noStringEditing) {
1463                if (ch0 != quoteChar) {
1464                    error(Lexer.message("here.non.matching.delimiter"), last, position, position);
1465                    restoreState(saved);
1466                    return false;
1467                }
1468                skip(1);
1469            }
1470
1471            // Check for identifier.
1472            if (identLength == 0) {
1473                // Treat as shift.
1474                restoreState(saved);
1475
1476                return false;
1477            }
1478
1479            // Record rest of line.
1480            final State restState = saveState();
1481            // keep line number updated
1482            int lastLine = line;
1483
1484            skipLine(false);
1485            lastLine++;
1486            int lastLinePosition = position;
1487            restState.setLimit(position);
1488
1489            // Record beginning of string.
1490            final State stringState = saveState();
1491            int stringEnd = position;
1492
1493            // Hunt down marker.
1494            while (!atEOF()) {
1495                // Skip any whitespace.
1496                skipWhitespace(false);
1497
1498                if (hasHereMarker(identStart, identLength)) {
1499                    break;
1500                }
1501
1502                skipLine(false);
1503                lastLine++;
1504                lastLinePosition = position;
1505                stringEnd = position;
1506            }
1507
1508            // notify last line information
1509            lir.lineInfo(lastLine, lastLinePosition);
1510
1511            // Record end of string.
1512            stringState.setLimit(stringEnd);
1513
1514            // If marker is missing.
1515            if (stringState.isEmpty() || atEOF()) {
1516                error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1517                restoreState(saved);
1518
1519                return false;
1520            }
1521
1522            // Remove last end of line if specified.
1523            if (excludeLastEOL) {
1524                // Handles \n.
1525                if (content[stringEnd - 1] == '\n') {
1526                    stringEnd--;
1527                }
1528
1529                // Handles \r and \r\n.
1530                if (content[stringEnd - 1] == '\r') {
1531                    stringEnd--;
1532                }
1533
1534                // Update end of string.
1535                stringState.setLimit(stringEnd);
1536            }
1537
1538            // Edit string if appropriate.
1539            if (!noStringEditing && !stringState.isEmpty()) {
1540                editString(STRING, stringState);
1541            } else {
1542                // Add here string.
1543                add(STRING, stringState.position, stringState.limit);
1544            }
1545
1546            // Scan rest of original line.
1547            final Lexer restLexer = new Lexer(this, restState);
1548
1549            restLexer.lexify();
1550
1551            return true;
1552        }
1553
1554        return false;
1555    }
1556
1557    /**
1558     * Breaks source content down into lex units, adding tokens to the token
1559     * stream. The routine scans until the stream buffer is full. Can be called
1560     * repeatedly until EOF is detected.
1561     */
1562    public void lexify() {
1563        while (!stream.isFull() || nested) {
1564            // Skip over whitespace.
1565            skipWhitespace(true);
1566
1567            // Detect end of file.
1568            if (atEOF()) {
1569                if (!nested) {
1570                    // Add an EOF token at the end.
1571                    add(EOF, position);
1572                }
1573
1574                break;
1575            }
1576
1577            // Check for comments. Note that we don't scan for regexp and other literals here as
1578            // we may not have enough context to distinguish them from similar looking operators.
1579            // Instead we break on ambiguous operators below and let the parser decide.
1580            if (ch0 == '/' && skipComments()) {
1581                continue;
1582            }
1583
1584            if (scripting && ch0 == '#' && skipComments()) {
1585                continue;
1586            }
1587
1588            // TokenType for lookup of delimiter or operator.
1589            TokenType type;
1590
1591            if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1592                // '.' followed by digit.
1593                // Scan and add a number.
1594                scanNumber();
1595            } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1596                // Get the number of characters in the token.
1597                final int typeLength = type.getLength();
1598                // Skip that many characters.
1599                skip(typeLength);
1600                // Add operator token.
1601                add(type, position - typeLength);
1602                // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1603                // We break to let the parser decide what it is.
1604                if (canStartLiteral(type)) {
1605                    break;
1606                } else if (type == LBRACE && pauseOnNextLeftBrace) {
1607                    pauseOnNextLeftBrace = false;
1608                    break;
1609                }
1610            } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1611                // Scan and add identifier or keyword.
1612                scanIdentifierOrKeyword();
1613            } else if (isStringDelimiter(ch0)) {
1614                // Scan and add a string.
1615                scanString(true);
1616            } else if (Character.isDigit(ch0)) {
1617                // Scan and add a number.
1618                scanNumber();
1619            } else {
1620                // Don't recognize this character.
1621                skip(1);
1622                add(ERROR, position - 1);
1623            }
1624        }
1625    }
1626
1627    /**
1628     * Return value of token given its token descriptor.
1629     *
1630     * @param token  Token descriptor.
1631     * @return JavaScript value.
1632     */
1633    Object getValueOf(final long token, final boolean strict) {
1634        final int start = Token.descPosition(token);
1635        final int len   = Token.descLength(token);
1636
1637        switch (Token.descType(token)) {
1638        case DECIMAL:
1639            return Lexer.valueOf(source.getString(start, len), 10); // number
1640        case OCTAL:
1641            return Lexer.valueOf(source.getString(start, len), 8); // number
1642        case HEXADECIMAL:
1643            return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1644        case FLOATING:
1645            final String str   = source.getString(start, len);
1646            final double value = Double.valueOf(str);
1647            if (str.indexOf('.') != -1) {
1648                return value; //number
1649            }
1650            //anything without an explicit decimal point is still subject to a
1651            //"representable as int or long" check. Then the programmer does not
1652            //explicitly code something as a double. For example new Color(int, int, int)
1653            //and new Color(float, float, float) will get ambiguous for cases like
1654            //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
1655            //yet we don't want e.g. 1e6 to be a double unnecessarily
1656            if (JSType.isStrictlyRepresentableAsInt(value)) {
1657                return (int)value;
1658            } else if (JSType.isStrictlyRepresentableAsLong(value)) {
1659                return (long)value;
1660            }
1661            return value;
1662        case STRING:
1663            return source.getString(start, len); // String
1664        case ESCSTRING:
1665            return valueOfString(start, len, strict); // String
1666        case IDENT:
1667            return valueOfIdent(start, len); // String
1668        case REGEX:
1669            return valueOfPattern(start, len); // RegexToken::LexerToken
1670        case XML:
1671            return valueOfXML(start, len); // XMLToken::LexerToken
1672        case DIRECTIVE_COMMENT:
1673            return source.getString(start, len);
1674        default:
1675            break;
1676        }
1677
1678        return null;
1679    }
1680
1681    /**
1682     * Get the correctly localized error message for a given message id format arguments
1683     * @param msgId message id
1684     * @param args  format arguments
1685     * @return message
1686     */
1687    protected static String message(final String msgId, final String... args) {
1688        return ECMAErrors.getMessage("lexer.error." + msgId, args);
1689    }
1690
1691    /**
1692     * Generate a runtime exception
1693     *
1694     * @param message       error message
1695     * @param type          token type
1696     * @param start         start position of lexed error
1697     * @param length        length of lexed error
1698     * @throws ParserException  unconditionally
1699     */
1700    protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1701        final long token     = Token.toDesc(type, start, length);
1702        final int  pos       = Token.descPosition(token);
1703        final int  lineNum   = source.getLine(pos);
1704        final int  columnNum = source.getColumn(pos);
1705        final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1706        throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1707    }
1708
1709    /**
1710     * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1711     * This is the abstract superclass
1712     */
1713    public static abstract class LexerToken implements Serializable {
1714        private static final long serialVersionUID = 1L;
1715
1716        private final String expression;
1717
1718        /**
1719         * Constructor
1720         * @param expression token expression
1721         */
1722        protected LexerToken(final String expression) {
1723            this.expression = expression;
1724        }
1725
1726        /**
1727         * Get the expression
1728         * @return expression
1729         */
1730        public String getExpression() {
1731            return expression;
1732        }
1733    }
1734
1735    /**
1736     * Temporary container for regular expressions.
1737     */
1738    public static class RegexToken extends LexerToken {
1739        private static final long serialVersionUID = 1L;
1740
1741        /** Options. */
1742        private final String options;
1743
1744        /**
1745         * Constructor.
1746         *
1747         * @param expression  regexp expression
1748         * @param options     regexp options
1749         */
1750        public RegexToken(final String expression, final String options) {
1751            super(expression);
1752            this.options = options;
1753        }
1754
1755        /**
1756         * Get regexp options
1757         * @return options
1758         */
1759        public String getOptions() {
1760            return options;
1761        }
1762
1763        @Override
1764        public String toString() {
1765            return '/' + getExpression() + '/' + options;
1766        }
1767    }
1768
1769    /**
1770     * Temporary container for XML expression.
1771     */
1772    public static class XMLToken extends LexerToken {
1773        private static final long serialVersionUID = 1L;
1774
1775        /**
1776         * Constructor.
1777         *
1778         * @param expression  XML expression
1779         */
1780        public XMLToken(final String expression) {
1781            super(expression);
1782        }
1783    }
1784}
1785