JavaTokenizer.java revision 2571:10fc81ac75b4
1/*
2 * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package com.sun.tools.javac.parser;
27
28import com.sun.tools.javac.code.Source;
29import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
30import com.sun.tools.javac.util.*;
31
32import java.nio.CharBuffer;
33
34import static com.sun.tools.javac.parser.Tokens.*;
35import static com.sun.tools.javac.util.LayoutCharacters.*;
36
37/** The lexical analyzer maps an input stream consisting of
38 *  ASCII characters and Unicode escapes into a token sequence.
39 *
40 *  <p><b>This is NOT part of any supported API.
41 *  If you write code that depends on this, you do so at your own risk.
42 *  This code and its internal interfaces are subject to change or
43 *  deletion without notice.</b>
44 */
45public class JavaTokenizer {
46
47    private static final boolean scannerDebug = false;
48
49    /** Allow binary literals.
50     */
51    private boolean allowBinaryLiterals;
52
53    /** Allow underscores in literals.
54     */
55    private boolean allowUnderscoresInLiterals;
56
57    /** The source language setting.
58     */
59    private Source source;
60
61    /** The log to be used for error reporting.
62     */
63    private final Log log;
64
65    /** The token factory. */
66    private final Tokens tokens;
67
68    /** The token kind, set by nextToken().
69     */
70    protected TokenKind tk;
71
72    /** The token's radix, set by nextToken().
73     */
74    protected int radix;
75
76    /** The token's name, set by nextToken().
77     */
78    protected Name name;
79
80    /** The position where a lexical error occurred;
81     */
82    protected int errPos = Position.NOPOS;
83
84    /** The Unicode reader (low-level stream reader).
85     */
86    protected UnicodeReader reader;
87
88    protected ScannerFactory fac;
89
90    private static final boolean hexFloatsWork = hexFloatsWork();
91    private static boolean hexFloatsWork() {
92        try {
93            Float.valueOf("0x1.0p1");
94            return true;
95        } catch (NumberFormatException ex) {
96            return false;
97        }
98    }
99
100    /**
101     * Create a scanner from the input array.  This method might
102     * modify the array.  To avoid copying the input array, ensure
103     * that {@code inputLength < input.length} or
104     * {@code input[input.length -1]} is a white space character.
105     *
106     * @param fac the factory which created this Scanner
107     * @param buf the input, might be modified
108     * Must be positive and less than or equal to input.length.
109     */
110    protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
111        this(fac, new UnicodeReader(fac, buf));
112    }
113
114    protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
115        this(fac, new UnicodeReader(fac, buf, inputLength));
116    }
117
118    protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
119        this.fac = fac;
120        this.log = fac.log;
121        this.tokens = fac.tokens;
122        this.source = fac.source;
123        this.reader = reader;
124        this.allowBinaryLiterals = source.allowBinaryLiterals();
125        this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
126    }
127
128    /** Report an error at the given position using the provided arguments.
129     */
130    protected void lexError(int pos, String key, Object... args) {
131        log.error(pos, key, args);
132        tk = TokenKind.ERROR;
133        errPos = pos;
134    }
135
136    /** Read next character in character or string literal and copy into sbuf.
137     */
138    private void scanLitChar(int pos) {
139        if (reader.ch == '\\') {
140            if (reader.peekChar() == '\\' && !reader.isUnicode()) {
141                reader.skipChar();
142                reader.putChar('\\', true);
143            } else {
144                reader.scanChar();
145                switch (reader.ch) {
146                case '0': case '1': case '2': case '3':
147                case '4': case '5': case '6': case '7':
148                    char leadch = reader.ch;
149                    int oct = reader.digit(pos, 8);
150                    reader.scanChar();
151                    if ('0' <= reader.ch && reader.ch <= '7') {
152                        oct = oct * 8 + reader.digit(pos, 8);
153                        reader.scanChar();
154                        if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
155                            oct = oct * 8 + reader.digit(pos, 8);
156                            reader.scanChar();
157                        }
158                    }
159                    reader.putChar((char)oct);
160                    break;
161                case 'b':
162                    reader.putChar('\b', true); break;
163                case 't':
164                    reader.putChar('\t', true); break;
165                case 'n':
166                    reader.putChar('\n', true); break;
167                case 'f':
168                    reader.putChar('\f', true); break;
169                case 'r':
170                    reader.putChar('\r', true); break;
171                case '\'':
172                    reader.putChar('\'', true); break;
173                case '\"':
174                    reader.putChar('\"', true); break;
175                case '\\':
176                    reader.putChar('\\', true); break;
177                default:
178                    lexError(reader.bp, "illegal.esc.char");
179                }
180            }
181        } else if (reader.bp != reader.buflen) {
182            reader.putChar(true);
183        }
184    }
185
186    private void scanDigits(int pos, int digitRadix) {
187        char saveCh;
188        int savePos;
189        do {
190            if (reader.ch != '_') {
191                reader.putChar(false);
192            } else {
193                if (!allowUnderscoresInLiterals) {
194                    lexError(pos, "unsupported.underscore.lit", source.name);
195                    allowUnderscoresInLiterals = true;
196                }
197            }
198            saveCh = reader.ch;
199            savePos = reader.bp;
200            reader.scanChar();
201        } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
202        if (saveCh == '_')
203            lexError(savePos, "illegal.underscore");
204    }
205
206    /** Read fractional part of hexadecimal floating point number.
207     */
208    private void scanHexExponentAndSuffix(int pos) {
209        if (reader.ch == 'p' || reader.ch == 'P') {
210            reader.putChar(true);
211            skipIllegalUnderscores();
212            if (reader.ch == '+' || reader.ch == '-') {
213                reader.putChar(true);
214            }
215            skipIllegalUnderscores();
216            if (reader.digit(pos, 10) >= 0) {
217                scanDigits(pos, 10);
218                if (!hexFloatsWork)
219                    lexError(pos, "unsupported.cross.fp.lit");
220            } else
221                lexError(pos, "malformed.fp.lit");
222        } else {
223            lexError(pos, "malformed.fp.lit");
224        }
225        if (reader.ch == 'f' || reader.ch == 'F') {
226            reader.putChar(true);
227            tk = TokenKind.FLOATLITERAL;
228            radix = 16;
229        } else {
230            if (reader.ch == 'd' || reader.ch == 'D') {
231                reader.putChar(true);
232            }
233            tk = TokenKind.DOUBLELITERAL;
234            radix = 16;
235        }
236    }
237
238    /** Read fractional part of floating point number.
239     */
240    private void scanFraction(int pos) {
241        skipIllegalUnderscores();
242        if (reader.digit(pos, 10) >= 0) {
243            scanDigits(pos, 10);
244        }
245        int sp1 = reader.sp;
246        if (reader.ch == 'e' || reader.ch == 'E') {
247            reader.putChar(true);
248            skipIllegalUnderscores();
249            if (reader.ch == '+' || reader.ch == '-') {
250                reader.putChar(true);
251            }
252            skipIllegalUnderscores();
253            if (reader.digit(pos, 10) >= 0) {
254                scanDigits(pos, 10);
255                return;
256            }
257            lexError(pos, "malformed.fp.lit");
258            reader.sp = sp1;
259        }
260    }
261
262    /** Read fractional part and 'd' or 'f' suffix of floating point number.
263     */
264    private void scanFractionAndSuffix(int pos) {
265        radix = 10;
266        scanFraction(pos);
267        if (reader.ch == 'f' || reader.ch == 'F') {
268            reader.putChar(true);
269            tk = TokenKind.FLOATLITERAL;
270        } else {
271            if (reader.ch == 'd' || reader.ch == 'D') {
272                reader.putChar(true);
273            }
274            tk = TokenKind.DOUBLELITERAL;
275        }
276    }
277
278    /** Read fractional part and 'd' or 'f' suffix of floating point number.
279     */
280    private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
281        radix = 16;
282        Assert.check(reader.ch == '.');
283        reader.putChar(true);
284        skipIllegalUnderscores();
285        if (reader.digit(pos, 16) >= 0) {
286            seendigit = true;
287            scanDigits(pos, 16);
288        }
289        if (!seendigit)
290            lexError(pos, "invalid.hex.number");
291        else
292            scanHexExponentAndSuffix(pos);
293    }
294
295    private void skipIllegalUnderscores() {
296        if (reader.ch == '_') {
297            lexError(reader.bp, "illegal.underscore");
298            while (reader.ch == '_')
299                reader.scanChar();
300        }
301    }
302
303    /** Read a number.
304     *  @param radix  The radix of the number; one of 2, j8, 10, 16.
305     */
306    private void scanNumber(int pos, int radix) {
307        // for octal, allow base-10 digit in case it's a float literal
308        this.radix = radix;
309        int digitRadix = (radix == 8 ? 10 : radix);
310        boolean seendigit = false;
311        if (reader.digit(pos, digitRadix) >= 0) {
312            seendigit = true;
313            scanDigits(pos, digitRadix);
314        }
315        if (radix == 16 && reader.ch == '.') {
316            scanHexFractionAndSuffix(pos, seendigit);
317        } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
318            scanHexExponentAndSuffix(pos);
319        } else if (digitRadix == 10 && reader.ch == '.') {
320            reader.putChar(true);
321            scanFractionAndSuffix(pos);
322        } else if (digitRadix == 10 &&
323                   (reader.ch == 'e' || reader.ch == 'E' ||
324                    reader.ch == 'f' || reader.ch == 'F' ||
325                    reader.ch == 'd' || reader.ch == 'D')) {
326            scanFractionAndSuffix(pos);
327        } else {
328            if (reader.ch == 'l' || reader.ch == 'L') {
329                reader.scanChar();
330                tk = TokenKind.LONGLITERAL;
331            } else {
332                tk = TokenKind.INTLITERAL;
333            }
334        }
335    }
336
337    /** Read an identifier.
338     */
339    private void scanIdent() {
340        boolean isJavaIdentifierPart;
341        char high;
342        reader.putChar(true);
343        do {
344            switch (reader.ch) {
345            case 'A': case 'B': case 'C': case 'D': case 'E':
346            case 'F': case 'G': case 'H': case 'I': case 'J':
347            case 'K': case 'L': case 'M': case 'N': case 'O':
348            case 'P': case 'Q': case 'R': case 'S': case 'T':
349            case 'U': case 'V': case 'W': case 'X': case 'Y':
350            case 'Z':
351            case 'a': case 'b': case 'c': case 'd': case 'e':
352            case 'f': case 'g': case 'h': case 'i': case 'j':
353            case 'k': case 'l': case 'm': case 'n': case 'o':
354            case 'p': case 'q': case 'r': case 's': case 't':
355            case 'u': case 'v': case 'w': case 'x': case 'y':
356            case 'z':
357            case '$': case '_':
358            case '0': case '1': case '2': case '3': case '4':
359            case '5': case '6': case '7': case '8': case '9':
360                break;
361            case '\u0000': case '\u0001': case '\u0002': case '\u0003':
362            case '\u0004': case '\u0005': case '\u0006': case '\u0007':
363            case '\u0008': case '\u000E': case '\u000F': case '\u0010':
364            case '\u0011': case '\u0012': case '\u0013': case '\u0014':
365            case '\u0015': case '\u0016': case '\u0017':
366            case '\u0018': case '\u0019': case '\u001B':
367            case '\u007F':
368                reader.scanChar();
369                continue;
370            case '\u001A': // EOI is also a legal identifier part
371                if (reader.bp >= reader.buflen) {
372                    name = reader.name();
373                    tk = tokens.lookupKind(name);
374                    return;
375                }
376                reader.scanChar();
377                continue;
378            default:
379                if (reader.ch < '\u0080') {
380                    // all ASCII range chars already handled, above
381                    isJavaIdentifierPart = false;
382                } else {
383                    if (Character.isIdentifierIgnorable(reader.ch)) {
384                        reader.scanChar();
385                        continue;
386                    } else {
387                        int codePoint = reader.peekSurrogates();
388                        if (codePoint >= 0) {
389                            if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
390                                reader.putChar(true);
391                            }
392                        } else {
393                            isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
394                        }
395                    }
396                }
397                if (!isJavaIdentifierPart) {
398                    name = reader.name();
399                    tk = tokens.lookupKind(name);
400                    return;
401                }
402            }
403            reader.putChar(true);
404        } while (true);
405    }
406
407    /** Return true if reader.ch can be part of an operator.
408     */
409    private boolean isSpecial(char ch) {
410        switch (ch) {
411        case '!': case '%': case '&': case '*': case '?':
412        case '+': case '-': case ':': case '<': case '=':
413        case '>': case '^': case '|': case '~':
414        case '@':
415            return true;
416        default:
417            return false;
418        }
419    }
420
421    /** Read longest possible sequence of special characters and convert
422     *  to token.
423     */
424    private void scanOperator() {
425        while (true) {
426            reader.putChar(false);
427            Name newname = reader.name();
428            TokenKind tk1 = tokens.lookupKind(newname);
429            if (tk1 == TokenKind.IDENTIFIER) {
430                reader.sp--;
431                break;
432            }
433            tk = tk1;
434            reader.scanChar();
435            if (!isSpecial(reader.ch)) break;
436        }
437    }
438
439    /** Read token.
440     */
441    public Token readToken() {
442
443        reader.sp = 0;
444        name = null;
445        radix = 0;
446
447        int pos = 0;
448        int endPos = 0;
449        List<Comment> comments = null;
450
451        try {
452            loop: while (true) {
453                pos = reader.bp;
454                switch (reader.ch) {
455                case ' ': // (Spec 3.6)
456                case '\t': // (Spec 3.6)
457                case FF: // (Spec 3.6)
458                    do {
459                        reader.scanChar();
460                    } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
461                    processWhiteSpace(pos, reader.bp);
462                    break;
463                case LF: // (Spec 3.4)
464                    reader.scanChar();
465                    processLineTerminator(pos, reader.bp);
466                    break;
467                case CR: // (Spec 3.4)
468                    reader.scanChar();
469                    if (reader.ch == LF) {
470                        reader.scanChar();
471                    }
472                    processLineTerminator(pos, reader.bp);
473                    break;
474                case 'A': case 'B': case 'C': case 'D': case 'E':
475                case 'F': case 'G': case 'H': case 'I': case 'J':
476                case 'K': case 'L': case 'M': case 'N': case 'O':
477                case 'P': case 'Q': case 'R': case 'S': case 'T':
478                case 'U': case 'V': case 'W': case 'X': case 'Y':
479                case 'Z':
480                case 'a': case 'b': case 'c': case 'd': case 'e':
481                case 'f': case 'g': case 'h': case 'i': case 'j':
482                case 'k': case 'l': case 'm': case 'n': case 'o':
483                case 'p': case 'q': case 'r': case 's': case 't':
484                case 'u': case 'v': case 'w': case 'x': case 'y':
485                case 'z':
486                case '$': case '_':
487                    scanIdent();
488                    break loop;
489                case '0':
490                    reader.scanChar();
491                    if (reader.ch == 'x' || reader.ch == 'X') {
492                        reader.scanChar();
493                        skipIllegalUnderscores();
494                        if (reader.ch == '.') {
495                            scanHexFractionAndSuffix(pos, false);
496                        } else if (reader.digit(pos, 16) < 0) {
497                            lexError(pos, "invalid.hex.number");
498                        } else {
499                            scanNumber(pos, 16);
500                        }
501                    } else if (reader.ch == 'b' || reader.ch == 'B') {
502                        if (!allowBinaryLiterals) {
503                            lexError(pos, "unsupported.binary.lit", source.name);
504                            allowBinaryLiterals = true;
505                        }
506                        reader.scanChar();
507                        skipIllegalUnderscores();
508                        if (reader.digit(pos, 2) < 0) {
509                            lexError(pos, "invalid.binary.number");
510                        } else {
511                            scanNumber(pos, 2);
512                        }
513                    } else {
514                        reader.putChar('0');
515                        if (reader.ch == '_') {
516                            int savePos = reader.bp;
517                            do {
518                                reader.scanChar();
519                            } while (reader.ch == '_');
520                            if (reader.digit(pos, 10) < 0) {
521                                lexError(savePos, "illegal.underscore");
522                            }
523                        }
524                        scanNumber(pos, 8);
525                    }
526                    break loop;
527                case '1': case '2': case '3': case '4':
528                case '5': case '6': case '7': case '8': case '9':
529                    scanNumber(pos, 10);
530                    break loop;
531                case '.':
532                    reader.scanChar();
533                    if (reader.digit(pos, 10) >= 0) {
534                        reader.putChar('.');
535                        scanFractionAndSuffix(pos);
536                    } else if (reader.ch == '.') {
537                        int savePos = reader.bp;
538                        reader.putChar('.'); reader.putChar('.', true);
539                        if (reader.ch == '.') {
540                            reader.scanChar();
541                            reader.putChar('.');
542                            tk = TokenKind.ELLIPSIS;
543                        } else {
544                            lexError(savePos, "illegal.dot");
545                        }
546                    } else {
547                        tk = TokenKind.DOT;
548                    }
549                    break loop;
550                case ',':
551                    reader.scanChar(); tk = TokenKind.COMMA; break loop;
552                case ';':
553                    reader.scanChar(); tk = TokenKind.SEMI; break loop;
554                case '(':
555                    reader.scanChar(); tk = TokenKind.LPAREN; break loop;
556                case ')':
557                    reader.scanChar(); tk = TokenKind.RPAREN; break loop;
558                case '[':
559                    reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
560                case ']':
561                    reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
562                case '{':
563                    reader.scanChar(); tk = TokenKind.LBRACE; break loop;
564                case '}':
565                    reader.scanChar(); tk = TokenKind.RBRACE; break loop;
566                case '/':
567                    reader.scanChar();
568                    if (reader.ch == '/') {
569                        do {
570                            reader.scanCommentChar();
571                        } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
572                        if (reader.bp < reader.buflen) {
573                            comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
574                        }
575                        break;
576                    } else if (reader.ch == '*') {
577                        boolean isEmpty = false;
578                        reader.scanChar();
579                        CommentStyle style;
580                        if (reader.ch == '*') {
581                            style = CommentStyle.JAVADOC;
582                            reader.scanCommentChar();
583                            if (reader.ch == '/') {
584                                isEmpty = true;
585                            }
586                        } else {
587                            style = CommentStyle.BLOCK;
588                        }
589                        while (!isEmpty && reader.bp < reader.buflen) {
590                            if (reader.ch == '*') {
591                                reader.scanChar();
592                                if (reader.ch == '/') break;
593                            } else {
594                                reader.scanCommentChar();
595                            }
596                        }
597                        if (reader.ch == '/') {
598                            reader.scanChar();
599                            comments = addComment(comments, processComment(pos, reader.bp, style));
600                            break;
601                        } else {
602                            lexError(pos, "unclosed.comment");
603                            break loop;
604                        }
605                    } else if (reader.ch == '=') {
606                        tk = TokenKind.SLASHEQ;
607                        reader.scanChar();
608                    } else {
609                        tk = TokenKind.SLASH;
610                    }
611                    break loop;
612                case '\'':
613                    reader.scanChar();
614                    if (reader.ch == '\'') {
615                        lexError(pos, "empty.char.lit");
616                        reader.scanChar();
617                    } else {
618                        if (reader.ch == CR || reader.ch == LF)
619                            lexError(pos, "illegal.line.end.in.char.lit");
620                        scanLitChar(pos);
621                        if (reader.ch == '\'') {
622                            reader.scanChar();
623                            tk = TokenKind.CHARLITERAL;
624                        } else {
625                            lexError(pos, "unclosed.char.lit");
626                        }
627                    }
628                    break loop;
629                case '\"':
630                    reader.scanChar();
631                    while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen)
632                        scanLitChar(pos);
633                    if (reader.ch == '\"') {
634                        tk = TokenKind.STRINGLITERAL;
635                        reader.scanChar();
636                    } else {
637                        lexError(pos, "unclosed.str.lit");
638                    }
639                    break loop;
640                default:
641                    if (isSpecial(reader.ch)) {
642                        scanOperator();
643                    } else {
644                        boolean isJavaIdentifierStart;
645                        int codePoint = -1;
646                        if (reader.ch < '\u0080') {
647                            // all ASCII range chars already handled, above
648                            isJavaIdentifierStart = false;
649                        } else {
650                            codePoint = reader.peekSurrogates();
651                            if (codePoint >= 0) {
652                                if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
653                                    reader.putChar(true);
654                                }
655                            } else {
656                                isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
657                            }
658                        }
659                        if (isJavaIdentifierStart) {
660                            scanIdent();
661                        } else if (reader.digit(pos, 10) >= 0) {
662                            scanNumber(pos, 10);
663                        } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
664                            tk = TokenKind.EOF;
665                            pos = reader.buflen;
666                        } else {
667                            String arg;
668
669                            if (codePoint >= 0) {
670                                char high = reader.ch;
671                                reader.scanChar();
672                                arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
673                            } else {
674                                arg = (32 < reader.ch && reader.ch < 127) ?
675                                                String.format("%s", reader.ch) :
676                                                String.format("\\u%04x", (int)reader.ch);
677                            }
678                            lexError(pos, "illegal.char", arg);
679                            reader.scanChar();
680                        }
681                    }
682                    break loop;
683                }
684            }
685            endPos = reader.bp;
686            switch (tk.tag) {
687                case DEFAULT: return new Token(tk, pos, endPos, comments);
688                case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
689                case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
690                case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
691                default: throw new AssertionError();
692            }
693        }
694        finally {
695            if (scannerDebug) {
696                    System.out.println("nextToken(" + pos
697                                       + "," + endPos + ")=|" +
698                                       new String(reader.getRawCharacters(pos, endPos))
699                                       + "|");
700            }
701        }
702    }
703    //where
704        List<Comment> addComment(List<Comment> comments, Comment comment) {
705            return comments == null ?
706                    List.of(comment) :
707                    comments.prepend(comment);
708        }
709
710    /** Return the position where a lexical error occurred;
711     */
712    public int errPos() {
713        return errPos;
714    }
715
716    /** Set the position where a lexical error occurred;
717     */
718    public void errPos(int pos) {
719        errPos = pos;
720    }
721
722    /**
723     * Called when a complete comment has been scanned. pos and endPos
724     * will mark the comment boundary.
725     */
726    protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
727        if (scannerDebug)
728            System.out.println("processComment(" + pos
729                               + "," + endPos + "," + style + ")=|"
730                               + new String(reader.getRawCharacters(pos, endPos))
731                               + "|");
732        char[] buf = reader.getRawCharacters(pos, endPos);
733        return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);
734    }
735
736    /**
737     * Called when a complete whitespace run has been scanned. pos and endPos
738     * will mark the whitespace boundary.
739     */
740    protected void processWhiteSpace(int pos, int endPos) {
741        if (scannerDebug)
742            System.out.println("processWhitespace(" + pos
743                               + "," + endPos + ")=|" +
744                               new String(reader.getRawCharacters(pos, endPos))
745                               + "|");
746    }
747
748    /**
749     * Called when a line terminator has been processed.
750     */
751    protected void processLineTerminator(int pos, int endPos) {
752        if (scannerDebug)
753            System.out.println("processTerminator(" + pos
754                               + "," + endPos + ")=|" +
755                               new String(reader.getRawCharacters(pos, endPos))
756                               + "|");
757    }
758
759    /** Build a map for translating between line numbers and
760     * positions in the input.
761     *
762     * @return a LineMap */
763    public Position.LineMap getLineMap() {
764        return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
765    }
766
767
768    /**
769    * Scan a documentation comment; determine if a deprecated tag is present.
770    * Called once the initial /, * have been skipped, positioned at the second *
771    * (which is treated as the beginning of the first line).
772    * Stops positioned at the closing '/'.
773    */
774    protected static class BasicComment<U extends UnicodeReader> implements Comment {
775
776        CommentStyle cs;
777        U comment_reader;
778
779        protected boolean deprecatedFlag = false;
780        protected boolean scanned = false;
781
782        protected BasicComment(U comment_reader, CommentStyle cs) {
783            this.comment_reader = comment_reader;
784            this.cs = cs;
785        }
786
787        public String getText() {
788            return null;
789        }
790
791        public int getSourcePos(int pos) {
792            return -1;
793        }
794
795        public CommentStyle getStyle() {
796            return cs;
797        }
798
799        public boolean isDeprecated() {
800            if (!scanned && cs == CommentStyle.JAVADOC) {
801                scanDocComment();
802            }
803            return deprecatedFlag;
804        }
805
806        @SuppressWarnings("fallthrough")
807        protected void scanDocComment() {
808            try {
809                boolean deprecatedPrefix = false;
810
811                comment_reader.bp += 3; // '/**'
812                comment_reader.ch = comment_reader.buf[comment_reader.bp];
813
814                forEachLine:
815                while (comment_reader.bp < comment_reader.buflen) {
816
817                    // Skip optional WhiteSpace at beginning of line
818                    while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
819                        comment_reader.scanCommentChar();
820                    }
821
822                    // Skip optional consecutive Stars
823                    while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
824                        comment_reader.scanCommentChar();
825                        if (comment_reader.ch == '/') {
826                            return;
827                        }
828                    }
829
830                    // Skip optional WhiteSpace after Stars
831                    while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
832                        comment_reader.scanCommentChar();
833                    }
834
835                    deprecatedPrefix = false;
836                    // At beginning of line in the JavaDoc sense.
837                    if (!deprecatedFlag) {
838                        String deprecated = "@deprecated";
839                        int i = 0;
840                        while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
841                            comment_reader.scanCommentChar();
842                            i++;
843                            if (i == deprecated.length()) {
844                                deprecatedPrefix = true;
845                                break;
846                            }
847                        }
848                    }
849
850                    if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
851                        if (Character.isWhitespace(comment_reader.ch)) {
852                            deprecatedFlag = true;
853                        } else if (comment_reader.ch == '*') {
854                            comment_reader.scanCommentChar();
855                            if (comment_reader.ch == '/') {
856                                deprecatedFlag = true;
857                                return;
858                            }
859                        }
860                    }
861
862                    // Skip rest of line
863                    while (comment_reader.bp < comment_reader.buflen) {
864                        switch (comment_reader.ch) {
865                            case '*':
866                                comment_reader.scanCommentChar();
867                                if (comment_reader.ch == '/') {
868                                    return;
869                                }
870                                break;
871                            case CR: // (Spec 3.4)
872                                comment_reader.scanCommentChar();
873                                if (comment_reader.ch != LF) {
874                                    continue forEachLine;
875                                }
876                            /* fall through to LF case */
877                            case LF: // (Spec 3.4)
878                                comment_reader.scanCommentChar();
879                                continue forEachLine;
880                            default:
881                                comment_reader.scanCommentChar();
882                        }
883                    } // rest of line
884                } // forEachLine
885                return;
886            } finally {
887                scanned = true;
888            }
889        }
890    }
891}
892