JavaTokenizer.java revision 3201:c3b040ed4122
1/*
2 * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package com.sun.tools.javac.parser;
27
28import com.sun.tools.javac.code.Source;
29import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
30import com.sun.tools.javac.util.*;
31
32import java.nio.CharBuffer;
33
34import static com.sun.tools.javac.parser.Tokens.*;
35import static com.sun.tools.javac.util.LayoutCharacters.*;
36
37/** The lexical analyzer maps an input stream consisting of
38 *  ASCII characters and Unicode escapes into a token sequence.
39 *
40 *  <p><b>This is NOT part of any supported API.
41 *  If you write code that depends on this, you do so at your own risk.
42 *  This code and its internal interfaces are subject to change or
43 *  deletion without notice.</b>
44 */
45public class JavaTokenizer {
46
47    private static final boolean scannerDebug = false;
48
49    /** Allow binary literals.
50     */
51    private boolean allowBinaryLiterals;
52
53    /** Allow underscores in literals.
54     */
55    private boolean allowUnderscoresInLiterals;
56
57    /** The source language setting.
58     */
59    private Source source;
60
61    /** The log to be used for error reporting.
62     */
63    private final Log log;
64
65    /** The token factory. */
66    private final Tokens tokens;
67
68    /** The token kind, set by nextToken().
69     */
70    protected TokenKind tk;
71
72    /** The token's radix, set by nextToken().
73     */
74    protected int radix;
75
76    /** The token's name, set by nextToken().
77     */
78    protected Name name;
79
80    /** The position where a lexical error occurred;
81     */
82    protected int errPos = Position.NOPOS;
83
84    /** The Unicode reader (low-level stream reader).
85     */
86    protected UnicodeReader reader;
87
88    protected ScannerFactory fac;
89
90    private static final boolean hexFloatsWork = hexFloatsWork();
91    private static boolean hexFloatsWork() {
92        try {
93            Float.valueOf("0x1.0p1");
94            return true;
95        } catch (NumberFormatException ex) {
96            return false;
97        }
98    }
99
100    /**
101     * Create a scanner from the input array.  This method might
102     * modify the array.  To avoid copying the input array, ensure
103     * that {@code inputLength < input.length} or
104     * {@code input[input.length -1]} is a white space character.
105     *
106     * @param fac the factory which created this Scanner
107     * @param buf the input, might be modified
108     * Must be positive and less than or equal to input.length.
109     */
110    protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
111        this(fac, new UnicodeReader(fac, buf));
112    }
113
114    protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
115        this(fac, new UnicodeReader(fac, buf, inputLength));
116    }
117
118    protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
119        this.fac = fac;
120        this.log = fac.log;
121        this.tokens = fac.tokens;
122        this.source = fac.source;
123        this.reader = reader;
124        this.allowBinaryLiterals = source.allowBinaryLiterals();
125        this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
126    }
127
128    /** Report an error at the given position using the provided arguments.
129     */
130    protected void lexError(int pos, String key, Object... args) {
131        log.error(pos, key, args);
132        tk = TokenKind.ERROR;
133        errPos = pos;
134    }
135
136    /** Read next character in character or string literal and copy into sbuf.
137     */
138    private void scanLitChar(int pos) {
139        if (reader.ch == '\\') {
140            if (reader.peekChar() == '\\' && !reader.isUnicode()) {
141                reader.skipChar();
142                reader.putChar('\\', true);
143            } else {
144                reader.scanChar();
145                switch (reader.ch) {
146                case '0': case '1': case '2': case '3':
147                case '4': case '5': case '6': case '7':
148                    char leadch = reader.ch;
149                    int oct = reader.digit(pos, 8);
150                    reader.scanChar();
151                    if ('0' <= reader.ch && reader.ch <= '7') {
152                        oct = oct * 8 + reader.digit(pos, 8);
153                        reader.scanChar();
154                        if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
155                            oct = oct * 8 + reader.digit(pos, 8);
156                            reader.scanChar();
157                        }
158                    }
159                    reader.putChar((char)oct);
160                    break;
161                case 'b':
162                    reader.putChar('\b', true); break;
163                case 't':
164                    reader.putChar('\t', true); break;
165                case 'n':
166                    reader.putChar('\n', true); break;
167                case 'f':
168                    reader.putChar('\f', true); break;
169                case 'r':
170                    reader.putChar('\r', true); break;
171                case '\'':
172                    reader.putChar('\'', true); break;
173                case '\"':
174                    reader.putChar('\"', true); break;
175                case '\\':
176                    reader.putChar('\\', true); break;
177                default:
178                    lexError(reader.bp, "illegal.esc.char");
179                }
180            }
181        } else if (reader.bp != reader.buflen) {
182            reader.putChar(true);
183        }
184    }
185
186    private void scanDigits(int pos, int digitRadix) {
187        char saveCh;
188        int savePos;
189        do {
190            if (reader.ch != '_') {
191                reader.putChar(false);
192            } else {
193                if (!allowUnderscoresInLiterals) {
194                    lexError(pos, "unsupported.underscore.lit", source.name);
195                    allowUnderscoresInLiterals = true;
196                }
197            }
198            saveCh = reader.ch;
199            savePos = reader.bp;
200            reader.scanChar();
201        } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
202        if (saveCh == '_')
203            lexError(savePos, "illegal.underscore");
204    }
205
206    /** Read fractional part of hexadecimal floating point number.
207     */
208    private void scanHexExponentAndSuffix(int pos) {
209        if (reader.ch == 'p' || reader.ch == 'P') {
210            reader.putChar(true);
211            skipIllegalUnderscores();
212            if (reader.ch == '+' || reader.ch == '-') {
213                reader.putChar(true);
214            }
215            skipIllegalUnderscores();
216            if (reader.digit(pos, 10) >= 0) {
217                scanDigits(pos, 10);
218                if (!hexFloatsWork)
219                    lexError(pos, "unsupported.cross.fp.lit");
220            } else
221                lexError(pos, "malformed.fp.lit");
222        } else {
223            lexError(pos, "malformed.fp.lit");
224        }
225        if (reader.ch == 'f' || reader.ch == 'F') {
226            reader.putChar(true);
227            tk = TokenKind.FLOATLITERAL;
228            radix = 16;
229        } else {
230            if (reader.ch == 'd' || reader.ch == 'D') {
231                reader.putChar(true);
232            }
233            tk = TokenKind.DOUBLELITERAL;
234            radix = 16;
235        }
236    }
237
238    /** Read fractional part of floating point number.
239     */
240    private void scanFraction(int pos) {
241        skipIllegalUnderscores();
242        if (reader.digit(pos, 10) >= 0) {
243            scanDigits(pos, 10);
244        }
245        int sp1 = reader.sp;
246        if (reader.ch == 'e' || reader.ch == 'E') {
247            reader.putChar(true);
248            skipIllegalUnderscores();
249            if (reader.ch == '+' || reader.ch == '-') {
250                reader.putChar(true);
251            }
252            skipIllegalUnderscores();
253            if (reader.digit(pos, 10) >= 0) {
254                scanDigits(pos, 10);
255                return;
256            }
257            lexError(pos, "malformed.fp.lit");
258            reader.sp = sp1;
259        }
260    }
261
262    /** Read fractional part and 'd' or 'f' suffix of floating point number.
263     */
264    private void scanFractionAndSuffix(int pos) {
265        radix = 10;
266        scanFraction(pos);
267        if (reader.ch == 'f' || reader.ch == 'F') {
268            reader.putChar(true);
269            tk = TokenKind.FLOATLITERAL;
270        } else {
271            if (reader.ch == 'd' || reader.ch == 'D') {
272                reader.putChar(true);
273            }
274            tk = TokenKind.DOUBLELITERAL;
275        }
276    }
277
278    /** Read fractional part and 'd' or 'f' suffix of floating point number.
279     */
280    private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
281        radix = 16;
282        Assert.check(reader.ch == '.');
283        reader.putChar(true);
284        skipIllegalUnderscores();
285        if (reader.digit(pos, 16) >= 0) {
286            seendigit = true;
287            scanDigits(pos, 16);
288        }
289        if (!seendigit)
290            lexError(pos, "invalid.hex.number");
291        else
292            scanHexExponentAndSuffix(pos);
293    }
294
295    private void skipIllegalUnderscores() {
296        if (reader.ch == '_') {
297            lexError(reader.bp, "illegal.underscore");
298            while (reader.ch == '_')
299                reader.scanChar();
300        }
301    }
302
303    /** Read a number.
304     *  @param radix  The radix of the number; one of 2, 8, 10, 16.
305     */
306    private void scanNumber(int pos, int radix) {
307        // for octal, allow base-10 digit in case it's a float literal
308        this.radix = radix;
309        int digitRadix = (radix == 8 ? 10 : radix);
310        int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
311        boolean seendigit = firstDigit >= 0;
312        boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
313        if (seendigit) {
314            scanDigits(pos, digitRadix);
315        }
316        if (radix == 16 && reader.ch == '.') {
317            scanHexFractionAndSuffix(pos, seendigit);
318        } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
319            scanHexExponentAndSuffix(pos);
320        } else if (digitRadix == 10 && reader.ch == '.') {
321            reader.putChar(true);
322            scanFractionAndSuffix(pos);
323        } else if (digitRadix == 10 &&
324                   (reader.ch == 'e' || reader.ch == 'E' ||
325                    reader.ch == 'f' || reader.ch == 'F' ||
326                    reader.ch == 'd' || reader.ch == 'D')) {
327            scanFractionAndSuffix(pos);
328        } else {
329            if (!seenValidDigit) {
330                switch (radix) {
331                case 2:
332                    lexError(pos, "invalid.binary.number");
333                    break;
334                case 16:
335                    lexError(pos, "invalid.hex.number");
336                    break;
337                }
338            }
339            if (reader.ch == 'l' || reader.ch == 'L') {
340                reader.scanChar();
341                tk = TokenKind.LONGLITERAL;
342            } else {
343                tk = TokenKind.INTLITERAL;
344            }
345        }
346    }
347
348    /** Read an identifier.
349     */
350    private void scanIdent() {
351        boolean isJavaIdentifierPart;
352        char high;
353        reader.putChar(true);
354        do {
355            switch (reader.ch) {
356            case 'A': case 'B': case 'C': case 'D': case 'E':
357            case 'F': case 'G': case 'H': case 'I': case 'J':
358            case 'K': case 'L': case 'M': case 'N': case 'O':
359            case 'P': case 'Q': case 'R': case 'S': case 'T':
360            case 'U': case 'V': case 'W': case 'X': case 'Y':
361            case 'Z':
362            case 'a': case 'b': case 'c': case 'd': case 'e':
363            case 'f': case 'g': case 'h': case 'i': case 'j':
364            case 'k': case 'l': case 'm': case 'n': case 'o':
365            case 'p': case 'q': case 'r': case 's': case 't':
366            case 'u': case 'v': case 'w': case 'x': case 'y':
367            case 'z':
368            case '$': case '_':
369            case '0': case '1': case '2': case '3': case '4':
370            case '5': case '6': case '7': case '8': case '9':
371                break;
372            case '\u0000': case '\u0001': case '\u0002': case '\u0003':
373            case '\u0004': case '\u0005': case '\u0006': case '\u0007':
374            case '\u0008': case '\u000E': case '\u000F': case '\u0010':
375            case '\u0011': case '\u0012': case '\u0013': case '\u0014':
376            case '\u0015': case '\u0016': case '\u0017':
377            case '\u0018': case '\u0019': case '\u001B':
378            case '\u007F':
379                reader.scanChar();
380                continue;
381            case '\u001A': // EOI is also a legal identifier part
382                if (reader.bp >= reader.buflen) {
383                    name = reader.name();
384                    tk = tokens.lookupKind(name);
385                    return;
386                }
387                reader.scanChar();
388                continue;
389            default:
390                if (reader.ch < '\u0080') {
391                    // all ASCII range chars already handled, above
392                    isJavaIdentifierPart = false;
393                } else {
394                    if (Character.isIdentifierIgnorable(reader.ch)) {
395                        reader.scanChar();
396                        continue;
397                    } else {
398                        int codePoint = reader.peekSurrogates();
399                        if (codePoint >= 0) {
400                            if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
401                                reader.putChar(true);
402                            }
403                        } else {
404                            isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
405                        }
406                    }
407                }
408                if (!isJavaIdentifierPart) {
409                    name = reader.name();
410                    tk = tokens.lookupKind(name);
411                    return;
412                }
413            }
414            reader.putChar(true);
415        } while (true);
416    }
417
418    /** Return true if reader.ch can be part of an operator.
419     */
420    private boolean isSpecial(char ch) {
421        switch (ch) {
422        case '!': case '%': case '&': case '*': case '?':
423        case '+': case '-': case ':': case '<': case '=':
424        case '>': case '^': case '|': case '~':
425        case '@':
426            return true;
427        default:
428            return false;
429        }
430    }
431
432    /** Read longest possible sequence of special characters and convert
433     *  to token.
434     */
435    private void scanOperator() {
436        while (true) {
437            reader.putChar(false);
438            Name newname = reader.name();
439            TokenKind tk1 = tokens.lookupKind(newname);
440            if (tk1 == TokenKind.IDENTIFIER) {
441                reader.sp--;
442                break;
443            }
444            tk = tk1;
445            reader.scanChar();
446            if (!isSpecial(reader.ch)) break;
447        }
448    }
449
450    /** Read token.
451     */
452    public Token readToken() {
453
454        reader.sp = 0;
455        name = null;
456        radix = 0;
457
458        int pos = 0;
459        int endPos = 0;
460        List<Comment> comments = null;
461
462        try {
463            loop: while (true) {
464                pos = reader.bp;
465                switch (reader.ch) {
466                case ' ': // (Spec 3.6)
467                case '\t': // (Spec 3.6)
468                case FF: // (Spec 3.6)
469                    do {
470                        reader.scanChar();
471                    } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
472                    processWhiteSpace(pos, reader.bp);
473                    break;
474                case LF: // (Spec 3.4)
475                    reader.scanChar();
476                    processLineTerminator(pos, reader.bp);
477                    break;
478                case CR: // (Spec 3.4)
479                    reader.scanChar();
480                    if (reader.ch == LF) {
481                        reader.scanChar();
482                    }
483                    processLineTerminator(pos, reader.bp);
484                    break;
485                case 'A': case 'B': case 'C': case 'D': case 'E':
486                case 'F': case 'G': case 'H': case 'I': case 'J':
487                case 'K': case 'L': case 'M': case 'N': case 'O':
488                case 'P': case 'Q': case 'R': case 'S': case 'T':
489                case 'U': case 'V': case 'W': case 'X': case 'Y':
490                case 'Z':
491                case 'a': case 'b': case 'c': case 'd': case 'e':
492                case 'f': case 'g': case 'h': case 'i': case 'j':
493                case 'k': case 'l': case 'm': case 'n': case 'o':
494                case 'p': case 'q': case 'r': case 's': case 't':
495                case 'u': case 'v': case 'w': case 'x': case 'y':
496                case 'z':
497                case '$': case '_':
498                    scanIdent();
499                    break loop;
500                case '0':
501                    reader.scanChar();
502                    if (reader.ch == 'x' || reader.ch == 'X') {
503                        reader.scanChar();
504                        skipIllegalUnderscores();
505                        scanNumber(pos, 16);
506                    } else if (reader.ch == 'b' || reader.ch == 'B') {
507                        if (!allowBinaryLiterals) {
508                            lexError(pos, "unsupported.binary.lit", source.name);
509                            allowBinaryLiterals = true;
510                        }
511                        reader.scanChar();
512                        skipIllegalUnderscores();
513                        scanNumber(pos, 2);
514                    } else {
515                        reader.putChar('0');
516                        if (reader.ch == '_') {
517                            int savePos = reader.bp;
518                            do {
519                                reader.scanChar();
520                            } while (reader.ch == '_');
521                            if (reader.digit(pos, 10) < 0) {
522                                lexError(savePos, "illegal.underscore");
523                            }
524                        }
525                        scanNumber(pos, 8);
526                    }
527                    break loop;
528                case '1': case '2': case '3': case '4':
529                case '5': case '6': case '7': case '8': case '9':
530                    scanNumber(pos, 10);
531                    break loop;
532                case '.':
533                    reader.scanChar();
534                    if (reader.digit(pos, 10) >= 0) {
535                        reader.putChar('.');
536                        scanFractionAndSuffix(pos);
537                    } else if (reader.ch == '.') {
538                        int savePos = reader.bp;
539                        reader.putChar('.'); reader.putChar('.', true);
540                        if (reader.ch == '.') {
541                            reader.scanChar();
542                            reader.putChar('.');
543                            tk = TokenKind.ELLIPSIS;
544                        } else {
545                            lexError(savePos, "illegal.dot");
546                        }
547                    } else {
548                        tk = TokenKind.DOT;
549                    }
550                    break loop;
551                case ',':
552                    reader.scanChar(); tk = TokenKind.COMMA; break loop;
553                case ';':
554                    reader.scanChar(); tk = TokenKind.SEMI; break loop;
555                case '(':
556                    reader.scanChar(); tk = TokenKind.LPAREN; break loop;
557                case ')':
558                    reader.scanChar(); tk = TokenKind.RPAREN; break loop;
559                case '[':
560                    reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
561                case ']':
562                    reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
563                case '{':
564                    reader.scanChar(); tk = TokenKind.LBRACE; break loop;
565                case '}':
566                    reader.scanChar(); tk = TokenKind.RBRACE; break loop;
567                case '/':
568                    reader.scanChar();
569                    if (reader.ch == '/') {
570                        do {
571                            reader.scanCommentChar();
572                        } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
573                        if (reader.bp < reader.buflen) {
574                            comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
575                        }
576                        break;
577                    } else if (reader.ch == '*') {
578                        boolean isEmpty = false;
579                        reader.scanChar();
580                        CommentStyle style;
581                        if (reader.ch == '*') {
582                            style = CommentStyle.JAVADOC;
583                            reader.scanCommentChar();
584                            if (reader.ch == '/') {
585                                isEmpty = true;
586                            }
587                        } else {
588                            style = CommentStyle.BLOCK;
589                        }
590                        while (!isEmpty && reader.bp < reader.buflen) {
591                            if (reader.ch == '*') {
592                                reader.scanChar();
593                                if (reader.ch == '/') break;
594                            } else {
595                                reader.scanCommentChar();
596                            }
597                        }
598                        if (reader.ch == '/') {
599                            reader.scanChar();
600                            comments = addComment(comments, processComment(pos, reader.bp, style));
601                            break;
602                        } else {
603                            lexError(pos, "unclosed.comment");
604                            break loop;
605                        }
606                    } else if (reader.ch == '=') {
607                        tk = TokenKind.SLASHEQ;
608                        reader.scanChar();
609                    } else {
610                        tk = TokenKind.SLASH;
611                    }
612                    break loop;
613                case '\'':
614                    reader.scanChar();
615                    if (reader.ch == '\'') {
616                        lexError(pos, "empty.char.lit");
617                        reader.scanChar();
618                    } else {
619                        if (reader.ch == CR || reader.ch == LF)
620                            lexError(pos, "illegal.line.end.in.char.lit");
621                        scanLitChar(pos);
622                        if (reader.ch == '\'') {
623                            reader.scanChar();
624                            tk = TokenKind.CHARLITERAL;
625                        } else {
626                            lexError(pos, "unclosed.char.lit");
627                        }
628                    }
629                    break loop;
630                case '\"':
631                    reader.scanChar();
632                    while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen)
633                        scanLitChar(pos);
634                    if (reader.ch == '\"') {
635                        tk = TokenKind.STRINGLITERAL;
636                        reader.scanChar();
637                    } else {
638                        lexError(pos, "unclosed.str.lit");
639                    }
640                    break loop;
641                default:
642                    if (isSpecial(reader.ch)) {
643                        scanOperator();
644                    } else {
645                        boolean isJavaIdentifierStart;
646                        int codePoint = -1;
647                        if (reader.ch < '\u0080') {
648                            // all ASCII range chars already handled, above
649                            isJavaIdentifierStart = false;
650                        } else {
651                            codePoint = reader.peekSurrogates();
652                            if (codePoint >= 0) {
653                                if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
654                                    reader.putChar(true);
655                                }
656                            } else {
657                                isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
658                            }
659                        }
660                        if (isJavaIdentifierStart) {
661                            scanIdent();
662                        } else if (reader.digit(pos, 10) >= 0) {
663                            scanNumber(pos, 10);
664                        } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
665                            tk = TokenKind.EOF;
666                            pos = reader.buflen;
667                        } else {
668                            String arg;
669
670                            if (codePoint >= 0) {
671                                char high = reader.ch;
672                                reader.scanChar();
673                                arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
674                            } else {
675                                arg = (32 < reader.ch && reader.ch < 127) ?
676                                                String.format("%s", reader.ch) :
677                                                String.format("\\u%04x", (int)reader.ch);
678                            }
679                            lexError(pos, "illegal.char", arg);
680                            reader.scanChar();
681                        }
682                    }
683                    break loop;
684                }
685            }
686            endPos = reader.bp;
687            switch (tk.tag) {
688                case DEFAULT: return new Token(tk, pos, endPos, comments);
689                case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
690                case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
691                case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
692                default: throw new AssertionError();
693            }
694        }
695        finally {
696            if (scannerDebug) {
697                    System.out.println("nextToken(" + pos
698                                       + "," + endPos + ")=|" +
699                                       new String(reader.getRawCharacters(pos, endPos))
700                                       + "|");
701            }
702        }
703    }
704    //where
705        List<Comment> addComment(List<Comment> comments, Comment comment) {
706            return comments == null ?
707                    List.of(comment) :
708                    comments.prepend(comment);
709        }
710
711    /** Return the position where a lexical error occurred;
712     */
713    public int errPos() {
714        return errPos;
715    }
716
717    /** Set the position where a lexical error occurred;
718     */
719    public void errPos(int pos) {
720        errPos = pos;
721    }
722
723    /**
724     * Called when a complete comment has been scanned. pos and endPos
725     * will mark the comment boundary.
726     */
727    protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
728        if (scannerDebug)
729            System.out.println("processComment(" + pos
730                               + "," + endPos + "," + style + ")=|"
731                               + new String(reader.getRawCharacters(pos, endPos))
732                               + "|");
733        char[] buf = reader.getRawCharacters(pos, endPos);
734        return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);
735    }
736
737    /**
738     * Called when a complete whitespace run has been scanned. pos and endPos
739     * will mark the whitespace boundary.
740     */
741    protected void processWhiteSpace(int pos, int endPos) {
742        if (scannerDebug)
743            System.out.println("processWhitespace(" + pos
744                               + "," + endPos + ")=|" +
745                               new String(reader.getRawCharacters(pos, endPos))
746                               + "|");
747    }
748
749    /**
750     * Called when a line terminator has been processed.
751     */
752    protected void processLineTerminator(int pos, int endPos) {
753        if (scannerDebug)
754            System.out.println("processTerminator(" + pos
755                               + "," + endPos + ")=|" +
756                               new String(reader.getRawCharacters(pos, endPos))
757                               + "|");
758    }
759
760    /** Build a map for translating between line numbers and
761     * positions in the input.
762     *
763     * @return a LineMap */
764    public Position.LineMap getLineMap() {
765        return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
766    }
767
768
769    /**
770    * Scan a documentation comment; determine if a deprecated tag is present.
771    * Called once the initial /, * have been skipped, positioned at the second *
772    * (which is treated as the beginning of the first line).
773    * Stops positioned at the closing '/'.
774    */
775    protected static class BasicComment<U extends UnicodeReader> implements Comment {
776
777        CommentStyle cs;
778        U comment_reader;
779
780        protected boolean deprecatedFlag = false;
781        protected boolean scanned = false;
782
783        protected BasicComment(U comment_reader, CommentStyle cs) {
784            this.comment_reader = comment_reader;
785            this.cs = cs;
786        }
787
788        public String getText() {
789            return null;
790        }
791
792        public int getSourcePos(int pos) {
793            return -1;
794        }
795
796        public CommentStyle getStyle() {
797            return cs;
798        }
799
800        public boolean isDeprecated() {
801            if (!scanned && cs == CommentStyle.JAVADOC) {
802                scanDocComment();
803            }
804            return deprecatedFlag;
805        }
806
807        @SuppressWarnings("fallthrough")
808        protected void scanDocComment() {
809            try {
810                boolean deprecatedPrefix = false;
811
812                comment_reader.bp += 3; // '/**'
813                comment_reader.ch = comment_reader.buf[comment_reader.bp];
814
815                forEachLine:
816                while (comment_reader.bp < comment_reader.buflen) {
817
818                    // Skip optional WhiteSpace at beginning of line
819                    while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
820                        comment_reader.scanCommentChar();
821                    }
822
823                    // Skip optional consecutive Stars
824                    while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
825                        comment_reader.scanCommentChar();
826                        if (comment_reader.ch == '/') {
827                            return;
828                        }
829                    }
830
831                    // Skip optional WhiteSpace after Stars
832                    while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
833                        comment_reader.scanCommentChar();
834                    }
835
836                    deprecatedPrefix = false;
837                    // At beginning of line in the JavaDoc sense.
838                    if (!deprecatedFlag) {
839                        String deprecated = "@deprecated";
840                        int i = 0;
841                        while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
842                            comment_reader.scanCommentChar();
843                            i++;
844                            if (i == deprecated.length()) {
845                                deprecatedPrefix = true;
846                                break;
847                            }
848                        }
849                    }
850
851                    if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
852                        if (Character.isWhitespace(comment_reader.ch)) {
853                            deprecatedFlag = true;
854                        } else if (comment_reader.ch == '*') {
855                            comment_reader.scanCommentChar();
856                            if (comment_reader.ch == '/') {
857                                deprecatedFlag = true;
858                                return;
859                            }
860                        }
861                    }
862
863                    // Skip rest of line
864                    while (comment_reader.bp < comment_reader.buflen) {
865                        switch (comment_reader.ch) {
866                            case '*':
867                                comment_reader.scanCommentChar();
868                                if (comment_reader.ch == '/') {
869                                    return;
870                                }
871                                break;
872                            case CR: // (Spec 3.4)
873                                comment_reader.scanCommentChar();
874                                if (comment_reader.ch != LF) {
875                                    continue forEachLine;
876                                }
877                            /* fall through to LF case */
878                            case LF: // (Spec 3.4)
879                                comment_reader.scanCommentChar();
880                                continue forEachLine;
881                            default:
882                                comment_reader.scanCommentChar();
883                        }
884                    } // rest of line
885                } // forEachLine
886                return;
887            } finally {
888                scanned = true;
889            }
890        }
891    }
892}
893