1/*
2 * Copyright (c) 1994, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package sun.tools.java;
27
28import java.io.IOException;
29import java.io.InputStream;
30import java.util.Hashtable;
31
32/**
33 * A Scanner for Java tokens. Errors are reported
34 * to the environment object.<p>
35 *
36 * The scanner keeps track of the current token,
37 * the value of the current token (if any), and the start
38 * position of the current token.<p>
39 *
40 * The scan() method advances the scanner to the next
41 * token in the input.<p>
42 *
43 * The match() method is used to quickly match opening
44 * brackets (ie: '(', '{', or '[') with their closing
45 * counter part. This is useful during error recovery.<p>
46 *
47 * An position consists of: ((linenr << WHEREOFFSETBITS) | offset)
48 * this means that both the line number and the exact offset into
49 * the file are encoded in each position value.<p>
50 *
51 * The compiler treats either "\n", "\r" or "\r\n" as the
52 * end of a line.<p>
53 *
54 * WARNING: The contents of this source file are not part of any
55 * supported API.  Code that depends on them does so at its own risk:
56 * they are subject to change or removal without notice.
57 *
58 * @author      Arthur van Hoff
59 */
60
61@SuppressWarnings("deprecation")
62public
63class Scanner implements Constants {
64    /**
65     * The increment for each character.
66     */
67    public static final long OFFSETINC = 1;
68
69    /**
70     * The increment for each line.
71     */
72    public static final long LINEINC = 1L << WHEREOFFSETBITS;
73
74    /**
75     * End of input
76     */
77    public static final int EOF = -1;
78
79    /**
80     * Where errors are reported
81     */
82    public Environment env;
83
84    /**
85     * Input reader
86     */
87    protected ScannerInputReader in;
88
89    /**
90     * If true, present all comments as tokens.
91     * Contents are not saved, but positions are recorded accurately,
92     * so the comment can be recovered from the text.
93     * Line terminations are also returned as comment tokens,
94     * and may be distinguished by their start and end positions,
95     * which are equal (meaning, these tokens contain no chars).
96     */
97   public boolean scanComments = false;
98
99    /**
100     * Current token
101     */
102    public int token;
103
104    /**
105     * The position of the current token
106     */
107    public long pos;
108
109    /**
110     * The position of the previous token
111     */
112    public long prevPos;
113
114    /**
115     * The current character
116     */
117    protected int ch;
118
119    /*
120     * Token values.
121     */
122    public char charValue;
123    public int intValue;
124    public long longValue;
125    public float floatValue;
126    public double doubleValue;
127    public String stringValue;
128    public Identifier idValue;
129    public int radix;   // Radix, when reading int or long
130
131    /*
132     * A doc comment preceding the most recent token
133     */
134    public String docComment;
135
136    /*
137     * A growable character buffer.
138     */
139    private int count;
140    private char buffer[] = new char[1024];
141    private void growBuffer() {
142        char newBuffer[] = new char[buffer.length * 2];
143        System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
144        buffer = newBuffer;
145    }
146
147    // The following two methods have been hand-inlined in
148    // scanDocComment.  If you make changes here, you should
149    // check to see if scanDocComment also needs modification.
150    private void putc(int ch) {
151        if (count == buffer.length) {
152            growBuffer();
153        }
154        buffer[count++] = (char)ch;
155    }
156
157    private String bufferString() {
158        return new String(buffer, 0, count);
159    }
160
161    /**
162     * Create a scanner to scan an input stream.
163     */
164    public Scanner(Environment env, InputStream in) throws IOException {
165        this.env = env;
166        useInputStream(in);
167    }
168
169    /**
170     * Setup input from the given input stream,
171     * and scan the first token from it.
172     */
173    protected void useInputStream(InputStream in) throws IOException {
174        try {
175            this.in = new ScannerInputReader(env, in);
176        } catch (Exception e) {
177            env.setCharacterEncoding(null);
178            this.in = new ScannerInputReader(env, in);
179        }
180
181        ch = this.in.read();
182        prevPos = this.in.pos;
183
184        scan();
185    }
186
187    /**
188     * Create a scanner to scan an input stream.
189     */
190    protected Scanner(Environment env) {
191        this.env = env;
192        // Expect the subclass to call useInputStream at the right time.
193    }
194
195    /**
196     * Define a keyword.
197     */
198    private static void defineKeyword(int val) {
199        Identifier.lookup(opNames[val]).setType(val);
200    }
201
202    /**
203     * Initialized keyword and token Hashtables
204     */
205    static {
206        // Statement keywords
207        defineKeyword(FOR);
208        defineKeyword(IF);
209        defineKeyword(ELSE);
210        defineKeyword(WHILE);
211        defineKeyword(DO);
212        defineKeyword(SWITCH);
213        defineKeyword(CASE);
214        defineKeyword(DEFAULT);
215        defineKeyword(BREAK);
216        defineKeyword(CONTINUE);
217        defineKeyword(RETURN);
218        defineKeyword(TRY);
219        defineKeyword(CATCH);
220        defineKeyword(FINALLY);
221        defineKeyword(THROW);
222
223        // Type defineKeywords
224        defineKeyword(BYTE);
225        defineKeyword(CHAR);
226        defineKeyword(SHORT);
227        defineKeyword(INT);
228        defineKeyword(LONG);
229        defineKeyword(FLOAT);
230        defineKeyword(DOUBLE);
231        defineKeyword(VOID);
232        defineKeyword(BOOLEAN);
233
234        // Expression keywords
235        defineKeyword(INSTANCEOF);
236        defineKeyword(TRUE);
237        defineKeyword(FALSE);
238        defineKeyword(NEW);
239        defineKeyword(THIS);
240        defineKeyword(SUPER);
241        defineKeyword(NULL);
242
243        // Declaration keywords
244        defineKeyword(IMPORT);
245        defineKeyword(CLASS);
246        defineKeyword(EXTENDS);
247        defineKeyword(IMPLEMENTS);
248        defineKeyword(INTERFACE);
249        defineKeyword(PACKAGE);
250        defineKeyword(THROWS);
251
252        // Modifier keywords
253        defineKeyword(PRIVATE);
254        defineKeyword(PUBLIC);
255        defineKeyword(PROTECTED);
256        defineKeyword(STATIC);
257        defineKeyword(TRANSIENT);
258        defineKeyword(SYNCHRONIZED);
259        defineKeyword(NATIVE);
260        defineKeyword(ABSTRACT);
261        defineKeyword(VOLATILE);
262        defineKeyword(FINAL);
263        defineKeyword(STRICTFP);
264
265        // reserved keywords
266        defineKeyword(CONST);
267        defineKeyword(GOTO);
268    }
269
270    /**
271     * Scan a comment. This method should be
272     * called once the initial /, * and the next
273     * character have been read.
274     */
275    private void skipComment() throws IOException {
276        while (true) {
277            switch (ch) {
278              case EOF:
279                env.error(pos, "eof.in.comment");
280                return;
281
282              case '*':
283                if ((ch = in.read()) == '/')  {
284                    ch = in.read();
285                    return;
286                }
287                break;
288
289              default:
290                ch = in.read();
291                break;
292            }
293        }
294    }
295
296    /**
297     * Scan a doc comment. This method should be called
298     * once the initial /, * and * have been read. It gathers
299     * the content of the comment (witout leading spaces and '*'s)
300     * in the string buffer.
301     */
302    private String scanDocComment() throws IOException {
303        // Note: this method has been hand-optimized to yield
304        // better performance.  This was done after it was noted
305        // that javadoc spent a great deal of its time here.
306        // This should also help the performance of the compiler
307        // as well -- it scans the doc comments to find
308        // @deprecated tags.
309        //
310        // The logic of the method has been completely rewritten
311        // to avoid the use of flags that need to be looked at
312        // for every character read.  Members that are accessed
313        // more than once have been stored in local variables.
314        // The methods putc() and bufferString() have been
315        // inlined by hand.  Extra cases have been added to
316        // switch statements to trick the compiler into generating
317        // a tableswitch instead of a lookupswitch.
318        //
319        // This implementation aims to preserve the previous
320        // behavior of this method.
321
322        int c;
323
324        // Put `in' in a local variable.
325        final ScannerInputReader in = this.in;
326
327        // We maintain the buffer locally rather than calling putc().
328        char[] buffer = this.buffer;
329        int count = 0;
330
331        // We are called pointing at the second star of the doc
332        // comment:
333        //
334        // Input: /** the rest of the comment ... */
335        //          ^
336        //
337        // We rely on this in the code below.
338
339        // Consume any number of stars.
340        while ((c = in.read()) == '*')
341            ;
342
343        // Is the comment of the form /**/, /***/, /****/, etc.?
344        if (c == '/') {
345            // Set ch and return
346            ch = in.read();
347            return "";
348        }
349
350        // Skip a newline on the first line of the comment.
351        if (c == '\n') {
352            c = in.read();
353        }
354
355    outerLoop:
356        // The outerLoop processes the doc comment, looping once
357        // for each line.  For each line, it first strips off
358        // whitespace, then it consumes any stars, then it
359        // puts the rest of the line into our buffer.
360        while (true) {
361
362            // The wsLoop consumes whitespace from the beginning
363            // of each line.
364        wsLoop:
365            while (true) {
366                switch (c) {
367                case ' ':
368                case '\t':
369                    // We could check for other forms of whitespace
370                    // as well, but this is left as is for minimum
371                    // disturbance of functionality.
372                    //
373                    // Just skip whitespace.
374                    c = in.read();
375                    break;
376
377                // We have added extra cases here to trick the
378                // compiler into using a tableswitch instead of
379                // a lookupswitch.  They can be removed without
380                // a change in meaning.
381                case 10: case 11: case 12: case 13: case 14: case 15:
382                case 16: case 17: case 18: case 19: case 20: case 21:
383                case 22: case 23: case 24: case 25: case 26: case 27:
384                case 28: case 29: case 30: case 31:
385                default:
386                    // We've seen something that isn't whitespace,
387                    // jump out.
388                    break wsLoop;
389                }
390            } // end wsLoop.
391
392            // Are there stars here?  If so, consume them all
393            // and check for the end of comment.
394            if (c == '*') {
395                // Skip all of the stars...
396                do {
397                    c = in.read();
398                } while (c == '*');
399
400                // ...then check for the closing slash.
401                if (c == '/') {
402                    // We're done with the doc comment.
403                    // Set ch and break out.
404                    ch = in.read();
405                    break outerLoop;
406                }
407            }
408
409            // The textLoop processes the rest of the characters
410            // on the line, adding them to our buffer.
411        textLoop:
412            while (true) {
413                switch (c) {
414                case EOF:
415                    // We've seen a premature EOF.  Break out
416                    // of the loop.
417                    env.error(pos, "eof.in.comment");
418                    ch = EOF;
419                    break outerLoop;
420
421                case '*':
422                    // Is this just a star?  Or is this the
423                    // end of a comment?
424                    c = in.read();
425                    if (c == '/') {
426                        // This is the end of the comment,
427                        // set ch and return our buffer.
428                        ch = in.read();
429                        break outerLoop;
430                    }
431                    // This is just an ordinary star.  Add it to
432                    // the buffer.
433                    if (count == buffer.length) {
434                        growBuffer();
435                        buffer = this.buffer;
436                    }
437                    buffer[count++] = '*';
438                    break;
439
440                case '\n':
441                    // We've seen a newline.  Add it to our
442                    // buffer and break out of this loop,
443                    // starting fresh on a new line.
444                    if (count == buffer.length) {
445                        growBuffer();
446                        buffer = this.buffer;
447                    }
448                    buffer[count++] = '\n';
449                    c = in.read();
450                    break textLoop;
451
452                // Again, the extra cases here are a trick
453                // to get the compiler to generate a tableswitch.
454                case 0: case 1: case 2: case 3: case 4: case 5:
455                case 6: case 7: case 8: case 11: case 12: case 13:
456                case 14: case 15: case 16: case 17: case 18: case 19:
457                case 20: case 21: case 22: case 23: case 24: case 25:
458                case 26: case 27: case 28: case 29: case 30: case 31:
459                case 32: case 33: case 34: case 35: case 36: case 37:
460                case 38: case 39: case 40:
461                default:
462                    // Add the character to our buffer.
463                    if (count == buffer.length) {
464                        growBuffer();
465                        buffer = this.buffer;
466                    }
467                    buffer[count++] = (char)c;
468                    c = in.read();
469                    break;
470                }
471            } // end textLoop
472        } // end outerLoop
473
474        // We have scanned our doc comment.  It is stored in
475        // buffer.  The previous implementation of scanDocComment
476        // stripped off all trailing spaces and stars from the comment.
477        // We will do this as well, so as to cause a minimum of
478        // disturbance.  Is this what we want?
479        if (count > 0) {
480            int i = count - 1;
481        trailLoop:
482            while (i > -1) {
483                switch (buffer[i]) {
484                case ' ':
485                case '\t':
486                case '*':
487                    i--;
488                    break;
489                // And again, the extra cases here are a trick
490                // to get the compiler to generate a tableswitch.
491                case 0: case 1: case 2: case 3: case 4: case 5:
492                case 6: case 7: case 8: case 10: case 11: case 12:
493                case 13: case 14: case 15: case 16: case 17: case 18:
494                case 19: case 20: case 21: case 22: case 23: case 24:
495                case 25: case 26: case 27: case 28: case 29: case 30:
496                case 31: case 33: case 34: case 35: case 36: case 37:
497                case 38: case 39: case 40:
498                default:
499                    break trailLoop;
500                }
501            }
502            count = i + 1;
503
504            // Return the text of the doc comment.
505            return new String(buffer, 0, count);
506        } else {
507            return "";
508        }
509    }
510
511    /**
512     * Scan a number. The first digit of the number should be the current
513     * character.  We may be scanning hex, decimal, or octal at this point
514     */
515    @SuppressWarnings("fallthrough")
516    private void scanNumber() throws IOException {
517        boolean seenNonOctal = false;
518        boolean overflow = false;
519        boolean seenDigit = false; // used to detect invalid hex number 0xL
520        radix = (ch == '0' ? 8 : 10);
521        long value = ch - '0';
522        count = 0;
523        putc(ch);               // save character in buffer
524    numberLoop:
525        for (;;) {
526            switch (ch = in.read()) {
527              case '.':
528                if (radix == 16)
529                    break numberLoop; // an illegal character
530                scanReal();
531                return;
532
533              case '8': case '9':
534                // We can't yet throw an error if reading an octal.  We might
535                // discover we're really reading a real.
536                seenNonOctal = true;
537                // Fall through
538              case '0': case '1': case '2': case '3':
539              case '4': case '5': case '6': case '7':
540                seenDigit = true;
541                putc(ch);
542                if (radix == 10) {
543                    overflow = overflow || (value * 10)/10 != value;
544                    value = (value * 10) + (ch - '0');
545                    overflow = overflow || (value - 1 < -1);
546                } else if (radix == 8) {
547                    overflow = overflow || (value >>> 61) != 0;
548                    value = (value << 3) + (ch - '0');
549                } else {
550                    overflow = overflow || (value >>> 60) != 0;
551                    value = (value << 4) + (ch - '0');
552                }
553                break;
554
555              case 'd': case 'D': case 'e': case 'E': case 'f': case 'F':
556                if (radix != 16) {
557                    scanReal();
558                    return;
559                }
560                // fall through
561              case 'a': case 'A': case 'b': case 'B': case 'c': case 'C':
562                seenDigit = true;
563                putc(ch);
564                if (radix != 16)
565                    break numberLoop; // an illegal character
566                overflow = overflow || (value >>> 60) != 0;
567                value = (value << 4) + 10 +
568                         Character.toLowerCase((char)ch) - 'a';
569                break;
570
571              case 'l': case 'L':
572                ch = in.read(); // skip over 'l'
573                longValue = value;
574                token = LONGVAL;
575                break numberLoop;
576
577              case 'x': case 'X':
578                // if the first character is a '0' and this is the second
579                // letter, then read in a hexadecimal number.  Otherwise, error.
580                if (count == 1 && radix == 8) {
581                    radix = 16;
582                    seenDigit = false;
583                    break;
584                } else {
585                    // we'll get an illegal character error
586                    break numberLoop;
587                }
588
589              default:
590                intValue = (int)value;
591                token = INTVAL;
592                break numberLoop;
593            }
594        } // while true
595
596        // We have just finished reading the number.  The next thing better
597        // not be a letter or digit.
598        // Note:  There will be deprecation warnings against these uses
599        // of Character.isJavaLetterOrDigit and Character.isJavaLetter.
600        // Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs.
601        if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
602            env.error(in.pos, "invalid.number");
603            do { ch = in.read(); }
604            while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
605            intValue = 0;
606            token = INTVAL;
607        } else if (radix == 8 && seenNonOctal) {
608            // A bogus octal literal.
609            intValue = 0;
610            token = INTVAL;
611            env.error(pos, "invalid.octal.number");
612        } else if (radix == 16 && seenDigit == false) {
613            // A hex literal with no digits, 0xL, for example.
614            intValue = 0;
615            token = INTVAL;
616            env.error(pos, "invalid.hex.number");
617        } else {
618            if (token == INTVAL) {
619                // Check for overflow.  Note that base 10 literals
620                // have different rules than base 8 and 16.
621                overflow = overflow ||
622                    (value & 0xFFFFFFFF00000000L) != 0 ||
623                    (radix == 10 && value > 2147483648L);
624
625                if (overflow) {
626                    intValue = 0;
627
628                    // Give a specific error message which tells
629                    // the user the range.
630                    switch (radix) {
631                    case 8:
632                        env.error(pos, "overflow.int.oct");
633                        break;
634                    case 10:
635                        env.error(pos, "overflow.int.dec");
636                        break;
637                    case 16:
638                        env.error(pos, "overflow.int.hex");
639                        break;
640                    default:
641                        throw new CompilerError("invalid radix");
642                    }
643                }
644            } else {
645                if (overflow) {
646                    longValue = 0;
647
648                    // Give a specific error message which tells
649                    // the user the range.
650                    switch (radix) {
651                    case 8:
652                        env.error(pos, "overflow.long.oct");
653                        break;
654                    case 10:
655                        env.error(pos, "overflow.long.dec");
656                        break;
657                    case 16:
658                        env.error(pos, "overflow.long.hex");
659                        break;
660                    default:
661                        throw new CompilerError("invalid radix");
662                    }
663                }
664            }
665        }
666    }
667
668    /**
669     * Scan a float.  We are either looking at the decimal, or we have already
670     * seen it and put it into the buffer.  We haven't seen an exponent.
671     * Scan a float.  Should be called with the current character is either
672     * the 'e', 'E' or '.'
673     */
674    @SuppressWarnings("fallthrough")
675    private void scanReal() throws IOException {
676        boolean seenExponent = false;
677        boolean isSingleFloat = false;
678        char lastChar;
679        if (ch == '.') {
680            putc(ch);
681            ch = in.read();
682        }
683
684    numberLoop:
685        for ( ; ; ch = in.read()) {
686            switch (ch) {
687                case '0': case '1': case '2': case '3': case '4':
688                case '5': case '6': case '7': case '8': case '9':
689                    putc(ch);
690                    break;
691
692                case 'e': case 'E':
693                    if (seenExponent)
694                        break numberLoop; // we'll get a format error
695                    putc(ch);
696                    seenExponent = true;
697                    break;
698
699                case '+': case '-':
700                    lastChar = buffer[count - 1];
701                    if (lastChar != 'e' && lastChar != 'E')
702                        break numberLoop; // this isn't an error, though!
703                    putc(ch);
704                    break;
705
706                case 'f': case 'F':
707                    ch = in.read(); // skip over 'f'
708                    isSingleFloat = true;
709                    break numberLoop;
710
711                case 'd': case 'D':
712                    ch = in.read(); // skip over 'd'
713                    // fall through
714                default:
715                    break numberLoop;
716            } // sswitch
717        } // loop
718
719        // we have just finished reading the number.  The next thing better
720        // not be a letter or digit.
721        if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
722            env.error(in.pos, "invalid.number");
723            do { ch = in.read(); }
724            while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
725            doubleValue = 0;
726            token = DOUBLEVAL;
727        } else {
728            token = isSingleFloat ? FLOATVAL : DOUBLEVAL;
729            try {
730                lastChar = buffer[count - 1];
731                if (lastChar == 'e' || lastChar == 'E'
732                       || lastChar == '+' || lastChar == '-') {
733                    env.error(in.pos -1, "float.format");
734                } else if (isSingleFloat) {
735                    String string = bufferString();
736                    floatValue = Float.valueOf(string).floatValue();
737                    if (Float.isInfinite(floatValue)) {
738                        env.error(pos, "overflow.float");
739                    } else if (floatValue == 0 && !looksLikeZero(string)) {
740                        env.error(pos, "underflow.float");
741                    }
742                } else {
743                    String string = bufferString();
744                    doubleValue = Double.valueOf(string).doubleValue();
745                    if (Double.isInfinite(doubleValue)) {
746                        env.error(pos, "overflow.double");
747                    } else if (doubleValue == 0 && !looksLikeZero(string)) {
748                        env.error(pos, "underflow.double");
749                    }
750                }
751            } catch (NumberFormatException ee) {
752                env.error(pos, "float.format");
753                doubleValue = 0;
754                floatValue = 0;
755            }
756        }
757        return;
758    }
759
760    // We have a token that parses as a number.  Is this token possibly zero?
761    // i.e. does it have a non-zero value in the mantissa?
762    private static boolean looksLikeZero(String token) {
763        int length = token.length();
764        for (int i = 0; i < length; i++) {
765            switch (token.charAt(i)) {
766                case 0: case '.':
767                    continue;
768                case '1': case '2': case '3': case '4': case '5':
769                case '6': case '7': case '8': case '9':
770                    return false;
771                case 'e': case 'E': case 'f': case 'F':
772                    return true;
773            }
774        }
775        return true;
776    }
777
778    /**
779     * Scan an escape character.
780     * @return the character or -1 if it escaped an
781     * end-of-line.
782     */
783    private int scanEscapeChar() throws IOException {
784        long p = in.pos;
785
786        switch (ch = in.read()) {
787          case '0': case '1': case '2': case '3':
788          case '4': case '5': case '6': case '7': {
789            int n = ch - '0';
790            for (int i = 2 ; i > 0 ; i--) {
791                switch (ch = in.read()) {
792                  case '0': case '1': case '2': case '3':
793                  case '4': case '5': case '6': case '7':
794                    n = (n << 3) + ch - '0';
795                    break;
796
797                  default:
798                    if (n > 0xFF) {
799                        env.error(p, "invalid.escape.char");
800                    }
801                    return n;
802                }
803            }
804            ch = in.read();
805            if (n > 0xFF) {
806                env.error(p, "invalid.escape.char");
807            }
808            return n;
809          }
810
811          case 'r':  ch = in.read(); return '\r';
812          case 'n':  ch = in.read(); return '\n';
813          case 'f':  ch = in.read(); return '\f';
814          case 'b':  ch = in.read(); return '\b';
815          case 't':  ch = in.read(); return '\t';
816          case '\\': ch = in.read(); return '\\';
817          case '\"': ch = in.read(); return '\"';
818          case '\'': ch = in.read(); return '\'';
819        }
820
821        env.error(p, "invalid.escape.char");
822        ch = in.read();
823        return -1;
824    }
825
826    /**
827     * Scan a string. The current character
828     * should be the opening " of the string.
829     */
830    private void scanString() throws IOException {
831        token = STRINGVAL;
832        count = 0;
833        ch = in.read();
834
835        // Scan a String
836        while (true) {
837            switch (ch) {
838              case EOF:
839                env.error(pos, "eof.in.string");
840                stringValue = bufferString();
841                return;
842
843              case '\r':
844              case '\n':
845                ch = in.read();
846                env.error(pos, "newline.in.string");
847                stringValue = bufferString();
848                return;
849
850              case '"':
851                ch = in.read();
852                stringValue = bufferString();
853                return;
854
855              case '\\': {
856                int c = scanEscapeChar();
857                if (c >= 0) {
858                    putc((char)c);
859                }
860                break;
861              }
862
863              default:
864                putc(ch);
865                ch = in.read();
866                break;
867            }
868        }
869    }
870
871    /**
872     * Scan a character. The current character should be
873     * the opening ' of the character constant.
874     */
875    private void scanCharacter() throws IOException {
876        token = CHARVAL;
877
878        switch (ch = in.read()) {
879          case '\\':
880            int c = scanEscapeChar();
881            charValue = (char)((c >= 0) ? c : 0);
882            break;
883
884        case '\'':
885            // There are two standard problems this case deals with.  One
886            // is the malformed single quote constant (i.e. the programmer
887            // uses ''' instead of '\'') and the other is the empty
888            // character constant (i.e. '').  Just consume any number of
889            // single quotes and emit an error message.
890            charValue = 0;
891            env.error(pos, "invalid.char.constant");
892            ch = in.read();
893            while (ch == '\'') {
894                ch = in.read();
895            }
896            return;
897
898          case '\r':
899          case '\n':
900            charValue = 0;
901            env.error(pos, "invalid.char.constant");
902            return;
903
904          default:
905            charValue = (char)ch;
906            ch = in.read();
907            break;
908        }
909
910        if (ch == '\'') {
911            ch = in.read();
912        } else {
913            env.error(pos, "invalid.char.constant");
914            while (true) {
915                switch (ch) {
916                  case '\'':
917                    ch = in.read();
918                    return;
919                  case ';':
920                  case '\n':
921                  case EOF:
922                    return;
923                  default:
924                    ch = in.read();
925                }
926            }
927        }
928    }
929
930    /**
931     * Scan an Identifier. The current character should
932     * be the first character of the identifier.
933     */
934    private void scanIdentifier() throws IOException {
935        count = 0;
936
937        while (true) {
938            putc(ch);
939            switch (ch = in.read()) {
940              case 'a': case 'b': case 'c': case 'd': case 'e':
941              case 'f': case 'g': case 'h': case 'i': case 'j':
942              case 'k': case 'l': case 'm': case 'n': case 'o':
943              case 'p': case 'q': case 'r': case 's': case 't':
944              case 'u': case 'v': case 'w': case 'x': case 'y':
945              case 'z':
946              case 'A': case 'B': case 'C': case 'D': case 'E':
947              case 'F': case 'G': case 'H': case 'I': case 'J':
948              case 'K': case 'L': case 'M': case 'N': case 'O':
949              case 'P': case 'Q': case 'R': case 'S': case 'T':
950              case 'U': case 'V': case 'W': case 'X': case 'Y':
951              case 'Z':
952              case '0': case '1': case '2': case '3': case '4':
953              case '5': case '6': case '7': case '8': case '9':
954              case '$': case '_':
955                break;
956
957              default:
958                if (!Character.isJavaLetterOrDigit((char)ch)) {
959                    idValue = Identifier.lookup(bufferString());
960                    token = idValue.getType();
961                    return;
962                }
963            }
964        }
965    }
966
967    /**
968     * The ending position of the current token
969     */
970    // Note: This should be part of the pos itself.
971    public long getEndPos() {
972        return in.pos;
973    }
974
975    /**
976     * If the current token is IDENT, return the identifier occurrence.
977     * It will be freshly allocated.
978     */
979    public IdentifierToken getIdToken() {
980        return (token != IDENT) ? null : new IdentifierToken(pos, idValue);
981    }
982
983    /**
984     * Scan the next token.
985     * @return the position of the previous token.
986     */
987   public long scan() throws IOException {
988       return xscan();
989   }
990
991    @SuppressWarnings("fallthrough")
992    protected long xscan() throws IOException {
993        final ScannerInputReader in = this.in;
994        long retPos = pos;
995        prevPos = in.pos;
996        docComment = null;
997        while (true) {
998            pos = in.pos;
999
1000            switch (ch) {
1001              case EOF:
1002                token = EOF;
1003                return retPos;
1004
1005              case '\n':
1006                if (scanComments) {
1007                    ch = ' ';
1008                    // Avoid this path the next time around.
1009                    // Do not just call in.read; we want to present
1010                    // a null token (and also avoid read-ahead).
1011                    token = COMMENT;
1012                    return retPos;
1013                }
1014                // Fall through
1015              case ' ':
1016              case '\t':
1017              case '\f':
1018                ch = in.read();
1019                break;
1020
1021              case '/':
1022                switch (ch = in.read()) {
1023                  case '/':
1024                    // Parse a // comment
1025                    while (((ch = in.read()) != EOF) && (ch != '\n'));
1026                    if (scanComments) {
1027                        token = COMMENT;
1028                        return retPos;
1029                    }
1030                    break;
1031
1032                  case '*':
1033                    ch = in.read();
1034                    if (ch == '*') {
1035                        docComment = scanDocComment();
1036                    } else {
1037                        skipComment();
1038                    }
1039                    if (scanComments) {
1040                        return retPos;
1041                    }
1042                    break;
1043
1044                  case '=':
1045                    ch = in.read();
1046                    token = ASGDIV;
1047                    return retPos;
1048
1049                  default:
1050                    token = DIV;
1051                    return retPos;
1052                }
1053                break;
1054
1055              case '"':
1056                scanString();
1057                return retPos;
1058
1059              case '\'':
1060                scanCharacter();
1061                return retPos;
1062
1063              case '0': case '1': case '2': case '3': case '4':
1064              case '5': case '6': case '7': case '8': case '9':
1065                scanNumber();
1066                return retPos;
1067
1068              case '.':
1069                switch (ch = in.read()) {
1070                  case '0': case '1': case '2': case '3': case '4':
1071                  case '5': case '6': case '7': case '8': case '9':
1072                    count = 0;
1073                    putc('.');
1074                    scanReal();
1075                    break;
1076                  default:
1077                    token = FIELD;
1078                }
1079                return retPos;
1080
1081              case '{':
1082                ch = in.read();
1083                token = LBRACE;
1084                return retPos;
1085
1086              case '}':
1087                ch = in.read();
1088                token = RBRACE;
1089                return retPos;
1090
1091              case '(':
1092                ch = in.read();
1093                token = LPAREN;
1094                return retPos;
1095
1096              case ')':
1097                ch = in.read();
1098                token = RPAREN;
1099                return retPos;
1100
1101              case '[':
1102                ch = in.read();
1103                token = LSQBRACKET;
1104                return retPos;
1105
1106              case ']':
1107                ch = in.read();
1108                token = RSQBRACKET;
1109                return retPos;
1110
1111              case ',':
1112                ch = in.read();
1113                token = COMMA;
1114                return retPos;
1115
1116              case ';':
1117                ch = in.read();
1118                token = SEMICOLON;
1119                return retPos;
1120
1121              case '?':
1122                ch = in.read();
1123                token = QUESTIONMARK;
1124                return retPos;
1125
1126              case '~':
1127                ch = in.read();
1128                token = BITNOT;
1129                return retPos;
1130
1131              case ':':
1132                ch = in.read();
1133                token = COLON;
1134                return retPos;
1135
1136              case '-':
1137                switch (ch = in.read()) {
1138                  case '-':
1139                    ch = in.read();
1140                    token = DEC;
1141                    return retPos;
1142
1143                  case '=':
1144                    ch = in.read();
1145                    token = ASGSUB;
1146                    return retPos;
1147                }
1148                token = SUB;
1149                return retPos;
1150
1151              case '+':
1152                switch (ch = in.read()) {
1153                  case '+':
1154                    ch = in.read();
1155                    token = INC;
1156                    return retPos;
1157
1158                  case '=':
1159                    ch = in.read();
1160                    token = ASGADD;
1161                    return retPos;
1162                }
1163                token = ADD;
1164                return retPos;
1165
1166              case '<':
1167                switch (ch = in.read()) {
1168                  case '<':
1169                    if ((ch = in.read()) == '=') {
1170                        ch = in.read();
1171                        token = ASGLSHIFT;
1172                        return retPos;
1173                    }
1174                    token = LSHIFT;
1175                    return retPos;
1176
1177                  case '=':
1178                    ch = in.read();
1179                    token = LE;
1180                    return retPos;
1181                }
1182                token = LT;
1183                return retPos;
1184
1185              case '>':
1186                switch (ch = in.read()) {
1187                  case '>':
1188                    switch (ch = in.read()) {
1189                      case '=':
1190                        ch = in.read();
1191                        token = ASGRSHIFT;
1192                        return retPos;
1193
1194                      case '>':
1195                        if ((ch = in.read()) == '=') {
1196                            ch = in.read();
1197                            token = ASGURSHIFT;
1198                            return retPos;
1199                        }
1200                        token = URSHIFT;
1201                        return retPos;
1202                    }
1203                    token = RSHIFT;
1204                    return retPos;
1205
1206                  case '=':
1207                    ch = in.read();
1208                    token = GE;
1209                    return retPos;
1210                }
1211                token = GT;
1212                return retPos;
1213
1214              case '|':
1215                switch (ch = in.read()) {
1216                  case '|':
1217                    ch = in.read();
1218                    token = OR;
1219                    return retPos;
1220
1221                  case '=':
1222                    ch = in.read();
1223                    token = ASGBITOR;
1224                    return retPos;
1225                }
1226                token = BITOR;
1227                return retPos;
1228
1229              case '&':
1230                switch (ch = in.read()) {
1231                  case '&':
1232                    ch = in.read();
1233                    token = AND;
1234                    return retPos;
1235
1236                  case '=':
1237                    ch = in.read();
1238                    token = ASGBITAND;
1239                    return retPos;
1240                }
1241                token = BITAND;
1242                return retPos;
1243
1244              case '=':
1245                if ((ch = in.read()) == '=') {
1246                    ch = in.read();
1247                    token = EQ;
1248                    return retPos;
1249                }
1250                token = ASSIGN;
1251                return retPos;
1252
1253              case '%':
1254                if ((ch = in.read()) == '=') {
1255                    ch = in.read();
1256                    token = ASGREM;
1257                    return retPos;
1258                }
1259                token = REM;
1260                return retPos;
1261
1262              case '^':
1263                if ((ch = in.read()) == '=') {
1264                    ch = in.read();
1265                    token = ASGBITXOR;
1266                    return retPos;
1267                }
1268                token = BITXOR;
1269                return retPos;
1270
1271              case '!':
1272                if ((ch = in.read()) == '=') {
1273                    ch = in.read();
1274                    token = NE;
1275                    return retPos;
1276                }
1277                token = NOT;
1278                return retPos;
1279
1280              case '*':
1281                if ((ch = in.read()) == '=') {
1282                    ch = in.read();
1283                    token = ASGMUL;
1284                    return retPos;
1285                }
1286                token = MUL;
1287                return retPos;
1288
1289              case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1290              case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1291              case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1292              case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1293              case 'y': case 'z':
1294              case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1295              case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1296              case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1297              case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1298              case 'Y': case 'Z':
1299              case '$': case '_':
1300                scanIdentifier();
1301                return retPos;
1302
1303              case '\u001a':
1304                // Our one concession to DOS.
1305                if ((ch = in.read()) == EOF) {
1306                    token = EOF;
1307                    return retPos;
1308                }
1309                env.error(pos, "funny.char");
1310                ch = in.read();
1311                break;
1312
1313
1314              default:
1315                if (Character.isJavaLetter((char)ch)) {
1316                    scanIdentifier();
1317                    return retPos;
1318                }
1319                env.error(pos, "funny.char");
1320                ch = in.read();
1321                break;
1322            }
1323        }
1324    }
1325
1326    /**
1327     * Scan to a matching '}', ']' or ')'. The current token must be
1328     * a '{', '[' or '(';
1329     */
1330    public void match(int open, int close) throws IOException {
1331        int depth = 1;
1332
1333        while (true) {
1334            scan();
1335            if (token == open) {
1336                depth++;
1337            } else if (token == close) {
1338                if (--depth == 0) {
1339                    return;
1340                }
1341            } else if (token == EOF) {
1342                env.error(pos, "unbalanced.paren");
1343                return;
1344            }
1345        }
1346    }
1347}
1348