JavaTokenizer.java revision 2571:10fc81ac75b4
1/* 2 * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package com.sun.tools.javac.parser; 27 28import com.sun.tools.javac.code.Source; 29import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; 30import com.sun.tools.javac.util.*; 31 32import java.nio.CharBuffer; 33 34import static com.sun.tools.javac.parser.Tokens.*; 35import static com.sun.tools.javac.util.LayoutCharacters.*; 36 37/** The lexical analyzer maps an input stream consisting of 38 * ASCII characters and Unicode escapes into a token sequence. 39 * 40 * <p><b>This is NOT part of any supported API. 41 * If you write code that depends on this, you do so at your own risk. 42 * This code and its internal interfaces are subject to change or 43 * deletion without notice.</b> 44 */ 45public class JavaTokenizer { 46 47 private static final boolean scannerDebug = false; 48 49 /** Allow binary literals. 50 */ 51 private boolean allowBinaryLiterals; 52 53 /** Allow underscores in literals. 54 */ 55 private boolean allowUnderscoresInLiterals; 56 57 /** The source language setting. 58 */ 59 private Source source; 60 61 /** The log to be used for error reporting. 62 */ 63 private final Log log; 64 65 /** The token factory. */ 66 private final Tokens tokens; 67 68 /** The token kind, set by nextToken(). 69 */ 70 protected TokenKind tk; 71 72 /** The token's radix, set by nextToken(). 73 */ 74 protected int radix; 75 76 /** The token's name, set by nextToken(). 77 */ 78 protected Name name; 79 80 /** The position where a lexical error occurred; 81 */ 82 protected int errPos = Position.NOPOS; 83 84 /** The Unicode reader (low-level stream reader). 85 */ 86 protected UnicodeReader reader; 87 88 protected ScannerFactory fac; 89 90 private static final boolean hexFloatsWork = hexFloatsWork(); 91 private static boolean hexFloatsWork() { 92 try { 93 Float.valueOf("0x1.0p1"); 94 return true; 95 } catch (NumberFormatException ex) { 96 return false; 97 } 98 } 99 100 /** 101 * Create a scanner from the input array. This method might 102 * modify the array. To avoid copying the input array, ensure 103 * that {@code inputLength < input.length} or 104 * {@code input[input.length -1]} is a white space character. 105 * 106 * @param fac the factory which created this Scanner 107 * @param buf the input, might be modified 108 * Must be positive and less than or equal to input.length. 109 */ 110 protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) { 111 this(fac, new UnicodeReader(fac, buf)); 112 } 113 114 protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) { 115 this(fac, new UnicodeReader(fac, buf, inputLength)); 116 } 117 118 protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) { 119 this.fac = fac; 120 this.log = fac.log; 121 this.tokens = fac.tokens; 122 this.source = fac.source; 123 this.reader = reader; 124 this.allowBinaryLiterals = source.allowBinaryLiterals(); 125 this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals(); 126 } 127 128 /** Report an error at the given position using the provided arguments. 129 */ 130 protected void lexError(int pos, String key, Object... args) { 131 log.error(pos, key, args); 132 tk = TokenKind.ERROR; 133 errPos = pos; 134 } 135 136 /** Read next character in character or string literal and copy into sbuf. 137 */ 138 private void scanLitChar(int pos) { 139 if (reader.ch == '\\') { 140 if (reader.peekChar() == '\\' && !reader.isUnicode()) { 141 reader.skipChar(); 142 reader.putChar('\\', true); 143 } else { 144 reader.scanChar(); 145 switch (reader.ch) { 146 case '0': case '1': case '2': case '3': 147 case '4': case '5': case '6': case '7': 148 char leadch = reader.ch; 149 int oct = reader.digit(pos, 8); 150 reader.scanChar(); 151 if ('0' <= reader.ch && reader.ch <= '7') { 152 oct = oct * 8 + reader.digit(pos, 8); 153 reader.scanChar(); 154 if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') { 155 oct = oct * 8 + reader.digit(pos, 8); 156 reader.scanChar(); 157 } 158 } 159 reader.putChar((char)oct); 160 break; 161 case 'b': 162 reader.putChar('\b', true); break; 163 case 't': 164 reader.putChar('\t', true); break; 165 case 'n': 166 reader.putChar('\n', true); break; 167 case 'f': 168 reader.putChar('\f', true); break; 169 case 'r': 170 reader.putChar('\r', true); break; 171 case '\'': 172 reader.putChar('\'', true); break; 173 case '\"': 174 reader.putChar('\"', true); break; 175 case '\\': 176 reader.putChar('\\', true); break; 177 default: 178 lexError(reader.bp, "illegal.esc.char"); 179 } 180 } 181 } else if (reader.bp != reader.buflen) { 182 reader.putChar(true); 183 } 184 } 185 186 private void scanDigits(int pos, int digitRadix) { 187 char saveCh; 188 int savePos; 189 do { 190 if (reader.ch != '_') { 191 reader.putChar(false); 192 } else { 193 if (!allowUnderscoresInLiterals) { 194 lexError(pos, "unsupported.underscore.lit", source.name); 195 allowUnderscoresInLiterals = true; 196 } 197 } 198 saveCh = reader.ch; 199 savePos = reader.bp; 200 reader.scanChar(); 201 } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_'); 202 if (saveCh == '_') 203 lexError(savePos, "illegal.underscore"); 204 } 205 206 /** Read fractional part of hexadecimal floating point number. 207 */ 208 private void scanHexExponentAndSuffix(int pos) { 209 if (reader.ch == 'p' || reader.ch == 'P') { 210 reader.putChar(true); 211 skipIllegalUnderscores(); 212 if (reader.ch == '+' || reader.ch == '-') { 213 reader.putChar(true); 214 } 215 skipIllegalUnderscores(); 216 if (reader.digit(pos, 10) >= 0) { 217 scanDigits(pos, 10); 218 if (!hexFloatsWork) 219 lexError(pos, "unsupported.cross.fp.lit"); 220 } else 221 lexError(pos, "malformed.fp.lit"); 222 } else { 223 lexError(pos, "malformed.fp.lit"); 224 } 225 if (reader.ch == 'f' || reader.ch == 'F') { 226 reader.putChar(true); 227 tk = TokenKind.FLOATLITERAL; 228 radix = 16; 229 } else { 230 if (reader.ch == 'd' || reader.ch == 'D') { 231 reader.putChar(true); 232 } 233 tk = TokenKind.DOUBLELITERAL; 234 radix = 16; 235 } 236 } 237 238 /** Read fractional part of floating point number. 239 */ 240 private void scanFraction(int pos) { 241 skipIllegalUnderscores(); 242 if (reader.digit(pos, 10) >= 0) { 243 scanDigits(pos, 10); 244 } 245 int sp1 = reader.sp; 246 if (reader.ch == 'e' || reader.ch == 'E') { 247 reader.putChar(true); 248 skipIllegalUnderscores(); 249 if (reader.ch == '+' || reader.ch == '-') { 250 reader.putChar(true); 251 } 252 skipIllegalUnderscores(); 253 if (reader.digit(pos, 10) >= 0) { 254 scanDigits(pos, 10); 255 return; 256 } 257 lexError(pos, "malformed.fp.lit"); 258 reader.sp = sp1; 259 } 260 } 261 262 /** Read fractional part and 'd' or 'f' suffix of floating point number. 263 */ 264 private void scanFractionAndSuffix(int pos) { 265 radix = 10; 266 scanFraction(pos); 267 if (reader.ch == 'f' || reader.ch == 'F') { 268 reader.putChar(true); 269 tk = TokenKind.FLOATLITERAL; 270 } else { 271 if (reader.ch == 'd' || reader.ch == 'D') { 272 reader.putChar(true); 273 } 274 tk = TokenKind.DOUBLELITERAL; 275 } 276 } 277 278 /** Read fractional part and 'd' or 'f' suffix of floating point number. 279 */ 280 private void scanHexFractionAndSuffix(int pos, boolean seendigit) { 281 radix = 16; 282 Assert.check(reader.ch == '.'); 283 reader.putChar(true); 284 skipIllegalUnderscores(); 285 if (reader.digit(pos, 16) >= 0) { 286 seendigit = true; 287 scanDigits(pos, 16); 288 } 289 if (!seendigit) 290 lexError(pos, "invalid.hex.number"); 291 else 292 scanHexExponentAndSuffix(pos); 293 } 294 295 private void skipIllegalUnderscores() { 296 if (reader.ch == '_') { 297 lexError(reader.bp, "illegal.underscore"); 298 while (reader.ch == '_') 299 reader.scanChar(); 300 } 301 } 302 303 /** Read a number. 304 * @param radix The radix of the number; one of 2, j8, 10, 16. 305 */ 306 private void scanNumber(int pos, int radix) { 307 // for octal, allow base-10 digit in case it's a float literal 308 this.radix = radix; 309 int digitRadix = (radix == 8 ? 10 : radix); 310 boolean seendigit = false; 311 if (reader.digit(pos, digitRadix) >= 0) { 312 seendigit = true; 313 scanDigits(pos, digitRadix); 314 } 315 if (radix == 16 && reader.ch == '.') { 316 scanHexFractionAndSuffix(pos, seendigit); 317 } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) { 318 scanHexExponentAndSuffix(pos); 319 } else if (digitRadix == 10 && reader.ch == '.') { 320 reader.putChar(true); 321 scanFractionAndSuffix(pos); 322 } else if (digitRadix == 10 && 323 (reader.ch == 'e' || reader.ch == 'E' || 324 reader.ch == 'f' || reader.ch == 'F' || 325 reader.ch == 'd' || reader.ch == 'D')) { 326 scanFractionAndSuffix(pos); 327 } else { 328 if (reader.ch == 'l' || reader.ch == 'L') { 329 reader.scanChar(); 330 tk = TokenKind.LONGLITERAL; 331 } else { 332 tk = TokenKind.INTLITERAL; 333 } 334 } 335 } 336 337 /** Read an identifier. 338 */ 339 private void scanIdent() { 340 boolean isJavaIdentifierPart; 341 char high; 342 reader.putChar(true); 343 do { 344 switch (reader.ch) { 345 case 'A': case 'B': case 'C': case 'D': case 'E': 346 case 'F': case 'G': case 'H': case 'I': case 'J': 347 case 'K': case 'L': case 'M': case 'N': case 'O': 348 case 'P': case 'Q': case 'R': case 'S': case 'T': 349 case 'U': case 'V': case 'W': case 'X': case 'Y': 350 case 'Z': 351 case 'a': case 'b': case 'c': case 'd': case 'e': 352 case 'f': case 'g': case 'h': case 'i': case 'j': 353 case 'k': case 'l': case 'm': case 'n': case 'o': 354 case 'p': case 'q': case 'r': case 's': case 't': 355 case 'u': case 'v': case 'w': case 'x': case 'y': 356 case 'z': 357 case '$': case '_': 358 case '0': case '1': case '2': case '3': case '4': 359 case '5': case '6': case '7': case '8': case '9': 360 break; 361 case '\u0000': case '\u0001': case '\u0002': case '\u0003': 362 case '\u0004': case '\u0005': case '\u0006': case '\u0007': 363 case '\u0008': case '\u000E': case '\u000F': case '\u0010': 364 case '\u0011': case '\u0012': case '\u0013': case '\u0014': 365 case '\u0015': case '\u0016': case '\u0017': 366 case '\u0018': case '\u0019': case '\u001B': 367 case '\u007F': 368 reader.scanChar(); 369 continue; 370 case '\u001A': // EOI is also a legal identifier part 371 if (reader.bp >= reader.buflen) { 372 name = reader.name(); 373 tk = tokens.lookupKind(name); 374 return; 375 } 376 reader.scanChar(); 377 continue; 378 default: 379 if (reader.ch < '\u0080') { 380 // all ASCII range chars already handled, above 381 isJavaIdentifierPart = false; 382 } else { 383 if (Character.isIdentifierIgnorable(reader.ch)) { 384 reader.scanChar(); 385 continue; 386 } else { 387 int codePoint = reader.peekSurrogates(); 388 if (codePoint >= 0) { 389 if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) { 390 reader.putChar(true); 391 } 392 } else { 393 isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch); 394 } 395 } 396 } 397 if (!isJavaIdentifierPart) { 398 name = reader.name(); 399 tk = tokens.lookupKind(name); 400 return; 401 } 402 } 403 reader.putChar(true); 404 } while (true); 405 } 406 407 /** Return true if reader.ch can be part of an operator. 408 */ 409 private boolean isSpecial(char ch) { 410 switch (ch) { 411 case '!': case '%': case '&': case '*': case '?': 412 case '+': case '-': case ':': case '<': case '=': 413 case '>': case '^': case '|': case '~': 414 case '@': 415 return true; 416 default: 417 return false; 418 } 419 } 420 421 /** Read longest possible sequence of special characters and convert 422 * to token. 423 */ 424 private void scanOperator() { 425 while (true) { 426 reader.putChar(false); 427 Name newname = reader.name(); 428 TokenKind tk1 = tokens.lookupKind(newname); 429 if (tk1 == TokenKind.IDENTIFIER) { 430 reader.sp--; 431 break; 432 } 433 tk = tk1; 434 reader.scanChar(); 435 if (!isSpecial(reader.ch)) break; 436 } 437 } 438 439 /** Read token. 440 */ 441 public Token readToken() { 442 443 reader.sp = 0; 444 name = null; 445 radix = 0; 446 447 int pos = 0; 448 int endPos = 0; 449 List<Comment> comments = null; 450 451 try { 452 loop: while (true) { 453 pos = reader.bp; 454 switch (reader.ch) { 455 case ' ': // (Spec 3.6) 456 case '\t': // (Spec 3.6) 457 case FF: // (Spec 3.6) 458 do { 459 reader.scanChar(); 460 } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF); 461 processWhiteSpace(pos, reader.bp); 462 break; 463 case LF: // (Spec 3.4) 464 reader.scanChar(); 465 processLineTerminator(pos, reader.bp); 466 break; 467 case CR: // (Spec 3.4) 468 reader.scanChar(); 469 if (reader.ch == LF) { 470 reader.scanChar(); 471 } 472 processLineTerminator(pos, reader.bp); 473 break; 474 case 'A': case 'B': case 'C': case 'D': case 'E': 475 case 'F': case 'G': case 'H': case 'I': case 'J': 476 case 'K': case 'L': case 'M': case 'N': case 'O': 477 case 'P': case 'Q': case 'R': case 'S': case 'T': 478 case 'U': case 'V': case 'W': case 'X': case 'Y': 479 case 'Z': 480 case 'a': case 'b': case 'c': case 'd': case 'e': 481 case 'f': case 'g': case 'h': case 'i': case 'j': 482 case 'k': case 'l': case 'm': case 'n': case 'o': 483 case 'p': case 'q': case 'r': case 's': case 't': 484 case 'u': case 'v': case 'w': case 'x': case 'y': 485 case 'z': 486 case '$': case '_': 487 scanIdent(); 488 break loop; 489 case '0': 490 reader.scanChar(); 491 if (reader.ch == 'x' || reader.ch == 'X') { 492 reader.scanChar(); 493 skipIllegalUnderscores(); 494 if (reader.ch == '.') { 495 scanHexFractionAndSuffix(pos, false); 496 } else if (reader.digit(pos, 16) < 0) { 497 lexError(pos, "invalid.hex.number"); 498 } else { 499 scanNumber(pos, 16); 500 } 501 } else if (reader.ch == 'b' || reader.ch == 'B') { 502 if (!allowBinaryLiterals) { 503 lexError(pos, "unsupported.binary.lit", source.name); 504 allowBinaryLiterals = true; 505 } 506 reader.scanChar(); 507 skipIllegalUnderscores(); 508 if (reader.digit(pos, 2) < 0) { 509 lexError(pos, "invalid.binary.number"); 510 } else { 511 scanNumber(pos, 2); 512 } 513 } else { 514 reader.putChar('0'); 515 if (reader.ch == '_') { 516 int savePos = reader.bp; 517 do { 518 reader.scanChar(); 519 } while (reader.ch == '_'); 520 if (reader.digit(pos, 10) < 0) { 521 lexError(savePos, "illegal.underscore"); 522 } 523 } 524 scanNumber(pos, 8); 525 } 526 break loop; 527 case '1': case '2': case '3': case '4': 528 case '5': case '6': case '7': case '8': case '9': 529 scanNumber(pos, 10); 530 break loop; 531 case '.': 532 reader.scanChar(); 533 if (reader.digit(pos, 10) >= 0) { 534 reader.putChar('.'); 535 scanFractionAndSuffix(pos); 536 } else if (reader.ch == '.') { 537 int savePos = reader.bp; 538 reader.putChar('.'); reader.putChar('.', true); 539 if (reader.ch == '.') { 540 reader.scanChar(); 541 reader.putChar('.'); 542 tk = TokenKind.ELLIPSIS; 543 } else { 544 lexError(savePos, "illegal.dot"); 545 } 546 } else { 547 tk = TokenKind.DOT; 548 } 549 break loop; 550 case ',': 551 reader.scanChar(); tk = TokenKind.COMMA; break loop; 552 case ';': 553 reader.scanChar(); tk = TokenKind.SEMI; break loop; 554 case '(': 555 reader.scanChar(); tk = TokenKind.LPAREN; break loop; 556 case ')': 557 reader.scanChar(); tk = TokenKind.RPAREN; break loop; 558 case '[': 559 reader.scanChar(); tk = TokenKind.LBRACKET; break loop; 560 case ']': 561 reader.scanChar(); tk = TokenKind.RBRACKET; break loop; 562 case '{': 563 reader.scanChar(); tk = TokenKind.LBRACE; break loop; 564 case '}': 565 reader.scanChar(); tk = TokenKind.RBRACE; break loop; 566 case '/': 567 reader.scanChar(); 568 if (reader.ch == '/') { 569 do { 570 reader.scanCommentChar(); 571 } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen); 572 if (reader.bp < reader.buflen) { 573 comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE)); 574 } 575 break; 576 } else if (reader.ch == '*') { 577 boolean isEmpty = false; 578 reader.scanChar(); 579 CommentStyle style; 580 if (reader.ch == '*') { 581 style = CommentStyle.JAVADOC; 582 reader.scanCommentChar(); 583 if (reader.ch == '/') { 584 isEmpty = true; 585 } 586 } else { 587 style = CommentStyle.BLOCK; 588 } 589 while (!isEmpty && reader.bp < reader.buflen) { 590 if (reader.ch == '*') { 591 reader.scanChar(); 592 if (reader.ch == '/') break; 593 } else { 594 reader.scanCommentChar(); 595 } 596 } 597 if (reader.ch == '/') { 598 reader.scanChar(); 599 comments = addComment(comments, processComment(pos, reader.bp, style)); 600 break; 601 } else { 602 lexError(pos, "unclosed.comment"); 603 break loop; 604 } 605 } else if (reader.ch == '=') { 606 tk = TokenKind.SLASHEQ; 607 reader.scanChar(); 608 } else { 609 tk = TokenKind.SLASH; 610 } 611 break loop; 612 case '\'': 613 reader.scanChar(); 614 if (reader.ch == '\'') { 615 lexError(pos, "empty.char.lit"); 616 reader.scanChar(); 617 } else { 618 if (reader.ch == CR || reader.ch == LF) 619 lexError(pos, "illegal.line.end.in.char.lit"); 620 scanLitChar(pos); 621 if (reader.ch == '\'') { 622 reader.scanChar(); 623 tk = TokenKind.CHARLITERAL; 624 } else { 625 lexError(pos, "unclosed.char.lit"); 626 } 627 } 628 break loop; 629 case '\"': 630 reader.scanChar(); 631 while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen) 632 scanLitChar(pos); 633 if (reader.ch == '\"') { 634 tk = TokenKind.STRINGLITERAL; 635 reader.scanChar(); 636 } else { 637 lexError(pos, "unclosed.str.lit"); 638 } 639 break loop; 640 default: 641 if (isSpecial(reader.ch)) { 642 scanOperator(); 643 } else { 644 boolean isJavaIdentifierStart; 645 int codePoint = -1; 646 if (reader.ch < '\u0080') { 647 // all ASCII range chars already handled, above 648 isJavaIdentifierStart = false; 649 } else { 650 codePoint = reader.peekSurrogates(); 651 if (codePoint >= 0) { 652 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) { 653 reader.putChar(true); 654 } 655 } else { 656 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch); 657 } 658 } 659 if (isJavaIdentifierStart) { 660 scanIdent(); 661 } else if (reader.digit(pos, 10) >= 0) { 662 scanNumber(pos, 10); 663 } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5 664 tk = TokenKind.EOF; 665 pos = reader.buflen; 666 } else { 667 String arg; 668 669 if (codePoint >= 0) { 670 char high = reader.ch; 671 reader.scanChar(); 672 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch); 673 } else { 674 arg = (32 < reader.ch && reader.ch < 127) ? 675 String.format("%s", reader.ch) : 676 String.format("\\u%04x", (int)reader.ch); 677 } 678 lexError(pos, "illegal.char", arg); 679 reader.scanChar(); 680 } 681 } 682 break loop; 683 } 684 } 685 endPos = reader.bp; 686 switch (tk.tag) { 687 case DEFAULT: return new Token(tk, pos, endPos, comments); 688 case NAMED: return new NamedToken(tk, pos, endPos, name, comments); 689 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments); 690 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments); 691 default: throw new AssertionError(); 692 } 693 } 694 finally { 695 if (scannerDebug) { 696 System.out.println("nextToken(" + pos 697 + "," + endPos + ")=|" + 698 new String(reader.getRawCharacters(pos, endPos)) 699 + "|"); 700 } 701 } 702 } 703 //where 704 List<Comment> addComment(List<Comment> comments, Comment comment) { 705 return comments == null ? 706 List.of(comment) : 707 comments.prepend(comment); 708 } 709 710 /** Return the position where a lexical error occurred; 711 */ 712 public int errPos() { 713 return errPos; 714 } 715 716 /** Set the position where a lexical error occurred; 717 */ 718 public void errPos(int pos) { 719 errPos = pos; 720 } 721 722 /** 723 * Called when a complete comment has been scanned. pos and endPos 724 * will mark the comment boundary. 725 */ 726 protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) { 727 if (scannerDebug) 728 System.out.println("processComment(" + pos 729 + "," + endPos + "," + style + ")=|" 730 + new String(reader.getRawCharacters(pos, endPos)) 731 + "|"); 732 char[] buf = reader.getRawCharacters(pos, endPos); 733 return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style); 734 } 735 736 /** 737 * Called when a complete whitespace run has been scanned. pos and endPos 738 * will mark the whitespace boundary. 739 */ 740 protected void processWhiteSpace(int pos, int endPos) { 741 if (scannerDebug) 742 System.out.println("processWhitespace(" + pos 743 + "," + endPos + ")=|" + 744 new String(reader.getRawCharacters(pos, endPos)) 745 + "|"); 746 } 747 748 /** 749 * Called when a line terminator has been processed. 750 */ 751 protected void processLineTerminator(int pos, int endPos) { 752 if (scannerDebug) 753 System.out.println("processTerminator(" + pos 754 + "," + endPos + ")=|" + 755 new String(reader.getRawCharacters(pos, endPos)) 756 + "|"); 757 } 758 759 /** Build a map for translating between line numbers and 760 * positions in the input. 761 * 762 * @return a LineMap */ 763 public Position.LineMap getLineMap() { 764 return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false); 765 } 766 767 768 /** 769 * Scan a documentation comment; determine if a deprecated tag is present. 770 * Called once the initial /, * have been skipped, positioned at the second * 771 * (which is treated as the beginning of the first line). 772 * Stops positioned at the closing '/'. 773 */ 774 protected static class BasicComment<U extends UnicodeReader> implements Comment { 775 776 CommentStyle cs; 777 U comment_reader; 778 779 protected boolean deprecatedFlag = false; 780 protected boolean scanned = false; 781 782 protected BasicComment(U comment_reader, CommentStyle cs) { 783 this.comment_reader = comment_reader; 784 this.cs = cs; 785 } 786 787 public String getText() { 788 return null; 789 } 790 791 public int getSourcePos(int pos) { 792 return -1; 793 } 794 795 public CommentStyle getStyle() { 796 return cs; 797 } 798 799 public boolean isDeprecated() { 800 if (!scanned && cs == CommentStyle.JAVADOC) { 801 scanDocComment(); 802 } 803 return deprecatedFlag; 804 } 805 806 @SuppressWarnings("fallthrough") 807 protected void scanDocComment() { 808 try { 809 boolean deprecatedPrefix = false; 810 811 comment_reader.bp += 3; // '/**' 812 comment_reader.ch = comment_reader.buf[comment_reader.bp]; 813 814 forEachLine: 815 while (comment_reader.bp < comment_reader.buflen) { 816 817 // Skip optional WhiteSpace at beginning of line 818 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 819 comment_reader.scanCommentChar(); 820 } 821 822 // Skip optional consecutive Stars 823 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { 824 comment_reader.scanCommentChar(); 825 if (comment_reader.ch == '/') { 826 return; 827 } 828 } 829 830 // Skip optional WhiteSpace after Stars 831 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 832 comment_reader.scanCommentChar(); 833 } 834 835 deprecatedPrefix = false; 836 // At beginning of line in the JavaDoc sense. 837 if (!deprecatedFlag) { 838 String deprecated = "@deprecated"; 839 int i = 0; 840 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) { 841 comment_reader.scanCommentChar(); 842 i++; 843 if (i == deprecated.length()) { 844 deprecatedPrefix = true; 845 break; 846 } 847 } 848 } 849 850 if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) { 851 if (Character.isWhitespace(comment_reader.ch)) { 852 deprecatedFlag = true; 853 } else if (comment_reader.ch == '*') { 854 comment_reader.scanCommentChar(); 855 if (comment_reader.ch == '/') { 856 deprecatedFlag = true; 857 return; 858 } 859 } 860 } 861 862 // Skip rest of line 863 while (comment_reader.bp < comment_reader.buflen) { 864 switch (comment_reader.ch) { 865 case '*': 866 comment_reader.scanCommentChar(); 867 if (comment_reader.ch == '/') { 868 return; 869 } 870 break; 871 case CR: // (Spec 3.4) 872 comment_reader.scanCommentChar(); 873 if (comment_reader.ch != LF) { 874 continue forEachLine; 875 } 876 /* fall through to LF case */ 877 case LF: // (Spec 3.4) 878 comment_reader.scanCommentChar(); 879 continue forEachLine; 880 default: 881 comment_reader.scanCommentChar(); 882 } 883 } // rest of line 884 } // forEachLine 885 return; 886 } finally { 887 scanned = true; 888 } 889 } 890 } 891} 892