JavaTokenizer.java revision 3201:c3b040ed4122
1/* 2 * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package com.sun.tools.javac.parser; 27 28import com.sun.tools.javac.code.Source; 29import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; 30import com.sun.tools.javac.util.*; 31 32import java.nio.CharBuffer; 33 34import static com.sun.tools.javac.parser.Tokens.*; 35import static com.sun.tools.javac.util.LayoutCharacters.*; 36 37/** The lexical analyzer maps an input stream consisting of 38 * ASCII characters and Unicode escapes into a token sequence. 39 * 40 * <p><b>This is NOT part of any supported API. 41 * If you write code that depends on this, you do so at your own risk. 42 * This code and its internal interfaces are subject to change or 43 * deletion without notice.</b> 44 */ 45public class JavaTokenizer { 46 47 private static final boolean scannerDebug = false; 48 49 /** Allow binary literals. 50 */ 51 private boolean allowBinaryLiterals; 52 53 /** Allow underscores in literals. 54 */ 55 private boolean allowUnderscoresInLiterals; 56 57 /** The source language setting. 58 */ 59 private Source source; 60 61 /** The log to be used for error reporting. 62 */ 63 private final Log log; 64 65 /** The token factory. */ 66 private final Tokens tokens; 67 68 /** The token kind, set by nextToken(). 69 */ 70 protected TokenKind tk; 71 72 /** The token's radix, set by nextToken(). 73 */ 74 protected int radix; 75 76 /** The token's name, set by nextToken(). 77 */ 78 protected Name name; 79 80 /** The position where a lexical error occurred; 81 */ 82 protected int errPos = Position.NOPOS; 83 84 /** The Unicode reader (low-level stream reader). 85 */ 86 protected UnicodeReader reader; 87 88 protected ScannerFactory fac; 89 90 private static final boolean hexFloatsWork = hexFloatsWork(); 91 private static boolean hexFloatsWork() { 92 try { 93 Float.valueOf("0x1.0p1"); 94 return true; 95 } catch (NumberFormatException ex) { 96 return false; 97 } 98 } 99 100 /** 101 * Create a scanner from the input array. This method might 102 * modify the array. To avoid copying the input array, ensure 103 * that {@code inputLength < input.length} or 104 * {@code input[input.length -1]} is a white space character. 105 * 106 * @param fac the factory which created this Scanner 107 * @param buf the input, might be modified 108 * Must be positive and less than or equal to input.length. 109 */ 110 protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) { 111 this(fac, new UnicodeReader(fac, buf)); 112 } 113 114 protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) { 115 this(fac, new UnicodeReader(fac, buf, inputLength)); 116 } 117 118 protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) { 119 this.fac = fac; 120 this.log = fac.log; 121 this.tokens = fac.tokens; 122 this.source = fac.source; 123 this.reader = reader; 124 this.allowBinaryLiterals = source.allowBinaryLiterals(); 125 this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals(); 126 } 127 128 /** Report an error at the given position using the provided arguments. 129 */ 130 protected void lexError(int pos, String key, Object... args) { 131 log.error(pos, key, args); 132 tk = TokenKind.ERROR; 133 errPos = pos; 134 } 135 136 /** Read next character in character or string literal and copy into sbuf. 137 */ 138 private void scanLitChar(int pos) { 139 if (reader.ch == '\\') { 140 if (reader.peekChar() == '\\' && !reader.isUnicode()) { 141 reader.skipChar(); 142 reader.putChar('\\', true); 143 } else { 144 reader.scanChar(); 145 switch (reader.ch) { 146 case '0': case '1': case '2': case '3': 147 case '4': case '5': case '6': case '7': 148 char leadch = reader.ch; 149 int oct = reader.digit(pos, 8); 150 reader.scanChar(); 151 if ('0' <= reader.ch && reader.ch <= '7') { 152 oct = oct * 8 + reader.digit(pos, 8); 153 reader.scanChar(); 154 if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') { 155 oct = oct * 8 + reader.digit(pos, 8); 156 reader.scanChar(); 157 } 158 } 159 reader.putChar((char)oct); 160 break; 161 case 'b': 162 reader.putChar('\b', true); break; 163 case 't': 164 reader.putChar('\t', true); break; 165 case 'n': 166 reader.putChar('\n', true); break; 167 case 'f': 168 reader.putChar('\f', true); break; 169 case 'r': 170 reader.putChar('\r', true); break; 171 case '\'': 172 reader.putChar('\'', true); break; 173 case '\"': 174 reader.putChar('\"', true); break; 175 case '\\': 176 reader.putChar('\\', true); break; 177 default: 178 lexError(reader.bp, "illegal.esc.char"); 179 } 180 } 181 } else if (reader.bp != reader.buflen) { 182 reader.putChar(true); 183 } 184 } 185 186 private void scanDigits(int pos, int digitRadix) { 187 char saveCh; 188 int savePos; 189 do { 190 if (reader.ch != '_') { 191 reader.putChar(false); 192 } else { 193 if (!allowUnderscoresInLiterals) { 194 lexError(pos, "unsupported.underscore.lit", source.name); 195 allowUnderscoresInLiterals = true; 196 } 197 } 198 saveCh = reader.ch; 199 savePos = reader.bp; 200 reader.scanChar(); 201 } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_'); 202 if (saveCh == '_') 203 lexError(savePos, "illegal.underscore"); 204 } 205 206 /** Read fractional part of hexadecimal floating point number. 207 */ 208 private void scanHexExponentAndSuffix(int pos) { 209 if (reader.ch == 'p' || reader.ch == 'P') { 210 reader.putChar(true); 211 skipIllegalUnderscores(); 212 if (reader.ch == '+' || reader.ch == '-') { 213 reader.putChar(true); 214 } 215 skipIllegalUnderscores(); 216 if (reader.digit(pos, 10) >= 0) { 217 scanDigits(pos, 10); 218 if (!hexFloatsWork) 219 lexError(pos, "unsupported.cross.fp.lit"); 220 } else 221 lexError(pos, "malformed.fp.lit"); 222 } else { 223 lexError(pos, "malformed.fp.lit"); 224 } 225 if (reader.ch == 'f' || reader.ch == 'F') { 226 reader.putChar(true); 227 tk = TokenKind.FLOATLITERAL; 228 radix = 16; 229 } else { 230 if (reader.ch == 'd' || reader.ch == 'D') { 231 reader.putChar(true); 232 } 233 tk = TokenKind.DOUBLELITERAL; 234 radix = 16; 235 } 236 } 237 238 /** Read fractional part of floating point number. 239 */ 240 private void scanFraction(int pos) { 241 skipIllegalUnderscores(); 242 if (reader.digit(pos, 10) >= 0) { 243 scanDigits(pos, 10); 244 } 245 int sp1 = reader.sp; 246 if (reader.ch == 'e' || reader.ch == 'E') { 247 reader.putChar(true); 248 skipIllegalUnderscores(); 249 if (reader.ch == '+' || reader.ch == '-') { 250 reader.putChar(true); 251 } 252 skipIllegalUnderscores(); 253 if (reader.digit(pos, 10) >= 0) { 254 scanDigits(pos, 10); 255 return; 256 } 257 lexError(pos, "malformed.fp.lit"); 258 reader.sp = sp1; 259 } 260 } 261 262 /** Read fractional part and 'd' or 'f' suffix of floating point number. 263 */ 264 private void scanFractionAndSuffix(int pos) { 265 radix = 10; 266 scanFraction(pos); 267 if (reader.ch == 'f' || reader.ch == 'F') { 268 reader.putChar(true); 269 tk = TokenKind.FLOATLITERAL; 270 } else { 271 if (reader.ch == 'd' || reader.ch == 'D') { 272 reader.putChar(true); 273 } 274 tk = TokenKind.DOUBLELITERAL; 275 } 276 } 277 278 /** Read fractional part and 'd' or 'f' suffix of floating point number. 279 */ 280 private void scanHexFractionAndSuffix(int pos, boolean seendigit) { 281 radix = 16; 282 Assert.check(reader.ch == '.'); 283 reader.putChar(true); 284 skipIllegalUnderscores(); 285 if (reader.digit(pos, 16) >= 0) { 286 seendigit = true; 287 scanDigits(pos, 16); 288 } 289 if (!seendigit) 290 lexError(pos, "invalid.hex.number"); 291 else 292 scanHexExponentAndSuffix(pos); 293 } 294 295 private void skipIllegalUnderscores() { 296 if (reader.ch == '_') { 297 lexError(reader.bp, "illegal.underscore"); 298 while (reader.ch == '_') 299 reader.scanChar(); 300 } 301 } 302 303 /** Read a number. 304 * @param radix The radix of the number; one of 2, 8, 10, 16. 305 */ 306 private void scanNumber(int pos, int radix) { 307 // for octal, allow base-10 digit in case it's a float literal 308 this.radix = radix; 309 int digitRadix = (radix == 8 ? 10 : radix); 310 int firstDigit = reader.digit(pos, Math.max(10, digitRadix)); 311 boolean seendigit = firstDigit >= 0; 312 boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix; 313 if (seendigit) { 314 scanDigits(pos, digitRadix); 315 } 316 if (radix == 16 && reader.ch == '.') { 317 scanHexFractionAndSuffix(pos, seendigit); 318 } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) { 319 scanHexExponentAndSuffix(pos); 320 } else if (digitRadix == 10 && reader.ch == '.') { 321 reader.putChar(true); 322 scanFractionAndSuffix(pos); 323 } else if (digitRadix == 10 && 324 (reader.ch == 'e' || reader.ch == 'E' || 325 reader.ch == 'f' || reader.ch == 'F' || 326 reader.ch == 'd' || reader.ch == 'D')) { 327 scanFractionAndSuffix(pos); 328 } else { 329 if (!seenValidDigit) { 330 switch (radix) { 331 case 2: 332 lexError(pos, "invalid.binary.number"); 333 break; 334 case 16: 335 lexError(pos, "invalid.hex.number"); 336 break; 337 } 338 } 339 if (reader.ch == 'l' || reader.ch == 'L') { 340 reader.scanChar(); 341 tk = TokenKind.LONGLITERAL; 342 } else { 343 tk = TokenKind.INTLITERAL; 344 } 345 } 346 } 347 348 /** Read an identifier. 349 */ 350 private void scanIdent() { 351 boolean isJavaIdentifierPart; 352 char high; 353 reader.putChar(true); 354 do { 355 switch (reader.ch) { 356 case 'A': case 'B': case 'C': case 'D': case 'E': 357 case 'F': case 'G': case 'H': case 'I': case 'J': 358 case 'K': case 'L': case 'M': case 'N': case 'O': 359 case 'P': case 'Q': case 'R': case 'S': case 'T': 360 case 'U': case 'V': case 'W': case 'X': case 'Y': 361 case 'Z': 362 case 'a': case 'b': case 'c': case 'd': case 'e': 363 case 'f': case 'g': case 'h': case 'i': case 'j': 364 case 'k': case 'l': case 'm': case 'n': case 'o': 365 case 'p': case 'q': case 'r': case 's': case 't': 366 case 'u': case 'v': case 'w': case 'x': case 'y': 367 case 'z': 368 case '$': case '_': 369 case '0': case '1': case '2': case '3': case '4': 370 case '5': case '6': case '7': case '8': case '9': 371 break; 372 case '\u0000': case '\u0001': case '\u0002': case '\u0003': 373 case '\u0004': case '\u0005': case '\u0006': case '\u0007': 374 case '\u0008': case '\u000E': case '\u000F': case '\u0010': 375 case '\u0011': case '\u0012': case '\u0013': case '\u0014': 376 case '\u0015': case '\u0016': case '\u0017': 377 case '\u0018': case '\u0019': case '\u001B': 378 case '\u007F': 379 reader.scanChar(); 380 continue; 381 case '\u001A': // EOI is also a legal identifier part 382 if (reader.bp >= reader.buflen) { 383 name = reader.name(); 384 tk = tokens.lookupKind(name); 385 return; 386 } 387 reader.scanChar(); 388 continue; 389 default: 390 if (reader.ch < '\u0080') { 391 // all ASCII range chars already handled, above 392 isJavaIdentifierPart = false; 393 } else { 394 if (Character.isIdentifierIgnorable(reader.ch)) { 395 reader.scanChar(); 396 continue; 397 } else { 398 int codePoint = reader.peekSurrogates(); 399 if (codePoint >= 0) { 400 if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) { 401 reader.putChar(true); 402 } 403 } else { 404 isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch); 405 } 406 } 407 } 408 if (!isJavaIdentifierPart) { 409 name = reader.name(); 410 tk = tokens.lookupKind(name); 411 return; 412 } 413 } 414 reader.putChar(true); 415 } while (true); 416 } 417 418 /** Return true if reader.ch can be part of an operator. 419 */ 420 private boolean isSpecial(char ch) { 421 switch (ch) { 422 case '!': case '%': case '&': case '*': case '?': 423 case '+': case '-': case ':': case '<': case '=': 424 case '>': case '^': case '|': case '~': 425 case '@': 426 return true; 427 default: 428 return false; 429 } 430 } 431 432 /** Read longest possible sequence of special characters and convert 433 * to token. 434 */ 435 private void scanOperator() { 436 while (true) { 437 reader.putChar(false); 438 Name newname = reader.name(); 439 TokenKind tk1 = tokens.lookupKind(newname); 440 if (tk1 == TokenKind.IDENTIFIER) { 441 reader.sp--; 442 break; 443 } 444 tk = tk1; 445 reader.scanChar(); 446 if (!isSpecial(reader.ch)) break; 447 } 448 } 449 450 /** Read token. 451 */ 452 public Token readToken() { 453 454 reader.sp = 0; 455 name = null; 456 radix = 0; 457 458 int pos = 0; 459 int endPos = 0; 460 List<Comment> comments = null; 461 462 try { 463 loop: while (true) { 464 pos = reader.bp; 465 switch (reader.ch) { 466 case ' ': // (Spec 3.6) 467 case '\t': // (Spec 3.6) 468 case FF: // (Spec 3.6) 469 do { 470 reader.scanChar(); 471 } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF); 472 processWhiteSpace(pos, reader.bp); 473 break; 474 case LF: // (Spec 3.4) 475 reader.scanChar(); 476 processLineTerminator(pos, reader.bp); 477 break; 478 case CR: // (Spec 3.4) 479 reader.scanChar(); 480 if (reader.ch == LF) { 481 reader.scanChar(); 482 } 483 processLineTerminator(pos, reader.bp); 484 break; 485 case 'A': case 'B': case 'C': case 'D': case 'E': 486 case 'F': case 'G': case 'H': case 'I': case 'J': 487 case 'K': case 'L': case 'M': case 'N': case 'O': 488 case 'P': case 'Q': case 'R': case 'S': case 'T': 489 case 'U': case 'V': case 'W': case 'X': case 'Y': 490 case 'Z': 491 case 'a': case 'b': case 'c': case 'd': case 'e': 492 case 'f': case 'g': case 'h': case 'i': case 'j': 493 case 'k': case 'l': case 'm': case 'n': case 'o': 494 case 'p': case 'q': case 'r': case 's': case 't': 495 case 'u': case 'v': case 'w': case 'x': case 'y': 496 case 'z': 497 case '$': case '_': 498 scanIdent(); 499 break loop; 500 case '0': 501 reader.scanChar(); 502 if (reader.ch == 'x' || reader.ch == 'X') { 503 reader.scanChar(); 504 skipIllegalUnderscores(); 505 scanNumber(pos, 16); 506 } else if (reader.ch == 'b' || reader.ch == 'B') { 507 if (!allowBinaryLiterals) { 508 lexError(pos, "unsupported.binary.lit", source.name); 509 allowBinaryLiterals = true; 510 } 511 reader.scanChar(); 512 skipIllegalUnderscores(); 513 scanNumber(pos, 2); 514 } else { 515 reader.putChar('0'); 516 if (reader.ch == '_') { 517 int savePos = reader.bp; 518 do { 519 reader.scanChar(); 520 } while (reader.ch == '_'); 521 if (reader.digit(pos, 10) < 0) { 522 lexError(savePos, "illegal.underscore"); 523 } 524 } 525 scanNumber(pos, 8); 526 } 527 break loop; 528 case '1': case '2': case '3': case '4': 529 case '5': case '6': case '7': case '8': case '9': 530 scanNumber(pos, 10); 531 break loop; 532 case '.': 533 reader.scanChar(); 534 if (reader.digit(pos, 10) >= 0) { 535 reader.putChar('.'); 536 scanFractionAndSuffix(pos); 537 } else if (reader.ch == '.') { 538 int savePos = reader.bp; 539 reader.putChar('.'); reader.putChar('.', true); 540 if (reader.ch == '.') { 541 reader.scanChar(); 542 reader.putChar('.'); 543 tk = TokenKind.ELLIPSIS; 544 } else { 545 lexError(savePos, "illegal.dot"); 546 } 547 } else { 548 tk = TokenKind.DOT; 549 } 550 break loop; 551 case ',': 552 reader.scanChar(); tk = TokenKind.COMMA; break loop; 553 case ';': 554 reader.scanChar(); tk = TokenKind.SEMI; break loop; 555 case '(': 556 reader.scanChar(); tk = TokenKind.LPAREN; break loop; 557 case ')': 558 reader.scanChar(); tk = TokenKind.RPAREN; break loop; 559 case '[': 560 reader.scanChar(); tk = TokenKind.LBRACKET; break loop; 561 case ']': 562 reader.scanChar(); tk = TokenKind.RBRACKET; break loop; 563 case '{': 564 reader.scanChar(); tk = TokenKind.LBRACE; break loop; 565 case '}': 566 reader.scanChar(); tk = TokenKind.RBRACE; break loop; 567 case '/': 568 reader.scanChar(); 569 if (reader.ch == '/') { 570 do { 571 reader.scanCommentChar(); 572 } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen); 573 if (reader.bp < reader.buflen) { 574 comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE)); 575 } 576 break; 577 } else if (reader.ch == '*') { 578 boolean isEmpty = false; 579 reader.scanChar(); 580 CommentStyle style; 581 if (reader.ch == '*') { 582 style = CommentStyle.JAVADOC; 583 reader.scanCommentChar(); 584 if (reader.ch == '/') { 585 isEmpty = true; 586 } 587 } else { 588 style = CommentStyle.BLOCK; 589 } 590 while (!isEmpty && reader.bp < reader.buflen) { 591 if (reader.ch == '*') { 592 reader.scanChar(); 593 if (reader.ch == '/') break; 594 } else { 595 reader.scanCommentChar(); 596 } 597 } 598 if (reader.ch == '/') { 599 reader.scanChar(); 600 comments = addComment(comments, processComment(pos, reader.bp, style)); 601 break; 602 } else { 603 lexError(pos, "unclosed.comment"); 604 break loop; 605 } 606 } else if (reader.ch == '=') { 607 tk = TokenKind.SLASHEQ; 608 reader.scanChar(); 609 } else { 610 tk = TokenKind.SLASH; 611 } 612 break loop; 613 case '\'': 614 reader.scanChar(); 615 if (reader.ch == '\'') { 616 lexError(pos, "empty.char.lit"); 617 reader.scanChar(); 618 } else { 619 if (reader.ch == CR || reader.ch == LF) 620 lexError(pos, "illegal.line.end.in.char.lit"); 621 scanLitChar(pos); 622 if (reader.ch == '\'') { 623 reader.scanChar(); 624 tk = TokenKind.CHARLITERAL; 625 } else { 626 lexError(pos, "unclosed.char.lit"); 627 } 628 } 629 break loop; 630 case '\"': 631 reader.scanChar(); 632 while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen) 633 scanLitChar(pos); 634 if (reader.ch == '\"') { 635 tk = TokenKind.STRINGLITERAL; 636 reader.scanChar(); 637 } else { 638 lexError(pos, "unclosed.str.lit"); 639 } 640 break loop; 641 default: 642 if (isSpecial(reader.ch)) { 643 scanOperator(); 644 } else { 645 boolean isJavaIdentifierStart; 646 int codePoint = -1; 647 if (reader.ch < '\u0080') { 648 // all ASCII range chars already handled, above 649 isJavaIdentifierStart = false; 650 } else { 651 codePoint = reader.peekSurrogates(); 652 if (codePoint >= 0) { 653 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) { 654 reader.putChar(true); 655 } 656 } else { 657 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch); 658 } 659 } 660 if (isJavaIdentifierStart) { 661 scanIdent(); 662 } else if (reader.digit(pos, 10) >= 0) { 663 scanNumber(pos, 10); 664 } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5 665 tk = TokenKind.EOF; 666 pos = reader.buflen; 667 } else { 668 String arg; 669 670 if (codePoint >= 0) { 671 char high = reader.ch; 672 reader.scanChar(); 673 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch); 674 } else { 675 arg = (32 < reader.ch && reader.ch < 127) ? 676 String.format("%s", reader.ch) : 677 String.format("\\u%04x", (int)reader.ch); 678 } 679 lexError(pos, "illegal.char", arg); 680 reader.scanChar(); 681 } 682 } 683 break loop; 684 } 685 } 686 endPos = reader.bp; 687 switch (tk.tag) { 688 case DEFAULT: return new Token(tk, pos, endPos, comments); 689 case NAMED: return new NamedToken(tk, pos, endPos, name, comments); 690 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments); 691 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments); 692 default: throw new AssertionError(); 693 } 694 } 695 finally { 696 if (scannerDebug) { 697 System.out.println("nextToken(" + pos 698 + "," + endPos + ")=|" + 699 new String(reader.getRawCharacters(pos, endPos)) 700 + "|"); 701 } 702 } 703 } 704 //where 705 List<Comment> addComment(List<Comment> comments, Comment comment) { 706 return comments == null ? 707 List.of(comment) : 708 comments.prepend(comment); 709 } 710 711 /** Return the position where a lexical error occurred; 712 */ 713 public int errPos() { 714 return errPos; 715 } 716 717 /** Set the position where a lexical error occurred; 718 */ 719 public void errPos(int pos) { 720 errPos = pos; 721 } 722 723 /** 724 * Called when a complete comment has been scanned. pos and endPos 725 * will mark the comment boundary. 726 */ 727 protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) { 728 if (scannerDebug) 729 System.out.println("processComment(" + pos 730 + "," + endPos + "," + style + ")=|" 731 + new String(reader.getRawCharacters(pos, endPos)) 732 + "|"); 733 char[] buf = reader.getRawCharacters(pos, endPos); 734 return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style); 735 } 736 737 /** 738 * Called when a complete whitespace run has been scanned. pos and endPos 739 * will mark the whitespace boundary. 740 */ 741 protected void processWhiteSpace(int pos, int endPos) { 742 if (scannerDebug) 743 System.out.println("processWhitespace(" + pos 744 + "," + endPos + ")=|" + 745 new String(reader.getRawCharacters(pos, endPos)) 746 + "|"); 747 } 748 749 /** 750 * Called when a line terminator has been processed. 751 */ 752 protected void processLineTerminator(int pos, int endPos) { 753 if (scannerDebug) 754 System.out.println("processTerminator(" + pos 755 + "," + endPos + ")=|" + 756 new String(reader.getRawCharacters(pos, endPos)) 757 + "|"); 758 } 759 760 /** Build a map for translating between line numbers and 761 * positions in the input. 762 * 763 * @return a LineMap */ 764 public Position.LineMap getLineMap() { 765 return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false); 766 } 767 768 769 /** 770 * Scan a documentation comment; determine if a deprecated tag is present. 771 * Called once the initial /, * have been skipped, positioned at the second * 772 * (which is treated as the beginning of the first line). 773 * Stops positioned at the closing '/'. 774 */ 775 protected static class BasicComment<U extends UnicodeReader> implements Comment { 776 777 CommentStyle cs; 778 U comment_reader; 779 780 protected boolean deprecatedFlag = false; 781 protected boolean scanned = false; 782 783 protected BasicComment(U comment_reader, CommentStyle cs) { 784 this.comment_reader = comment_reader; 785 this.cs = cs; 786 } 787 788 public String getText() { 789 return null; 790 } 791 792 public int getSourcePos(int pos) { 793 return -1; 794 } 795 796 public CommentStyle getStyle() { 797 return cs; 798 } 799 800 public boolean isDeprecated() { 801 if (!scanned && cs == CommentStyle.JAVADOC) { 802 scanDocComment(); 803 } 804 return deprecatedFlag; 805 } 806 807 @SuppressWarnings("fallthrough") 808 protected void scanDocComment() { 809 try { 810 boolean deprecatedPrefix = false; 811 812 comment_reader.bp += 3; // '/**' 813 comment_reader.ch = comment_reader.buf[comment_reader.bp]; 814 815 forEachLine: 816 while (comment_reader.bp < comment_reader.buflen) { 817 818 // Skip optional WhiteSpace at beginning of line 819 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 820 comment_reader.scanCommentChar(); 821 } 822 823 // Skip optional consecutive Stars 824 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { 825 comment_reader.scanCommentChar(); 826 if (comment_reader.ch == '/') { 827 return; 828 } 829 } 830 831 // Skip optional WhiteSpace after Stars 832 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 833 comment_reader.scanCommentChar(); 834 } 835 836 deprecatedPrefix = false; 837 // At beginning of line in the JavaDoc sense. 838 if (!deprecatedFlag) { 839 String deprecated = "@deprecated"; 840 int i = 0; 841 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) { 842 comment_reader.scanCommentChar(); 843 i++; 844 if (i == deprecated.length()) { 845 deprecatedPrefix = true; 846 break; 847 } 848 } 849 } 850 851 if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) { 852 if (Character.isWhitespace(comment_reader.ch)) { 853 deprecatedFlag = true; 854 } else if (comment_reader.ch == '*') { 855 comment_reader.scanCommentChar(); 856 if (comment_reader.ch == '/') { 857 deprecatedFlag = true; 858 return; 859 } 860 } 861 } 862 863 // Skip rest of line 864 while (comment_reader.bp < comment_reader.buflen) { 865 switch (comment_reader.ch) { 866 case '*': 867 comment_reader.scanCommentChar(); 868 if (comment_reader.ch == '/') { 869 return; 870 } 871 break; 872 case CR: // (Spec 3.4) 873 comment_reader.scanCommentChar(); 874 if (comment_reader.ch != LF) { 875 continue forEachLine; 876 } 877 /* fall through to LF case */ 878 case LF: // (Spec 3.4) 879 comment_reader.scanCommentChar(); 880 continue forEachLine; 881 default: 882 comment_reader.scanCommentChar(); 883 } 884 } // rest of line 885 } // forEachLine 886 return; 887 } finally { 888 scanned = true; 889 } 890 } 891 } 892} 893