JavadocTokenizer.java revision 2571:10fc81ac75b4
1/* 2 * Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package com.sun.tools.javac.parser; 27 28import com.sun.tools.javac.parser.Tokens.Comment; 29import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; 30import com.sun.tools.javac.util.*; 31 32import java.nio.*; 33 34import static com.sun.tools.javac.util.LayoutCharacters.*; 35 36/** An extension to the base lexical analyzer that captures 37 * and processes the contents of doc comments. It does so by 38 * translating Unicode escape sequences and by stripping the 39 * leading whitespace and starts from each line of the comment. 40 * 41 * <p><b>This is NOT part of any supported API. 42 * If you write code that depends on this, you do so at your own risk. 43 * This code and its internal interfaces are subject to change or 44 * deletion without notice.</b> 45 */ 46public class JavadocTokenizer extends JavaTokenizer { 47 48 /** Create a scanner from the input buffer. buffer must implement 49 * array() and compact(), and remaining() must be less than limit(). 50 */ 51 protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) { 52 super(fac, buffer); 53 } 54 55 /** Create a scanner from the input array. The array must have at 56 * least a single character of extra space. 57 */ 58 protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) { 59 super(fac, input, inputLength); 60 } 61 62 @Override 63 protected Comment processComment(int pos, int endPos, CommentStyle style) { 64 char[] buf = reader.getRawCharacters(pos, endPos); 65 return new JavadocComment(new DocReader(fac, buf, buf.length, pos), style); 66 } 67 68 /** 69 * This is a specialized version of UnicodeReader that keeps track of the 70 * column position within a given character stream (used for Javadoc processing), 71 * and which builds a table for mapping positions in the comment string to 72 * positions in the source file. 73 */ 74 static class DocReader extends UnicodeReader { 75 76 int col; 77 int startPos; 78 79 /** 80 * A buffer for building a table for mapping positions in {@link #sbuf} 81 * to positions in the source buffer. 82 * 83 * The array is organized as a series of pairs of integers: the first 84 * number in each pair specifies a position in the comment text, 85 * the second number in each pair specifies the corresponding position 86 * in the source buffer. The pairs are sorted in ascending order. 87 * 88 * Since the mapping function is generally continuous, with successive 89 * positions in the string corresponding to successive positions in the 90 * source buffer, the table only needs to record discontinuities in 91 * the mapping. The values of intermediate positions can be inferred. 92 * 93 * Discontinuities may occur in a number of places: when a newline 94 * is followed by whitespace and asterisks (which are ignored), 95 * when a tab is expanded into spaces, and when unicode escapes 96 * are used in the source buffer. 97 * 98 * Thus, to find the source position of any position, p, in the comment 99 * string, find the index, i, of the pair whose string offset 100 * ({@code pbuf[i] }) is closest to but not greater than p. Then, 101 * {@code sourcePos(p) = pbuf[i+1] + (p - pbuf[i]) }. 102 */ 103 int[] pbuf = new int[128]; 104 105 /** 106 * The index of the next empty slot in the pbuf buffer. 107 */ 108 int pp = 0; 109 110 /** The buffer index of the last double backslash sequence 111 */ 112 private int doubleBackslashBp = -1; 113 114 DocReader(ScannerFactory fac, char[] input, int inputLength, int startPos) { 115 super(fac, input, inputLength); 116 this.startPos = startPos; 117 } 118 119 @Override 120 protected void convertUnicode() { 121 if (ch == '\\' && unicodeConversionBp != bp) { 122 bp++; ch = buf[bp]; col++; 123 if (ch == 'u') { 124 do { 125 bp++; ch = buf[bp]; col++; 126 } while (ch == 'u'); 127 int limit = bp + 3; 128 if (limit < buflen) { 129 int d = digit(bp, 16); 130 int code = d; 131 while (bp < limit && d >= 0) { 132 bp++; ch = buf[bp]; col++; 133 d = digit(bp, 16); 134 code = (code << 4) + d; 135 } 136 if (d >= 0) { 137 ch = (char)code; 138 unicodeConversionBp = bp; 139 return; 140 } 141 } 142 // "illegal.Unicode.esc", reported by base scanner 143 } else { 144 bp--; 145 ch = '\\'; 146 col--; 147 } 148 } 149 } 150 151 @Override 152 protected void scanCommentChar() { 153 scanChar(); 154 if (ch == '\\') { 155 if (peekChar() == '\\' && !isUnicode()) { 156 bp++; col++; 157 doubleBackslashBp = bp; 158 } else { 159 convertUnicode(); 160 } 161 } 162 } 163 164 @Override 165 protected void scanChar() { 166 bp++; 167 ch = buf[bp]; 168 switch (ch) { 169 case '\r': // return 170 col = 0; 171 break; 172 case '\n': // newline 173 if (bp == 0 || buf[bp-1] != '\r') { 174 col = 0; 175 } 176 break; 177 case '\t': // tab 178 col = (col / TabInc * TabInc) + TabInc; 179 break; 180 case '\\': // possible Unicode 181 col++; 182 convertUnicode(); 183 break; 184 default: 185 col++; 186 break; 187 } 188 } 189 190 @Override 191 public void putChar(char ch, boolean scan) { 192 // At this point, bp is the position of the current character in buf, 193 // and sp is the position in sbuf where this character will be put. 194 // Record a new entry in pbuf if pbuf is empty or if sp and its 195 // corresponding source position are not equidistant from the 196 // corresponding values in the latest entry in the pbuf array. 197 // (i.e. there is a discontinuity in the map function.) 198 if ((pp == 0) 199 || (sp - pbuf[pp - 2] != (startPos + bp) - pbuf[pp - 1])) { 200 if (pp + 1 >= pbuf.length) { 201 int[] new_pbuf = new int[pbuf.length * 2]; 202 System.arraycopy(pbuf, 0, new_pbuf, 0, pbuf.length); 203 pbuf = new_pbuf; 204 } 205 pbuf[pp] = sp; 206 pbuf[pp + 1] = startPos + bp; 207 pp += 2; 208 } 209 super.putChar(ch, scan); 210 } 211 212 /** Whether the ch represents a sequence of two backslashes. */ 213 boolean isDoubleBackslash() { 214 return doubleBackslashBp == bp; 215 } 216 217 218 } 219 220 protected static class JavadocComment extends JavaTokenizer.BasicComment<DocReader> { 221 222 /** 223 * Translated and stripped contents of doc comment 224 */ 225 private String docComment = null; 226 private int[] docPosns = null; 227 228 JavadocComment(DocReader reader, CommentStyle cs) { 229 super(reader, cs); 230 } 231 232 @Override 233 public String getText() { 234 if (!scanned && cs == CommentStyle.JAVADOC) { 235 scanDocComment(); 236 } 237 return docComment; 238 } 239 240 @Override 241 public int getSourcePos(int pos) { 242 // Binary search to find the entry for which the string index is 243 // less than pos. Since docPosns is a list of pairs of integers 244 // we must make sure the index is always even. 245 // If we find an exact match for pos, the other item in the pair 246 // gives the source pos; otherwise, compute the source position 247 // relative to the best match found in the array. 248 if (pos == Position.NOPOS) 249 return Position.NOPOS; 250 if (pos < 0 || pos > docComment.length()) 251 throw new StringIndexOutOfBoundsException(String.valueOf(pos)); 252 if (docPosns == null) 253 return Position.NOPOS; 254 int start = 0; 255 int end = docPosns.length; 256 while (start < end - 2) { 257 // find an even index midway between start and end 258 int index = ((start + end) / 4) * 2; 259 if (docPosns[index] < pos) 260 start = index; 261 else if (docPosns[index] == pos) 262 return docPosns[index + 1]; 263 else 264 end = index; 265 } 266 return docPosns[start + 1] + (pos - docPosns[start]); 267 } 268 269 @Override 270 @SuppressWarnings("fallthrough") 271 protected void scanDocComment() { 272 try { 273 boolean firstLine = true; 274 275 // Skip over first slash 276 comment_reader.scanCommentChar(); 277 // Skip over first star 278 comment_reader.scanCommentChar(); 279 280 // consume any number of stars 281 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { 282 comment_reader.scanCommentChar(); 283 } 284 // is the comment in the form /**/, /***/, /****/, etc. ? 285 if (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '/') { 286 docComment = ""; 287 return; 288 } 289 290 // skip a newline on the first line of the comment. 291 if (comment_reader.bp < comment_reader.buflen) { 292 if (comment_reader.ch == LF) { 293 comment_reader.scanCommentChar(); 294 firstLine = false; 295 } else if (comment_reader.ch == CR) { 296 comment_reader.scanCommentChar(); 297 if (comment_reader.ch == LF) { 298 comment_reader.scanCommentChar(); 299 firstLine = false; 300 } 301 } 302 } 303 304 outerLoop: 305 306 // The outerLoop processes the doc comment, looping once 307 // for each line. For each line, it first strips off 308 // whitespace, then it consumes any stars, then it 309 // puts the rest of the line into our buffer. 310 while (comment_reader.bp < comment_reader.buflen) { 311 int begin_bp = comment_reader.bp; 312 char begin_ch = comment_reader.ch; 313 // The wsLoop consumes whitespace from the beginning 314 // of each line. 315 wsLoop: 316 317 while (comment_reader.bp < comment_reader.buflen) { 318 switch(comment_reader.ch) { 319 case ' ': 320 comment_reader.scanCommentChar(); 321 break; 322 case '\t': 323 comment_reader.col = ((comment_reader.col - 1) / TabInc * TabInc) + TabInc; 324 comment_reader.scanCommentChar(); 325 break; 326 case FF: 327 comment_reader.col = 0; 328 comment_reader.scanCommentChar(); 329 break; 330 // Treat newline at beginning of line (blank line, no star) 331 // as comment text. Old Javadoc compatibility requires this. 332 /*---------------------------------* 333 case CR: // (Spec 3.4) 334 doc_reader.scanCommentChar(); 335 if (ch == LF) { 336 col = 0; 337 doc_reader.scanCommentChar(); 338 } 339 break; 340 case LF: // (Spec 3.4) 341 doc_reader.scanCommentChar(); 342 break; 343 *---------------------------------*/ 344 default: 345 // we've seen something that isn't whitespace; 346 // jump out. 347 break wsLoop; 348 } 349 } 350 351 // Are there stars here? If so, consume them all 352 // and check for the end of comment. 353 if (comment_reader.ch == '*') { 354 // skip all of the stars 355 do { 356 comment_reader.scanCommentChar(); 357 } while (comment_reader.ch == '*'); 358 359 // check for the closing slash. 360 if (comment_reader.ch == '/') { 361 // We're done with the doc comment 362 // scanChar() and breakout. 363 break outerLoop; 364 } 365 } else if (! firstLine) { 366 // The current line does not begin with a '*' so we will 367 // treat it as comment 368 comment_reader.bp = begin_bp; 369 comment_reader.ch = begin_ch; 370 } 371 // The textLoop processes the rest of the characters 372 // on the line, adding them to our buffer. 373 textLoop: 374 while (comment_reader.bp < comment_reader.buflen) { 375 switch (comment_reader.ch) { 376 case '*': 377 // Is this just a star? Or is this the 378 // end of a comment? 379 comment_reader.scanCommentChar(); 380 if (comment_reader.ch == '/') { 381 // This is the end of the comment, 382 // set ch and return our buffer. 383 break outerLoop; 384 } 385 // This is just an ordinary star. Add it to 386 // the buffer. 387 comment_reader.putChar('*', false); 388 break; 389 case '\\': 390 comment_reader.putChar('\\', false); 391 // If a double backslash was found, write two 392 if (comment_reader.isDoubleBackslash()) { 393 comment_reader.putChar('\\', false); 394 } 395 comment_reader.scanCommentChar(); 396 case ' ': 397 case '\t': 398 comment_reader.putChar(comment_reader.ch, false); 399 comment_reader.scanCommentChar(); 400 break; 401 case FF: 402 comment_reader.scanCommentChar(); 403 break textLoop; // treat as end of line 404 case CR: // (Spec 3.4) 405 comment_reader.scanCommentChar(); 406 if (comment_reader.ch != LF) { 407 // Canonicalize CR-only line terminator to LF 408 comment_reader.putChar((char)LF, false); 409 break textLoop; 410 } 411 /* fall through to LF case */ 412 case LF: // (Spec 3.4) 413 // We've seen a newline. Add it to our 414 // buffer and break out of this loop, 415 // starting fresh on a new line. 416 comment_reader.putChar(comment_reader.ch, false); 417 comment_reader.scanCommentChar(); 418 break textLoop; 419 default: 420 // Add the character to our buffer. 421 comment_reader.putChar(comment_reader.ch, false); 422 comment_reader.scanCommentChar(); 423 } 424 } // end textLoop 425 firstLine = false; 426 } // end outerLoop 427 428 if (comment_reader.sp > 0) { 429 int i = comment_reader.sp - 1; 430 trailLoop: 431 while (i > -1) { 432 switch (comment_reader.sbuf[i]) { 433 case '*': 434 i--; 435 break; 436 default: 437 break trailLoop; 438 } 439 } 440 comment_reader.sp = i + 1; 441 442 // Store the text of the doc comment 443 docComment = comment_reader.chars(); 444 docPosns = new int[comment_reader.pp]; 445 System.arraycopy(comment_reader.pbuf, 0, docPosns, 0, docPosns.length); 446 } else { 447 docComment = ""; 448 } 449 } finally { 450 scanned = true; 451 comment_reader = null; 452 if (docComment != null && 453 docComment.matches("(?sm).*^\\s*@deprecated( |$).*")) { 454 deprecatedFlag = true; 455 } 456 } 457 } 458 } 459 460 @Override 461 public Position.LineMap getLineMap() { 462 char[] buf = reader.getRawCharacters(); 463 return Position.makeLineMap(buf, buf.length, true); 464 } 465} 466