/* * Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package com.sun.tools.javac.parser; import com.sun.tools.javac.parser.Tokens.Comment; import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; import com.sun.tools.javac.util.*; import java.nio.*; import java.util.regex.Pattern; import static com.sun.tools.javac.util.LayoutCharacters.*; /** An extension to the base lexical analyzer that captures * and processes the contents of doc comments. It does so by * translating Unicode escape sequences and by stripping the * leading whitespace and starts from each line of the comment. * *
This is NOT part of any supported API.
* If you write code that depends on this, you do so at your own risk.
* This code and its internal interfaces are subject to change or
* deletion without notice.
*/
public class JavadocTokenizer extends JavaTokenizer {
/** Create a scanner from the input buffer. buffer must implement
* array() and compact(), and remaining() must be less than limit().
*/
protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) {
super(fac, buffer);
}
/** Create a scanner from the input array. The array must have at
* least a single character of extra space.
*/
protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {
super(fac, input, inputLength);
}
@Override
protected Comment processComment(int pos, int endPos, CommentStyle style) {
char[] buf = reader.getRawCharacters(pos, endPos);
return new JavadocComment(new DocReader(fac, buf, buf.length, pos), style);
}
/**
* This is a specialized version of UnicodeReader that keeps track of the
* column position within a given character stream (used for Javadoc processing),
* and which builds a table for mapping positions in the comment string to
* positions in the source file.
*/
static class DocReader extends UnicodeReader {
int col;
int startPos;
/**
* A buffer for building a table for mapping positions in {@link #sbuf}
* to positions in the source buffer.
*
* The array is organized as a series of pairs of integers: the first
* number in each pair specifies a position in the comment text,
* the second number in each pair specifies the corresponding position
* in the source buffer. The pairs are sorted in ascending order.
*
* Since the mapping function is generally continuous, with successive
* positions in the string corresponding to successive positions in the
* source buffer, the table only needs to record discontinuities in
* the mapping. The values of intermediate positions can be inferred.
*
* Discontinuities may occur in a number of places: when a newline
* is followed by whitespace and asterisks (which are ignored),
* when a tab is expanded into spaces, and when unicode escapes
* are used in the source buffer.
*
* Thus, to find the source position of any position, p, in the comment
* string, find the index, i, of the pair whose string offset
* ({@code pbuf[i] }) is closest to but not greater than p. Then,
* {@code sourcePos(p) = pbuf[i+1] + (p - pbuf[i]) }.
*/
int[] pbuf = new int[128];
/**
* The index of the next empty slot in the pbuf buffer.
*/
int pp = 0;
/** The buffer index of the last double backslash sequence
*/
private int doubleBackslashBp = -1;
DocReader(ScannerFactory fac, char[] input, int inputLength, int startPos) {
super(fac, input, inputLength);
this.startPos = startPos;
}
@Override
protected void convertUnicode() {
if (ch == '\\' && unicodeConversionBp != bp) {
bp++; ch = buf[bp]; col++;
if (ch == 'u') {
do {
bp++; ch = buf[bp]; col++;
} while (ch == 'u');
int limit = bp + 3;
if (limit < buflen) {
int d = digit(bp, 16);
int code = d;
while (bp < limit && d >= 0) {
bp++; ch = buf[bp]; col++;
d = digit(bp, 16);
code = (code << 4) + d;
}
if (d >= 0) {
ch = (char)code;
unicodeConversionBp = bp;
return;
}
}
// "illegal.Unicode.esc", reported by base scanner
} else {
bp--;
ch = '\\';
col--;
}
}
}
@Override
protected void scanCommentChar() {
scanChar();
if (ch == '\\') {
if (peekChar() == '\\' && !isUnicode()) {
bp++; col++;
doubleBackslashBp = bp;
} else {
convertUnicode();
}
}
}
@Override
protected void scanChar() {
bp++;
ch = buf[bp];
switch (ch) {
case '\r': // return
col = 0;
break;
case '\n': // newline
if (bp == 0 || buf[bp-1] != '\r') {
col = 0;
}
break;
case '\t': // tab
col = (col / TabInc * TabInc) + TabInc;
break;
case '\\': // possible Unicode
col++;
convertUnicode();
break;
default:
col++;
break;
}
}
@Override
public void putChar(char ch, boolean scan) {
// At this point, bp is the position of the current character in buf,
// and sp is the position in sbuf where this character will be put.
// Record a new entry in pbuf if pbuf is empty or if sp and its
// corresponding source position are not equidistant from the
// corresponding values in the latest entry in the pbuf array.
// (i.e. there is a discontinuity in the map function.)
if ((pp == 0)
|| (sp - pbuf[pp - 2] != (startPos + bp) - pbuf[pp - 1])) {
if (pp + 1 >= pbuf.length) {
int[] new_pbuf = new int[pbuf.length * 2];
System.arraycopy(pbuf, 0, new_pbuf, 0, pbuf.length);
pbuf = new_pbuf;
}
pbuf[pp] = sp;
pbuf[pp + 1] = startPos + bp;
pp += 2;
}
super.putChar(ch, scan);
}
/** Whether the ch represents a sequence of two backslashes. */
boolean isDoubleBackslash() {
return doubleBackslashBp == bp;
}
}
protected static class JavadocComment extends JavaTokenizer.BasicComment