javac/parser/JavadocTokenizer.java

61452Sdfr/*
61452Sdfr * Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
61452Sdfr * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
61452Sdfr *
61452Sdfr * This code is free software; you can redistribute it and/or modify it
61452Sdfr * under the terms of the GNU General Public License version 2 only, as
61452Sdfr * published by the Free Software Foundation.  Oracle designates this
61452Sdfr * particular file as subject to the "Classpath" exception as provided
61452Sdfr * by Oracle in the LICENSE file that accompanied this code.
61452Sdfr *
61452Sdfr * This code is distributed in the hope that it will be useful, but WITHOUT
61452Sdfr * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
61452Sdfr * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
61452Sdfr * version 2 for more details (a copy is included in the LICENSE file that
61452Sdfr * accompanied this code).
61452Sdfr *
61452Sdfr * You should have received a copy of the GNU General Public License version
61452Sdfr * 2 along with this work; if not, write to the Free Software Foundation,
61452Sdfr * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
61452Sdfr *
61452Sdfr * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
61452Sdfr * or visit www.oracle.com if you need additional information or have any
61452Sdfr * questions.
61452Sdfr */
61452Sdfr
61452Sdfrpackage com.sun.tools.javac.parser;
116192Sobrien
116192Sobrienimport com.sun.tools.javac.parser.Tokens.Comment;
116192Sobrienimport com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
61452Sdfrimport com.sun.tools.javac.util.*;
61452Sdfr
61452Sdfrimport java.nio.*;
61452Sdfrimport java.util.regex.Pattern;
61452Sdfr
61452Sdfrimport static com.sun.tools.javac.util.LayoutCharacters.*;
129878Sphk
61452Sdfr/** An extension to the base lexical analyzer that captures
61452Sdfr *  and processes the contents of doc comments.  It does so by
76827Salfred *  translating Unicode escape sequences and by stripping the
79339Sjhb *  leading whitespace and starts from each line of the comment.
61452Sdfr *
119288Simp *  <p><b>This is NOT part of any supported API.
119288Simp *  If you write code that depends on this, you do so at your own risk.
61452Sdfr *  This code and its internal interfaces are subject to change or
61452Sdfr *  deletion without notice.</b>
61452Sdfr */
61452Sdfrpublic class JavadocTokenizer extends JavaTokenizer {
61452Sdfr
61452Sdfr    /** Create a scanner from the input buffer.  buffer must implement
61452Sdfr     *  array() and compact(), and remaining() must be less than limit().
61452Sdfr     */
61452Sdfr    protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) {
61452Sdfr        super(fac, buffer);
61501Sdfr    }
61501Sdfr
61452Sdfr    /** Create a scanner from the input array.  The array must have at
61452Sdfr     *  least a single character of extra space.
61452Sdfr     */
61452Sdfr    protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {
61452Sdfr        super(fac, input, inputLength);
61501Sdfr    }
61501Sdfr
87479Scokane    @Override
87479Scokane    protected Comment processComment(int pos, int endPos, CommentStyle style) {
61501Sdfr        char[] buf = reader.getRawCharacters(pos, endPos);
61501Sdfr        return new JavadocComment(new DocReader(fac, buf, buf.length, pos), style);
61501Sdfr    }
61501Sdfr
61452Sdfr    /**
133851Sobrien     * This is a specialized version of UnicodeReader that keeps track of the
133851Sobrien     * column position within a given character stream (used for Javadoc processing),
133851Sobrien     * and which builds a table for mapping positions in the comment string to
133851Sobrien     * positions in the source file.
133851Sobrien     */
133851Sobrien    static class DocReader extends UnicodeReader {
61452Sdfr
61452Sdfr         int col;
61501Sdfr         int startPos;
61501Sdfr
61501Sdfr         /**
61501Sdfr          * A buffer for building a table for mapping positions in {@link #sbuf}
61501Sdfr          * to positions in the source buffer.
61501Sdfr          *
87479Scokane          * The array is organized as a series of pairs of integers: the first
61501Sdfr          * number in each pair specifies a position in the comment text,
61501Sdfr          * the second number in each pair specifies the corresponding position
61501Sdfr          * in the source buffer. The pairs are sorted in ascending order.
61501Sdfr          *
61501Sdfr          * Since the mapping function is generally continuous, with successive
61501Sdfr          * positions in the string corresponding to successive positions in the
61501Sdfr          * source buffer, the table only needs to record discontinuities in
61501Sdfr          * the mapping. The values of intermediate positions can be inferred.
61501Sdfr          *
61501Sdfr          * Discontinuities may occur in a number of places: when a newline
61501Sdfr          * is followed by whitespace and asterisks (which are ignored),
61501Sdfr          * when a tab is expanded into spaces, and when unicode escapes
61501Sdfr          * are used in the source buffer.
87479Scokane          *
87479Scokane          * Thus, to find the source position of any position, p, in the comment
61501Sdfr          * string, find the index, i, of the pair whose string offset
61501Sdfr          * ({@code pbuf[i] }) is closest to but not greater than p. Then,
61501Sdfr          * {@code sourcePos(p) = pbuf[i+1] + (p - pbuf[i]) }.
61501Sdfr          */
61501Sdfr         int[] pbuf = new int[128];
61501Sdfr
61501Sdfr         /**
61501Sdfr          * The index of the next empty slot in the pbuf buffer.
61501Sdfr          */
61501Sdfr         int pp = 0;
61501Sdfr
61501Sdfr         /** The buffer index of the last double backslash sequence
61501Sdfr          */
61501Sdfr         private int doubleBackslashBp = -1;
61501Sdfr
61501Sdfr         DocReader(ScannerFactory fac, char[] input, int inputLength, int startPos) {
61501Sdfr             super(fac, input, inputLength);
61501Sdfr             this.startPos = startPos;
61501Sdfr         }
61501Sdfr
61501Sdfr         @Override
61501Sdfr         protected void convertUnicode() {
61501Sdfr             if (ch == '\\' && unicodeConversionBp != bp) {
61501Sdfr                 bp++; ch = buf[bp]; col++;
94790Scokane                 if (ch == 'u') {
94790Scokane                     do {
61501Sdfr                         bp++; ch = buf[bp]; col++;
87479Scokane                     } while (ch == 'u');
105145Smarcel                     int limit = bp + 3;
105145Smarcel                     if (limit < buflen) {
87479Scokane                         int d = digit(bp, 16);
87479Scokane                         int code = d;
87479Scokane                         while (bp < limit && d >= 0) {
87479Scokane                             bp++; ch = buf[bp]; col++;
87479Scokane                             d = digit(bp, 16);
87479Scokane                             code = (code << 4) + d;
87479Scokane                         }
94790Scokane                         if (d >= 0) {
87479Scokane                             ch = (char)code;
61501Sdfr                             unicodeConversionBp = bp;
61501Sdfr                             return;
61501Sdfr                         }
87479Scokane                     }
87479Scokane                     // "illegal.Unicode.esc", reported by base scanner
87479Scokane                 } else {
87479Scokane                     bp--;
87479Scokane                     ch = '\\';
61501Sdfr                     col--;
87479Scokane                 }
87479Scokane             }
87479Scokane         }
61501Sdfr
61501Sdfr         @Override
87479Scokane         protected void scanCommentChar() {
61501Sdfr             scanChar();
61501Sdfr             if (ch == '\\') {
61501Sdfr                 if (peekChar() == '\\' && !isUnicode()) {
61501Sdfr                     bp++; col++;
61501Sdfr                     doubleBackslashBp = bp;
61501Sdfr                 } else {
87479Scokane                     convertUnicode();
61501Sdfr                 }
61501Sdfr             }
61501Sdfr         }
61501Sdfr
61501Sdfr         @Override
61501Sdfr         protected void scanChar() {
61501Sdfr             bp++;
61501Sdfr             ch = buf[bp];
61501Sdfr             switch (ch) {
61501Sdfr             case '\r': // return
61501Sdfr                 col = 0;
61501Sdfr                 break;
61501Sdfr             case '\n': // newline
61501Sdfr                 if (bp == 0 || buf[bp-1] != '\r') {
61501Sdfr                     col = 0;
61501Sdfr                 }
61501Sdfr                 break;
61501Sdfr             case '\t': // tab
61452Sdfr                 col = (col / TabInc * TabInc) + TabInc;
61452Sdfr                 break;
61452Sdfr             case '\\': // possible Unicode
61452Sdfr                 col++;
61452Sdfr                 convertUnicode();
61452Sdfr                 break;
61452Sdfr             default:
61452Sdfr                 col++;
61452Sdfr                 break;
61452Sdfr             }
61452Sdfr         }
133851Sobrien
133851Sobrien         @Override
83699Scokane         public void putChar(char ch, boolean scan) {
83699Scokane             // At this point, bp is the position of the current character in buf,
87479Scokane             // and sp is the position in sbuf where this character will be put.
87479Scokane             // Record a new entry in pbuf if pbuf is empty or if sp and its
61452Sdfr             // corresponding source position are not equidistant from the
61452Sdfr             // corresponding values in the latest entry in the pbuf array.
61452Sdfr             // (i.e. there is a discontinuity in the map function.)
61452Sdfr             if ((pp == 0)
61452Sdfr                     || (sp - pbuf[pp - 2] != (startPos + bp) - pbuf[pp - 1])) {
61452Sdfr                 if (pp + 1 >= pbuf.length) {
61452Sdfr                     int[] new_pbuf = new int[pbuf.length * 2];
61452Sdfr                     System.arraycopy(pbuf, 0, new_pbuf, 0, pbuf.length);
61452Sdfr                     pbuf = new_pbuf;
61452Sdfr                 }
127815Snjl                 pbuf[pp] = sp;
127815Snjl                 pbuf[pp + 1] = startPos + bp;
61452Sdfr                 pp += 2;
61452Sdfr             }
61452Sdfr             super.putChar(ch, scan);
61452Sdfr         }
142398Simp
61452Sdfr         /** Whether the ch represents a sequence of two backslashes. */
61452Sdfr         boolean isDoubleBackslash() {
61452Sdfr             return doubleBackslashBp == bp;
61452Sdfr         }
61452Sdfr
61452Sdfr
61452Sdfr     }
61452Sdfr
61452Sdfr     protected static class JavadocComment extends JavaTokenizer.BasicComment<DocReader> {
61501Sdfr
61452Sdfr        /**
61452Sdfr        * Translated and stripped contents of doc comment
61452Sdfr        */
61452Sdfr        private String docComment = null;
61452Sdfr        private int[] docPosns = null;
61452Sdfr
61452Sdfr        JavadocComment(DocReader reader, CommentStyle cs) {
127135Snjl            super(reader, cs);
127135Snjl        }
61452Sdfr
61452Sdfr        @Override
61452Sdfr        public String getText() {
61452Sdfr            if (!scanned && cs == CommentStyle.JAVADOC) {
61452Sdfr                scanDocComment();
61452Sdfr            }
61452Sdfr            return docComment;
61452Sdfr        }
61452Sdfr
61452Sdfr        @Override
61452Sdfr        public int getSourcePos(int pos) {
61501Sdfr            // Binary search to find the entry for which the string index is
61452Sdfr            // less than pos. Since docPosns is a list of pairs of integers
61452Sdfr            // we must make sure the index is always even.
61452Sdfr            // If we find an exact match for pos, the other item in the pair
61452Sdfr            // gives the source pos; otherwise, compute the source position
61452Sdfr            // relative to the best match found in the array.
61452Sdfr            if (pos == Position.NOPOS)
61452Sdfr                return Position.NOPOS;
61452Sdfr            if (pos < 0 || pos > docComment.length())
61452Sdfr                throw new StringIndexOutOfBoundsException(String.valueOf(pos));
61452Sdfr            if (docPosns == null)
61452Sdfr                return Position.NOPOS;
61452Sdfr            int start = 0;
61452Sdfr            int end = docPosns.length;
61501Sdfr            while (start < end - 2) {
61452Sdfr                // find an even index midway between start and end
61452Sdfr                int index = ((start  + end) / 4) * 2;
61501Sdfr                if (docPosns[index] < pos)
61501Sdfr                    start = index;
61501Sdfr                else if (docPosns[index] == pos)
61452Sdfr                    return docPosns[index + 1];
61501Sdfr                else
61501Sdfr                    end = index;
61501Sdfr            }
61501Sdfr            return docPosns[start + 1] + (pos - docPosns[start]);
61501Sdfr        }
61452Sdfr
61452Sdfr        @Override
61452Sdfr        @SuppressWarnings("fallthrough")
61452Sdfr        protected void scanDocComment() {
61452Sdfr             try {
61501Sdfr                 boolean firstLine = true;
61452Sdfr
61452Sdfr                 // Skip over first slash
61452Sdfr                 comment_reader.scanCommentChar();
61452Sdfr                 // Skip over first star
61452Sdfr                 comment_reader.scanCommentChar();
61452Sdfr
61503Sdfr                 // consume any number of stars
61452Sdfr                 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
61503Sdfr                     comment_reader.scanCommentChar();
61503Sdfr                 }
61503Sdfr                 // is the comment in the form /**/, /***/, /****/, etc. ?
61503Sdfr                 if (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '/') {
61452Sdfr                     docComment = "";
61452Sdfr                     return;
61452Sdfr                 }
61452Sdfr
61452Sdfr                 // skip a newline on the first line of the comment.
61452Sdfr                 if (comment_reader.bp < comment_reader.buflen) {
61452Sdfr                     if (comment_reader.ch == LF) {
61452Sdfr                         comment_reader.scanCommentChar();
61452Sdfr                         firstLine = false;
61452Sdfr                     } else if (comment_reader.ch == CR) {
61452Sdfr                         comment_reader.scanCommentChar();
61452Sdfr                         if (comment_reader.ch == LF) {
61452Sdfr                             comment_reader.scanCommentChar();
61501Sdfr                             firstLine = false;
61503Sdfr                         }
61503Sdfr                     }
61503Sdfr                 }
61503Sdfr
61452Sdfr             outerLoop:
61452Sdfr
61452Sdfr                 // The outerLoop processes the doc comment, looping once
61452Sdfr                 // for each line.  For each line, it first strips off
61452Sdfr                 // whitespace, then it consumes any stars, then it
61452Sdfr                 // puts the rest of the line into our buffer.
61452Sdfr                 while (comment_reader.bp < comment_reader.buflen) {
61452Sdfr                     int begin_bp = comment_reader.bp;
61452Sdfr                     char begin_ch = comment_reader.ch;
61452Sdfr                     // The wsLoop consumes whitespace from the beginning
61452Sdfr                     // of each line.
61452Sdfr                 wsLoop:
61452Sdfr
61452Sdfr                     while (comment_reader.bp < comment_reader.buflen) {
61452Sdfr                         switch(comment_reader.ch) {
61452Sdfr                         case ' ':
61452Sdfr                             comment_reader.scanCommentChar();
61452Sdfr                             break;
61452Sdfr                         case '\t':
61452Sdfr                             comment_reader.col = ((comment_reader.col - 1) / TabInc * TabInc) + TabInc;
61452Sdfr                             comment_reader.scanCommentChar();
61452Sdfr                             break;
61452Sdfr                         case FF:
61452Sdfr                             comment_reader.col = 0;
61452Sdfr                             comment_reader.scanCommentChar();
61452Sdfr                             break;
61452Sdfr         // Treat newline at beginning of line (blank line, no star)
61452Sdfr         // as comment text.  Old Javadoc compatibility requires this.
61452Sdfr         /*---------------------------------*
61452Sdfr                         case CR: // (Spec 3.4)
61452Sdfr                             doc_reader.scanCommentChar();
87479Scokane                             if (ch == LF) {
87479Scokane                                 col = 0;
87479Scokane                                 doc_reader.scanCommentChar();
87479Scokane                             }
61452Sdfr                             break;
87479Scokane                         case LF: // (Spec 3.4)
87479Scokane                             doc_reader.scanCommentChar();
61452Sdfr                             break;
61452Sdfr         *---------------------------------*/
61452Sdfr                         default:
61452Sdfr                             // we've seen something that isn't whitespace;
61452Sdfr                             // jump out.
61452Sdfr                             break wsLoop;
61452Sdfr                         }
61452Sdfr                     }
61452Sdfr
61452Sdfr                     // Are there stars here?  If so, consume them all
61452Sdfr                     // and check for the end of comment.
61452Sdfr                     if (comment_reader.ch == '*') {
61452Sdfr                         // skip all of the stars
87479Scokane                         do {
87479Scokane                             comment_reader.scanCommentChar();
87479Scokane                         } while (comment_reader.ch == '*');
61452Sdfr
61452Sdfr                         // check for the closing slash.
61452Sdfr                         if (comment_reader.ch == '/') {
61452Sdfr                             // We're done with the doc comment
61452Sdfr                             // scanChar() and breakout.
61452Sdfr                             break outerLoop;
61452Sdfr                         }
61452Sdfr                     } else if (! firstLine) {
61452Sdfr                         // The current line does not begin with a '*' so we will
61452Sdfr                         // treat it as comment
61452Sdfr                         comment_reader.bp = begin_bp;
61452Sdfr                         comment_reader.ch = begin_ch;
61452Sdfr                     }
61452Sdfr                     // The textLoop processes the rest of the characters
61452Sdfr                     // on the line, adding them to our buffer.
61452Sdfr                 textLoop:
61452Sdfr                     while (comment_reader.bp < comment_reader.buflen) {
61452Sdfr                         switch (comment_reader.ch) {
61452Sdfr                         case '*':
61452Sdfr                             // Is this just a star?  Or is this the
61452Sdfr                             // end of a comment?
61452Sdfr                             comment_reader.scanCommentChar();
61452Sdfr                             if (comment_reader.ch == '/') {
61452Sdfr                                 // This is the end of the comment,
61452Sdfr                                 // set ch and return our buffer.
61452Sdfr                                 break outerLoop;
61452Sdfr                             }
61452Sdfr                             // This is just an ordinary star.  Add it to
61452Sdfr                             // the buffer.
61452Sdfr                             comment_reader.putChar('*', false);
61452Sdfr                             break;
61452Sdfr                         case '\\':
61452Sdfr                             comment_reader.putChar('\\', false);
61452Sdfr                             // If a double backslash was found, write two
61452Sdfr                             if (comment_reader.isDoubleBackslash()) {
61452Sdfr                                 comment_reader.putChar('\\', false);
61452Sdfr                             }
61452Sdfr                             comment_reader.scanCommentChar();
61452Sdfr                         case ' ':
61452Sdfr                         case '\t':
61452Sdfr                             comment_reader.putChar(comment_reader.ch, false);
61452Sdfr                             comment_reader.scanCommentChar();
61452Sdfr                             break;
61452Sdfr                         case FF:
61452Sdfr                             comment_reader.scanCommentChar();
61452Sdfr                             break textLoop; // treat as end of line
61452Sdfr                         case CR: // (Spec 3.4)
61452Sdfr                             comment_reader.scanCommentChar();
61452Sdfr                             if (comment_reader.ch != LF) {
61452Sdfr                                 // Canonicalize CR-only line terminator to LF
61452Sdfr                                 comment_reader.putChar((char)LF, false);
61452Sdfr                                 break textLoop;
61452Sdfr                             }
61452Sdfr                             /* fall through to LF case */
61452Sdfr                         case LF: // (Spec 3.4)
61452Sdfr                             // We've seen a newline.  Add it to our
61452Sdfr                             // buffer and break out of this loop,
61452Sdfr                             // starting fresh on a new line.
61452Sdfr                             comment_reader.putChar(comment_reader.ch, false);
61452Sdfr                             comment_reader.scanCommentChar();
113506Smdodd                             break textLoop;
113506Smdodd                         default:
                             // Add the character to our buffer.
                             comment_reader.putChar(comment_reader.ch, false);
                             comment_reader.scanCommentChar();
                         }
                     } // end textLoop
                     firstLine = false;
                 } // end outerLoop

                 if (comment_reader.sp > 0) {
                     int i = comment_reader.sp - 1;
                 trailLoop:
                     while (i > -1) {
                         switch (comment_reader.sbuf[i]) {
                         case '*':
                             i--;
                             break;
                         default:
                             break trailLoop;
                         }
                     }
                     comment_reader.sp = i + 1;

                     // Store the text of the doc comment
                    docComment = comment_reader.chars();
                    docPosns = new int[comment_reader.pp];
                    System.arraycopy(comment_reader.pbuf, 0, docPosns, 0, docPosns.length);
                } else {
                    docComment = "";
                }
            } finally {
                scanned = true;
                comment_reader = null;
                if (docComment != null &&
                        DEPRECATED_PATTERN.matcher(docComment).matches()) {
                    deprecatedFlag = true;
                }
            }
        }
        //where:
            private static final Pattern DEPRECATED_PATTERN =
                    Pattern.compile("(?sm).*^\\s*@deprecated( |$).*");

    }

    @Override
    public Position.LineMap getLineMap() {
        char[] buf = reader.getRawCharacters();
        return Position.makeLineMap(buf, buf.length, true);
    }
}