DocCommentParser.java revision 2571:10fc81ac75b4
1/*
2 * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package com.sun.tools.javac.parser;
27
28import java.text.BreakIterator;
29import java.util.Arrays;
30import java.util.HashMap;
31import java.util.HashSet;
32import java.util.Locale;
33import java.util.Map;
34import java.util.Set;
35
36import com.sun.source.doctree.AttributeTree.ValueKind;
37import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind;
38import com.sun.tools.javac.parser.Tokens.Comment;
39import com.sun.tools.javac.parser.Tokens.TokenKind;
40import com.sun.tools.javac.tree.DCTree;
41import com.sun.tools.javac.tree.DCTree.DCAttribute;
42import com.sun.tools.javac.tree.DCTree.DCDocComment;
43import com.sun.tools.javac.tree.DCTree.DCEndElement;
44import com.sun.tools.javac.tree.DCTree.DCEndPosTree;
45import com.sun.tools.javac.tree.DCTree.DCErroneous;
46import com.sun.tools.javac.tree.DCTree.DCIdentifier;
47import com.sun.tools.javac.tree.DCTree.DCReference;
48import com.sun.tools.javac.tree.DCTree.DCStartElement;
49import com.sun.tools.javac.tree.DCTree.DCText;
50import com.sun.tools.javac.tree.DocTreeMaker;
51import com.sun.tools.javac.tree.JCTree;
52import com.sun.tools.javac.util.DiagnosticSource;
53import com.sun.tools.javac.util.List;
54import com.sun.tools.javac.util.ListBuffer;
55import com.sun.tools.javac.util.Log;
56import com.sun.tools.javac.util.Name;
57import com.sun.tools.javac.util.Names;
58import com.sun.tools.javac.util.Options;
59import com.sun.tools.javac.util.Position;
60import com.sun.tools.javac.util.StringUtils;
61import static com.sun.tools.javac.util.LayoutCharacters.*;
62
63/**
64 *
65 *  <p><b>This is NOT part of any supported API.
66 *  If you write code that depends on this, you do so at your own risk.
67 *  This code and its internal interfaces are subject to change or
68 *  deletion without notice.</b>
69 */
70public class DocCommentParser {
71    static class ParseException extends Exception {
72        private static final long serialVersionUID = 0;
73        ParseException(String key) {
74            super(key);
75        }
76    }
77
78    final ParserFactory fac;
79    final DiagnosticSource diagSource;
80    final Comment comment;
81    final DocTreeMaker m;
82    final Names names;
83
84    BreakIterator sentenceBreaker;
85
86    /** The input buffer, index of most recent character read,
87     *  index of one past last character in buffer.
88     */
89    protected char[] buf;
90    protected int bp;
91    protected int buflen;
92
93    /** The current character.
94     */
95    protected char ch;
96
97    int textStart = -1;
98    int lastNonWhite = -1;
99    boolean newline = true;
100
101    Map<Name, TagParser> tagParsers;
102
103    DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) {
104        this.fac = fac;
105        this.diagSource = diagSource;
106        this.comment = comment;
107        names = fac.names;
108        m = fac.docTreeMaker;
109
110        Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale;
111
112        Options options = fac.options;
113        boolean useBreakIterator = options.isSet("breakIterator");
114        if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage()))
115            sentenceBreaker = BreakIterator.getSentenceInstance(locale);
116
117        initTagParsers();
118    }
119
120    DCDocComment parse() {
121        String c = comment.getText();
122        buf = new char[c.length() + 1];
123        c.getChars(0, c.length(), buf, 0);
124        buf[buf.length - 1] = EOI;
125        buflen = buf.length - 1;
126        bp = -1;
127        nextChar();
128
129        List<DCTree> body = blockContent();
130        List<DCTree> tags = blockTags();
131
132        // split body into first sentence and body
133        ListBuffer<DCTree> fs = new ListBuffer<>();
134        loop:
135        for (; body.nonEmpty(); body = body.tail) {
136            DCTree t = body.head;
137            switch (t.getKind()) {
138                case TEXT:
139                    String s = ((DCText) t).getBody();
140                    int i = getSentenceBreak(s);
141                    if (i > 0) {
142                        int i0 = i;
143                        while (i0 > 0 && isWhitespace(s.charAt(i0 - 1)))
144                            i0--;
145                        fs.add(m.at(t.pos).Text(s.substring(0, i0)));
146                        int i1 = i;
147                        while (i1 < s.length() && isWhitespace(s.charAt(i1)))
148                            i1++;
149                        body = body.tail;
150                        if (i1 < s.length())
151                            body = body.prepend(m.at(t.pos + i1).Text(s.substring(i1)));
152                        break loop;
153                    } else if (body.tail.nonEmpty()) {
154                        if (isSentenceBreak(body.tail.head)) {
155                            int i0 = s.length() - 1;
156                            while (i0 > 0 && isWhitespace(s.charAt(i0)))
157                                i0--;
158                            fs.add(m.at(t.pos).Text(s.substring(0, i0 + 1)));
159                            body = body.tail;
160                            break loop;
161                        }
162                    }
163                    break;
164
165                case START_ELEMENT:
166                case END_ELEMENT:
167                    if (isSentenceBreak(t))
168                        break loop;
169                    break;
170            }
171            fs.add(t);
172        }
173
174        @SuppressWarnings("unchecked")
175        DCTree first = getFirst(fs.toList(), body, tags);
176        int pos = (first == null) ? Position.NOPOS : first.pos;
177
178        DCDocComment dc = m.at(pos).DocComment(comment, fs.toList(), body, tags);
179        return dc;
180    }
181
182    void nextChar() {
183        ch = buf[bp < buflen ? ++bp : buflen];
184        switch (ch) {
185            case '\f': case '\n': case '\r':
186                newline = true;
187        }
188    }
189
190    /**
191     * Read block content, consisting of text, html and inline tags.
192     * Terminated by the end of input, or the beginning of the next block tag:
193     * i.e. @ as the first non-whitespace character on a line.
194     */
195    @SuppressWarnings("fallthrough")
196    protected List<DCTree> blockContent() {
197        ListBuffer<DCTree> trees = new ListBuffer<>();
198        textStart = -1;
199
200        loop:
201        while (bp < buflen) {
202            switch (ch) {
203                case '\n': case '\r': case '\f':
204                    newline = true;
205                    // fallthrough
206
207                case ' ': case '\t':
208                    nextChar();
209                    break;
210
211                case '&':
212                    entity(trees);
213                    break;
214
215                case '<':
216                    newline = false;
217                    addPendingText(trees, bp - 1);
218                    trees.add(html());
219                    if (textStart == -1) {
220                        textStart = bp;
221                        lastNonWhite = -1;
222                    }
223                    break;
224
225                case '>':
226                    newline = false;
227                    addPendingText(trees, bp - 1);
228                    trees.add(m.at(bp).Erroneous(newString(bp, bp+1), diagSource, "dc.bad.gt"));
229                    nextChar();
230                    if (textStart == -1) {
231                        textStart = bp;
232                        lastNonWhite = -1;
233                    }
234                    break;
235
236                case '{':
237                    inlineTag(trees);
238                    break;
239
240                case '@':
241                    if (newline) {
242                        addPendingText(trees, lastNonWhite);
243                        break loop;
244                    }
245                    // fallthrough
246
247                default:
248                    newline = false;
249                    if (textStart == -1)
250                        textStart = bp;
251                    lastNonWhite = bp;
252                    nextChar();
253            }
254        }
255
256        if (lastNonWhite != -1)
257            addPendingText(trees, lastNonWhite);
258
259        return trees.toList();
260    }
261
262    /**
263     * Read a series of block tags, including their content.
264     * Standard tags parse their content appropriately.
265     * Non-standard tags are represented by {@link UnknownBlockTag}.
266     */
267    protected List<DCTree> blockTags() {
268        ListBuffer<DCTree> tags = new ListBuffer<>();
269        while (ch == '@')
270            tags.add(blockTag());
271        return tags.toList();
272    }
273
274    /**
275     * Read a single block tag, including its content.
276     * Standard tags parse their content appropriately.
277     * Non-standard tags are represented by {@link UnknownBlockTag}.
278     */
279    protected DCTree blockTag() {
280        int p = bp;
281        try {
282            nextChar();
283            if (isIdentifierStart(ch)) {
284                Name name = readTagName();
285                TagParser tp = tagParsers.get(name);
286                if (tp == null) {
287                    List<DCTree> content = blockContent();
288                    return m.at(p).UnknownBlockTag(name, content);
289                } else {
290                    switch (tp.getKind()) {
291                        case BLOCK:
292                            return tp.parse(p);
293                        case INLINE:
294                            return erroneous("dc.bad.inline.tag", p);
295                    }
296                }
297            }
298            blockContent();
299
300            return erroneous("dc.no.tag.name", p);
301        } catch (ParseException e) {
302            blockContent();
303            return erroneous(e.getMessage(), p);
304        }
305    }
306
307    protected void inlineTag(ListBuffer<DCTree> list) {
308        newline = false;
309        nextChar();
310        if (ch == '@') {
311            addPendingText(list, bp - 2);
312            list.add(inlineTag());
313            textStart = bp;
314            lastNonWhite = -1;
315        } else {
316            if (textStart == -1)
317                textStart = bp - 1;
318            lastNonWhite = bp;
319        }
320    }
321
322    /**
323     * Read a single inline tag, including its content.
324     * Standard tags parse their content appropriately.
325     * Non-standard tags are represented by {@link UnknownBlockTag}.
326     * Malformed tags may be returned as {@link Erroneous}.
327     */
328    protected DCTree inlineTag() {
329        int p = bp - 1;
330        try {
331            nextChar();
332            if (isIdentifierStart(ch)) {
333                Name name = readTagName();
334                skipWhitespace();
335
336                TagParser tp = tagParsers.get(name);
337                if (tp == null) {
338                    DCTree text = inlineText();
339                    if (text != null) {
340                        nextChar();
341                        return m.at(p).UnknownInlineTag(name, List.of(text)).setEndPos(bp);
342                    }
343                } else if (tp.getKind() == TagParser.Kind.INLINE) {
344                    DCEndPosTree<?> tree = (DCEndPosTree<?>) tp.parse(p);
345                    if (tree != null) {
346                        return tree.setEndPos(bp);
347                    }
348                } else {
349                    inlineText(); // skip content
350                    nextChar();
351                }
352            }
353            return erroneous("dc.no.tag.name", p);
354        } catch (ParseException e) {
355            return erroneous(e.getMessage(), p);
356        }
357    }
358
359    /**
360     * Read plain text content of an inline tag.
361     * Matching pairs of { } are skipped; the text is terminated by the first
362     * unmatched }. It is an error if the beginning of the next tag is detected.
363     */
364    protected DCTree inlineText() throws ParseException {
365        skipWhitespace();
366        int pos = bp;
367        int depth = 1;
368
369        loop:
370        while (bp < buflen) {
371            switch (ch) {
372                case '\n': case '\r': case '\f':
373                    newline = true;
374                    break;
375
376                case ' ': case '\t':
377                    break;
378
379                case '{':
380                    newline = false;
381                    lastNonWhite = bp;
382                    depth++;
383                    break;
384
385                case '}':
386                    if (--depth == 0) {
387                        return m.at(pos).Text(newString(pos, bp));
388                    }
389                    newline = false;
390                    lastNonWhite = bp;
391                    break;
392
393                case '@':
394                    if (newline)
395                        break loop;
396                    newline = false;
397                    lastNonWhite = bp;
398                    break;
399
400                default:
401                    newline = false;
402                    lastNonWhite = bp;
403                    break;
404            }
405            nextChar();
406        }
407        throw new ParseException("dc.unterminated.inline.tag");
408    }
409
410    /**
411     * Read Java class name, possibly followed by member
412     * Matching pairs of < > are skipped. The text is terminated by the first
413     * unmatched }. It is an error if the beginning of the next tag is detected.
414     */
415    // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE
416    // TODO: improve quality of parse to forbid bad constructions.
417    @SuppressWarnings("fallthrough")
418    protected DCReference reference(boolean allowMember) throws ParseException {
419        int pos = bp;
420        int depth = 0;
421
422        // scan to find the end of the signature, by looking for the first
423        // whitespace not enclosed in () or <>, or the end of the tag
424        loop:
425        while (bp < buflen) {
426            switch (ch) {
427                case '\n': case '\r': case '\f':
428                    newline = true;
429                    // fallthrough
430
431                case ' ': case '\t':
432                    if (depth == 0)
433                        break loop;
434                    break;
435
436                case '(':
437                case '<':
438                    newline = false;
439                    depth++;
440                    break;
441
442                case ')':
443                case '>':
444                    newline = false;
445                    --depth;
446                    break;
447
448                case '}':
449                    if (bp == pos)
450                        return null;
451                    newline = false;
452                    break loop;
453
454                case '@':
455                    if (newline)
456                        break loop;
457                    // fallthrough
458
459                default:
460                    newline = false;
461
462            }
463            nextChar();
464        }
465
466        if (depth != 0)
467            throw new ParseException("dc.unterminated.signature");
468
469        String sig = newString(pos, bp);
470
471        // Break sig apart into qualifiedExpr member paramTypes.
472        JCTree qualExpr;
473        Name member;
474        List<JCTree> paramTypes;
475
476        Log.DeferredDiagnosticHandler deferredDiagnosticHandler
477                = new Log.DeferredDiagnosticHandler(fac.log);
478
479        try {
480            int hash = sig.indexOf("#");
481            int lparen = sig.indexOf("(", hash + 1);
482            if (hash == -1) {
483                if (lparen == -1) {
484                    qualExpr = parseType(sig);
485                    member = null;
486                } else {
487                    qualExpr = null;
488                    member = parseMember(sig.substring(0, lparen));
489                }
490            } else {
491                qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash));
492                if (lparen == -1)
493                    member = parseMember(sig.substring(hash + 1));
494                else
495                    member = parseMember(sig.substring(hash + 1, lparen));
496            }
497
498            if (lparen < 0) {
499                paramTypes = null;
500            } else {
501                int rparen = sig.indexOf(")", lparen);
502                if (rparen != sig.length() - 1)
503                    throw new ParseException("dc.ref.bad.parens");
504                paramTypes = parseParams(sig.substring(lparen + 1, rparen));
505            }
506
507            if (!deferredDiagnosticHandler.getDiagnostics().isEmpty())
508                throw new ParseException("dc.ref.syntax.error");
509
510        } finally {
511            fac.log.popDiagnosticHandler(deferredDiagnosticHandler);
512        }
513
514        return m.at(pos).Reference(sig, qualExpr, member, paramTypes).setEndPos(bp);
515    }
516
517    JCTree parseType(String s) throws ParseException {
518        JavacParser p = fac.newParser(s, false, false, false);
519        JCTree tree = p.parseType();
520        if (p.token().kind != TokenKind.EOF)
521            throw new ParseException("dc.ref.unexpected.input");
522        return tree;
523    }
524
525    Name parseMember(String s) throws ParseException {
526        JavacParser p = fac.newParser(s, false, false, false);
527        Name name = p.ident();
528        if (p.token().kind != TokenKind.EOF)
529            throw new ParseException("dc.ref.unexpected.input");
530        return name;
531    }
532
533    List<JCTree> parseParams(String s) throws ParseException {
534        if (s.trim().isEmpty())
535            return List.nil();
536
537        JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false);
538        ListBuffer<JCTree> paramTypes = new ListBuffer<>();
539        paramTypes.add(p.parseType());
540
541        if (p.token().kind == TokenKind.IDENTIFIER)
542            p.nextToken();
543
544        while (p.token().kind == TokenKind.COMMA) {
545            p.nextToken();
546            paramTypes.add(p.parseType());
547
548            if (p.token().kind == TokenKind.IDENTIFIER)
549                p.nextToken();
550        }
551
552        if (p.token().kind != TokenKind.EOF)
553            throw new ParseException("dc.ref.unexpected.input");
554
555        return paramTypes.toList();
556    }
557
558    /**
559     * Read Java identifier
560     * Matching pairs of { } are skipped; the text is terminated by the first
561     * unmatched }. It is an error if the beginning of the next tag is detected.
562     */
563    @SuppressWarnings("fallthrough")
564    protected DCIdentifier identifier() throws ParseException {
565        skipWhitespace();
566        int pos = bp;
567
568        if (isJavaIdentifierStart(ch)) {
569            Name name = readJavaIdentifier();
570            return m.at(pos).Identifier(name);
571        }
572
573        throw new ParseException("dc.identifier.expected");
574    }
575
576    /**
577     * Read a quoted string.
578     * It is an error if the beginning of the next tag is detected.
579     */
580    @SuppressWarnings("fallthrough")
581    protected DCText quotedString() {
582        int pos = bp;
583        nextChar();
584
585        loop:
586        while (bp < buflen) {
587            switch (ch) {
588                case '\n': case '\r': case '\f':
589                    newline = true;
590                    break;
591
592                case ' ': case '\t':
593                    break;
594
595                case '"':
596                    nextChar();
597                    // trim trailing white-space?
598                    return m.at(pos).Text(newString(pos, bp));
599
600                case '@':
601                    if (newline)
602                        break loop;
603
604            }
605            nextChar();
606        }
607        return null;
608    }
609
610    /**
611     * Read general text content of an inline tag, including HTML entities and elements.
612     * Matching pairs of { } are skipped; the text is terminated by the first
613     * unmatched }. It is an error if the beginning of the next tag is detected.
614     */
615    @SuppressWarnings("fallthrough")
616    protected List<DCTree> inlineContent() {
617        ListBuffer<DCTree> trees = new ListBuffer<>();
618
619        skipWhitespace();
620        int pos = bp;
621        int depth = 1;
622        textStart = -1;
623
624        loop:
625        while (bp < buflen) {
626
627            switch (ch) {
628                case '\n': case '\r': case '\f':
629                    newline = true;
630                    // fall through
631
632                case ' ': case '\t':
633                    nextChar();
634                    break;
635
636                case '&':
637                    entity(trees);
638                    break;
639
640                case '<':
641                    newline = false;
642                    addPendingText(trees, bp - 1);
643                    trees.add(html());
644                    break;
645
646                case '{':
647                    newline = false;
648                    depth++;
649                    nextChar();
650                    break;
651
652                case '}':
653                    newline = false;
654                    if (--depth == 0) {
655                        addPendingText(trees, bp - 1);
656                        nextChar();
657                        return trees.toList();
658                    }
659                    nextChar();
660                    break;
661
662                case '@':
663                    if (newline)
664                        break loop;
665                    // fallthrough
666
667                default:
668                    if (textStart == -1)
669                        textStart = bp;
670                    nextChar();
671                    break;
672            }
673        }
674
675        return List.<DCTree>of(erroneous("dc.unterminated.inline.tag", pos));
676    }
677
678    protected void entity(ListBuffer<DCTree> list) {
679        newline = false;
680        addPendingText(list, bp - 1);
681        list.add(entity());
682        if (textStart == -1) {
683            textStart = bp;
684            lastNonWhite = -1;
685        }
686    }
687
688    /**
689     * Read an HTML entity.
690     * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }
691     */
692    protected DCTree entity() {
693        int p = bp;
694        nextChar();
695        Name name = null;
696        if (ch == '#') {
697            int namep = bp;
698            nextChar();
699            if (isDecimalDigit(ch)) {
700                nextChar();
701                while (isDecimalDigit(ch))
702                    nextChar();
703                name = names.fromChars(buf, namep, bp - namep);
704            } else if (ch == 'x' || ch == 'X') {
705                nextChar();
706                if (isHexDigit(ch)) {
707                    nextChar();
708                    while (isHexDigit(ch))
709                        nextChar();
710                    name = names.fromChars(buf, namep, bp - namep);
711                }
712            }
713        } else if (isIdentifierStart(ch)) {
714            name = readIdentifier();
715        }
716
717        if (name == null)
718            return erroneous("dc.bad.entity", p);
719        else {
720            if (ch != ';')
721                return erroneous("dc.missing.semicolon", p);
722            nextChar();
723            return m.at(p).Entity(name);
724        }
725    }
726
727    /**
728     * Read the start or end of an HTML tag, or an HTML comment
729     * {@literal <identifier attrs> } or {@literal </identifier> }
730     */
731    protected DCTree html() {
732        int p = bp;
733        nextChar();
734        if (isIdentifierStart(ch)) {
735            Name name = readIdentifier();
736            List<DCTree> attrs = htmlAttrs();
737            if (attrs != null) {
738                boolean selfClosing = false;
739                if (ch == '/') {
740                    nextChar();
741                    selfClosing = true;
742                }
743                if (ch == '>') {
744                    nextChar();
745                    return m.at(p).StartElement(name, attrs, selfClosing).setEndPos(bp);
746                }
747            }
748        } else if (ch == '/') {
749            nextChar();
750            if (isIdentifierStart(ch)) {
751                Name name = readIdentifier();
752                skipWhitespace();
753                if (ch == '>') {
754                    nextChar();
755                    return m.at(p).EndElement(name);
756                }
757            }
758        } else if (ch == '!') {
759            nextChar();
760            if (ch == '-') {
761                nextChar();
762                if (ch == '-') {
763                    nextChar();
764                    while (bp < buflen) {
765                        int dash = 0;
766                        while (ch == '-') {
767                            dash++;
768                            nextChar();
769                        }
770                        // strictly speaking, a comment should not contain "--"
771                        // so dash > 2 is an error, dash == 2 implies ch == '>'
772                        if (dash >= 2 && ch == '>') {
773                            nextChar();
774                            return m.at(p).Comment(newString(p, bp));
775                        }
776
777                        nextChar();
778                    }
779                }
780            }
781        }
782
783        bp = p + 1;
784        ch = buf[bp];
785        return erroneous("dc.malformed.html", p);
786    }
787
788    /**
789     * Read a series of HTML attributes, terminated by {@literal > }.
790     * Each attribute is of the form {@literal identifier[=value] }.
791     * "value" may be unquoted, single-quoted, or double-quoted.
792     */
793    protected List<DCTree> htmlAttrs() {
794        ListBuffer<DCTree> attrs = new ListBuffer<>();
795        skipWhitespace();
796
797        loop:
798        while (isIdentifierStart(ch)) {
799            int namePos = bp;
800            Name name = readIdentifier();
801            skipWhitespace();
802            List<DCTree> value = null;
803            ValueKind vkind = ValueKind.EMPTY;
804            if (ch == '=') {
805                ListBuffer<DCTree> v = new ListBuffer<>();
806                nextChar();
807                skipWhitespace();
808                if (ch == '\'' || ch == '"') {
809                    vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE;
810                    char quote = ch;
811                    nextChar();
812                    textStart = bp;
813                    while (bp < buflen && ch != quote) {
814                        if (newline && ch == '@') {
815                            attrs.add(erroneous("dc.unterminated.string", namePos));
816                            // No point trying to read more.
817                            // In fact, all attrs get discarded by the caller
818                            // and superseded by a malformed.html node because
819                            // the html tag itself is not terminated correctly.
820                            break loop;
821                        }
822                        attrValueChar(v);
823                    }
824                    addPendingText(v, bp - 1);
825                    nextChar();
826                } else {
827                    vkind = ValueKind.UNQUOTED;
828                    textStart = bp;
829                    while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {
830                        attrValueChar(v);
831                    }
832                    addPendingText(v, bp - 1);
833                }
834                skipWhitespace();
835                value = v.toList();
836            }
837            DCAttribute attr = m.at(namePos).Attribute(name, vkind, value);
838            attrs.add(attr);
839        }
840
841        return attrs.toList();
842    }
843
844    protected void attrValueChar(ListBuffer<DCTree> list) {
845        switch (ch) {
846            case '&':
847                entity(list);
848                break;
849
850            case '{':
851                inlineTag(list);
852                break;
853
854            default:
855                nextChar();
856        }
857    }
858
859    protected void addPendingText(ListBuffer<DCTree> list, int textEnd) {
860        if (textStart != -1) {
861            if (textStart <= textEnd) {
862                list.add(m.at(textStart).Text(newString(textStart, textEnd + 1)));
863            }
864            textStart = -1;
865        }
866    }
867
868    protected DCErroneous erroneous(String code, int pos) {
869        int i = bp - 1;
870        loop:
871        while (i > pos) {
872            switch (buf[i]) {
873                case '\f': case '\n': case '\r':
874                    newline = true;
875                    break;
876                case '\t': case ' ':
877                    break;
878                default:
879                    break loop;
880            }
881            i--;
882        }
883        textStart = -1;
884        return m.at(pos).Erroneous(newString(pos, i + 1), diagSource, code);
885    }
886
887    @SuppressWarnings("unchecked")
888    <T> T getFirst(List<T>... lists) {
889        for (List<T> list: lists) {
890            if (list.nonEmpty())
891                return list.head;
892        }
893        return null;
894    }
895
896    protected boolean isIdentifierStart(char ch) {
897        return Character.isUnicodeIdentifierStart(ch);
898    }
899
900    protected Name readIdentifier() {
901        int start = bp;
902        nextChar();
903        while (bp < buflen && Character.isUnicodeIdentifierPart(ch))
904            nextChar();
905        return names.fromChars(buf, start, bp - start);
906    }
907
908    protected Name readTagName() {
909        int start = bp;
910        nextChar();
911        while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '.'))
912            nextChar();
913        return names.fromChars(buf, start, bp - start);
914    }
915
916    protected boolean isJavaIdentifierStart(char ch) {
917        return Character.isJavaIdentifierStart(ch);
918    }
919
920    protected Name readJavaIdentifier() {
921        int start = bp;
922        nextChar();
923        while (bp < buflen && Character.isJavaIdentifierPart(ch))
924            nextChar();
925        return names.fromChars(buf, start, bp - start);
926    }
927
928    protected boolean isDecimalDigit(char ch) {
929        return ('0' <= ch && ch <= '9');
930    }
931
932    protected boolean isHexDigit(char ch) {
933        return ('0' <= ch && ch <= '9')
934                || ('a' <= ch && ch <= 'f')
935                || ('A' <= ch && ch <= 'F');
936    }
937
938    protected boolean isUnquotedAttrValueTerminator(char ch) {
939        switch (ch) {
940            case '\f': case '\n': case '\r': case '\t':
941            case ' ':
942            case '"': case '\'': case '`':
943            case '=': case '<': case '>':
944                return true;
945            default:
946                return false;
947        }
948    }
949
950    protected boolean isWhitespace(char ch) {
951        return Character.isWhitespace(ch);
952    }
953
954    protected void skipWhitespace() {
955        while (isWhitespace(ch))
956            nextChar();
957    }
958
959    protected int getSentenceBreak(String s) {
960        if (sentenceBreaker != null) {
961            sentenceBreaker.setText(s);
962            int i = sentenceBreaker.next();
963            return (i == s.length()) ? -1 : i;
964        }
965
966        // scan for period followed by whitespace
967        boolean period = false;
968        for (int i = 0; i < s.length(); i++) {
969            switch (s.charAt(i)) {
970                case '.':
971                    period = true;
972                    break;
973
974                case ' ':
975                case '\f':
976                case '\n':
977                case '\r':
978                case '\t':
979                    if (period)
980                        return i;
981                    break;
982
983                default:
984                    period = false;
985                    break;
986            }
987        }
988        return -1;
989    }
990
991
992    Set<String> htmlBlockTags = new HashSet<>(Arrays.asList(
993                    "h1", "h2", "h3", "h4", "h5", "h6", "p", "pre"));
994
995    protected boolean isSentenceBreak(Name n) {
996        return htmlBlockTags.contains(StringUtils.toLowerCase(n.toString()));
997    }
998
999    protected boolean isSentenceBreak(DCTree t) {
1000        switch (t.getKind()) {
1001            case START_ELEMENT:
1002                return isSentenceBreak(((DCStartElement) t).getName());
1003
1004            case END_ELEMENT:
1005                return isSentenceBreak(((DCEndElement) t).getName());
1006        }
1007        return false;
1008    }
1009
1010    /**
1011     * @param start position of first character of string
1012     * @param end position of character beyond last character to be included
1013     */
1014    String newString(int start, int end) {
1015        return new String(buf, start, end - start);
1016    }
1017
1018    static abstract class TagParser {
1019        enum Kind { INLINE, BLOCK }
1020
1021        Kind kind;
1022        DCTree.Kind treeKind;
1023
1024        TagParser(Kind k, DCTree.Kind tk) {
1025            kind = k;
1026            treeKind = tk;
1027        }
1028
1029        Kind getKind() {
1030            return kind;
1031        }
1032
1033        DCTree.Kind getTreeKind() {
1034            return treeKind;
1035        }
1036
1037        abstract DCTree parse(int pos) throws ParseException;
1038    }
1039
1040    /**
1041     * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a>
1042     */
1043    private void initTagParsers() {
1044        TagParser[] parsers = {
1045            // @author name-text
1046            new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) {
1047                public DCTree parse(int pos) {
1048                    List<DCTree> name = blockContent();
1049                    return m.at(pos).Author(name);
1050                }
1051            },
1052
1053            // {@code text}
1054            new TagParser(Kind.INLINE, DCTree.Kind.CODE) {
1055                public DCTree parse(int pos) throws ParseException {
1056                    DCTree text = inlineText();
1057                    nextChar();
1058                    return m.at(pos).Code((DCText) text);
1059                }
1060            },
1061
1062            // @deprecated deprecated-text
1063            new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) {
1064                public DCTree parse(int pos) {
1065                    List<DCTree> reason = blockContent();
1066                    return m.at(pos).Deprecated(reason);
1067                }
1068            },
1069
1070            // {@docRoot}
1071            new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) {
1072                public DCTree parse(int pos) throws ParseException {
1073                    if (ch == '}') {
1074                        nextChar();
1075                        return m.at(pos).DocRoot();
1076                    }
1077                    inlineText(); // skip unexpected content
1078                    nextChar();
1079                    throw new ParseException("dc.unexpected.content");
1080                }
1081            },
1082
1083            // @exception class-name description
1084            new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) {
1085                public DCTree parse(int pos) throws ParseException {
1086                    skipWhitespace();
1087                    DCReference ref = reference(false);
1088                    List<DCTree> description = blockContent();
1089                    return m.at(pos).Exception(ref, description);
1090                }
1091            },
1092
1093            // {@inheritDoc}
1094            new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) {
1095                public DCTree parse(int pos) throws ParseException {
1096                    if (ch == '}') {
1097                        nextChar();
1098                        return m.at(pos).InheritDoc();
1099                    }
1100                    inlineText(); // skip unexpected content
1101                    nextChar();
1102                    throw new ParseException("dc.unexpected.content");
1103                }
1104            },
1105
1106            // {@link package.class#member label}
1107            new TagParser(Kind.INLINE, DCTree.Kind.LINK) {
1108                public DCTree parse(int pos) throws ParseException {
1109                    DCReference ref = reference(true);
1110                    List<DCTree> label = inlineContent();
1111                    return m.at(pos).Link(ref, label);
1112                }
1113            },
1114
1115            // {@linkplain package.class#member label}
1116            new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) {
1117                public DCTree parse(int pos) throws ParseException {
1118                    DCReference ref = reference(true);
1119                    List<DCTree> label = inlineContent();
1120                    return m.at(pos).LinkPlain(ref, label);
1121                }
1122            },
1123
1124            // {@literal text}
1125            new TagParser(Kind.INLINE, DCTree.Kind.LITERAL) {
1126                public DCTree parse(int pos) throws ParseException {
1127                    DCTree text = inlineText();
1128                    nextChar();
1129                    return m.at(pos).Literal((DCText) text);
1130                }
1131            },
1132
1133            // @param parameter-name description
1134            new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) {
1135                public DCTree parse(int pos) throws ParseException {
1136                    skipWhitespace();
1137
1138                    boolean typaram = false;
1139                    if (ch == '<') {
1140                        typaram = true;
1141                        nextChar();
1142                    }
1143
1144                    DCIdentifier id = identifier();
1145
1146                    if (typaram) {
1147                        if (ch != '>')
1148                            throw new ParseException("dc.gt.expected");
1149                        nextChar();
1150                    }
1151
1152                    skipWhitespace();
1153                    List<DCTree> desc = blockContent();
1154                    return m.at(pos).Param(typaram, id, desc);
1155                }
1156            },
1157
1158            // @return description
1159            new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) {
1160                public DCTree parse(int pos) {
1161                    List<DCTree> description = blockContent();
1162                    return m.at(pos).Return(description);
1163                }
1164            },
1165
1166            // @see reference | quoted-string | HTML
1167            new TagParser(Kind.BLOCK, DCTree.Kind.SEE) {
1168                public DCTree parse(int pos) throws ParseException {
1169                    skipWhitespace();
1170                    switch (ch) {
1171                        case '"':
1172                            DCText string = quotedString();
1173                            if (string != null) {
1174                                skipWhitespace();
1175                                if (ch == '@'
1176                                        || ch == EOI && bp == buf.length - 1) {
1177                                    return m.at(pos).See(List.<DCTree>of(string));
1178                                }
1179                            }
1180                            break;
1181
1182                        case '<':
1183                            List<DCTree> html = blockContent();
1184                            if (html != null)
1185                                return m.at(pos).See(html);
1186                            break;
1187
1188                        case '@':
1189                            if (newline)
1190                                throw new ParseException("dc.no.content");
1191                            break;
1192
1193                        case EOI:
1194                            if (bp == buf.length - 1)
1195                                throw new ParseException("dc.no.content");
1196                            break;
1197
1198                        default:
1199                            if (isJavaIdentifierStart(ch) || ch == '#') {
1200                                DCReference ref = reference(true);
1201                                List<DCTree> description = blockContent();
1202                                return m.at(pos).See(description.prepend(ref));
1203                            }
1204                    }
1205                    throw new ParseException("dc.unexpected.content");
1206                }
1207            },
1208
1209            // @serialData data-description
1210            new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) {
1211                public DCTree parse(int pos) {
1212                    List<DCTree> description = blockContent();
1213                    return m.at(pos).SerialData(description);
1214                }
1215            },
1216
1217            // @serialField field-name field-type description
1218            new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) {
1219                public DCTree parse(int pos) throws ParseException {
1220                    skipWhitespace();
1221                    DCIdentifier name = identifier();
1222                    skipWhitespace();
1223                    DCReference type = reference(false);
1224                    List<DCTree> description = null;
1225                    if (isWhitespace(ch)) {
1226                        skipWhitespace();
1227                        description = blockContent();
1228                    }
1229                    return m.at(pos).SerialField(name, type, description);
1230                }
1231            },
1232
1233            // @serial field-description | include | exclude
1234            new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) {
1235                public DCTree parse(int pos) {
1236                    List<DCTree> description = blockContent();
1237                    return m.at(pos).Serial(description);
1238                }
1239            },
1240
1241            // @since since-text
1242            new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) {
1243                public DCTree parse(int pos) {
1244                    List<DCTree> description = blockContent();
1245                    return m.at(pos).Since(description);
1246                }
1247            },
1248
1249            // @throws class-name description
1250            new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) {
1251                public DCTree parse(int pos) throws ParseException {
1252                    skipWhitespace();
1253                    DCReference ref = reference(false);
1254                    List<DCTree> description = blockContent();
1255                    return m.at(pos).Throws(ref, description);
1256                }
1257            },
1258
1259            // {@value package.class#field}
1260            new TagParser(Kind.INLINE, DCTree.Kind.VALUE) {
1261                public DCTree parse(int pos) throws ParseException {
1262                    DCReference ref = reference(true);
1263                    skipWhitespace();
1264                    if (ch == '}') {
1265                        nextChar();
1266                        return m.at(pos).Value(ref);
1267                    }
1268                    nextChar();
1269                    throw new ParseException("dc.unexpected.content");
1270                }
1271            },
1272
1273            // @version version-text
1274            new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) {
1275                public DCTree parse(int pos) {
1276                    List<DCTree> description = blockContent();
1277                    return m.at(pos).Version(description);
1278                }
1279            },
1280        };
1281
1282        tagParsers = new HashMap<>();
1283        for (TagParser p: parsers)
1284            tagParsers.put(names.fromString(p.getTreeKind().tagName), p);
1285
1286    }
1287}
1288