DocCommentParser.java revision 4278:a6cee0419f93
1/*
2 * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package com.sun.tools.javac.parser;
27
28import java.text.BreakIterator;
29import java.util.HashMap;
30import java.util.Map;
31
32import com.sun.source.doctree.AttributeTree.ValueKind;
33import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind;
34import com.sun.tools.javac.parser.Tokens.Comment;
35import com.sun.tools.javac.parser.Tokens.TokenKind;
36import com.sun.tools.javac.tree.DCTree;
37import com.sun.tools.javac.tree.DCTree.DCAttribute;
38import com.sun.tools.javac.tree.DCTree.DCDocComment;
39import com.sun.tools.javac.tree.DCTree.DCEndPosTree;
40import com.sun.tools.javac.tree.DCTree.DCErroneous;
41import com.sun.tools.javac.tree.DCTree.DCIdentifier;
42import com.sun.tools.javac.tree.DCTree.DCReference;
43import com.sun.tools.javac.tree.DCTree.DCText;
44import com.sun.tools.javac.tree.DocTreeMaker;
45import com.sun.tools.javac.tree.JCTree;
46import com.sun.tools.javac.util.DiagnosticSource;
47import com.sun.tools.javac.util.List;
48import com.sun.tools.javac.util.ListBuffer;
49import com.sun.tools.javac.util.Log;
50import com.sun.tools.javac.util.Name;
51import com.sun.tools.javac.util.Names;
52import com.sun.tools.javac.util.Position;
53
54import static com.sun.tools.javac.util.LayoutCharacters.*;
55
56/**
57 *
58 *  <p><b>This is NOT part of any supported API.
59 *  If you write code that depends on this, you do so at your own risk.
60 *  This code and its internal interfaces are subject to change or
61 *  deletion without notice.</b>
62 */
63public class DocCommentParser {
64    static class ParseException extends Exception {
65        private static final long serialVersionUID = 0;
66        ParseException(String key) {
67            super(key);
68        }
69    }
70
71    final ParserFactory fac;
72    final DiagnosticSource diagSource;
73    final Comment comment;
74    final DocTreeMaker m;
75    final Names names;
76
77    BreakIterator sentenceBreaker;
78
79    /** The input buffer, index of most recent character read,
80     *  index of one past last character in buffer.
81     */
82    protected char[] buf;
83    protected int bp;
84    protected int buflen;
85
86    /** The current character.
87     */
88    protected char ch;
89
90    int textStart = -1;
91    int lastNonWhite = -1;
92    boolean newline = true;
93
94    Map<Name, TagParser> tagParsers;
95
96    public DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) {
97        this.fac = fac;
98        this.diagSource = diagSource;
99        this.comment = comment;
100        names = fac.names;
101        m = fac.docTreeMaker;
102        initTagParsers();
103    }
104
105    public DocCommentParser(ParserFactory fac) {
106        this(fac, null, null);
107    }
108
109    public DCDocComment parse() {
110        String c = comment.getText();
111        buf = new char[c.length() + 1];
112        c.getChars(0, c.length(), buf, 0);
113        buf[buf.length - 1] = EOI;
114        buflen = buf.length - 1;
115        bp = -1;
116        nextChar();
117
118        List<DCTree> body = blockContent();
119        List<DCTree> tags = blockTags();
120        int pos = !body.isEmpty()
121                ? body.head.pos
122                : !tags.isEmpty() ? tags.head.pos : Position.NOPOS;
123
124        DCDocComment dc = m.at(pos).newDocCommentTree(comment, body, tags);
125        return dc;
126    }
127
128    void nextChar() {
129        ch = buf[bp < buflen ? ++bp : buflen];
130        switch (ch) {
131            case '\f': case '\n': case '\r':
132                newline = true;
133        }
134    }
135
136    /**
137     * Read block content, consisting of text, html and inline tags.
138     * Terminated by the end of input, or the beginning of the next block tag:
139     * i.e. @ as the first non-whitespace character on a line.
140     */
141    @SuppressWarnings("fallthrough")
142    protected List<DCTree> blockContent() {
143        ListBuffer<DCTree> trees = new ListBuffer<>();
144        textStart = -1;
145
146        loop:
147        while (bp < buflen) {
148            switch (ch) {
149                case '\n': case '\r': case '\f':
150                    newline = true;
151                    // fallthrough
152
153                case ' ': case '\t':
154                    nextChar();
155                    break;
156
157                case '&':
158                    entity(trees);
159                    break;
160
161                case '<':
162                    newline = false;
163                    addPendingText(trees, bp - 1);
164                    trees.add(html());
165                    if (textStart == -1) {
166                        textStart = bp;
167                        lastNonWhite = -1;
168                    }
169                    break;
170
171                case '>':
172                    newline = false;
173                    addPendingText(trees, bp - 1);
174                    trees.add(m.at(bp).newErroneousTree(newString(bp, bp + 1), diagSource, "dc.bad.gt"));
175                    nextChar();
176                    if (textStart == -1) {
177                        textStart = bp;
178                        lastNonWhite = -1;
179                    }
180                    break;
181
182                case '{':
183                    inlineTag(trees);
184                    break;
185
186                case '@':
187                    if (newline) {
188                        addPendingText(trees, lastNonWhite);
189                        break loop;
190                    }
191                    // fallthrough
192
193                default:
194                    newline = false;
195                    if (textStart == -1)
196                        textStart = bp;
197                    lastNonWhite = bp;
198                    nextChar();
199            }
200        }
201
202        if (lastNonWhite != -1)
203            addPendingText(trees, lastNonWhite);
204
205        return trees.toList();
206    }
207
208    /**
209     * Read a series of block tags, including their content.
210     * Standard tags parse their content appropriately.
211     * Non-standard tags are represented by {@link UnknownBlockTag}.
212     */
213    protected List<DCTree> blockTags() {
214        ListBuffer<DCTree> tags = new ListBuffer<>();
215        while (ch == '@')
216            tags.add(blockTag());
217        return tags.toList();
218    }
219
220    /**
221     * Read a single block tag, including its content.
222     * Standard tags parse their content appropriately.
223     * Non-standard tags are represented by {@link UnknownBlockTag}.
224     */
225    protected DCTree blockTag() {
226        int p = bp;
227        try {
228            nextChar();
229            if (isIdentifierStart(ch)) {
230                Name name = readTagName();
231                TagParser tp = tagParsers.get(name);
232                if (tp == null) {
233                    List<DCTree> content = blockContent();
234                    return m.at(p).newUnknownBlockTagTree(name, content);
235                } else {
236                    switch (tp.getKind()) {
237                        case BLOCK:
238                            return tp.parse(p);
239                        case INLINE:
240                            return erroneous("dc.bad.inline.tag", p);
241                    }
242                }
243            }
244            blockContent();
245
246            return erroneous("dc.no.tag.name", p);
247        } catch (ParseException e) {
248            blockContent();
249            return erroneous(e.getMessage(), p);
250        }
251    }
252
253    protected void inlineTag(ListBuffer<DCTree> list) {
254        newline = false;
255        nextChar();
256        if (ch == '@') {
257            addPendingText(list, bp - 2);
258            list.add(inlineTag());
259            textStart = bp;
260            lastNonWhite = -1;
261        } else {
262            if (textStart == -1)
263                textStart = bp - 1;
264            lastNonWhite = bp;
265        }
266    }
267
268    /**
269     * Read a single inline tag, including its content.
270     * Standard tags parse their content appropriately.
271     * Non-standard tags are represented by {@link UnknownBlockTag}.
272     * Malformed tags may be returned as {@link Erroneous}.
273     */
274    protected DCTree inlineTag() {
275        int p = bp - 1;
276        try {
277            nextChar();
278            if (isIdentifierStart(ch)) {
279                Name name = readTagName();
280                TagParser tp = tagParsers.get(name);
281
282                if (tp == null) {
283                    skipWhitespace();
284                    DCTree text = inlineText(WhitespaceRetentionPolicy.REMOVE_ALL);
285                    if (text != null) {
286                        nextChar();
287                        return m.at(p).newUnknownInlineTagTree(name, List.of(text)).setEndPos(bp);
288                    }
289                } else {
290                    if (!tp.retainWhiteSpace) {
291                        skipWhitespace();
292                    }
293                    if (tp.getKind() == TagParser.Kind.INLINE) {
294                        DCEndPosTree<?> tree = (DCEndPosTree<?>) tp.parse(p);
295                        if (tree != null) {
296                            return tree.setEndPos(bp);
297                        }
298                    } else { // handle block tags (ex: @see) in inline content
299                        inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip content
300                        nextChar();
301                    }
302                }
303            }
304            return erroneous("dc.no.tag.name", p);
305        } catch (ParseException e) {
306            return erroneous(e.getMessage(), p);
307        }
308    }
309
310    private static enum WhitespaceRetentionPolicy {
311        RETAIN_ALL,
312        REMOVE_FIRST_SPACE,
313        REMOVE_ALL
314    }
315
316    /**
317     * Read plain text content of an inline tag.
318     * Matching pairs of { } are skipped; the text is terminated by the first
319     * unmatched }. It is an error if the beginning of the next tag is detected.
320     */
321    private DCTree inlineText(WhitespaceRetentionPolicy whitespacePolicy) throws ParseException {
322        switch (whitespacePolicy) {
323            case REMOVE_ALL:
324                skipWhitespace();
325                break;
326            case REMOVE_FIRST_SPACE:
327                if (ch == ' ')
328                    nextChar();
329                break;
330            case RETAIN_ALL:
331            default:
332                // do nothing
333                break;
334
335        }
336        int pos = bp;
337        int depth = 1;
338
339        loop:
340        while (bp < buflen) {
341            switch (ch) {
342                case '\n': case '\r': case '\f':
343                    newline = true;
344                    break;
345
346                case ' ': case '\t':
347                    break;
348
349                case '{':
350                    newline = false;
351                    lastNonWhite = bp;
352                    depth++;
353                    break;
354
355                case '}':
356                    if (--depth == 0) {
357                        return m.at(pos).newTextTree(newString(pos, bp));
358                    }
359                    newline = false;
360                    lastNonWhite = bp;
361                    break;
362
363                case '@':
364                    if (newline)
365                        break loop;
366                    newline = false;
367                    lastNonWhite = bp;
368                    break;
369
370                default:
371                    newline = false;
372                    lastNonWhite = bp;
373                    break;
374            }
375            nextChar();
376        }
377        throw new ParseException("dc.unterminated.inline.tag");
378    }
379
380    /**
381     * Read Java class name, possibly followed by member
382     * Matching pairs of {@literal < >} are skipped. The text is terminated by the first
383     * unmatched }. It is an error if the beginning of the next tag is detected.
384     */
385    // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE
386    // TODO: improve quality of parse to forbid bad constructions.
387    // TODO: update to use ReferenceParser
388    @SuppressWarnings("fallthrough")
389    protected DCReference reference(boolean allowMember) throws ParseException {
390        int pos = bp;
391        int depth = 0;
392
393        // scan to find the end of the signature, by looking for the first
394        // whitespace not enclosed in () or <>, or the end of the tag
395        loop:
396        while (bp < buflen) {
397            switch (ch) {
398                case '\n': case '\r': case '\f':
399                    newline = true;
400                    // fallthrough
401
402                case ' ': case '\t':
403                    if (depth == 0)
404                        break loop;
405                    break;
406
407                case '(':
408                case '<':
409                    newline = false;
410                    depth++;
411                    break;
412
413                case ')':
414                case '>':
415                    newline = false;
416                    --depth;
417                    break;
418
419                case '}':
420                    if (bp == pos)
421                        return null;
422                    newline = false;
423                    break loop;
424
425                case '@':
426                    if (newline)
427                        break loop;
428                    // fallthrough
429
430                default:
431                    newline = false;
432
433            }
434            nextChar();
435        }
436
437        if (depth != 0)
438            throw new ParseException("dc.unterminated.signature");
439
440        String sig = newString(pos, bp);
441
442        // Break sig apart into qualifiedExpr member paramTypes.
443        JCTree qualExpr;
444        Name member;
445        List<JCTree> paramTypes;
446
447        Log.DeferredDiagnosticHandler deferredDiagnosticHandler
448                = new Log.DeferredDiagnosticHandler(fac.log);
449
450        try {
451            int hash = sig.indexOf("#");
452            int lparen = sig.indexOf("(", hash + 1);
453            if (hash == -1) {
454                if (lparen == -1) {
455                    qualExpr = parseType(sig);
456                    member = null;
457                } else {
458                    qualExpr = null;
459                    member = parseMember(sig.substring(0, lparen));
460                }
461            } else {
462                qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash));
463                if (lparen == -1)
464                    member = parseMember(sig.substring(hash + 1));
465                else
466                    member = parseMember(sig.substring(hash + 1, lparen));
467            }
468
469            if (lparen < 0) {
470                paramTypes = null;
471            } else {
472                int rparen = sig.indexOf(")", lparen);
473                if (rparen != sig.length() - 1)
474                    throw new ParseException("dc.ref.bad.parens");
475                paramTypes = parseParams(sig.substring(lparen + 1, rparen));
476            }
477
478            if (!deferredDiagnosticHandler.getDiagnostics().isEmpty())
479                throw new ParseException("dc.ref.syntax.error");
480
481        } finally {
482            fac.log.popDiagnosticHandler(deferredDiagnosticHandler);
483        }
484
485        return m.at(pos).newReferenceTree(sig, qualExpr, member, paramTypes).setEndPos(bp);
486    }
487
488    JCTree parseType(String s) throws ParseException {
489        JavacParser p = fac.newParser(s, false, false, false);
490        JCTree tree = p.parseType();
491        if (p.token().kind != TokenKind.EOF)
492            throw new ParseException("dc.ref.unexpected.input");
493        return tree;
494    }
495
496    Name parseMember(String s) throws ParseException {
497        JavacParser p = fac.newParser(s, false, false, false);
498        Name name = p.ident();
499        if (p.token().kind != TokenKind.EOF)
500            throw new ParseException("dc.ref.unexpected.input");
501        return name;
502    }
503
504    List<JCTree> parseParams(String s) throws ParseException {
505        if (s.trim().isEmpty())
506            return List.nil();
507
508        JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false);
509        ListBuffer<JCTree> paramTypes = new ListBuffer<>();
510        paramTypes.add(p.parseType());
511
512        if (p.token().kind == TokenKind.IDENTIFIER)
513            p.nextToken();
514
515        while (p.token().kind == TokenKind.COMMA) {
516            p.nextToken();
517            paramTypes.add(p.parseType());
518
519            if (p.token().kind == TokenKind.IDENTIFIER)
520                p.nextToken();
521        }
522
523        if (p.token().kind != TokenKind.EOF)
524            throw new ParseException("dc.ref.unexpected.input");
525
526        return paramTypes.toList();
527    }
528
529    /**
530     * Read Java identifier
531     * Matching pairs of { } are skipped; the text is terminated by the first
532     * unmatched }. It is an error if the beginning of the next tag is detected.
533     */
534    @SuppressWarnings("fallthrough")
535    protected DCIdentifier identifier() throws ParseException {
536        skipWhitespace();
537        int pos = bp;
538
539        if (isJavaIdentifierStart(ch)) {
540            Name name = readJavaIdentifier();
541            return m.at(pos).newIdentifierTree(name);
542        }
543
544        throw new ParseException("dc.identifier.expected");
545    }
546
547    /**
548     * Read a quoted string.
549     * It is an error if the beginning of the next tag is detected.
550     */
551    @SuppressWarnings("fallthrough")
552    protected DCText quotedString() {
553        int pos = bp;
554        nextChar();
555
556        loop:
557        while (bp < buflen) {
558            switch (ch) {
559                case '\n': case '\r': case '\f':
560                    newline = true;
561                    break;
562
563                case ' ': case '\t':
564                    break;
565
566                case '"':
567                    nextChar();
568                    // trim trailing white-space?
569                    return m.at(pos).newTextTree(newString(pos, bp));
570
571                case '@':
572                    if (newline)
573                        break loop;
574
575            }
576            nextChar();
577        }
578        return null;
579    }
580
581    /**
582     * Read a term ie. one word.
583     * It is an error if the beginning of the next tag is detected.
584     */
585    @SuppressWarnings("fallthrough")
586    protected DCText inlineWord() {
587        int pos = bp;
588        int depth = 0;
589        loop:
590        while (bp < buflen) {
591            switch (ch) {
592                case '\n':
593                    newline = true;
594                    // fallthrough
595
596                case '\r': case '\f': case ' ': case '\t':
597                    return m.at(pos).newTextTree(newString(pos, bp));
598
599                case '@':
600                    if (newline)
601                        break loop;
602
603                case '{':
604                    depth++;
605                    break;
606
607                case '}':
608                    if (depth == 0 || --depth == 0)
609                        return m.at(pos).newTextTree(newString(pos, bp));
610                    break;
611            }
612            newline = false;
613            nextChar();
614        }
615        return null;
616    }
617
618    /**
619     * Read general text content of an inline tag, including HTML entities and elements.
620     * Matching pairs of { } are skipped; the text is terminated by the first
621     * unmatched }. It is an error if the beginning of the next tag is detected.
622     */
623    @SuppressWarnings("fallthrough")
624    private List<DCTree> inlineContent() {
625        ListBuffer<DCTree> trees = new ListBuffer<>();
626
627        skipWhitespace();
628        int pos = bp;
629        int depth = 1;
630        textStart = -1;
631
632        loop:
633        while (bp < buflen) {
634
635            switch (ch) {
636                case '\n': case '\r': case '\f':
637                    newline = true;
638                    // fall through
639
640                case ' ': case '\t':
641                    nextChar();
642                    break;
643
644                case '&':
645                    entity(trees);
646                    break;
647
648                case '<':
649                    newline = false;
650                    addPendingText(trees, bp - 1);
651                    trees.add(html());
652                    break;
653
654                case '{':
655                    if (textStart == -1)
656                        textStart = bp;
657                    newline = false;
658                    depth++;
659                    nextChar();
660                    break;
661
662                case '}':
663                    newline = false;
664                    if (--depth == 0) {
665                        addPendingText(trees, bp - 1);
666                        nextChar();
667                        return trees.toList();
668                    }
669                    nextChar();
670                    break;
671
672                case '@':
673                    if (newline)
674                        break loop;
675                    // fallthrough
676
677                default:
678                    if (textStart == -1)
679                        textStart = bp;
680                    nextChar();
681                    break;
682            }
683        }
684
685        return List.of(erroneous("dc.unterminated.inline.tag", pos));
686    }
687
688    protected void entity(ListBuffer<DCTree> list) {
689        newline = false;
690        addPendingText(list, bp - 1);
691        list.add(entity());
692        if (textStart == -1) {
693            textStart = bp;
694            lastNonWhite = -1;
695        }
696    }
697
698    /**
699     * Read an HTML entity.
700     * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }
701     */
702    protected DCTree entity() {
703        int p = bp;
704        nextChar();
705        Name name = null;
706        if (ch == '#') {
707            int namep = bp;
708            nextChar();
709            if (isDecimalDigit(ch)) {
710                nextChar();
711                while (isDecimalDigit(ch))
712                    nextChar();
713                name = names.fromChars(buf, namep, bp - namep);
714            } else if (ch == 'x' || ch == 'X') {
715                nextChar();
716                if (isHexDigit(ch)) {
717                    nextChar();
718                    while (isHexDigit(ch))
719                        nextChar();
720                    name = names.fromChars(buf, namep, bp - namep);
721                }
722            }
723        } else if (isIdentifierStart(ch)) {
724            name = readIdentifier();
725        }
726
727        if (name == null)
728            return erroneous("dc.bad.entity", p);
729        else {
730            if (ch != ';')
731                return erroneous("dc.missing.semicolon", p);
732            nextChar();
733            return m.at(p).newEntityTree(name);
734        }
735    }
736
737    /**
738     * Read the start or end of an HTML tag, or an HTML comment
739     * {@literal <identifier attrs> } or {@literal </identifier> }
740     */
741    protected DCTree html() {
742        int p = bp;
743        nextChar();
744        if (isIdentifierStart(ch)) {
745            Name name = readIdentifier();
746            List<DCTree> attrs = htmlAttrs();
747            if (attrs != null) {
748                boolean selfClosing = false;
749                if (ch == '/') {
750                    nextChar();
751                    selfClosing = true;
752                }
753                if (ch == '>') {
754                    nextChar();
755                    DCTree dctree = m.at(p).newStartElementTree(name, attrs, selfClosing).setEndPos(bp);
756                    return dctree;
757                }
758            }
759        } else if (ch == '/') {
760            nextChar();
761            if (isIdentifierStart(ch)) {
762                Name name = readIdentifier();
763                skipWhitespace();
764                if (ch == '>') {
765                    nextChar();
766                    return m.at(p).newEndElementTree(name);
767                }
768            }
769        } else if (ch == '!') {
770            nextChar();
771            if (ch == '-') {
772                nextChar();
773                if (ch == '-') {
774                    nextChar();
775                    while (bp < buflen) {
776                        int dash = 0;
777                        while (ch == '-') {
778                            dash++;
779                            nextChar();
780                        }
781                        // Strictly speaking, a comment should not contain "--"
782                        // so dash > 2 is an error, dash == 2 implies ch == '>'
783                        // See http://www.w3.org/TR/html-markup/syntax.html#syntax-comments
784                        // for more details.
785                        if (dash >= 2 && ch == '>') {
786                            nextChar();
787                            return m.at(p).newCommentTree(newString(p, bp));
788                        }
789
790                        nextChar();
791                    }
792                }
793            }
794        }
795
796        bp = p + 1;
797        ch = buf[bp];
798        return erroneous("dc.malformed.html", p);
799    }
800
801    /**
802     * Read a series of HTML attributes, terminated by {@literal > }.
803     * Each attribute is of the form {@literal identifier[=value] }.
804     * "value" may be unquoted, single-quoted, or double-quoted.
805     */
806    protected List<DCTree> htmlAttrs() {
807        ListBuffer<DCTree> attrs = new ListBuffer<>();
808        skipWhitespace();
809
810        loop:
811        while (isIdentifierStart(ch)) {
812            int namePos = bp;
813            Name name = readAttributeName();
814            skipWhitespace();
815            List<DCTree> value = null;
816            ValueKind vkind = ValueKind.EMPTY;
817            if (ch == '=') {
818                ListBuffer<DCTree> v = new ListBuffer<>();
819                nextChar();
820                skipWhitespace();
821                if (ch == '\'' || ch == '"') {
822                    vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE;
823                    char quote = ch;
824                    nextChar();
825                    textStart = bp;
826                    while (bp < buflen && ch != quote) {
827                        if (newline && ch == '@') {
828                            attrs.add(erroneous("dc.unterminated.string", namePos));
829                            // No point trying to read more.
830                            // In fact, all attrs get discarded by the caller
831                            // and superseded by a malformed.html node because
832                            // the html tag itself is not terminated correctly.
833                            break loop;
834                        }
835                        attrValueChar(v);
836                    }
837                    addPendingText(v, bp - 1);
838                    nextChar();
839                } else {
840                    vkind = ValueKind.UNQUOTED;
841                    textStart = bp;
842                    while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {
843                        attrValueChar(v);
844                    }
845                    addPendingText(v, bp - 1);
846                }
847                skipWhitespace();
848                value = v.toList();
849            }
850            DCAttribute attr = m.at(namePos).newAttributeTree(name, vkind, value);
851            attrs.add(attr);
852        }
853
854        return attrs.toList();
855    }
856
857    protected void attrValueChar(ListBuffer<DCTree> list) {
858        switch (ch) {
859            case '&':
860                entity(list);
861                break;
862
863            case '{':
864                inlineTag(list);
865                break;
866
867            default:
868                nextChar();
869        }
870    }
871
872    protected void addPendingText(ListBuffer<DCTree> list, int textEnd) {
873        if (textStart != -1) {
874            if (textStart <= textEnd) {
875                list.add(m.at(textStart).newTextTree(newString(textStart, textEnd + 1)));
876            }
877            textStart = -1;
878        }
879    }
880
881    protected DCErroneous erroneous(String code, int pos) {
882        int i = bp - 1;
883        loop:
884        while (i > pos) {
885            switch (buf[i]) {
886                case '\f': case '\n': case '\r':
887                    newline = true;
888                    break;
889                case '\t': case ' ':
890                    break;
891                default:
892                    break loop;
893            }
894            i--;
895        }
896        textStart = -1;
897        return m.at(pos).newErroneousTree(newString(pos, i + 1), diagSource, code);
898    }
899
900    protected boolean isIdentifierStart(char ch) {
901        return Character.isUnicodeIdentifierStart(ch);
902    }
903
904    protected Name readIdentifier() {
905        int start = bp;
906        nextChar();
907        while (bp < buflen && Character.isUnicodeIdentifierPart(ch))
908            nextChar();
909        return names.fromChars(buf, start, bp - start);
910    }
911
912    protected Name readAttributeName() {
913        int start = bp;
914        nextChar();
915        while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '-'))
916            nextChar();
917        return names.fromChars(buf, start, bp - start);
918    }
919
920    protected Name readTagName() {
921        int start = bp;
922        nextChar();
923        while (bp < buflen
924                && (Character.isUnicodeIdentifierPart(ch) || ch == '.'
925                || ch == '-' || ch == ':')) {
926            nextChar();
927        }
928        return names.fromChars(buf, start, bp - start);
929    }
930
931    protected boolean isJavaIdentifierStart(char ch) {
932        return Character.isJavaIdentifierStart(ch);
933    }
934
935    protected Name readJavaIdentifier() {
936        int start = bp;
937        nextChar();
938        while (bp < buflen && Character.isJavaIdentifierPart(ch))
939            nextChar();
940        return names.fromChars(buf, start, bp - start);
941    }
942
943    protected boolean isDecimalDigit(char ch) {
944        return ('0' <= ch && ch <= '9');
945    }
946
947    protected boolean isHexDigit(char ch) {
948        return ('0' <= ch && ch <= '9')
949                || ('a' <= ch && ch <= 'f')
950                || ('A' <= ch && ch <= 'F');
951    }
952
953    protected boolean isUnquotedAttrValueTerminator(char ch) {
954        switch (ch) {
955            case '\f': case '\n': case '\r': case '\t':
956            case ' ':
957            case '"': case '\'': case '`':
958            case '=': case '<': case '>':
959                return true;
960            default:
961                return false;
962        }
963    }
964
965    protected boolean isWhitespace(char ch) {
966        return Character.isWhitespace(ch);
967    }
968
969    protected void skipWhitespace() {
970        while (isWhitespace(ch)) {
971            nextChar();
972        }
973    }
974
975    /**
976     * @param start position of first character of string
977     * @param end position of character beyond last character to be included
978     */
979    String newString(int start, int end) {
980        return new String(buf, start, end - start);
981    }
982
983    static abstract class TagParser {
984        enum Kind { INLINE, BLOCK }
985
986        final Kind kind;
987        final DCTree.Kind treeKind;
988        final boolean retainWhiteSpace;
989
990
991        TagParser(Kind k, DCTree.Kind tk) {
992            kind = k;
993            treeKind = tk;
994            retainWhiteSpace = false;
995        }
996
997        TagParser(Kind k, DCTree.Kind tk, boolean retainWhiteSpace) {
998            kind = k;
999            treeKind = tk;
1000            this.retainWhiteSpace = retainWhiteSpace;
1001        }
1002
1003        Kind getKind() {
1004            return kind;
1005        }
1006
1007        DCTree.Kind getTreeKind() {
1008            return treeKind;
1009        }
1010
1011        abstract DCTree parse(int pos) throws ParseException;
1012    }
1013
1014    /**
1015     * @see <a href="http://docs.oracle.com/javase/8/docs/technotes/tools/unix/javadoc.html#CHDJGIJB">Javadoc Tags</a>
1016     */
1017    private void initTagParsers() {
1018        TagParser[] parsers = {
1019            // @author name-text
1020            new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) {
1021                public DCTree parse(int pos) {
1022                    List<DCTree> name = blockContent();
1023                    return m.at(pos).newAuthorTree(name);
1024                }
1025            },
1026
1027            // {@code text}
1028            new TagParser(Kind.INLINE, DCTree.Kind.CODE, true) {
1029                public DCTree parse(int pos) throws ParseException {
1030                    DCTree text = inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE);
1031                    nextChar();
1032                    return m.at(pos).newCodeTree((DCText) text);
1033                }
1034            },
1035
1036            // @deprecated deprecated-text
1037            new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) {
1038                public DCTree parse(int pos) {
1039                    List<DCTree> reason = blockContent();
1040                    return m.at(pos).newDeprecatedTree(reason);
1041                }
1042            },
1043
1044            // {@docRoot}
1045            new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) {
1046                public DCTree parse(int pos) throws ParseException {
1047                    if (ch == '}') {
1048                        nextChar();
1049                        return m.at(pos).newDocRootTree();
1050                    }
1051                    inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content
1052                    nextChar();
1053                    throw new ParseException("dc.unexpected.content");
1054                }
1055            },
1056
1057            // @exception class-name description
1058            new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) {
1059                public DCTree parse(int pos) throws ParseException {
1060                    skipWhitespace();
1061                    DCReference ref = reference(false);
1062                    List<DCTree> description = blockContent();
1063                    return m.at(pos).newExceptionTree(ref, description);
1064                }
1065            },
1066
1067            // @hidden hidden-text
1068            new TagParser(Kind.BLOCK, DCTree.Kind.HIDDEN) {
1069                public DCTree parse(int pos) {
1070                    List<DCTree> reason = blockContent();
1071                    return m.at(pos).newHiddenTree(reason);
1072                }
1073            },
1074
1075            // @index search-term options-description
1076            new TagParser(Kind.INLINE, DCTree.Kind.INDEX) {
1077                public DCTree parse(int pos) throws ParseException {
1078                    skipWhitespace();
1079                    if (ch == '}') {
1080                        throw new ParseException("dc.no.content");
1081                    }
1082                    DCTree term = ch == '"' ? quotedString() : inlineWord();
1083                    if (term == null) {
1084                        throw new ParseException("dc.no.content");
1085                    }
1086                    skipWhitespace();
1087                    List<DCTree> description = List.nil();
1088                    if (ch != '}') {
1089                        description = inlineContent();
1090                    } else {
1091                        nextChar();
1092                    }
1093                    return m.at(pos).newIndexTree(term, description);
1094                }
1095            },
1096
1097            // {@inheritDoc}
1098            new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) {
1099                public DCTree parse(int pos) throws ParseException {
1100                    if (ch == '}') {
1101                        nextChar();
1102                        return m.at(pos).newInheritDocTree();
1103                    }
1104                    inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content
1105                    nextChar();
1106                    throw new ParseException("dc.unexpected.content");
1107                }
1108            },
1109
1110            // {@link package.class#member label}
1111            new TagParser(Kind.INLINE, DCTree.Kind.LINK) {
1112                public DCTree parse(int pos) throws ParseException {
1113                    DCReference ref = reference(true);
1114                    List<DCTree> label = inlineContent();
1115                    return m.at(pos).newLinkTree(ref, label);
1116                }
1117            },
1118
1119            // {@linkplain package.class#member label}
1120            new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) {
1121                public DCTree parse(int pos) throws ParseException {
1122                    DCReference ref = reference(true);
1123                    List<DCTree> label = inlineContent();
1124                    return m.at(pos).newLinkPlainTree(ref, label);
1125                }
1126            },
1127
1128            // {@literal text}
1129            new TagParser(Kind.INLINE, DCTree.Kind.LITERAL, true) {
1130                public DCTree parse(int pos) throws ParseException {
1131                    DCTree text = inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE);
1132                    nextChar();
1133                    return m.at(pos).newLiteralTree((DCText) text);
1134                }
1135            },
1136
1137            // @param parameter-name description
1138            new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) {
1139                public DCTree parse(int pos) throws ParseException {
1140                    skipWhitespace();
1141
1142                    boolean typaram = false;
1143                    if (ch == '<') {
1144                        typaram = true;
1145                        nextChar();
1146                    }
1147
1148                    DCIdentifier id = identifier();
1149
1150                    if (typaram) {
1151                        if (ch != '>')
1152                            throw new ParseException("dc.gt.expected");
1153                        nextChar();
1154                    }
1155
1156                    skipWhitespace();
1157                    List<DCTree> desc = blockContent();
1158                    return m.at(pos).newParamTree(typaram, id, desc);
1159                }
1160            },
1161
1162            // @provides service-name description
1163            new TagParser(Kind.BLOCK, DCTree.Kind.PROVIDES) {
1164                public DCTree parse(int pos) throws ParseException {
1165                    skipWhitespace();
1166                    DCReference ref = reference(true);
1167                    List<DCTree> description = blockContent();
1168                    return m.at(pos).newProvidesTree(ref, description);
1169                }
1170            },
1171
1172            // @return description
1173            new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) {
1174                public DCTree parse(int pos) {
1175                    List<DCTree> description = blockContent();
1176                    return m.at(pos).newReturnTree(description);
1177                }
1178            },
1179
1180            // @see reference | quoted-string | HTML
1181            new TagParser(Kind.BLOCK, DCTree.Kind.SEE) {
1182                public DCTree parse(int pos) throws ParseException {
1183                    skipWhitespace();
1184                    switch (ch) {
1185                        case '"':
1186                            DCText string = quotedString();
1187                            if (string != null) {
1188                                skipWhitespace();
1189                                if (ch == '@'
1190                                        || ch == EOI && bp == buf.length - 1) {
1191                                    return m.at(pos).newSeeTree(List.<DCTree>of(string));
1192                                }
1193                            }
1194                            break;
1195
1196                        case '<':
1197                            List<DCTree> html = blockContent();
1198                            if (html != null)
1199                                return m.at(pos).newSeeTree(html);
1200                            break;
1201
1202                        case '@':
1203                            if (newline)
1204                                throw new ParseException("dc.no.content");
1205                            break;
1206
1207                        case EOI:
1208                            if (bp == buf.length - 1)
1209                                throw new ParseException("dc.no.content");
1210                            break;
1211
1212                        default:
1213                            if (isJavaIdentifierStart(ch) || ch == '#') {
1214                                DCReference ref = reference(true);
1215                                List<DCTree> description = blockContent();
1216                                return m.at(pos).newSeeTree(description.prepend(ref));
1217                            }
1218                    }
1219                    throw new ParseException("dc.unexpected.content");
1220                }
1221            },
1222
1223            // @serialData data-description
1224            new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) {
1225                public DCTree parse(int pos) {
1226                    List<DCTree> description = blockContent();
1227                    return m.at(pos).newSerialDataTree(description);
1228                }
1229            },
1230
1231            // @serialField field-name field-type description
1232            new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) {
1233                public DCTree parse(int pos) throws ParseException {
1234                    skipWhitespace();
1235                    DCIdentifier name = identifier();
1236                    skipWhitespace();
1237                    DCReference type = reference(false);
1238                    List<DCTree> description = null;
1239                    if (isWhitespace(ch)) {
1240                        skipWhitespace();
1241                        description = blockContent();
1242                    }
1243                    return m.at(pos).newSerialFieldTree(name, type, description);
1244                }
1245            },
1246
1247            // @serial field-description | include | exclude
1248            new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) {
1249                public DCTree parse(int pos) {
1250                    List<DCTree> description = blockContent();
1251                    return m.at(pos).newSerialTree(description);
1252                }
1253            },
1254
1255            // @since since-text
1256            new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) {
1257                public DCTree parse(int pos) {
1258                    List<DCTree> description = blockContent();
1259                    return m.at(pos).newSinceTree(description);
1260                }
1261            },
1262
1263            // @summary summary-text
1264            new TagParser(Kind.INLINE, DCTree.Kind.SUMMARY) {
1265                public DCTree parse(int pos) throws ParseException {
1266                    List<DCTree> summary = inlineContent();
1267                    return m.at(pos).newSummaryTree(summary);
1268                }
1269            },
1270
1271            // @throws class-name description
1272            new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) {
1273                public DCTree parse(int pos) throws ParseException {
1274                    skipWhitespace();
1275                    DCReference ref = reference(false);
1276                    List<DCTree> description = blockContent();
1277                    return m.at(pos).newThrowsTree(ref, description);
1278                }
1279            },
1280
1281            // @uses service-name description
1282            new TagParser(Kind.BLOCK, DCTree.Kind.USES) {
1283                public DCTree parse(int pos) throws ParseException {
1284                    skipWhitespace();
1285                    DCReference ref = reference(true);
1286                    List<DCTree> description = blockContent();
1287                    return m.at(pos).newUsesTree(ref, description);
1288                }
1289            },
1290
1291            // {@value package.class#field}
1292            new TagParser(Kind.INLINE, DCTree.Kind.VALUE) {
1293                public DCTree parse(int pos) throws ParseException {
1294                    DCReference ref = reference(true);
1295                    skipWhitespace();
1296                    if (ch == '}') {
1297                        nextChar();
1298                        return m.at(pos).newValueTree(ref);
1299                    }
1300                    nextChar();
1301                    throw new ParseException("dc.unexpected.content");
1302                }
1303            },
1304
1305            // @version version-text
1306            new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) {
1307                public DCTree parse(int pos) {
1308                    List<DCTree> description = blockContent();
1309                    return m.at(pos).newVersionTree(description);
1310                }
1311            },
1312        };
1313
1314        tagParsers = new HashMap<>();
1315        for (TagParser p: parsers)
1316            tagParsers.put(names.fromString(p.getTreeKind().tagName), p);
1317
1318    }
1319}
1320