DocCommentParser.java revision 3831:209b0eab0e1f
1/*
2 * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package com.sun.tools.javac.parser;
27
28import java.text.BreakIterator;
29import java.util.HashMap;
30import java.util.Map;
31
32import com.sun.source.doctree.AttributeTree.ValueKind;
33import com.sun.source.doctree.DocTree;
34import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind;
35import com.sun.tools.javac.parser.Tokens.Comment;
36import com.sun.tools.javac.parser.Tokens.TokenKind;
37import com.sun.tools.javac.tree.DCTree;
38import com.sun.tools.javac.tree.DCTree.DCAttribute;
39import com.sun.tools.javac.tree.DCTree.DCDocComment;
40import com.sun.tools.javac.tree.DCTree.DCEndPosTree;
41import com.sun.tools.javac.tree.DCTree.DCErroneous;
42import com.sun.tools.javac.tree.DCTree.DCIdentifier;
43import com.sun.tools.javac.tree.DCTree.DCReference;
44import com.sun.tools.javac.tree.DCTree.DCText;
45import com.sun.tools.javac.tree.DocTreeMaker;
46import com.sun.tools.javac.tree.JCTree;
47import com.sun.tools.javac.util.DiagnosticSource;
48import com.sun.tools.javac.util.List;
49import com.sun.tools.javac.util.ListBuffer;
50import com.sun.tools.javac.util.Log;
51import com.sun.tools.javac.util.Name;
52import com.sun.tools.javac.util.Names;
53import com.sun.tools.javac.util.Position;
54
55import static com.sun.tools.javac.util.LayoutCharacters.*;
56
57/**
58 *
59 *  <p><b>This is NOT part of any supported API.
60 *  If you write code that depends on this, you do so at your own risk.
61 *  This code and its internal interfaces are subject to change or
62 *  deletion without notice.</b>
63 */
64public class DocCommentParser {
65    static class ParseException extends Exception {
66        private static final long serialVersionUID = 0;
67        ParseException(String key) {
68            super(key);
69        }
70    }
71
72    final ParserFactory fac;
73    final DiagnosticSource diagSource;
74    final Comment comment;
75    final DocTreeMaker m;
76    final Names names;
77
78    BreakIterator sentenceBreaker;
79
80    /** The input buffer, index of most recent character read,
81     *  index of one past last character in buffer.
82     */
83    protected char[] buf;
84    protected int bp;
85    protected int buflen;
86
87    /** The current character.
88     */
89    protected char ch;
90
91    int textStart = -1;
92    int lastNonWhite = -1;
93    boolean newline = true;
94
95    Map<Name, TagParser> tagParsers;
96
97    public DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) {
98        this.fac = fac;
99        this.diagSource = diagSource;
100        this.comment = comment;
101        names = fac.names;
102        m = fac.docTreeMaker;
103        initTagParsers();
104    }
105
106    public DocCommentParser(ParserFactory fac) {
107        this(fac, null, null);
108    }
109
110    public DCDocComment parse() {
111        String c = comment.getText();
112        buf = new char[c.length() + 1];
113        c.getChars(0, c.length(), buf, 0);
114        buf[buf.length - 1] = EOI;
115        buflen = buf.length - 1;
116        bp = -1;
117        nextChar();
118
119        List<DCTree> body = blockContent();
120        List<DCTree> tags = blockTags();
121        int pos = !body.isEmpty()
122                ? body.head.pos
123                : !tags.isEmpty() ? tags.head.pos : Position.NOPOS;
124
125        DCDocComment dc = m.at(pos).newDocCommentTree(comment, body, tags);
126        return dc;
127    }
128
129    void nextChar() {
130        ch = buf[bp < buflen ? ++bp : buflen];
131        switch (ch) {
132            case '\f': case '\n': case '\r':
133                newline = true;
134        }
135    }
136
137    /**
138     * Read block content, consisting of text, html and inline tags.
139     * Terminated by the end of input, or the beginning of the next block tag:
140     * i.e. @ as the first non-whitespace character on a line.
141     */
142    @SuppressWarnings("fallthrough")
143    protected List<DCTree> blockContent() {
144        ListBuffer<DCTree> trees = new ListBuffer<>();
145        textStart = -1;
146
147        loop:
148        while (bp < buflen) {
149            switch (ch) {
150                case '\n': case '\r': case '\f':
151                    newline = true;
152                    // fallthrough
153
154                case ' ': case '\t':
155                    nextChar();
156                    break;
157
158                case '&':
159                    entity(trees);
160                    break;
161
162                case '<':
163                    newline = false;
164                    addPendingText(trees, bp - 1);
165                    trees.add(html());
166                    if (textStart == -1) {
167                        textStart = bp;
168                        lastNonWhite = -1;
169                    }
170                    break;
171
172                case '>':
173                    newline = false;
174                    addPendingText(trees, bp - 1);
175                    trees.add(m.at(bp).newErroneousTree(newString(bp, bp + 1), diagSource, "dc.bad.gt"));
176                    nextChar();
177                    if (textStart == -1) {
178                        textStart = bp;
179                        lastNonWhite = -1;
180                    }
181                    break;
182
183                case '{':
184                    inlineTag(trees);
185                    break;
186
187                case '@':
188                    if (newline) {
189                        addPendingText(trees, lastNonWhite);
190                        break loop;
191                    }
192                    // fallthrough
193
194                default:
195                    newline = false;
196                    if (textStart == -1)
197                        textStart = bp;
198                    lastNonWhite = bp;
199                    nextChar();
200            }
201        }
202
203        if (lastNonWhite != -1)
204            addPendingText(trees, lastNonWhite);
205
206        return trees.toList();
207    }
208
209    /**
210     * Read a series of block tags, including their content.
211     * Standard tags parse their content appropriately.
212     * Non-standard tags are represented by {@link UnknownBlockTag}.
213     */
214    protected List<DCTree> blockTags() {
215        ListBuffer<DCTree> tags = new ListBuffer<>();
216        while (ch == '@')
217            tags.add(blockTag());
218        return tags.toList();
219    }
220
221    /**
222     * Read a single block tag, including its content.
223     * Standard tags parse their content appropriately.
224     * Non-standard tags are represented by {@link UnknownBlockTag}.
225     */
226    protected DCTree blockTag() {
227        int p = bp;
228        try {
229            nextChar();
230            if (isIdentifierStart(ch)) {
231                Name name = readTagName();
232                TagParser tp = tagParsers.get(name);
233                if (tp == null) {
234                    List<DCTree> content = blockContent();
235                    return m.at(p).newUnknownBlockTagTree(name, content);
236                } else {
237                    switch (tp.getKind()) {
238                        case BLOCK:
239                            return tp.parse(p);
240                        case INLINE:
241                            return erroneous("dc.bad.inline.tag", p);
242                    }
243                }
244            }
245            blockContent();
246
247            return erroneous("dc.no.tag.name", p);
248        } catch (ParseException e) {
249            blockContent();
250            return erroneous(e.getMessage(), p);
251        }
252    }
253
254    protected void inlineTag(ListBuffer<DCTree> list) {
255        newline = false;
256        nextChar();
257        if (ch == '@') {
258            addPendingText(list, bp - 2);
259            list.add(inlineTag());
260            textStart = bp;
261            lastNonWhite = -1;
262        } else {
263            if (textStart == -1)
264                textStart = bp - 1;
265            lastNonWhite = bp;
266        }
267    }
268
269    /**
270     * Read a single inline tag, including its content.
271     * Standard tags parse their content appropriately.
272     * Non-standard tags are represented by {@link UnknownBlockTag}.
273     * Malformed tags may be returned as {@link Erroneous}.
274     */
275    protected DCTree inlineTag() {
276        int p = bp - 1;
277        try {
278            nextChar();
279            if (isIdentifierStart(ch)) {
280                Name name = readTagName();
281                TagParser tp = tagParsers.get(name);
282
283                if (tp == null) {
284                    skipWhitespace();
285                    DCTree text = inlineText(WhitespaceRetentionPolicy.REMOVE_ALL);
286                    if (text != null) {
287                        nextChar();
288                        return m.at(p).newUnknownInlineTagTree(name, List.of(text)).setEndPos(bp);
289                    }
290                } else {
291                    if (!tp.retainWhiteSpace) {
292                        skipWhitespace();
293                    }
294                    if (tp.getKind() == TagParser.Kind.INLINE) {
295                        DCEndPosTree<?> tree = (DCEndPosTree<?>) tp.parse(p);
296                        if (tree != null) {
297                            return tree.setEndPos(bp);
298                        }
299                    } else { // handle block tags (ex: @see) in inline content
300                        inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip content
301                        nextChar();
302                    }
303                }
304            }
305            return erroneous("dc.no.tag.name", p);
306        } catch (ParseException e) {
307            return erroneous(e.getMessage(), p);
308        }
309    }
310
311    private static enum WhitespaceRetentionPolicy {
312        RETAIN_ALL,
313        REMOVE_FIRST_SPACE,
314        REMOVE_ALL
315    }
316
317    /**
318     * Read plain text content of an inline tag.
319     * Matching pairs of { } are skipped; the text is terminated by the first
320     * unmatched }. It is an error if the beginning of the next tag is detected.
321     */
322    private DCTree inlineText(WhitespaceRetentionPolicy whitespacePolicy) throws ParseException {
323        switch (whitespacePolicy) {
324            case REMOVE_ALL:
325                skipWhitespace();
326                break;
327            case REMOVE_FIRST_SPACE:
328                if (ch == ' ')
329                    nextChar();
330                break;
331            case RETAIN_ALL:
332            default:
333                // do nothing
334                break;
335
336        }
337        int pos = bp;
338        int depth = 1;
339
340        loop:
341        while (bp < buflen) {
342            switch (ch) {
343                case '\n': case '\r': case '\f':
344                    newline = true;
345                    break;
346
347                case ' ': case '\t':
348                    break;
349
350                case '{':
351                    newline = false;
352                    lastNonWhite = bp;
353                    depth++;
354                    break;
355
356                case '}':
357                    if (--depth == 0) {
358                        return m.at(pos).newTextTree(newString(pos, bp));
359                    }
360                    newline = false;
361                    lastNonWhite = bp;
362                    break;
363
364                case '@':
365                    if (newline)
366                        break loop;
367                    newline = false;
368                    lastNonWhite = bp;
369                    break;
370
371                default:
372                    newline = false;
373                    lastNonWhite = bp;
374                    break;
375            }
376            nextChar();
377        }
378        throw new ParseException("dc.unterminated.inline.tag");
379    }
380
381    /**
382     * Read Java class name, possibly followed by member
383     * Matching pairs of {@literal < >} are skipped. The text is terminated by the first
384     * unmatched }. It is an error if the beginning of the next tag is detected.
385     */
386    // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE
387    // TODO: improve quality of parse to forbid bad constructions.
388    // TODO: update to use ReferenceParser
389    @SuppressWarnings("fallthrough")
390    protected DCReference reference(boolean allowMember) throws ParseException {
391        int pos = bp;
392        int depth = 0;
393
394        // scan to find the end of the signature, by looking for the first
395        // whitespace not enclosed in () or <>, or the end of the tag
396        loop:
397        while (bp < buflen) {
398            switch (ch) {
399                case '\n': case '\r': case '\f':
400                    newline = true;
401                    // fallthrough
402
403                case ' ': case '\t':
404                    if (depth == 0)
405                        break loop;
406                    break;
407
408                case '(':
409                case '<':
410                    newline = false;
411                    depth++;
412                    break;
413
414                case ')':
415                case '>':
416                    newline = false;
417                    --depth;
418                    break;
419
420                case '}':
421                    if (bp == pos)
422                        return null;
423                    newline = false;
424                    break loop;
425
426                case '@':
427                    if (newline)
428                        break loop;
429                    // fallthrough
430
431                default:
432                    newline = false;
433
434            }
435            nextChar();
436        }
437
438        if (depth != 0)
439            throw new ParseException("dc.unterminated.signature");
440
441        String sig = newString(pos, bp);
442
443        // Break sig apart into qualifiedExpr member paramTypes.
444        JCTree qualExpr;
445        Name member;
446        List<JCTree> paramTypes;
447
448        Log.DeferredDiagnosticHandler deferredDiagnosticHandler
449                = new Log.DeferredDiagnosticHandler(fac.log);
450
451        try {
452            int hash = sig.indexOf("#");
453            int lparen = sig.indexOf("(", hash + 1);
454            if (hash == -1) {
455                if (lparen == -1) {
456                    qualExpr = parseType(sig);
457                    member = null;
458                } else {
459                    qualExpr = null;
460                    member = parseMember(sig.substring(0, lparen));
461                }
462            } else {
463                qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash));
464                if (lparen == -1)
465                    member = parseMember(sig.substring(hash + 1));
466                else
467                    member = parseMember(sig.substring(hash + 1, lparen));
468            }
469
470            if (lparen < 0) {
471                paramTypes = null;
472            } else {
473                int rparen = sig.indexOf(")", lparen);
474                if (rparen != sig.length() - 1)
475                    throw new ParseException("dc.ref.bad.parens");
476                paramTypes = parseParams(sig.substring(lparen + 1, rparen));
477            }
478
479            if (!deferredDiagnosticHandler.getDiagnostics().isEmpty())
480                throw new ParseException("dc.ref.syntax.error");
481
482        } finally {
483            fac.log.popDiagnosticHandler(deferredDiagnosticHandler);
484        }
485
486        return m.at(pos).newReferenceTree(sig, qualExpr, member, paramTypes).setEndPos(bp);
487    }
488
489    JCTree parseType(String s) throws ParseException {
490        JavacParser p = fac.newParser(s, false, false, false);
491        JCTree tree = p.parseType();
492        if (p.token().kind != TokenKind.EOF)
493            throw new ParseException("dc.ref.unexpected.input");
494        return tree;
495    }
496
497    Name parseMember(String s) throws ParseException {
498        JavacParser p = fac.newParser(s, false, false, false);
499        Name name = p.ident();
500        if (p.token().kind != TokenKind.EOF)
501            throw new ParseException("dc.ref.unexpected.input");
502        return name;
503    }
504
505    List<JCTree> parseParams(String s) throws ParseException {
506        if (s.trim().isEmpty())
507            return List.nil();
508
509        JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false);
510        ListBuffer<JCTree> paramTypes = new ListBuffer<>();
511        paramTypes.add(p.parseType());
512
513        if (p.token().kind == TokenKind.IDENTIFIER)
514            p.nextToken();
515
516        while (p.token().kind == TokenKind.COMMA) {
517            p.nextToken();
518            paramTypes.add(p.parseType());
519
520            if (p.token().kind == TokenKind.IDENTIFIER)
521                p.nextToken();
522        }
523
524        if (p.token().kind != TokenKind.EOF)
525            throw new ParseException("dc.ref.unexpected.input");
526
527        return paramTypes.toList();
528    }
529
530    /**
531     * Read Java identifier
532     * Matching pairs of { } are skipped; the text is terminated by the first
533     * unmatched }. It is an error if the beginning of the next tag is detected.
534     */
535    @SuppressWarnings("fallthrough")
536    protected DCIdentifier identifier() throws ParseException {
537        skipWhitespace();
538        int pos = bp;
539
540        if (isJavaIdentifierStart(ch)) {
541            Name name = readJavaIdentifier();
542            return m.at(pos).newIdentifierTree(name);
543        }
544
545        throw new ParseException("dc.identifier.expected");
546    }
547
548    /**
549     * Read a quoted string.
550     * It is an error if the beginning of the next tag is detected.
551     */
552    @SuppressWarnings("fallthrough")
553    protected DCText quotedString() {
554        int pos = bp;
555        nextChar();
556
557        loop:
558        while (bp < buflen) {
559            switch (ch) {
560                case '\n': case '\r': case '\f':
561                    newline = true;
562                    break;
563
564                case ' ': case '\t':
565                    break;
566
567                case '"':
568                    nextChar();
569                    // trim trailing white-space?
570                    return m.at(pos).newTextTree(newString(pos, bp));
571
572                case '@':
573                    if (newline)
574                        break loop;
575
576            }
577            nextChar();
578        }
579        return null;
580    }
581
582    /**
583     * Read a term ie. one word.
584     * It is an error if the beginning of the next tag is detected.
585     */
586    @SuppressWarnings("fallthrough")
587    protected DCText inlineWord() {
588        int pos = bp;
589        int depth = 0;
590        loop:
591        while (bp < buflen) {
592            switch (ch) {
593                case '\n':
594                    newline = true;
595                    // fallthrough
596
597                case '\r': case '\f': case ' ': case '\t':
598                    return m.at(pos).newTextTree(newString(pos, bp));
599
600                case '@':
601                    if (newline)
602                        break loop;
603
604                case '{':
605                    depth++;
606                    break;
607
608                case '}':
609                    if (depth == 0 || --depth == 0)
610                        return m.at(pos).newTextTree(newString(pos, bp));
611                    break;
612            }
613            newline = false;
614            nextChar();
615        }
616        return null;
617    }
618
619    /**
620     * Read general text content of an inline tag, including HTML entities and elements.
621     * Matching pairs of { } are skipped; the text is terminated by the first
622     * unmatched }. It is an error if the beginning of the next tag is detected.
623     */
624    @SuppressWarnings("fallthrough")
625    private List<DCTree> inlineContent() {
626        ListBuffer<DCTree> trees = new ListBuffer<>();
627
628        skipWhitespace();
629        int pos = bp;
630        int depth = 1;
631        textStart = -1;
632
633        loop:
634        while (bp < buflen) {
635
636            switch (ch) {
637                case '\n': case '\r': case '\f':
638                    newline = true;
639                    // fall through
640
641                case ' ': case '\t':
642                    nextChar();
643                    break;
644
645                case '&':
646                    entity(trees);
647                    break;
648
649                case '<':
650                    newline = false;
651                    addPendingText(trees, bp - 1);
652                    trees.add(html());
653                    break;
654
655                case '{':
656                    if (textStart == -1)
657                        textStart = bp;
658                    newline = false;
659                    depth++;
660                    nextChar();
661                    break;
662
663                case '}':
664                    newline = false;
665                    if (--depth == 0) {
666                        addPendingText(trees, bp - 1);
667                        nextChar();
668                        return trees.toList();
669                    }
670                    nextChar();
671                    break;
672
673                case '@':
674                    if (newline)
675                        break loop;
676                    // fallthrough
677
678                default:
679                    if (textStart == -1)
680                        textStart = bp;
681                    nextChar();
682                    break;
683            }
684        }
685
686        return List.of(erroneous("dc.unterminated.inline.tag", pos));
687    }
688
689    protected void entity(ListBuffer<DCTree> list) {
690        newline = false;
691        addPendingText(list, bp - 1);
692        list.add(entity());
693        if (textStart == -1) {
694            textStart = bp;
695            lastNonWhite = -1;
696        }
697    }
698
699    /**
700     * Read an HTML entity.
701     * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }
702     */
703    protected DCTree entity() {
704        int p = bp;
705        nextChar();
706        Name name = null;
707        if (ch == '#') {
708            int namep = bp;
709            nextChar();
710            if (isDecimalDigit(ch)) {
711                nextChar();
712                while (isDecimalDigit(ch))
713                    nextChar();
714                name = names.fromChars(buf, namep, bp - namep);
715            } else if (ch == 'x' || ch == 'X') {
716                nextChar();
717                if (isHexDigit(ch)) {
718                    nextChar();
719                    while (isHexDigit(ch))
720                        nextChar();
721                    name = names.fromChars(buf, namep, bp - namep);
722                }
723            }
724        } else if (isIdentifierStart(ch)) {
725            name = readIdentifier();
726        }
727
728        if (name == null)
729            return erroneous("dc.bad.entity", p);
730        else {
731            if (ch != ';')
732                return erroneous("dc.missing.semicolon", p);
733            nextChar();
734            return m.at(p).newEntityTree(name);
735        }
736    }
737
738    /**
739     * Read the start or end of an HTML tag, or an HTML comment
740     * {@literal <identifier attrs> } or {@literal </identifier> }
741     */
742    protected DCTree html() {
743        int p = bp;
744        nextChar();
745        if (isIdentifierStart(ch)) {
746            Name name = readIdentifier();
747            List<DCTree> attrs = htmlAttrs();
748            if (attrs != null) {
749                boolean selfClosing = false;
750                if (ch == '/') {
751                    nextChar();
752                    selfClosing = true;
753                }
754                if (ch == '>') {
755                    nextChar();
756                    DCTree dctree = m.at(p).newStartElementTree(name, attrs, selfClosing).setEndPos(bp);
757                    return dctree;
758                }
759            }
760        } else if (ch == '/') {
761            nextChar();
762            if (isIdentifierStart(ch)) {
763                Name name = readIdentifier();
764                skipWhitespace();
765                if (ch == '>') {
766                    nextChar();
767                    return m.at(p).newEndElementTree(name);
768                }
769            }
770        } else if (ch == '!') {
771            nextChar();
772            if (ch == '-') {
773                nextChar();
774                if (ch == '-') {
775                    nextChar();
776                    while (bp < buflen) {
777                        int dash = 0;
778                        while (ch == '-') {
779                            dash++;
780                            nextChar();
781                        }
782                        // Strictly speaking, a comment should not contain "--"
783                        // so dash > 2 is an error, dash == 2 implies ch == '>'
784                        // See http://www.w3.org/TR/html-markup/syntax.html#syntax-comments
785                        // for more details.
786                        if (dash >= 2 && ch == '>') {
787                            nextChar();
788                            return m.at(p).newCommentTree(newString(p, bp));
789                        }
790
791                        nextChar();
792                    }
793                }
794            }
795        }
796
797        bp = p + 1;
798        ch = buf[bp];
799        return erroneous("dc.malformed.html", p);
800    }
801
802    /**
803     * Read a series of HTML attributes, terminated by {@literal > }.
804     * Each attribute is of the form {@literal identifier[=value] }.
805     * "value" may be unquoted, single-quoted, or double-quoted.
806     */
807    protected List<DCTree> htmlAttrs() {
808        ListBuffer<DCTree> attrs = new ListBuffer<>();
809        skipWhitespace();
810
811        loop:
812        while (isIdentifierStart(ch)) {
813            int namePos = bp;
814            Name name = readAttributeName();
815            skipWhitespace();
816            List<DCTree> value = null;
817            ValueKind vkind = ValueKind.EMPTY;
818            if (ch == '=') {
819                ListBuffer<DCTree> v = new ListBuffer<>();
820                nextChar();
821                skipWhitespace();
822                if (ch == '\'' || ch == '"') {
823                    vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE;
824                    char quote = ch;
825                    nextChar();
826                    textStart = bp;
827                    while (bp < buflen && ch != quote) {
828                        if (newline && ch == '@') {
829                            attrs.add(erroneous("dc.unterminated.string", namePos));
830                            // No point trying to read more.
831                            // In fact, all attrs get discarded by the caller
832                            // and superseded by a malformed.html node because
833                            // the html tag itself is not terminated correctly.
834                            break loop;
835                        }
836                        attrValueChar(v);
837                    }
838                    addPendingText(v, bp - 1);
839                    nextChar();
840                } else {
841                    vkind = ValueKind.UNQUOTED;
842                    textStart = bp;
843                    while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {
844                        attrValueChar(v);
845                    }
846                    addPendingText(v, bp - 1);
847                }
848                skipWhitespace();
849                value = v.toList();
850            }
851            DCAttribute attr = m.at(namePos).newAttributeTree(name, vkind, value);
852            attrs.add(attr);
853        }
854
855        return attrs.toList();
856    }
857
858    protected void attrValueChar(ListBuffer<DCTree> list) {
859        switch (ch) {
860            case '&':
861                entity(list);
862                break;
863
864            case '{':
865                inlineTag(list);
866                break;
867
868            default:
869                nextChar();
870        }
871    }
872
873    protected void addPendingText(ListBuffer<DCTree> list, int textEnd) {
874        if (textStart != -1) {
875            if (textStart <= textEnd) {
876                list.add(m.at(textStart).newTextTree(newString(textStart, textEnd + 1)));
877            }
878            textStart = -1;
879        }
880    }
881
882    protected DCErroneous erroneous(String code, int pos) {
883        int i = bp - 1;
884        loop:
885        while (i > pos) {
886            switch (buf[i]) {
887                case '\f': case '\n': case '\r':
888                    newline = true;
889                    break;
890                case '\t': case ' ':
891                    break;
892                default:
893                    break loop;
894            }
895            i--;
896        }
897        textStart = -1;
898        return m.at(pos).newErroneousTree(newString(pos, i + 1), diagSource, code);
899    }
900
901    protected boolean isIdentifierStart(char ch) {
902        return Character.isUnicodeIdentifierStart(ch);
903    }
904
905    protected Name readIdentifier() {
906        int start = bp;
907        nextChar();
908        while (bp < buflen && Character.isUnicodeIdentifierPart(ch))
909            nextChar();
910        return names.fromChars(buf, start, bp - start);
911    }
912
913    protected Name readAttributeName() {
914        int start = bp;
915        nextChar();
916        while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '-'))
917            nextChar();
918        return names.fromChars(buf, start, bp - start);
919    }
920
921    protected Name readTagName() {
922        int start = bp;
923        nextChar();
924        while (bp < buflen
925                && (Character.isUnicodeIdentifierPart(ch) || ch == '.'
926                || ch == '-' || ch == ':')) {
927            nextChar();
928        }
929        return names.fromChars(buf, start, bp - start);
930    }
931
932    protected boolean isJavaIdentifierStart(char ch) {
933        return Character.isJavaIdentifierStart(ch);
934    }
935
936    protected Name readJavaIdentifier() {
937        int start = bp;
938        nextChar();
939        while (bp < buflen && Character.isJavaIdentifierPart(ch))
940            nextChar();
941        return names.fromChars(buf, start, bp - start);
942    }
943
944    protected boolean isDecimalDigit(char ch) {
945        return ('0' <= ch && ch <= '9');
946    }
947
948    protected boolean isHexDigit(char ch) {
949        return ('0' <= ch && ch <= '9')
950                || ('a' <= ch && ch <= 'f')
951                || ('A' <= ch && ch <= 'F');
952    }
953
954    protected boolean isUnquotedAttrValueTerminator(char ch) {
955        switch (ch) {
956            case '\f': case '\n': case '\r': case '\t':
957            case ' ':
958            case '"': case '\'': case '`':
959            case '=': case '<': case '>':
960                return true;
961            default:
962                return false;
963        }
964    }
965
966    protected boolean isWhitespace(char ch) {
967        return Character.isWhitespace(ch);
968    }
969
970    protected void skipWhitespace() {
971        while (isWhitespace(ch)) {
972            nextChar();
973        }
974    }
975
976    /**
977     * @param start position of first character of string
978     * @param end position of character beyond last character to be included
979     */
980    String newString(int start, int end) {
981        return new String(buf, start, end - start);
982    }
983
984    static abstract class TagParser {
985        enum Kind { INLINE, BLOCK }
986
987        final Kind kind;
988        final DCTree.Kind treeKind;
989        final boolean retainWhiteSpace;
990
991
992        TagParser(Kind k, DCTree.Kind tk) {
993            kind = k;
994            treeKind = tk;
995            retainWhiteSpace = false;
996        }
997
998        TagParser(Kind k, DCTree.Kind tk, boolean retainWhiteSpace) {
999            kind = k;
1000            treeKind = tk;
1001            this.retainWhiteSpace = retainWhiteSpace;
1002        }
1003
1004        Kind getKind() {
1005            return kind;
1006        }
1007
1008        DCTree.Kind getTreeKind() {
1009            return treeKind;
1010        }
1011
1012        abstract DCTree parse(int pos) throws ParseException;
1013    }
1014
1015    /**
1016     * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a>
1017     */
1018    private void initTagParsers() {
1019        TagParser[] parsers = {
1020            // @author name-text
1021            new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) {
1022                public DCTree parse(int pos) {
1023                    List<DCTree> name = blockContent();
1024                    return m.at(pos).newAuthorTree(name);
1025                }
1026            },
1027
1028            // {@code text}
1029            new TagParser(Kind.INLINE, DCTree.Kind.CODE, true) {
1030                public DCTree parse(int pos) throws ParseException {
1031                    DCTree text = inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE);
1032                    nextChar();
1033                    return m.at(pos).newCodeTree((DCText) text);
1034                }
1035            },
1036
1037            // @deprecated deprecated-text
1038            new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) {
1039                public DCTree parse(int pos) {
1040                    List<DCTree> reason = blockContent();
1041                    return m.at(pos).newDeprecatedTree(reason);
1042                }
1043            },
1044
1045            // {@docRoot}
1046            new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) {
1047                public DCTree parse(int pos) throws ParseException {
1048                    if (ch == '}') {
1049                        nextChar();
1050                        return m.at(pos).newDocRootTree();
1051                    }
1052                    inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content
1053                    nextChar();
1054                    throw new ParseException("dc.unexpected.content");
1055                }
1056            },
1057
1058            // @exception class-name description
1059            new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) {
1060                public DCTree parse(int pos) throws ParseException {
1061                    skipWhitespace();
1062                    DCReference ref = reference(false);
1063                    List<DCTree> description = blockContent();
1064                    return m.at(pos).newExceptionTree(ref, description);
1065                }
1066            },
1067
1068            // @hidden hidden-text
1069            new TagParser(Kind.BLOCK, DCTree.Kind.HIDDEN) {
1070                public DCTree parse(int pos) {
1071                    List<DCTree> reason = blockContent();
1072                    return m.at(pos).newHiddenTree(reason);
1073                }
1074            },
1075
1076            // @index search-term options-description
1077            new TagParser(Kind.INLINE, DCTree.Kind.INDEX) {
1078                public DCTree parse(int pos) throws ParseException {
1079                    skipWhitespace();
1080                    if (ch == '}') {
1081                        throw new ParseException("dc.no.content");
1082                    }
1083                    DCTree term = ch == '"' ? quotedString() : inlineWord();
1084                    if (term == null) {
1085                        throw new ParseException("dc.no.content");
1086                    }
1087                    skipWhitespace();
1088                    List<DCTree> description = List.nil();
1089                    if (ch != '}') {
1090                        description = inlineContent();
1091                    } else {
1092                        nextChar();
1093                    }
1094                    return m.at(pos).newIndexTree(term, description);
1095                }
1096            },
1097
1098            // {@inheritDoc}
1099            new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) {
1100                public DCTree parse(int pos) throws ParseException {
1101                    if (ch == '}') {
1102                        nextChar();
1103                        return m.at(pos).newInheritDocTree();
1104                    }
1105                    inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content
1106                    nextChar();
1107                    throw new ParseException("dc.unexpected.content");
1108                }
1109            },
1110
1111            // {@link package.class#member label}
1112            new TagParser(Kind.INLINE, DCTree.Kind.LINK) {
1113                public DCTree parse(int pos) throws ParseException {
1114                    DCReference ref = reference(true);
1115                    List<DCTree> label = inlineContent();
1116                    return m.at(pos).newLinkTree(ref, label);
1117                }
1118            },
1119
1120            // {@linkplain package.class#member label}
1121            new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) {
1122                public DCTree parse(int pos) throws ParseException {
1123                    DCReference ref = reference(true);
1124                    List<DCTree> label = inlineContent();
1125                    return m.at(pos).newLinkPlainTree(ref, label);
1126                }
1127            },
1128
1129            // {@literal text}
1130            new TagParser(Kind.INLINE, DCTree.Kind.LITERAL, true) {
1131                public DCTree parse(int pos) throws ParseException {
1132                    DCTree text = inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE);
1133                    nextChar();
1134                    return m.at(pos).newLiteralTree((DCText) text);
1135                }
1136            },
1137
1138            // @param parameter-name description
1139            new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) {
1140                public DCTree parse(int pos) throws ParseException {
1141                    skipWhitespace();
1142
1143                    boolean typaram = false;
1144                    if (ch == '<') {
1145                        typaram = true;
1146                        nextChar();
1147                    }
1148
1149                    DCIdentifier id = identifier();
1150
1151                    if (typaram) {
1152                        if (ch != '>')
1153                            throw new ParseException("dc.gt.expected");
1154                        nextChar();
1155                    }
1156
1157                    skipWhitespace();
1158                    List<DCTree> desc = blockContent();
1159                    return m.at(pos).newParamTree(typaram, id, desc);
1160                }
1161            },
1162
1163            // @provides service-name description
1164            new TagParser(Kind.BLOCK, DCTree.Kind.PROVIDES) {
1165                public DCTree parse(int pos) throws ParseException {
1166                    skipWhitespace();
1167                    DCReference ref = reference(true);
1168                    List<DCTree> description = blockContent();
1169                    return m.at(pos).newProvidesTree(ref, description);
1170                }
1171            },
1172
1173            // @return description
1174            new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) {
1175                public DCTree parse(int pos) {
1176                    List<DCTree> description = blockContent();
1177                    return m.at(pos).newReturnTree(description);
1178                }
1179            },
1180
1181            // @see reference | quoted-string | HTML
1182            new TagParser(Kind.BLOCK, DCTree.Kind.SEE) {
1183                public DCTree parse(int pos) throws ParseException {
1184                    skipWhitespace();
1185                    switch (ch) {
1186                        case '"':
1187                            DCText string = quotedString();
1188                            if (string != null) {
1189                                skipWhitespace();
1190                                if (ch == '@'
1191                                        || ch == EOI && bp == buf.length - 1) {
1192                                    return m.at(pos).newSeeTree(List.<DCTree>of(string));
1193                                }
1194                            }
1195                            break;
1196
1197                        case '<':
1198                            List<DCTree> html = blockContent();
1199                            if (html != null)
1200                                return m.at(pos).newSeeTree(html);
1201                            break;
1202
1203                        case '@':
1204                            if (newline)
1205                                throw new ParseException("dc.no.content");
1206                            break;
1207
1208                        case EOI:
1209                            if (bp == buf.length - 1)
1210                                throw new ParseException("dc.no.content");
1211                            break;
1212
1213                        default:
1214                            if (isJavaIdentifierStart(ch) || ch == '#') {
1215                                DCReference ref = reference(true);
1216                                List<DCTree> description = blockContent();
1217                                return m.at(pos).newSeeTree(description.prepend(ref));
1218                            }
1219                    }
1220                    throw new ParseException("dc.unexpected.content");
1221                }
1222            },
1223
1224            // @serialData data-description
1225            new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) {
1226                public DCTree parse(int pos) {
1227                    List<DCTree> description = blockContent();
1228                    return m.at(pos).newSerialDataTree(description);
1229                }
1230            },
1231
1232            // @serialField field-name field-type description
1233            new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) {
1234                public DCTree parse(int pos) throws ParseException {
1235                    skipWhitespace();
1236                    DCIdentifier name = identifier();
1237                    skipWhitespace();
1238                    DCReference type = reference(false);
1239                    List<DCTree> description = null;
1240                    if (isWhitespace(ch)) {
1241                        skipWhitespace();
1242                        description = blockContent();
1243                    }
1244                    return m.at(pos).newSerialFieldTree(name, type, description);
1245                }
1246            },
1247
1248            // @serial field-description | include | exclude
1249            new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) {
1250                public DCTree parse(int pos) {
1251                    List<DCTree> description = blockContent();
1252                    return m.at(pos).newSerialTree(description);
1253                }
1254            },
1255
1256            // @since since-text
1257            new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) {
1258                public DCTree parse(int pos) {
1259                    List<DCTree> description = blockContent();
1260                    return m.at(pos).newSinceTree(description);
1261                }
1262            },
1263
1264            // @throws class-name description
1265            new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) {
1266                public DCTree parse(int pos) throws ParseException {
1267                    skipWhitespace();
1268                    DCReference ref = reference(false);
1269                    List<DCTree> description = blockContent();
1270                    return m.at(pos).newThrowsTree(ref, description);
1271                }
1272            },
1273
1274            // @uses service-name description
1275            new TagParser(Kind.BLOCK, DCTree.Kind.USES) {
1276                public DCTree parse(int pos) throws ParseException {
1277                    skipWhitespace();
1278                    DCReference ref = reference(true);
1279                    List<DCTree> description = blockContent();
1280                    return m.at(pos).newUsesTree(ref, description);
1281                }
1282            },
1283
1284            // {@value package.class#field}
1285            new TagParser(Kind.INLINE, DCTree.Kind.VALUE) {
1286                public DCTree parse(int pos) throws ParseException {
1287                    DCReference ref = reference(true);
1288                    skipWhitespace();
1289                    if (ch == '}') {
1290                        nextChar();
1291                        return m.at(pos).newValueTree(ref);
1292                    }
1293                    nextChar();
1294                    throw new ParseException("dc.unexpected.content");
1295                }
1296            },
1297
1298            // @version version-text
1299            new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) {
1300                public DCTree parse(int pos) {
1301                    List<DCTree> description = blockContent();
1302                    return m.at(pos).newVersionTree(description);
1303                }
1304            },
1305        };
1306
1307        tagParsers = new HashMap<>();
1308        for (TagParser p: parsers)
1309            tagParsers.put(names.fromString(p.getTreeKind().tagName), p);
1310
1311    }
1312}
1313