JavaScriptScanner.java revision 3896:8e4dbcb99277
1/*
2 * Copyright (c) 2012,2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package com.sun.tools.javadoc.main;
27
28import java.util.Arrays;
29import java.util.HashMap;
30import java.util.HashSet;
31import java.util.Locale;
32import java.util.Map;
33import java.util.Set;
34
35import com.sun.tools.javadoc.main.JavaScriptScanner.TagParser.Kind;
36
37import static com.sun.tools.javac.util.LayoutCharacters.EOI;
38
39/**
40 * Parser to detect use of JavaScript in documentation comments.
41 */
42@Deprecated
43public class JavaScriptScanner {
44    public static interface Reporter {
45        void report();
46    }
47
48    static class ParseException extends Exception {
49        private static final long serialVersionUID = 0;
50        ParseException(String key) {
51            super(key);
52        }
53    }
54
55    private Reporter reporter;
56
57    /** The input buffer, index of most recent character read,
58     *  index of one past last character in buffer.
59     */
60    protected char[] buf;
61    protected int bp;
62    protected int buflen;
63
64    /** The current character.
65     */
66    protected char ch;
67
68    private boolean newline = true;
69
70    Map<String, TagParser> tagParsers;
71    Set<String> eventAttrs;
72    Set<String> uriAttrs;
73
74    public JavaScriptScanner() {
75        initTagParsers();
76        initEventAttrs();
77        initURIAttrs();
78    }
79
80    public void parse(String comment, Reporter r) {
81        reporter = r;
82        String c = comment;
83        buf = new char[c.length() + 1];
84        c.getChars(0, c.length(), buf, 0);
85        buf[buf.length - 1] = EOI;
86        buflen = buf.length - 1;
87        bp = -1;
88        newline = true;
89        nextChar();
90
91        blockContent();
92        blockTags();
93    }
94
95    private void checkHtmlTag(String tag) {
96        if (tag.equalsIgnoreCase("script")) {
97            reporter.report();
98        }
99    }
100
101    private void checkHtmlAttr(String name, String value) {
102        String n = name.toLowerCase(Locale.ENGLISH);
103        if (eventAttrs.contains(n)
104                || uriAttrs.contains(n)
105                    && value != null && value.toLowerCase(Locale.ENGLISH).trim().startsWith("javascript:")) {
106            reporter.report();
107        }
108    }
109
110    void nextChar() {
111        ch = buf[bp < buflen ? ++bp : buflen];
112        switch (ch) {
113            case '\f': case '\n': case '\r':
114                newline = true;
115        }
116    }
117
118    /**
119     * Read block content, consisting of text, html and inline tags.
120     * Terminated by the end of input, or the beginning of the next block tag:
121     * i.e. @ as the first non-whitespace character on a line.
122     */
123    @SuppressWarnings("fallthrough")
124    protected void blockContent() {
125
126        loop:
127        while (bp < buflen) {
128            switch (ch) {
129                case '\n': case '\r': case '\f':
130                    newline = true;
131                    // fallthrough
132
133                case ' ': case '\t':
134                    nextChar();
135                    break;
136
137                case '&':
138                    entity(null);
139                    break;
140
141                case '<':
142                    html();
143                    break;
144
145                case '>':
146                    newline = false;
147                    nextChar();
148                    break;
149
150                case '{':
151                    inlineTag(null);
152                    break;
153
154                case '@':
155                    if (newline) {
156                        break loop;
157                    }
158                    // fallthrough
159
160                default:
161                    newline = false;
162                    nextChar();
163            }
164        }
165    }
166
167    /**
168     * Read a series of block tags, including their content.
169     * Standard tags parse their content appropriately.
170     * Non-standard tags are represented by {@link UnknownBlockTag}.
171     */
172    protected void blockTags() {
173        while (ch == '@')
174            blockTag();
175    }
176
177    /**
178     * Read a single block tag, including its content.
179     * Standard tags parse their content appropriately.
180     * Non-standard tags are represented by {@link UnknownBlockTag}.
181     */
182    protected void blockTag() {
183        int p = bp;
184        try {
185            nextChar();
186            if (isIdentifierStart(ch)) {
187                String name = readTagName();
188                TagParser tp = tagParsers.get(name);
189                if (tp == null) {
190                    blockContent();
191                } else {
192                    switch (tp.getKind()) {
193                        case BLOCK:
194                            tp.parse(p);
195                            return;
196                        case INLINE:
197                            return;
198                    }
199                }
200            }
201            blockContent();
202        } catch (ParseException e) {
203            blockContent();
204        }
205    }
206
207    protected void inlineTag(Void list) {
208        newline = false;
209        nextChar();
210        if (ch == '@') {
211            inlineTag();
212        }
213    }
214
215    /**
216     * Read a single inline tag, including its content.
217     * Standard tags parse their content appropriately.
218     * Non-standard tags are represented by {@link UnknownBlockTag}.
219     * Malformed tags may be returned as {@link Erroneous}.
220     */
221    protected void inlineTag() {
222        int p = bp - 1;
223        try {
224            nextChar();
225            if (isIdentifierStart(ch)) {
226                String name = readTagName();
227                TagParser tp = tagParsers.get(name);
228
229                if (tp == null) {
230                    skipWhitespace();
231                    inlineText(WhitespaceRetentionPolicy.REMOVE_ALL);
232                    nextChar();
233                } else {
234                    skipWhitespace();
235                    if (tp.getKind() == TagParser.Kind.INLINE) {
236                        tp.parse(p);
237                    } else { // handle block tags (ex: @see) in inline content
238                        inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip content
239                        nextChar();
240                    }
241                }
242            }
243        } catch (ParseException e) {
244        }
245    }
246
247    private static enum WhitespaceRetentionPolicy {
248        RETAIN_ALL,
249        REMOVE_FIRST_SPACE,
250        REMOVE_ALL
251    }
252
253    /**
254     * Read plain text content of an inline tag.
255     * Matching pairs of { } are skipped; the text is terminated by the first
256     * unmatched }. It is an error if the beginning of the next tag is detected.
257     */
258    private void inlineText(WhitespaceRetentionPolicy whitespacePolicy) throws ParseException {
259        switch (whitespacePolicy) {
260            case REMOVE_ALL:
261                skipWhitespace();
262                break;
263            case REMOVE_FIRST_SPACE:
264                if (ch == ' ')
265                    nextChar();
266                break;
267            case RETAIN_ALL:
268            default:
269                // do nothing
270                break;
271
272        }
273        int pos = bp;
274        int depth = 1;
275
276        loop:
277        while (bp < buflen) {
278            switch (ch) {
279                case '\n': case '\r': case '\f':
280                    newline = true;
281                    break;
282
283                case ' ': case '\t':
284                    break;
285
286                case '{':
287                    newline = false;
288                    depth++;
289                    break;
290
291                case '}':
292                    if (--depth == 0) {
293                        return;
294                    }
295                    newline = false;
296                    break;
297
298                case '@':
299                    if (newline)
300                        break loop;
301                    newline = false;
302                    break;
303
304                default:
305                    newline = false;
306                    break;
307            }
308            nextChar();
309        }
310        throw new ParseException("dc.unterminated.inline.tag");
311    }
312
313    /**
314     * Read Java class name, possibly followed by member
315     * Matching pairs of {@literal < >} are skipped. The text is terminated by the first
316     * unmatched }. It is an error if the beginning of the next tag is detected.
317     */
318    // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE
319    // TODO: improve quality of parse to forbid bad constructions.
320    // TODO: update to use ReferenceParser
321    @SuppressWarnings("fallthrough")
322    protected void reference(boolean allowMember) throws ParseException {
323        int pos = bp;
324        int depth = 0;
325
326        // scan to find the end of the signature, by looking for the first
327        // whitespace not enclosed in () or <>, or the end of the tag
328        loop:
329        while (bp < buflen) {
330            switch (ch) {
331                case '\n': case '\r': case '\f':
332                    newline = true;
333                    // fallthrough
334
335                case ' ': case '\t':
336                    if (depth == 0)
337                        break loop;
338                    break;
339
340                case '(':
341                case '<':
342                    newline = false;
343                    depth++;
344                    break;
345
346                case ')':
347                case '>':
348                    newline = false;
349                    --depth;
350                    break;
351
352                case '}':
353                    if (bp == pos)
354                        return;
355                    newline = false;
356                    break loop;
357
358                case '@':
359                    if (newline)
360                        break loop;
361                    // fallthrough
362
363                default:
364                    newline = false;
365
366            }
367            nextChar();
368        }
369
370        if (depth != 0)
371            throw new ParseException("dc.unterminated.signature");
372    }
373
374    /**
375     * Read Java identifier
376     * Matching pairs of { } are skipped; the text is terminated by the first
377     * unmatched }. It is an error if the beginning of the next tag is detected.
378     */
379    @SuppressWarnings("fallthrough")
380    protected void identifier() throws ParseException {
381        skipWhitespace();
382        int pos = bp;
383
384        if (isJavaIdentifierStart(ch)) {
385            readJavaIdentifier();
386            return;
387        }
388
389        throw new ParseException("dc.identifier.expected");
390    }
391
392    /**
393     * Read a quoted string.
394     * It is an error if the beginning of the next tag is detected.
395     */
396    @SuppressWarnings("fallthrough")
397    protected void quotedString() {
398        int pos = bp;
399        nextChar();
400
401        loop:
402        while (bp < buflen) {
403            switch (ch) {
404                case '\n': case '\r': case '\f':
405                    newline = true;
406                    break;
407
408                case ' ': case '\t':
409                    break;
410
411                case '"':
412                    nextChar();
413                    // trim trailing white-space?
414                    return;
415
416                case '@':
417                    if (newline)
418                        break loop;
419
420            }
421            nextChar();
422        }
423    }
424
425    /**
426     * Read a term ie. one word.
427     * It is an error if the beginning of the next tag is detected.
428     */
429    @SuppressWarnings("fallthrough")
430    protected void inlineWord() {
431        int pos = bp;
432        int depth = 0;
433        loop:
434        while (bp < buflen) {
435            switch (ch) {
436                case '\n':
437                    newline = true;
438                    // fallthrough
439
440                case '\r': case '\f': case ' ': case '\t':
441                    return;
442
443                case '@':
444                    if (newline)
445                        break loop;
446
447                case '{':
448                    depth++;
449                    break;
450
451                case '}':
452                    if (depth == 0 || --depth == 0)
453                        return;
454                    break;
455            }
456            newline = false;
457            nextChar();
458        }
459    }
460
461    /**
462     * Read general text content of an inline tag, including HTML entities and elements.
463     * Matching pairs of { } are skipped; the text is terminated by the first
464     * unmatched }. It is an error if the beginning of the next tag is detected.
465     */
466    @SuppressWarnings("fallthrough")
467    private void inlineContent() {
468
469        skipWhitespace();
470        int pos = bp;
471        int depth = 1;
472
473        loop:
474        while (bp < buflen) {
475
476            switch (ch) {
477                case '\n': case '\r': case '\f':
478                    newline = true;
479                    // fall through
480
481                case ' ': case '\t':
482                    nextChar();
483                    break;
484
485                case '&':
486                    entity(null);
487                    break;
488
489                case '<':
490                    newline = false;
491                    html();
492                    break;
493
494                case '{':
495                    newline = false;
496                    depth++;
497                    nextChar();
498                    break;
499
500                case '}':
501                    newline = false;
502                    if (--depth == 0) {
503                        nextChar();
504                        return;
505                    }
506                    nextChar();
507                    break;
508
509                case '@':
510                    if (newline)
511                        break loop;
512                    // fallthrough
513
514                default:
515                    nextChar();
516                    break;
517            }
518        }
519
520    }
521
522    protected void entity(Void list) {
523        newline = false;
524        entity();
525    }
526
527    /**
528     * Read an HTML entity.
529     * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }
530     */
531    protected void entity() {
532        nextChar();
533        String name = null;
534        if (ch == '#') {
535            int namep = bp;
536            nextChar();
537            if (isDecimalDigit(ch)) {
538                nextChar();
539                while (isDecimalDigit(ch))
540                    nextChar();
541                name = new String(buf, namep, bp - namep);
542            } else if (ch == 'x' || ch == 'X') {
543                nextChar();
544                if (isHexDigit(ch)) {
545                    nextChar();
546                    while (isHexDigit(ch))
547                        nextChar();
548                    name = new String(buf, namep, bp - namep);
549                }
550            }
551        } else if (isIdentifierStart(ch)) {
552            name = readIdentifier();
553        }
554
555        if (name != null) {
556            if (ch != ';')
557                return;
558            nextChar();
559        }
560    }
561
562    /**
563     * Read the start or end of an HTML tag, or an HTML comment
564     * {@literal <identifier attrs> } or {@literal </identifier> }
565     */
566    protected void html() {
567        int p = bp;
568        nextChar();
569        if (isIdentifierStart(ch)) {
570            String name = readIdentifier();
571            checkHtmlTag(name);
572            htmlAttrs();
573            if (ch == '/') {
574                nextChar();
575            }
576            if (ch == '>') {
577                nextChar();
578                return;
579            }
580        } else if (ch == '/') {
581            nextChar();
582            if (isIdentifierStart(ch)) {
583                readIdentifier();
584                skipWhitespace();
585                if (ch == '>') {
586                    nextChar();
587                    return;
588                }
589            }
590        } else if (ch == '!') {
591            nextChar();
592            if (ch == '-') {
593                nextChar();
594                if (ch == '-') {
595                    nextChar();
596                    while (bp < buflen) {
597                        int dash = 0;
598                        while (ch == '-') {
599                            dash++;
600                            nextChar();
601                        }
602                        // Strictly speaking, a comment should not contain "--"
603                        // so dash > 2 is an error, dash == 2 implies ch == '>'
604                        // See http://www.w3.org/TR/html-markup/syntax.html#syntax-comments
605                        // for more details.
606                        if (dash >= 2 && ch == '>') {
607                            nextChar();
608                            return;
609                        }
610
611                        nextChar();
612                    }
613                }
614            }
615        }
616
617        bp = p + 1;
618        ch = buf[bp];
619    }
620
621    /**
622     * Read a series of HTML attributes, terminated by {@literal > }.
623     * Each attribute is of the form {@literal identifier[=value] }.
624     * "value" may be unquoted, single-quoted, or double-quoted.
625     */
626    protected void htmlAttrs() {
627        skipWhitespace();
628
629        loop:
630        while (isIdentifierStart(ch)) {
631            int namePos = bp;
632            String name = readAttributeName();
633            skipWhitespace();
634            StringBuilder value = new StringBuilder();
635            if (ch == '=') {
636                nextChar();
637                skipWhitespace();
638                if (ch == '\'' || ch == '"') {
639                    char quote = ch;
640                    nextChar();
641                    while (bp < buflen && ch != quote) {
642                        if (newline && ch == '@') {
643                            // No point trying to read more.
644                            // In fact, all attrs get discarded by the caller
645                            // and superseded by a malformed.html node because
646                            // the html tag itself is not terminated correctly.
647                            break loop;
648                        }
649                        value.append(ch);
650                        nextChar();
651                    }
652                    nextChar();
653                } else {
654                    while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {
655                        value.append(ch);
656                        nextChar();
657                    }
658                }
659                skipWhitespace();
660            }
661            checkHtmlAttr(name, value.toString());
662        }
663    }
664
665    protected void attrValueChar(Void list) {
666        switch (ch) {
667            case '&':
668                entity(list);
669                break;
670
671            case '{':
672                inlineTag(list);
673                break;
674
675            default:
676                nextChar();
677        }
678    }
679
680    protected boolean isIdentifierStart(char ch) {
681        return Character.isUnicodeIdentifierStart(ch);
682    }
683
684    protected String readIdentifier() {
685        int start = bp;
686        nextChar();
687        while (bp < buflen && Character.isUnicodeIdentifierPart(ch))
688            nextChar();
689        return new String(buf, start, bp - start);
690    }
691
692    protected String readAttributeName() {
693        int start = bp;
694        nextChar();
695        while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '-'))
696            nextChar();
697        return new String(buf, start, bp - start);
698    }
699
700    protected String readTagName() {
701        int start = bp;
702        nextChar();
703        while (bp < buflen
704                && (Character.isUnicodeIdentifierPart(ch) || ch == '.'
705                || ch == '-' || ch == ':')) {
706            nextChar();
707        }
708        return new String(buf, start, bp - start);
709    }
710
711    protected boolean isJavaIdentifierStart(char ch) {
712        return Character.isJavaIdentifierStart(ch);
713    }
714
715    protected String readJavaIdentifier() {
716        int start = bp;
717        nextChar();
718        while (bp < buflen && Character.isJavaIdentifierPart(ch))
719            nextChar();
720        return new String(buf, start, bp - start);
721    }
722
723    protected boolean isDecimalDigit(char ch) {
724        return ('0' <= ch && ch <= '9');
725    }
726
727    protected boolean isHexDigit(char ch) {
728        return ('0' <= ch && ch <= '9')
729                || ('a' <= ch && ch <= 'f')
730                || ('A' <= ch && ch <= 'F');
731    }
732
733    protected boolean isUnquotedAttrValueTerminator(char ch) {
734        switch (ch) {
735            case '\f': case '\n': case '\r': case '\t':
736            case ' ':
737            case '"': case '\'': case '`':
738            case '=': case '<': case '>':
739                return true;
740            default:
741                return false;
742        }
743    }
744
745    protected boolean isWhitespace(char ch) {
746        return Character.isWhitespace(ch);
747    }
748
749    protected void skipWhitespace() {
750        while (isWhitespace(ch)) {
751            nextChar();
752        }
753    }
754
755    /**
756     * @param start position of first character of string
757     * @param end position of character beyond last character to be included
758     */
759    String newString(int start, int end) {
760        return new String(buf, start, end - start);
761    }
762
763    static abstract class TagParser {
764        enum Kind { INLINE, BLOCK }
765
766        final Kind kind;
767        final String name;
768
769
770        TagParser(Kind k, String tk) {
771            kind = k;
772            name = tk;
773        }
774
775        TagParser(Kind k, String tk, boolean retainWhiteSpace) {
776            this(k, tk);
777        }
778
779        Kind getKind() {
780            return kind;
781        }
782
783        String getName() {
784            return name;
785        }
786
787        abstract void parse(int pos) throws ParseException;
788    }
789
790    /**
791     * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a>
792     */
793    @SuppressWarnings("deprecation")
794    private void initTagParsers() {
795        TagParser[] parsers = {
796            // @author name-text
797            new TagParser(Kind.BLOCK, "author") {
798                @Override
799                public void parse(int pos) {
800                    blockContent();
801                }
802            },
803
804            // {@code text}
805            new TagParser(Kind.INLINE, "code", true) {
806                @Override
807                public void parse(int pos) throws ParseException {
808                    inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE);
809                    nextChar();
810                }
811            },
812
813            // @deprecated deprecated-text
814            new TagParser(Kind.BLOCK, "deprecated") {
815                @Override
816                public void parse(int pos) {
817                    blockContent();
818                }
819            },
820
821            // {@docRoot}
822            new TagParser(Kind.INLINE, "docRoot") {
823                @Override
824                public void parse(int pos) throws ParseException {
825                    if (ch == '}') {
826                        nextChar();
827                        return;
828                    }
829                    inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content
830                    nextChar();
831                    throw new ParseException("dc.unexpected.content");
832                }
833            },
834
835            // @exception class-name description
836            new TagParser(Kind.BLOCK, "exception") {
837                @Override
838                public void parse(int pos) throws ParseException {
839                    skipWhitespace();
840                    reference(false);
841                    blockContent();
842                }
843            },
844
845            // @hidden hidden-text
846            new TagParser(Kind.BLOCK, "hidden") {
847                @Override
848                public void parse(int pos) {
849                    blockContent();
850                }
851            },
852
853            // @index search-term options-description
854            new TagParser(Kind.INLINE, "index") {
855                @Override
856                public void parse(int pos) throws ParseException {
857                    skipWhitespace();
858                    if (ch == '}') {
859                        throw new ParseException("dc.no.content");
860                    }
861                    if (ch == '"') quotedString(); else inlineWord();
862                    skipWhitespace();
863                    if (ch != '}') {
864                        inlineContent();
865                    } else {
866                        nextChar();
867                    }
868                }
869            },
870
871            // {@inheritDoc}
872            new TagParser(Kind.INLINE, "inheritDoc") {
873                @Override
874                public void parse(int pos) throws ParseException {
875                    if (ch == '}') {
876                        nextChar();
877                        return;
878                    }
879                    inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content
880                    nextChar();
881                    throw new ParseException("dc.unexpected.content");
882                }
883            },
884
885            // {@link package.class#member label}
886            new TagParser(Kind.INLINE, "link") {
887                @Override
888                public void parse(int pos) throws ParseException {
889                    reference(true);
890                    inlineContent();
891                }
892            },
893
894            // {@linkplain package.class#member label}
895            new TagParser(Kind.INLINE, "linkplain") {
896                @Override
897                public void parse(int pos) throws ParseException {
898                    reference(true);
899                    inlineContent();
900                }
901            },
902
903            // {@literal text}
904            new TagParser(Kind.INLINE, "literal", true) {
905                @Override
906                public void parse(int pos) throws ParseException {
907                    inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE);
908                    nextChar();
909                }
910            },
911
912            // @param parameter-name description
913            new TagParser(Kind.BLOCK, "param") {
914                @Override
915                public void parse(int pos) throws ParseException {
916                    skipWhitespace();
917
918                    boolean typaram = false;
919                    if (ch == '<') {
920                        typaram = true;
921                        nextChar();
922                    }
923
924                    identifier();
925
926                    if (typaram) {
927                        if (ch != '>')
928                            throw new ParseException("dc.gt.expected");
929                        nextChar();
930                    }
931
932                    skipWhitespace();
933                    blockContent();
934                }
935            },
936
937            // @return description
938            new TagParser(Kind.BLOCK, "return") {
939                @Override
940                public void parse(int pos) {
941                    blockContent();
942                }
943            },
944
945            // @see reference | quoted-string | HTML
946            new TagParser(Kind.BLOCK, "see") {
947                @Override
948                public void parse(int pos) throws ParseException {
949                    skipWhitespace();
950                    switch (ch) {
951                        case '"':
952                            quotedString();
953                            skipWhitespace();
954                            if (ch == '@'
955                                    || ch == EOI && bp == buf.length - 1) {
956                                return;
957                            }
958                            break;
959
960                        case '<':
961                            blockContent();
962                            return;
963
964                        case '@':
965                            if (newline)
966                                throw new ParseException("dc.no.content");
967                            break;
968
969                        case EOI:
970                            if (bp == buf.length - 1)
971                                throw new ParseException("dc.no.content");
972                            break;
973
974                        default:
975                            if (isJavaIdentifierStart(ch) || ch == '#') {
976                                reference(true);
977                                blockContent();
978                            }
979                    }
980                    throw new ParseException("dc.unexpected.content");
981                }
982            },
983
984            // @serialData data-description
985            new TagParser(Kind.BLOCK, "@serialData") {
986                @Override
987                public void parse(int pos) {
988                    blockContent();
989                }
990            },
991
992            // @serialField field-name field-type description
993            new TagParser(Kind.BLOCK, "serialField") {
994                @Override
995                public void parse(int pos) throws ParseException {
996                    skipWhitespace();
997                    identifier();
998                    skipWhitespace();
999                    reference(false);
1000                    if (isWhitespace(ch)) {
1001                        skipWhitespace();
1002                        blockContent();
1003                    }
1004                }
1005            },
1006
1007            // @serial field-description | include | exclude
1008            new TagParser(Kind.BLOCK, "serial") {
1009                @Override
1010                public void parse(int pos) {
1011                    blockContent();
1012                }
1013            },
1014
1015            // @since since-text
1016            new TagParser(Kind.BLOCK, "since") {
1017                @Override
1018                public void parse(int pos) {
1019                    blockContent();
1020                }
1021            },
1022
1023            // @throws class-name description
1024            new TagParser(Kind.BLOCK, "throws") {
1025                @Override
1026                public void parse(int pos) throws ParseException {
1027                    skipWhitespace();
1028                    reference(false);
1029                    blockContent();
1030                }
1031            },
1032
1033            // {@value package.class#field}
1034            new TagParser(Kind.INLINE, "value") {
1035                @Override
1036                public void parse(int pos) throws ParseException {
1037                    reference(true);
1038                    skipWhitespace();
1039                    if (ch == '}') {
1040                        nextChar();
1041                        return;
1042                    }
1043                    nextChar();
1044                    throw new ParseException("dc.unexpected.content");
1045                }
1046            },
1047
1048            // @version version-text
1049            new TagParser(Kind.BLOCK, "version") {
1050                @Override
1051                public void parse(int pos) {
1052                    blockContent();
1053                }
1054            },
1055        };
1056
1057        tagParsers = new HashMap<>();
1058        for (TagParser p: parsers)
1059            tagParsers.put(p.getName(), p);
1060
1061    }
1062
1063    private void initEventAttrs() {
1064        eventAttrs = new HashSet<>(Arrays.asList(
1065            // See https://www.w3.org/TR/html-markup/global-attributes.html#common.attrs.event-handler
1066            "onabort",  "onblur",  "oncanplay",  "oncanplaythrough",
1067            "onchange",  "onclick",  "oncontextmenu",  "ondblclick",
1068            "ondrag",  "ondragend",  "ondragenter",  "ondragleave",
1069            "ondragover",  "ondragstart",  "ondrop",  "ondurationchange",
1070            "onemptied",  "onended",  "onerror",  "onfocus",  "oninput",
1071            "oninvalid",  "onkeydown",  "onkeypress",  "onkeyup",
1072            "onload",  "onloadeddata",  "onloadedmetadata",  "onloadstart",
1073            "onmousedown",  "onmousemove",  "onmouseout",  "onmouseover",
1074            "onmouseup",  "onmousewheel",  "onpause",  "onplay",
1075            "onplaying",  "onprogress",  "onratechange",  "onreadystatechange",
1076            "onreset",  "onscroll",  "onseeked",  "onseeking",
1077            "onselect",  "onshow",  "onstalled",  "onsubmit",  "onsuspend",
1078            "ontimeupdate",  "onvolumechange",  "onwaiting",
1079
1080            // See https://www.w3.org/TR/html4/sgml/dtd.html
1081            // Most of the attributes that take a %Script are also defined as event handlers
1082            // in HTML 5. The one exception is onunload.
1083            // "onchange",  "onclick",   "ondblclick",  "onfocus",
1084            // "onkeydown",  "onkeypress",  "onkeyup",  "onload",
1085            // "onmousedown",  "onmousemove",  "onmouseout",  "onmouseover",
1086            // "onmouseup",  "onreset",  "onselect",  "onsubmit",
1087            "onunload"
1088        ));
1089    }
1090
1091    private void initURIAttrs() {
1092        uriAttrs = new HashSet<>(Arrays.asList(
1093            // See https://www.w3.org/TR/html4/sgml/dtd.html
1094            //     https://www.w3.org/TR/html5/
1095            // These are all the attributes that take a %URI or a valid URL potentially surrounded
1096            // by spaces
1097            "action",  "cite",  "classid",  "codebase",  "data",
1098            "datasrc",  "for",  "href",  "longdesc",  "profile",
1099            "src",  "usemap"
1100        ));
1101    }
1102
1103}
1104