DocCommentParser.java revision 3831:209b0eab0e1f
1/* 2 * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package com.sun.tools.javac.parser; 27 28import java.text.BreakIterator; 29import java.util.HashMap; 30import java.util.Map; 31 32import com.sun.source.doctree.AttributeTree.ValueKind; 33import com.sun.source.doctree.DocTree; 34import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind; 35import com.sun.tools.javac.parser.Tokens.Comment; 36import com.sun.tools.javac.parser.Tokens.TokenKind; 37import com.sun.tools.javac.tree.DCTree; 38import com.sun.tools.javac.tree.DCTree.DCAttribute; 39import com.sun.tools.javac.tree.DCTree.DCDocComment; 40import com.sun.tools.javac.tree.DCTree.DCEndPosTree; 41import com.sun.tools.javac.tree.DCTree.DCErroneous; 42import com.sun.tools.javac.tree.DCTree.DCIdentifier; 43import com.sun.tools.javac.tree.DCTree.DCReference; 44import com.sun.tools.javac.tree.DCTree.DCText; 45import com.sun.tools.javac.tree.DocTreeMaker; 46import com.sun.tools.javac.tree.JCTree; 47import com.sun.tools.javac.util.DiagnosticSource; 48import com.sun.tools.javac.util.List; 49import com.sun.tools.javac.util.ListBuffer; 50import com.sun.tools.javac.util.Log; 51import com.sun.tools.javac.util.Name; 52import com.sun.tools.javac.util.Names; 53import com.sun.tools.javac.util.Position; 54 55import static com.sun.tools.javac.util.LayoutCharacters.*; 56 57/** 58 * 59 * <p><b>This is NOT part of any supported API. 60 * If you write code that depends on this, you do so at your own risk. 61 * This code and its internal interfaces are subject to change or 62 * deletion without notice.</b> 63 */ 64public class DocCommentParser { 65 static class ParseException extends Exception { 66 private static final long serialVersionUID = 0; 67 ParseException(String key) { 68 super(key); 69 } 70 } 71 72 final ParserFactory fac; 73 final DiagnosticSource diagSource; 74 final Comment comment; 75 final DocTreeMaker m; 76 final Names names; 77 78 BreakIterator sentenceBreaker; 79 80 /** The input buffer, index of most recent character read, 81 * index of one past last character in buffer. 82 */ 83 protected char[] buf; 84 protected int bp; 85 protected int buflen; 86 87 /** The current character. 88 */ 89 protected char ch; 90 91 int textStart = -1; 92 int lastNonWhite = -1; 93 boolean newline = true; 94 95 Map<Name, TagParser> tagParsers; 96 97 public DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) { 98 this.fac = fac; 99 this.diagSource = diagSource; 100 this.comment = comment; 101 names = fac.names; 102 m = fac.docTreeMaker; 103 initTagParsers(); 104 } 105 106 public DocCommentParser(ParserFactory fac) { 107 this(fac, null, null); 108 } 109 110 public DCDocComment parse() { 111 String c = comment.getText(); 112 buf = new char[c.length() + 1]; 113 c.getChars(0, c.length(), buf, 0); 114 buf[buf.length - 1] = EOI; 115 buflen = buf.length - 1; 116 bp = -1; 117 nextChar(); 118 119 List<DCTree> body = blockContent(); 120 List<DCTree> tags = blockTags(); 121 int pos = !body.isEmpty() 122 ? body.head.pos 123 : !tags.isEmpty() ? tags.head.pos : Position.NOPOS; 124 125 DCDocComment dc = m.at(pos).newDocCommentTree(comment, body, tags); 126 return dc; 127 } 128 129 void nextChar() { 130 ch = buf[bp < buflen ? ++bp : buflen]; 131 switch (ch) { 132 case '\f': case '\n': case '\r': 133 newline = true; 134 } 135 } 136 137 /** 138 * Read block content, consisting of text, html and inline tags. 139 * Terminated by the end of input, or the beginning of the next block tag: 140 * i.e. @ as the first non-whitespace character on a line. 141 */ 142 @SuppressWarnings("fallthrough") 143 protected List<DCTree> blockContent() { 144 ListBuffer<DCTree> trees = new ListBuffer<>(); 145 textStart = -1; 146 147 loop: 148 while (bp < buflen) { 149 switch (ch) { 150 case '\n': case '\r': case '\f': 151 newline = true; 152 // fallthrough 153 154 case ' ': case '\t': 155 nextChar(); 156 break; 157 158 case '&': 159 entity(trees); 160 break; 161 162 case '<': 163 newline = false; 164 addPendingText(trees, bp - 1); 165 trees.add(html()); 166 if (textStart == -1) { 167 textStart = bp; 168 lastNonWhite = -1; 169 } 170 break; 171 172 case '>': 173 newline = false; 174 addPendingText(trees, bp - 1); 175 trees.add(m.at(bp).newErroneousTree(newString(bp, bp + 1), diagSource, "dc.bad.gt")); 176 nextChar(); 177 if (textStart == -1) { 178 textStart = bp; 179 lastNonWhite = -1; 180 } 181 break; 182 183 case '{': 184 inlineTag(trees); 185 break; 186 187 case '@': 188 if (newline) { 189 addPendingText(trees, lastNonWhite); 190 break loop; 191 } 192 // fallthrough 193 194 default: 195 newline = false; 196 if (textStart == -1) 197 textStart = bp; 198 lastNonWhite = bp; 199 nextChar(); 200 } 201 } 202 203 if (lastNonWhite != -1) 204 addPendingText(trees, lastNonWhite); 205 206 return trees.toList(); 207 } 208 209 /** 210 * Read a series of block tags, including their content. 211 * Standard tags parse their content appropriately. 212 * Non-standard tags are represented by {@link UnknownBlockTag}. 213 */ 214 protected List<DCTree> blockTags() { 215 ListBuffer<DCTree> tags = new ListBuffer<>(); 216 while (ch == '@') 217 tags.add(blockTag()); 218 return tags.toList(); 219 } 220 221 /** 222 * Read a single block tag, including its content. 223 * Standard tags parse their content appropriately. 224 * Non-standard tags are represented by {@link UnknownBlockTag}. 225 */ 226 protected DCTree blockTag() { 227 int p = bp; 228 try { 229 nextChar(); 230 if (isIdentifierStart(ch)) { 231 Name name = readTagName(); 232 TagParser tp = tagParsers.get(name); 233 if (tp == null) { 234 List<DCTree> content = blockContent(); 235 return m.at(p).newUnknownBlockTagTree(name, content); 236 } else { 237 switch (tp.getKind()) { 238 case BLOCK: 239 return tp.parse(p); 240 case INLINE: 241 return erroneous("dc.bad.inline.tag", p); 242 } 243 } 244 } 245 blockContent(); 246 247 return erroneous("dc.no.tag.name", p); 248 } catch (ParseException e) { 249 blockContent(); 250 return erroneous(e.getMessage(), p); 251 } 252 } 253 254 protected void inlineTag(ListBuffer<DCTree> list) { 255 newline = false; 256 nextChar(); 257 if (ch == '@') { 258 addPendingText(list, bp - 2); 259 list.add(inlineTag()); 260 textStart = bp; 261 lastNonWhite = -1; 262 } else { 263 if (textStart == -1) 264 textStart = bp - 1; 265 lastNonWhite = bp; 266 } 267 } 268 269 /** 270 * Read a single inline tag, including its content. 271 * Standard tags parse their content appropriately. 272 * Non-standard tags are represented by {@link UnknownBlockTag}. 273 * Malformed tags may be returned as {@link Erroneous}. 274 */ 275 protected DCTree inlineTag() { 276 int p = bp - 1; 277 try { 278 nextChar(); 279 if (isIdentifierStart(ch)) { 280 Name name = readTagName(); 281 TagParser tp = tagParsers.get(name); 282 283 if (tp == null) { 284 skipWhitespace(); 285 DCTree text = inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); 286 if (text != null) { 287 nextChar(); 288 return m.at(p).newUnknownInlineTagTree(name, List.of(text)).setEndPos(bp); 289 } 290 } else { 291 if (!tp.retainWhiteSpace) { 292 skipWhitespace(); 293 } 294 if (tp.getKind() == TagParser.Kind.INLINE) { 295 DCEndPosTree<?> tree = (DCEndPosTree<?>) tp.parse(p); 296 if (tree != null) { 297 return tree.setEndPos(bp); 298 } 299 } else { // handle block tags (ex: @see) in inline content 300 inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip content 301 nextChar(); 302 } 303 } 304 } 305 return erroneous("dc.no.tag.name", p); 306 } catch (ParseException e) { 307 return erroneous(e.getMessage(), p); 308 } 309 } 310 311 private static enum WhitespaceRetentionPolicy { 312 RETAIN_ALL, 313 REMOVE_FIRST_SPACE, 314 REMOVE_ALL 315 } 316 317 /** 318 * Read plain text content of an inline tag. 319 * Matching pairs of { } are skipped; the text is terminated by the first 320 * unmatched }. It is an error if the beginning of the next tag is detected. 321 */ 322 private DCTree inlineText(WhitespaceRetentionPolicy whitespacePolicy) throws ParseException { 323 switch (whitespacePolicy) { 324 case REMOVE_ALL: 325 skipWhitespace(); 326 break; 327 case REMOVE_FIRST_SPACE: 328 if (ch == ' ') 329 nextChar(); 330 break; 331 case RETAIN_ALL: 332 default: 333 // do nothing 334 break; 335 336 } 337 int pos = bp; 338 int depth = 1; 339 340 loop: 341 while (bp < buflen) { 342 switch (ch) { 343 case '\n': case '\r': case '\f': 344 newline = true; 345 break; 346 347 case ' ': case '\t': 348 break; 349 350 case '{': 351 newline = false; 352 lastNonWhite = bp; 353 depth++; 354 break; 355 356 case '}': 357 if (--depth == 0) { 358 return m.at(pos).newTextTree(newString(pos, bp)); 359 } 360 newline = false; 361 lastNonWhite = bp; 362 break; 363 364 case '@': 365 if (newline) 366 break loop; 367 newline = false; 368 lastNonWhite = bp; 369 break; 370 371 default: 372 newline = false; 373 lastNonWhite = bp; 374 break; 375 } 376 nextChar(); 377 } 378 throw new ParseException("dc.unterminated.inline.tag"); 379 } 380 381 /** 382 * Read Java class name, possibly followed by member 383 * Matching pairs of {@literal < >} are skipped. The text is terminated by the first 384 * unmatched }. It is an error if the beginning of the next tag is detected. 385 */ 386 // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE 387 // TODO: improve quality of parse to forbid bad constructions. 388 // TODO: update to use ReferenceParser 389 @SuppressWarnings("fallthrough") 390 protected DCReference reference(boolean allowMember) throws ParseException { 391 int pos = bp; 392 int depth = 0; 393 394 // scan to find the end of the signature, by looking for the first 395 // whitespace not enclosed in () or <>, or the end of the tag 396 loop: 397 while (bp < buflen) { 398 switch (ch) { 399 case '\n': case '\r': case '\f': 400 newline = true; 401 // fallthrough 402 403 case ' ': case '\t': 404 if (depth == 0) 405 break loop; 406 break; 407 408 case '(': 409 case '<': 410 newline = false; 411 depth++; 412 break; 413 414 case ')': 415 case '>': 416 newline = false; 417 --depth; 418 break; 419 420 case '}': 421 if (bp == pos) 422 return null; 423 newline = false; 424 break loop; 425 426 case '@': 427 if (newline) 428 break loop; 429 // fallthrough 430 431 default: 432 newline = false; 433 434 } 435 nextChar(); 436 } 437 438 if (depth != 0) 439 throw new ParseException("dc.unterminated.signature"); 440 441 String sig = newString(pos, bp); 442 443 // Break sig apart into qualifiedExpr member paramTypes. 444 JCTree qualExpr; 445 Name member; 446 List<JCTree> paramTypes; 447 448 Log.DeferredDiagnosticHandler deferredDiagnosticHandler 449 = new Log.DeferredDiagnosticHandler(fac.log); 450 451 try { 452 int hash = sig.indexOf("#"); 453 int lparen = sig.indexOf("(", hash + 1); 454 if (hash == -1) { 455 if (lparen == -1) { 456 qualExpr = parseType(sig); 457 member = null; 458 } else { 459 qualExpr = null; 460 member = parseMember(sig.substring(0, lparen)); 461 } 462 } else { 463 qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash)); 464 if (lparen == -1) 465 member = parseMember(sig.substring(hash + 1)); 466 else 467 member = parseMember(sig.substring(hash + 1, lparen)); 468 } 469 470 if (lparen < 0) { 471 paramTypes = null; 472 } else { 473 int rparen = sig.indexOf(")", lparen); 474 if (rparen != sig.length() - 1) 475 throw new ParseException("dc.ref.bad.parens"); 476 paramTypes = parseParams(sig.substring(lparen + 1, rparen)); 477 } 478 479 if (!deferredDiagnosticHandler.getDiagnostics().isEmpty()) 480 throw new ParseException("dc.ref.syntax.error"); 481 482 } finally { 483 fac.log.popDiagnosticHandler(deferredDiagnosticHandler); 484 } 485 486 return m.at(pos).newReferenceTree(sig, qualExpr, member, paramTypes).setEndPos(bp); 487 } 488 489 JCTree parseType(String s) throws ParseException { 490 JavacParser p = fac.newParser(s, false, false, false); 491 JCTree tree = p.parseType(); 492 if (p.token().kind != TokenKind.EOF) 493 throw new ParseException("dc.ref.unexpected.input"); 494 return tree; 495 } 496 497 Name parseMember(String s) throws ParseException { 498 JavacParser p = fac.newParser(s, false, false, false); 499 Name name = p.ident(); 500 if (p.token().kind != TokenKind.EOF) 501 throw new ParseException("dc.ref.unexpected.input"); 502 return name; 503 } 504 505 List<JCTree> parseParams(String s) throws ParseException { 506 if (s.trim().isEmpty()) 507 return List.nil(); 508 509 JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false); 510 ListBuffer<JCTree> paramTypes = new ListBuffer<>(); 511 paramTypes.add(p.parseType()); 512 513 if (p.token().kind == TokenKind.IDENTIFIER) 514 p.nextToken(); 515 516 while (p.token().kind == TokenKind.COMMA) { 517 p.nextToken(); 518 paramTypes.add(p.parseType()); 519 520 if (p.token().kind == TokenKind.IDENTIFIER) 521 p.nextToken(); 522 } 523 524 if (p.token().kind != TokenKind.EOF) 525 throw new ParseException("dc.ref.unexpected.input"); 526 527 return paramTypes.toList(); 528 } 529 530 /** 531 * Read Java identifier 532 * Matching pairs of { } are skipped; the text is terminated by the first 533 * unmatched }. It is an error if the beginning of the next tag is detected. 534 */ 535 @SuppressWarnings("fallthrough") 536 protected DCIdentifier identifier() throws ParseException { 537 skipWhitespace(); 538 int pos = bp; 539 540 if (isJavaIdentifierStart(ch)) { 541 Name name = readJavaIdentifier(); 542 return m.at(pos).newIdentifierTree(name); 543 } 544 545 throw new ParseException("dc.identifier.expected"); 546 } 547 548 /** 549 * Read a quoted string. 550 * It is an error if the beginning of the next tag is detected. 551 */ 552 @SuppressWarnings("fallthrough") 553 protected DCText quotedString() { 554 int pos = bp; 555 nextChar(); 556 557 loop: 558 while (bp < buflen) { 559 switch (ch) { 560 case '\n': case '\r': case '\f': 561 newline = true; 562 break; 563 564 case ' ': case '\t': 565 break; 566 567 case '"': 568 nextChar(); 569 // trim trailing white-space? 570 return m.at(pos).newTextTree(newString(pos, bp)); 571 572 case '@': 573 if (newline) 574 break loop; 575 576 } 577 nextChar(); 578 } 579 return null; 580 } 581 582 /** 583 * Read a term ie. one word. 584 * It is an error if the beginning of the next tag is detected. 585 */ 586 @SuppressWarnings("fallthrough") 587 protected DCText inlineWord() { 588 int pos = bp; 589 int depth = 0; 590 loop: 591 while (bp < buflen) { 592 switch (ch) { 593 case '\n': 594 newline = true; 595 // fallthrough 596 597 case '\r': case '\f': case ' ': case '\t': 598 return m.at(pos).newTextTree(newString(pos, bp)); 599 600 case '@': 601 if (newline) 602 break loop; 603 604 case '{': 605 depth++; 606 break; 607 608 case '}': 609 if (depth == 0 || --depth == 0) 610 return m.at(pos).newTextTree(newString(pos, bp)); 611 break; 612 } 613 newline = false; 614 nextChar(); 615 } 616 return null; 617 } 618 619 /** 620 * Read general text content of an inline tag, including HTML entities and elements. 621 * Matching pairs of { } are skipped; the text is terminated by the first 622 * unmatched }. It is an error if the beginning of the next tag is detected. 623 */ 624 @SuppressWarnings("fallthrough") 625 private List<DCTree> inlineContent() { 626 ListBuffer<DCTree> trees = new ListBuffer<>(); 627 628 skipWhitespace(); 629 int pos = bp; 630 int depth = 1; 631 textStart = -1; 632 633 loop: 634 while (bp < buflen) { 635 636 switch (ch) { 637 case '\n': case '\r': case '\f': 638 newline = true; 639 // fall through 640 641 case ' ': case '\t': 642 nextChar(); 643 break; 644 645 case '&': 646 entity(trees); 647 break; 648 649 case '<': 650 newline = false; 651 addPendingText(trees, bp - 1); 652 trees.add(html()); 653 break; 654 655 case '{': 656 if (textStart == -1) 657 textStart = bp; 658 newline = false; 659 depth++; 660 nextChar(); 661 break; 662 663 case '}': 664 newline = false; 665 if (--depth == 0) { 666 addPendingText(trees, bp - 1); 667 nextChar(); 668 return trees.toList(); 669 } 670 nextChar(); 671 break; 672 673 case '@': 674 if (newline) 675 break loop; 676 // fallthrough 677 678 default: 679 if (textStart == -1) 680 textStart = bp; 681 nextChar(); 682 break; 683 } 684 } 685 686 return List.of(erroneous("dc.unterminated.inline.tag", pos)); 687 } 688 689 protected void entity(ListBuffer<DCTree> list) { 690 newline = false; 691 addPendingText(list, bp - 1); 692 list.add(entity()); 693 if (textStart == -1) { 694 textStart = bp; 695 lastNonWhite = -1; 696 } 697 } 698 699 /** 700 * Read an HTML entity. 701 * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; } 702 */ 703 protected DCTree entity() { 704 int p = bp; 705 nextChar(); 706 Name name = null; 707 if (ch == '#') { 708 int namep = bp; 709 nextChar(); 710 if (isDecimalDigit(ch)) { 711 nextChar(); 712 while (isDecimalDigit(ch)) 713 nextChar(); 714 name = names.fromChars(buf, namep, bp - namep); 715 } else if (ch == 'x' || ch == 'X') { 716 nextChar(); 717 if (isHexDigit(ch)) { 718 nextChar(); 719 while (isHexDigit(ch)) 720 nextChar(); 721 name = names.fromChars(buf, namep, bp - namep); 722 } 723 } 724 } else if (isIdentifierStart(ch)) { 725 name = readIdentifier(); 726 } 727 728 if (name == null) 729 return erroneous("dc.bad.entity", p); 730 else { 731 if (ch != ';') 732 return erroneous("dc.missing.semicolon", p); 733 nextChar(); 734 return m.at(p).newEntityTree(name); 735 } 736 } 737 738 /** 739 * Read the start or end of an HTML tag, or an HTML comment 740 * {@literal <identifier attrs> } or {@literal </identifier> } 741 */ 742 protected DCTree html() { 743 int p = bp; 744 nextChar(); 745 if (isIdentifierStart(ch)) { 746 Name name = readIdentifier(); 747 List<DCTree> attrs = htmlAttrs(); 748 if (attrs != null) { 749 boolean selfClosing = false; 750 if (ch == '/') { 751 nextChar(); 752 selfClosing = true; 753 } 754 if (ch == '>') { 755 nextChar(); 756 DCTree dctree = m.at(p).newStartElementTree(name, attrs, selfClosing).setEndPos(bp); 757 return dctree; 758 } 759 } 760 } else if (ch == '/') { 761 nextChar(); 762 if (isIdentifierStart(ch)) { 763 Name name = readIdentifier(); 764 skipWhitespace(); 765 if (ch == '>') { 766 nextChar(); 767 return m.at(p).newEndElementTree(name); 768 } 769 } 770 } else if (ch == '!') { 771 nextChar(); 772 if (ch == '-') { 773 nextChar(); 774 if (ch == '-') { 775 nextChar(); 776 while (bp < buflen) { 777 int dash = 0; 778 while (ch == '-') { 779 dash++; 780 nextChar(); 781 } 782 // Strictly speaking, a comment should not contain "--" 783 // so dash > 2 is an error, dash == 2 implies ch == '>' 784 // See http://www.w3.org/TR/html-markup/syntax.html#syntax-comments 785 // for more details. 786 if (dash >= 2 && ch == '>') { 787 nextChar(); 788 return m.at(p).newCommentTree(newString(p, bp)); 789 } 790 791 nextChar(); 792 } 793 } 794 } 795 } 796 797 bp = p + 1; 798 ch = buf[bp]; 799 return erroneous("dc.malformed.html", p); 800 } 801 802 /** 803 * Read a series of HTML attributes, terminated by {@literal > }. 804 * Each attribute is of the form {@literal identifier[=value] }. 805 * "value" may be unquoted, single-quoted, or double-quoted. 806 */ 807 protected List<DCTree> htmlAttrs() { 808 ListBuffer<DCTree> attrs = new ListBuffer<>(); 809 skipWhitespace(); 810 811 loop: 812 while (isIdentifierStart(ch)) { 813 int namePos = bp; 814 Name name = readAttributeName(); 815 skipWhitespace(); 816 List<DCTree> value = null; 817 ValueKind vkind = ValueKind.EMPTY; 818 if (ch == '=') { 819 ListBuffer<DCTree> v = new ListBuffer<>(); 820 nextChar(); 821 skipWhitespace(); 822 if (ch == '\'' || ch == '"') { 823 vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE; 824 char quote = ch; 825 nextChar(); 826 textStart = bp; 827 while (bp < buflen && ch != quote) { 828 if (newline && ch == '@') { 829 attrs.add(erroneous("dc.unterminated.string", namePos)); 830 // No point trying to read more. 831 // In fact, all attrs get discarded by the caller 832 // and superseded by a malformed.html node because 833 // the html tag itself is not terminated correctly. 834 break loop; 835 } 836 attrValueChar(v); 837 } 838 addPendingText(v, bp - 1); 839 nextChar(); 840 } else { 841 vkind = ValueKind.UNQUOTED; 842 textStart = bp; 843 while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) { 844 attrValueChar(v); 845 } 846 addPendingText(v, bp - 1); 847 } 848 skipWhitespace(); 849 value = v.toList(); 850 } 851 DCAttribute attr = m.at(namePos).newAttributeTree(name, vkind, value); 852 attrs.add(attr); 853 } 854 855 return attrs.toList(); 856 } 857 858 protected void attrValueChar(ListBuffer<DCTree> list) { 859 switch (ch) { 860 case '&': 861 entity(list); 862 break; 863 864 case '{': 865 inlineTag(list); 866 break; 867 868 default: 869 nextChar(); 870 } 871 } 872 873 protected void addPendingText(ListBuffer<DCTree> list, int textEnd) { 874 if (textStart != -1) { 875 if (textStart <= textEnd) { 876 list.add(m.at(textStart).newTextTree(newString(textStart, textEnd + 1))); 877 } 878 textStart = -1; 879 } 880 } 881 882 protected DCErroneous erroneous(String code, int pos) { 883 int i = bp - 1; 884 loop: 885 while (i > pos) { 886 switch (buf[i]) { 887 case '\f': case '\n': case '\r': 888 newline = true; 889 break; 890 case '\t': case ' ': 891 break; 892 default: 893 break loop; 894 } 895 i--; 896 } 897 textStart = -1; 898 return m.at(pos).newErroneousTree(newString(pos, i + 1), diagSource, code); 899 } 900 901 protected boolean isIdentifierStart(char ch) { 902 return Character.isUnicodeIdentifierStart(ch); 903 } 904 905 protected Name readIdentifier() { 906 int start = bp; 907 nextChar(); 908 while (bp < buflen && Character.isUnicodeIdentifierPart(ch)) 909 nextChar(); 910 return names.fromChars(buf, start, bp - start); 911 } 912 913 protected Name readAttributeName() { 914 int start = bp; 915 nextChar(); 916 while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '-')) 917 nextChar(); 918 return names.fromChars(buf, start, bp - start); 919 } 920 921 protected Name readTagName() { 922 int start = bp; 923 nextChar(); 924 while (bp < buflen 925 && (Character.isUnicodeIdentifierPart(ch) || ch == '.' 926 || ch == '-' || ch == ':')) { 927 nextChar(); 928 } 929 return names.fromChars(buf, start, bp - start); 930 } 931 932 protected boolean isJavaIdentifierStart(char ch) { 933 return Character.isJavaIdentifierStart(ch); 934 } 935 936 protected Name readJavaIdentifier() { 937 int start = bp; 938 nextChar(); 939 while (bp < buflen && Character.isJavaIdentifierPart(ch)) 940 nextChar(); 941 return names.fromChars(buf, start, bp - start); 942 } 943 944 protected boolean isDecimalDigit(char ch) { 945 return ('0' <= ch && ch <= '9'); 946 } 947 948 protected boolean isHexDigit(char ch) { 949 return ('0' <= ch && ch <= '9') 950 || ('a' <= ch && ch <= 'f') 951 || ('A' <= ch && ch <= 'F'); 952 } 953 954 protected boolean isUnquotedAttrValueTerminator(char ch) { 955 switch (ch) { 956 case '\f': case '\n': case '\r': case '\t': 957 case ' ': 958 case '"': case '\'': case '`': 959 case '=': case '<': case '>': 960 return true; 961 default: 962 return false; 963 } 964 } 965 966 protected boolean isWhitespace(char ch) { 967 return Character.isWhitespace(ch); 968 } 969 970 protected void skipWhitespace() { 971 while (isWhitespace(ch)) { 972 nextChar(); 973 } 974 } 975 976 /** 977 * @param start position of first character of string 978 * @param end position of character beyond last character to be included 979 */ 980 String newString(int start, int end) { 981 return new String(buf, start, end - start); 982 } 983 984 static abstract class TagParser { 985 enum Kind { INLINE, BLOCK } 986 987 final Kind kind; 988 final DCTree.Kind treeKind; 989 final boolean retainWhiteSpace; 990 991 992 TagParser(Kind k, DCTree.Kind tk) { 993 kind = k; 994 treeKind = tk; 995 retainWhiteSpace = false; 996 } 997 998 TagParser(Kind k, DCTree.Kind tk, boolean retainWhiteSpace) { 999 kind = k; 1000 treeKind = tk; 1001 this.retainWhiteSpace = retainWhiteSpace; 1002 } 1003 1004 Kind getKind() { 1005 return kind; 1006 } 1007 1008 DCTree.Kind getTreeKind() { 1009 return treeKind; 1010 } 1011 1012 abstract DCTree parse(int pos) throws ParseException; 1013 } 1014 1015 /** 1016 * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a> 1017 */ 1018 private void initTagParsers() { 1019 TagParser[] parsers = { 1020 // @author name-text 1021 new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) { 1022 public DCTree parse(int pos) { 1023 List<DCTree> name = blockContent(); 1024 return m.at(pos).newAuthorTree(name); 1025 } 1026 }, 1027 1028 // {@code text} 1029 new TagParser(Kind.INLINE, DCTree.Kind.CODE, true) { 1030 public DCTree parse(int pos) throws ParseException { 1031 DCTree text = inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE); 1032 nextChar(); 1033 return m.at(pos).newCodeTree((DCText) text); 1034 } 1035 }, 1036 1037 // @deprecated deprecated-text 1038 new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) { 1039 public DCTree parse(int pos) { 1040 List<DCTree> reason = blockContent(); 1041 return m.at(pos).newDeprecatedTree(reason); 1042 } 1043 }, 1044 1045 // {@docRoot} 1046 new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) { 1047 public DCTree parse(int pos) throws ParseException { 1048 if (ch == '}') { 1049 nextChar(); 1050 return m.at(pos).newDocRootTree(); 1051 } 1052 inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content 1053 nextChar(); 1054 throw new ParseException("dc.unexpected.content"); 1055 } 1056 }, 1057 1058 // @exception class-name description 1059 new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) { 1060 public DCTree parse(int pos) throws ParseException { 1061 skipWhitespace(); 1062 DCReference ref = reference(false); 1063 List<DCTree> description = blockContent(); 1064 return m.at(pos).newExceptionTree(ref, description); 1065 } 1066 }, 1067 1068 // @hidden hidden-text 1069 new TagParser(Kind.BLOCK, DCTree.Kind.HIDDEN) { 1070 public DCTree parse(int pos) { 1071 List<DCTree> reason = blockContent(); 1072 return m.at(pos).newHiddenTree(reason); 1073 } 1074 }, 1075 1076 // @index search-term options-description 1077 new TagParser(Kind.INLINE, DCTree.Kind.INDEX) { 1078 public DCTree parse(int pos) throws ParseException { 1079 skipWhitespace(); 1080 if (ch == '}') { 1081 throw new ParseException("dc.no.content"); 1082 } 1083 DCTree term = ch == '"' ? quotedString() : inlineWord(); 1084 if (term == null) { 1085 throw new ParseException("dc.no.content"); 1086 } 1087 skipWhitespace(); 1088 List<DCTree> description = List.nil(); 1089 if (ch != '}') { 1090 description = inlineContent(); 1091 } else { 1092 nextChar(); 1093 } 1094 return m.at(pos).newIndexTree(term, description); 1095 } 1096 }, 1097 1098 // {@inheritDoc} 1099 new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) { 1100 public DCTree parse(int pos) throws ParseException { 1101 if (ch == '}') { 1102 nextChar(); 1103 return m.at(pos).newInheritDocTree(); 1104 } 1105 inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content 1106 nextChar(); 1107 throw new ParseException("dc.unexpected.content"); 1108 } 1109 }, 1110 1111 // {@link package.class#member label} 1112 new TagParser(Kind.INLINE, DCTree.Kind.LINK) { 1113 public DCTree parse(int pos) throws ParseException { 1114 DCReference ref = reference(true); 1115 List<DCTree> label = inlineContent(); 1116 return m.at(pos).newLinkTree(ref, label); 1117 } 1118 }, 1119 1120 // {@linkplain package.class#member label} 1121 new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) { 1122 public DCTree parse(int pos) throws ParseException { 1123 DCReference ref = reference(true); 1124 List<DCTree> label = inlineContent(); 1125 return m.at(pos).newLinkPlainTree(ref, label); 1126 } 1127 }, 1128 1129 // {@literal text} 1130 new TagParser(Kind.INLINE, DCTree.Kind.LITERAL, true) { 1131 public DCTree parse(int pos) throws ParseException { 1132 DCTree text = inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE); 1133 nextChar(); 1134 return m.at(pos).newLiteralTree((DCText) text); 1135 } 1136 }, 1137 1138 // @param parameter-name description 1139 new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) { 1140 public DCTree parse(int pos) throws ParseException { 1141 skipWhitespace(); 1142 1143 boolean typaram = false; 1144 if (ch == '<') { 1145 typaram = true; 1146 nextChar(); 1147 } 1148 1149 DCIdentifier id = identifier(); 1150 1151 if (typaram) { 1152 if (ch != '>') 1153 throw new ParseException("dc.gt.expected"); 1154 nextChar(); 1155 } 1156 1157 skipWhitespace(); 1158 List<DCTree> desc = blockContent(); 1159 return m.at(pos).newParamTree(typaram, id, desc); 1160 } 1161 }, 1162 1163 // @provides service-name description 1164 new TagParser(Kind.BLOCK, DCTree.Kind.PROVIDES) { 1165 public DCTree parse(int pos) throws ParseException { 1166 skipWhitespace(); 1167 DCReference ref = reference(true); 1168 List<DCTree> description = blockContent(); 1169 return m.at(pos).newProvidesTree(ref, description); 1170 } 1171 }, 1172 1173 // @return description 1174 new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) { 1175 public DCTree parse(int pos) { 1176 List<DCTree> description = blockContent(); 1177 return m.at(pos).newReturnTree(description); 1178 } 1179 }, 1180 1181 // @see reference | quoted-string | HTML 1182 new TagParser(Kind.BLOCK, DCTree.Kind.SEE) { 1183 public DCTree parse(int pos) throws ParseException { 1184 skipWhitespace(); 1185 switch (ch) { 1186 case '"': 1187 DCText string = quotedString(); 1188 if (string != null) { 1189 skipWhitespace(); 1190 if (ch == '@' 1191 || ch == EOI && bp == buf.length - 1) { 1192 return m.at(pos).newSeeTree(List.<DCTree>of(string)); 1193 } 1194 } 1195 break; 1196 1197 case '<': 1198 List<DCTree> html = blockContent(); 1199 if (html != null) 1200 return m.at(pos).newSeeTree(html); 1201 break; 1202 1203 case '@': 1204 if (newline) 1205 throw new ParseException("dc.no.content"); 1206 break; 1207 1208 case EOI: 1209 if (bp == buf.length - 1) 1210 throw new ParseException("dc.no.content"); 1211 break; 1212 1213 default: 1214 if (isJavaIdentifierStart(ch) || ch == '#') { 1215 DCReference ref = reference(true); 1216 List<DCTree> description = blockContent(); 1217 return m.at(pos).newSeeTree(description.prepend(ref)); 1218 } 1219 } 1220 throw new ParseException("dc.unexpected.content"); 1221 } 1222 }, 1223 1224 // @serialData data-description 1225 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) { 1226 public DCTree parse(int pos) { 1227 List<DCTree> description = blockContent(); 1228 return m.at(pos).newSerialDataTree(description); 1229 } 1230 }, 1231 1232 // @serialField field-name field-type description 1233 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) { 1234 public DCTree parse(int pos) throws ParseException { 1235 skipWhitespace(); 1236 DCIdentifier name = identifier(); 1237 skipWhitespace(); 1238 DCReference type = reference(false); 1239 List<DCTree> description = null; 1240 if (isWhitespace(ch)) { 1241 skipWhitespace(); 1242 description = blockContent(); 1243 } 1244 return m.at(pos).newSerialFieldTree(name, type, description); 1245 } 1246 }, 1247 1248 // @serial field-description | include | exclude 1249 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) { 1250 public DCTree parse(int pos) { 1251 List<DCTree> description = blockContent(); 1252 return m.at(pos).newSerialTree(description); 1253 } 1254 }, 1255 1256 // @since since-text 1257 new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) { 1258 public DCTree parse(int pos) { 1259 List<DCTree> description = blockContent(); 1260 return m.at(pos).newSinceTree(description); 1261 } 1262 }, 1263 1264 // @throws class-name description 1265 new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) { 1266 public DCTree parse(int pos) throws ParseException { 1267 skipWhitespace(); 1268 DCReference ref = reference(false); 1269 List<DCTree> description = blockContent(); 1270 return m.at(pos).newThrowsTree(ref, description); 1271 } 1272 }, 1273 1274 // @uses service-name description 1275 new TagParser(Kind.BLOCK, DCTree.Kind.USES) { 1276 public DCTree parse(int pos) throws ParseException { 1277 skipWhitespace(); 1278 DCReference ref = reference(true); 1279 List<DCTree> description = blockContent(); 1280 return m.at(pos).newUsesTree(ref, description); 1281 } 1282 }, 1283 1284 // {@value package.class#field} 1285 new TagParser(Kind.INLINE, DCTree.Kind.VALUE) { 1286 public DCTree parse(int pos) throws ParseException { 1287 DCReference ref = reference(true); 1288 skipWhitespace(); 1289 if (ch == '}') { 1290 nextChar(); 1291 return m.at(pos).newValueTree(ref); 1292 } 1293 nextChar(); 1294 throw new ParseException("dc.unexpected.content"); 1295 } 1296 }, 1297 1298 // @version version-text 1299 new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) { 1300 public DCTree parse(int pos) { 1301 List<DCTree> description = blockContent(); 1302 return m.at(pos).newVersionTree(description); 1303 } 1304 }, 1305 }; 1306 1307 tagParsers = new HashMap<>(); 1308 for (TagParser p: parsers) 1309 tagParsers.put(names.fromString(p.getTreeKind().tagName), p); 1310 1311 } 1312} 1313