DocCommentParser.java revision 2571:10fc81ac75b4
1/* 2 * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package com.sun.tools.javac.parser; 27 28import java.text.BreakIterator; 29import java.util.Arrays; 30import java.util.HashMap; 31import java.util.HashSet; 32import java.util.Locale; 33import java.util.Map; 34import java.util.Set; 35 36import com.sun.source.doctree.AttributeTree.ValueKind; 37import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind; 38import com.sun.tools.javac.parser.Tokens.Comment; 39import com.sun.tools.javac.parser.Tokens.TokenKind; 40import com.sun.tools.javac.tree.DCTree; 41import com.sun.tools.javac.tree.DCTree.DCAttribute; 42import com.sun.tools.javac.tree.DCTree.DCDocComment; 43import com.sun.tools.javac.tree.DCTree.DCEndElement; 44import com.sun.tools.javac.tree.DCTree.DCEndPosTree; 45import com.sun.tools.javac.tree.DCTree.DCErroneous; 46import com.sun.tools.javac.tree.DCTree.DCIdentifier; 47import com.sun.tools.javac.tree.DCTree.DCReference; 48import com.sun.tools.javac.tree.DCTree.DCStartElement; 49import com.sun.tools.javac.tree.DCTree.DCText; 50import com.sun.tools.javac.tree.DocTreeMaker; 51import com.sun.tools.javac.tree.JCTree; 52import com.sun.tools.javac.util.DiagnosticSource; 53import com.sun.tools.javac.util.List; 54import com.sun.tools.javac.util.ListBuffer; 55import com.sun.tools.javac.util.Log; 56import com.sun.tools.javac.util.Name; 57import com.sun.tools.javac.util.Names; 58import com.sun.tools.javac.util.Options; 59import com.sun.tools.javac.util.Position; 60import com.sun.tools.javac.util.StringUtils; 61import static com.sun.tools.javac.util.LayoutCharacters.*; 62 63/** 64 * 65 * <p><b>This is NOT part of any supported API. 66 * If you write code that depends on this, you do so at your own risk. 67 * This code and its internal interfaces are subject to change or 68 * deletion without notice.</b> 69 */ 70public class DocCommentParser { 71 static class ParseException extends Exception { 72 private static final long serialVersionUID = 0; 73 ParseException(String key) { 74 super(key); 75 } 76 } 77 78 final ParserFactory fac; 79 final DiagnosticSource diagSource; 80 final Comment comment; 81 final DocTreeMaker m; 82 final Names names; 83 84 BreakIterator sentenceBreaker; 85 86 /** The input buffer, index of most recent character read, 87 * index of one past last character in buffer. 88 */ 89 protected char[] buf; 90 protected int bp; 91 protected int buflen; 92 93 /** The current character. 94 */ 95 protected char ch; 96 97 int textStart = -1; 98 int lastNonWhite = -1; 99 boolean newline = true; 100 101 Map<Name, TagParser> tagParsers; 102 103 DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) { 104 this.fac = fac; 105 this.diagSource = diagSource; 106 this.comment = comment; 107 names = fac.names; 108 m = fac.docTreeMaker; 109 110 Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale; 111 112 Options options = fac.options; 113 boolean useBreakIterator = options.isSet("breakIterator"); 114 if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage())) 115 sentenceBreaker = BreakIterator.getSentenceInstance(locale); 116 117 initTagParsers(); 118 } 119 120 DCDocComment parse() { 121 String c = comment.getText(); 122 buf = new char[c.length() + 1]; 123 c.getChars(0, c.length(), buf, 0); 124 buf[buf.length - 1] = EOI; 125 buflen = buf.length - 1; 126 bp = -1; 127 nextChar(); 128 129 List<DCTree> body = blockContent(); 130 List<DCTree> tags = blockTags(); 131 132 // split body into first sentence and body 133 ListBuffer<DCTree> fs = new ListBuffer<>(); 134 loop: 135 for (; body.nonEmpty(); body = body.tail) { 136 DCTree t = body.head; 137 switch (t.getKind()) { 138 case TEXT: 139 String s = ((DCText) t).getBody(); 140 int i = getSentenceBreak(s); 141 if (i > 0) { 142 int i0 = i; 143 while (i0 > 0 && isWhitespace(s.charAt(i0 - 1))) 144 i0--; 145 fs.add(m.at(t.pos).Text(s.substring(0, i0))); 146 int i1 = i; 147 while (i1 < s.length() && isWhitespace(s.charAt(i1))) 148 i1++; 149 body = body.tail; 150 if (i1 < s.length()) 151 body = body.prepend(m.at(t.pos + i1).Text(s.substring(i1))); 152 break loop; 153 } else if (body.tail.nonEmpty()) { 154 if (isSentenceBreak(body.tail.head)) { 155 int i0 = s.length() - 1; 156 while (i0 > 0 && isWhitespace(s.charAt(i0))) 157 i0--; 158 fs.add(m.at(t.pos).Text(s.substring(0, i0 + 1))); 159 body = body.tail; 160 break loop; 161 } 162 } 163 break; 164 165 case START_ELEMENT: 166 case END_ELEMENT: 167 if (isSentenceBreak(t)) 168 break loop; 169 break; 170 } 171 fs.add(t); 172 } 173 174 @SuppressWarnings("unchecked") 175 DCTree first = getFirst(fs.toList(), body, tags); 176 int pos = (first == null) ? Position.NOPOS : first.pos; 177 178 DCDocComment dc = m.at(pos).DocComment(comment, fs.toList(), body, tags); 179 return dc; 180 } 181 182 void nextChar() { 183 ch = buf[bp < buflen ? ++bp : buflen]; 184 switch (ch) { 185 case '\f': case '\n': case '\r': 186 newline = true; 187 } 188 } 189 190 /** 191 * Read block content, consisting of text, html and inline tags. 192 * Terminated by the end of input, or the beginning of the next block tag: 193 * i.e. @ as the first non-whitespace character on a line. 194 */ 195 @SuppressWarnings("fallthrough") 196 protected List<DCTree> blockContent() { 197 ListBuffer<DCTree> trees = new ListBuffer<>(); 198 textStart = -1; 199 200 loop: 201 while (bp < buflen) { 202 switch (ch) { 203 case '\n': case '\r': case '\f': 204 newline = true; 205 // fallthrough 206 207 case ' ': case '\t': 208 nextChar(); 209 break; 210 211 case '&': 212 entity(trees); 213 break; 214 215 case '<': 216 newline = false; 217 addPendingText(trees, bp - 1); 218 trees.add(html()); 219 if (textStart == -1) { 220 textStart = bp; 221 lastNonWhite = -1; 222 } 223 break; 224 225 case '>': 226 newline = false; 227 addPendingText(trees, bp - 1); 228 trees.add(m.at(bp).Erroneous(newString(bp, bp+1), diagSource, "dc.bad.gt")); 229 nextChar(); 230 if (textStart == -1) { 231 textStart = bp; 232 lastNonWhite = -1; 233 } 234 break; 235 236 case '{': 237 inlineTag(trees); 238 break; 239 240 case '@': 241 if (newline) { 242 addPendingText(trees, lastNonWhite); 243 break loop; 244 } 245 // fallthrough 246 247 default: 248 newline = false; 249 if (textStart == -1) 250 textStart = bp; 251 lastNonWhite = bp; 252 nextChar(); 253 } 254 } 255 256 if (lastNonWhite != -1) 257 addPendingText(trees, lastNonWhite); 258 259 return trees.toList(); 260 } 261 262 /** 263 * Read a series of block tags, including their content. 264 * Standard tags parse their content appropriately. 265 * Non-standard tags are represented by {@link UnknownBlockTag}. 266 */ 267 protected List<DCTree> blockTags() { 268 ListBuffer<DCTree> tags = new ListBuffer<>(); 269 while (ch == '@') 270 tags.add(blockTag()); 271 return tags.toList(); 272 } 273 274 /** 275 * Read a single block tag, including its content. 276 * Standard tags parse their content appropriately. 277 * Non-standard tags are represented by {@link UnknownBlockTag}. 278 */ 279 protected DCTree blockTag() { 280 int p = bp; 281 try { 282 nextChar(); 283 if (isIdentifierStart(ch)) { 284 Name name = readTagName(); 285 TagParser tp = tagParsers.get(name); 286 if (tp == null) { 287 List<DCTree> content = blockContent(); 288 return m.at(p).UnknownBlockTag(name, content); 289 } else { 290 switch (tp.getKind()) { 291 case BLOCK: 292 return tp.parse(p); 293 case INLINE: 294 return erroneous("dc.bad.inline.tag", p); 295 } 296 } 297 } 298 blockContent(); 299 300 return erroneous("dc.no.tag.name", p); 301 } catch (ParseException e) { 302 blockContent(); 303 return erroneous(e.getMessage(), p); 304 } 305 } 306 307 protected void inlineTag(ListBuffer<DCTree> list) { 308 newline = false; 309 nextChar(); 310 if (ch == '@') { 311 addPendingText(list, bp - 2); 312 list.add(inlineTag()); 313 textStart = bp; 314 lastNonWhite = -1; 315 } else { 316 if (textStart == -1) 317 textStart = bp - 1; 318 lastNonWhite = bp; 319 } 320 } 321 322 /** 323 * Read a single inline tag, including its content. 324 * Standard tags parse their content appropriately. 325 * Non-standard tags are represented by {@link UnknownBlockTag}. 326 * Malformed tags may be returned as {@link Erroneous}. 327 */ 328 protected DCTree inlineTag() { 329 int p = bp - 1; 330 try { 331 nextChar(); 332 if (isIdentifierStart(ch)) { 333 Name name = readTagName(); 334 skipWhitespace(); 335 336 TagParser tp = tagParsers.get(name); 337 if (tp == null) { 338 DCTree text = inlineText(); 339 if (text != null) { 340 nextChar(); 341 return m.at(p).UnknownInlineTag(name, List.of(text)).setEndPos(bp); 342 } 343 } else if (tp.getKind() == TagParser.Kind.INLINE) { 344 DCEndPosTree<?> tree = (DCEndPosTree<?>) tp.parse(p); 345 if (tree != null) { 346 return tree.setEndPos(bp); 347 } 348 } else { 349 inlineText(); // skip content 350 nextChar(); 351 } 352 } 353 return erroneous("dc.no.tag.name", p); 354 } catch (ParseException e) { 355 return erroneous(e.getMessage(), p); 356 } 357 } 358 359 /** 360 * Read plain text content of an inline tag. 361 * Matching pairs of { } are skipped; the text is terminated by the first 362 * unmatched }. It is an error if the beginning of the next tag is detected. 363 */ 364 protected DCTree inlineText() throws ParseException { 365 skipWhitespace(); 366 int pos = bp; 367 int depth = 1; 368 369 loop: 370 while (bp < buflen) { 371 switch (ch) { 372 case '\n': case '\r': case '\f': 373 newline = true; 374 break; 375 376 case ' ': case '\t': 377 break; 378 379 case '{': 380 newline = false; 381 lastNonWhite = bp; 382 depth++; 383 break; 384 385 case '}': 386 if (--depth == 0) { 387 return m.at(pos).Text(newString(pos, bp)); 388 } 389 newline = false; 390 lastNonWhite = bp; 391 break; 392 393 case '@': 394 if (newline) 395 break loop; 396 newline = false; 397 lastNonWhite = bp; 398 break; 399 400 default: 401 newline = false; 402 lastNonWhite = bp; 403 break; 404 } 405 nextChar(); 406 } 407 throw new ParseException("dc.unterminated.inline.tag"); 408 } 409 410 /** 411 * Read Java class name, possibly followed by member 412 * Matching pairs of < > are skipped. The text is terminated by the first 413 * unmatched }. It is an error if the beginning of the next tag is detected. 414 */ 415 // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE 416 // TODO: improve quality of parse to forbid bad constructions. 417 @SuppressWarnings("fallthrough") 418 protected DCReference reference(boolean allowMember) throws ParseException { 419 int pos = bp; 420 int depth = 0; 421 422 // scan to find the end of the signature, by looking for the first 423 // whitespace not enclosed in () or <>, or the end of the tag 424 loop: 425 while (bp < buflen) { 426 switch (ch) { 427 case '\n': case '\r': case '\f': 428 newline = true; 429 // fallthrough 430 431 case ' ': case '\t': 432 if (depth == 0) 433 break loop; 434 break; 435 436 case '(': 437 case '<': 438 newline = false; 439 depth++; 440 break; 441 442 case ')': 443 case '>': 444 newline = false; 445 --depth; 446 break; 447 448 case '}': 449 if (bp == pos) 450 return null; 451 newline = false; 452 break loop; 453 454 case '@': 455 if (newline) 456 break loop; 457 // fallthrough 458 459 default: 460 newline = false; 461 462 } 463 nextChar(); 464 } 465 466 if (depth != 0) 467 throw new ParseException("dc.unterminated.signature"); 468 469 String sig = newString(pos, bp); 470 471 // Break sig apart into qualifiedExpr member paramTypes. 472 JCTree qualExpr; 473 Name member; 474 List<JCTree> paramTypes; 475 476 Log.DeferredDiagnosticHandler deferredDiagnosticHandler 477 = new Log.DeferredDiagnosticHandler(fac.log); 478 479 try { 480 int hash = sig.indexOf("#"); 481 int lparen = sig.indexOf("(", hash + 1); 482 if (hash == -1) { 483 if (lparen == -1) { 484 qualExpr = parseType(sig); 485 member = null; 486 } else { 487 qualExpr = null; 488 member = parseMember(sig.substring(0, lparen)); 489 } 490 } else { 491 qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash)); 492 if (lparen == -1) 493 member = parseMember(sig.substring(hash + 1)); 494 else 495 member = parseMember(sig.substring(hash + 1, lparen)); 496 } 497 498 if (lparen < 0) { 499 paramTypes = null; 500 } else { 501 int rparen = sig.indexOf(")", lparen); 502 if (rparen != sig.length() - 1) 503 throw new ParseException("dc.ref.bad.parens"); 504 paramTypes = parseParams(sig.substring(lparen + 1, rparen)); 505 } 506 507 if (!deferredDiagnosticHandler.getDiagnostics().isEmpty()) 508 throw new ParseException("dc.ref.syntax.error"); 509 510 } finally { 511 fac.log.popDiagnosticHandler(deferredDiagnosticHandler); 512 } 513 514 return m.at(pos).Reference(sig, qualExpr, member, paramTypes).setEndPos(bp); 515 } 516 517 JCTree parseType(String s) throws ParseException { 518 JavacParser p = fac.newParser(s, false, false, false); 519 JCTree tree = p.parseType(); 520 if (p.token().kind != TokenKind.EOF) 521 throw new ParseException("dc.ref.unexpected.input"); 522 return tree; 523 } 524 525 Name parseMember(String s) throws ParseException { 526 JavacParser p = fac.newParser(s, false, false, false); 527 Name name = p.ident(); 528 if (p.token().kind != TokenKind.EOF) 529 throw new ParseException("dc.ref.unexpected.input"); 530 return name; 531 } 532 533 List<JCTree> parseParams(String s) throws ParseException { 534 if (s.trim().isEmpty()) 535 return List.nil(); 536 537 JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false); 538 ListBuffer<JCTree> paramTypes = new ListBuffer<>(); 539 paramTypes.add(p.parseType()); 540 541 if (p.token().kind == TokenKind.IDENTIFIER) 542 p.nextToken(); 543 544 while (p.token().kind == TokenKind.COMMA) { 545 p.nextToken(); 546 paramTypes.add(p.parseType()); 547 548 if (p.token().kind == TokenKind.IDENTIFIER) 549 p.nextToken(); 550 } 551 552 if (p.token().kind != TokenKind.EOF) 553 throw new ParseException("dc.ref.unexpected.input"); 554 555 return paramTypes.toList(); 556 } 557 558 /** 559 * Read Java identifier 560 * Matching pairs of { } are skipped; the text is terminated by the first 561 * unmatched }. It is an error if the beginning of the next tag is detected. 562 */ 563 @SuppressWarnings("fallthrough") 564 protected DCIdentifier identifier() throws ParseException { 565 skipWhitespace(); 566 int pos = bp; 567 568 if (isJavaIdentifierStart(ch)) { 569 Name name = readJavaIdentifier(); 570 return m.at(pos).Identifier(name); 571 } 572 573 throw new ParseException("dc.identifier.expected"); 574 } 575 576 /** 577 * Read a quoted string. 578 * It is an error if the beginning of the next tag is detected. 579 */ 580 @SuppressWarnings("fallthrough") 581 protected DCText quotedString() { 582 int pos = bp; 583 nextChar(); 584 585 loop: 586 while (bp < buflen) { 587 switch (ch) { 588 case '\n': case '\r': case '\f': 589 newline = true; 590 break; 591 592 case ' ': case '\t': 593 break; 594 595 case '"': 596 nextChar(); 597 // trim trailing white-space? 598 return m.at(pos).Text(newString(pos, bp)); 599 600 case '@': 601 if (newline) 602 break loop; 603 604 } 605 nextChar(); 606 } 607 return null; 608 } 609 610 /** 611 * Read general text content of an inline tag, including HTML entities and elements. 612 * Matching pairs of { } are skipped; the text is terminated by the first 613 * unmatched }. It is an error if the beginning of the next tag is detected. 614 */ 615 @SuppressWarnings("fallthrough") 616 protected List<DCTree> inlineContent() { 617 ListBuffer<DCTree> trees = new ListBuffer<>(); 618 619 skipWhitespace(); 620 int pos = bp; 621 int depth = 1; 622 textStart = -1; 623 624 loop: 625 while (bp < buflen) { 626 627 switch (ch) { 628 case '\n': case '\r': case '\f': 629 newline = true; 630 // fall through 631 632 case ' ': case '\t': 633 nextChar(); 634 break; 635 636 case '&': 637 entity(trees); 638 break; 639 640 case '<': 641 newline = false; 642 addPendingText(trees, bp - 1); 643 trees.add(html()); 644 break; 645 646 case '{': 647 newline = false; 648 depth++; 649 nextChar(); 650 break; 651 652 case '}': 653 newline = false; 654 if (--depth == 0) { 655 addPendingText(trees, bp - 1); 656 nextChar(); 657 return trees.toList(); 658 } 659 nextChar(); 660 break; 661 662 case '@': 663 if (newline) 664 break loop; 665 // fallthrough 666 667 default: 668 if (textStart == -1) 669 textStart = bp; 670 nextChar(); 671 break; 672 } 673 } 674 675 return List.<DCTree>of(erroneous("dc.unterminated.inline.tag", pos)); 676 } 677 678 protected void entity(ListBuffer<DCTree> list) { 679 newline = false; 680 addPendingText(list, bp - 1); 681 list.add(entity()); 682 if (textStart == -1) { 683 textStart = bp; 684 lastNonWhite = -1; 685 } 686 } 687 688 /** 689 * Read an HTML entity. 690 * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; } 691 */ 692 protected DCTree entity() { 693 int p = bp; 694 nextChar(); 695 Name name = null; 696 if (ch == '#') { 697 int namep = bp; 698 nextChar(); 699 if (isDecimalDigit(ch)) { 700 nextChar(); 701 while (isDecimalDigit(ch)) 702 nextChar(); 703 name = names.fromChars(buf, namep, bp - namep); 704 } else if (ch == 'x' || ch == 'X') { 705 nextChar(); 706 if (isHexDigit(ch)) { 707 nextChar(); 708 while (isHexDigit(ch)) 709 nextChar(); 710 name = names.fromChars(buf, namep, bp - namep); 711 } 712 } 713 } else if (isIdentifierStart(ch)) { 714 name = readIdentifier(); 715 } 716 717 if (name == null) 718 return erroneous("dc.bad.entity", p); 719 else { 720 if (ch != ';') 721 return erroneous("dc.missing.semicolon", p); 722 nextChar(); 723 return m.at(p).Entity(name); 724 } 725 } 726 727 /** 728 * Read the start or end of an HTML tag, or an HTML comment 729 * {@literal <identifier attrs> } or {@literal </identifier> } 730 */ 731 protected DCTree html() { 732 int p = bp; 733 nextChar(); 734 if (isIdentifierStart(ch)) { 735 Name name = readIdentifier(); 736 List<DCTree> attrs = htmlAttrs(); 737 if (attrs != null) { 738 boolean selfClosing = false; 739 if (ch == '/') { 740 nextChar(); 741 selfClosing = true; 742 } 743 if (ch == '>') { 744 nextChar(); 745 return m.at(p).StartElement(name, attrs, selfClosing).setEndPos(bp); 746 } 747 } 748 } else if (ch == '/') { 749 nextChar(); 750 if (isIdentifierStart(ch)) { 751 Name name = readIdentifier(); 752 skipWhitespace(); 753 if (ch == '>') { 754 nextChar(); 755 return m.at(p).EndElement(name); 756 } 757 } 758 } else if (ch == '!') { 759 nextChar(); 760 if (ch == '-') { 761 nextChar(); 762 if (ch == '-') { 763 nextChar(); 764 while (bp < buflen) { 765 int dash = 0; 766 while (ch == '-') { 767 dash++; 768 nextChar(); 769 } 770 // strictly speaking, a comment should not contain "--" 771 // so dash > 2 is an error, dash == 2 implies ch == '>' 772 if (dash >= 2 && ch == '>') { 773 nextChar(); 774 return m.at(p).Comment(newString(p, bp)); 775 } 776 777 nextChar(); 778 } 779 } 780 } 781 } 782 783 bp = p + 1; 784 ch = buf[bp]; 785 return erroneous("dc.malformed.html", p); 786 } 787 788 /** 789 * Read a series of HTML attributes, terminated by {@literal > }. 790 * Each attribute is of the form {@literal identifier[=value] }. 791 * "value" may be unquoted, single-quoted, or double-quoted. 792 */ 793 protected List<DCTree> htmlAttrs() { 794 ListBuffer<DCTree> attrs = new ListBuffer<>(); 795 skipWhitespace(); 796 797 loop: 798 while (isIdentifierStart(ch)) { 799 int namePos = bp; 800 Name name = readIdentifier(); 801 skipWhitespace(); 802 List<DCTree> value = null; 803 ValueKind vkind = ValueKind.EMPTY; 804 if (ch == '=') { 805 ListBuffer<DCTree> v = new ListBuffer<>(); 806 nextChar(); 807 skipWhitespace(); 808 if (ch == '\'' || ch == '"') { 809 vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE; 810 char quote = ch; 811 nextChar(); 812 textStart = bp; 813 while (bp < buflen && ch != quote) { 814 if (newline && ch == '@') { 815 attrs.add(erroneous("dc.unterminated.string", namePos)); 816 // No point trying to read more. 817 // In fact, all attrs get discarded by the caller 818 // and superseded by a malformed.html node because 819 // the html tag itself is not terminated correctly. 820 break loop; 821 } 822 attrValueChar(v); 823 } 824 addPendingText(v, bp - 1); 825 nextChar(); 826 } else { 827 vkind = ValueKind.UNQUOTED; 828 textStart = bp; 829 while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) { 830 attrValueChar(v); 831 } 832 addPendingText(v, bp - 1); 833 } 834 skipWhitespace(); 835 value = v.toList(); 836 } 837 DCAttribute attr = m.at(namePos).Attribute(name, vkind, value); 838 attrs.add(attr); 839 } 840 841 return attrs.toList(); 842 } 843 844 protected void attrValueChar(ListBuffer<DCTree> list) { 845 switch (ch) { 846 case '&': 847 entity(list); 848 break; 849 850 case '{': 851 inlineTag(list); 852 break; 853 854 default: 855 nextChar(); 856 } 857 } 858 859 protected void addPendingText(ListBuffer<DCTree> list, int textEnd) { 860 if (textStart != -1) { 861 if (textStart <= textEnd) { 862 list.add(m.at(textStart).Text(newString(textStart, textEnd + 1))); 863 } 864 textStart = -1; 865 } 866 } 867 868 protected DCErroneous erroneous(String code, int pos) { 869 int i = bp - 1; 870 loop: 871 while (i > pos) { 872 switch (buf[i]) { 873 case '\f': case '\n': case '\r': 874 newline = true; 875 break; 876 case '\t': case ' ': 877 break; 878 default: 879 break loop; 880 } 881 i--; 882 } 883 textStart = -1; 884 return m.at(pos).Erroneous(newString(pos, i + 1), diagSource, code); 885 } 886 887 @SuppressWarnings("unchecked") 888 <T> T getFirst(List<T>... lists) { 889 for (List<T> list: lists) { 890 if (list.nonEmpty()) 891 return list.head; 892 } 893 return null; 894 } 895 896 protected boolean isIdentifierStart(char ch) { 897 return Character.isUnicodeIdentifierStart(ch); 898 } 899 900 protected Name readIdentifier() { 901 int start = bp; 902 nextChar(); 903 while (bp < buflen && Character.isUnicodeIdentifierPart(ch)) 904 nextChar(); 905 return names.fromChars(buf, start, bp - start); 906 } 907 908 protected Name readTagName() { 909 int start = bp; 910 nextChar(); 911 while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '.')) 912 nextChar(); 913 return names.fromChars(buf, start, bp - start); 914 } 915 916 protected boolean isJavaIdentifierStart(char ch) { 917 return Character.isJavaIdentifierStart(ch); 918 } 919 920 protected Name readJavaIdentifier() { 921 int start = bp; 922 nextChar(); 923 while (bp < buflen && Character.isJavaIdentifierPart(ch)) 924 nextChar(); 925 return names.fromChars(buf, start, bp - start); 926 } 927 928 protected boolean isDecimalDigit(char ch) { 929 return ('0' <= ch && ch <= '9'); 930 } 931 932 protected boolean isHexDigit(char ch) { 933 return ('0' <= ch && ch <= '9') 934 || ('a' <= ch && ch <= 'f') 935 || ('A' <= ch && ch <= 'F'); 936 } 937 938 protected boolean isUnquotedAttrValueTerminator(char ch) { 939 switch (ch) { 940 case '\f': case '\n': case '\r': case '\t': 941 case ' ': 942 case '"': case '\'': case '`': 943 case '=': case '<': case '>': 944 return true; 945 default: 946 return false; 947 } 948 } 949 950 protected boolean isWhitespace(char ch) { 951 return Character.isWhitespace(ch); 952 } 953 954 protected void skipWhitespace() { 955 while (isWhitespace(ch)) 956 nextChar(); 957 } 958 959 protected int getSentenceBreak(String s) { 960 if (sentenceBreaker != null) { 961 sentenceBreaker.setText(s); 962 int i = sentenceBreaker.next(); 963 return (i == s.length()) ? -1 : i; 964 } 965 966 // scan for period followed by whitespace 967 boolean period = false; 968 for (int i = 0; i < s.length(); i++) { 969 switch (s.charAt(i)) { 970 case '.': 971 period = true; 972 break; 973 974 case ' ': 975 case '\f': 976 case '\n': 977 case '\r': 978 case '\t': 979 if (period) 980 return i; 981 break; 982 983 default: 984 period = false; 985 break; 986 } 987 } 988 return -1; 989 } 990 991 992 Set<String> htmlBlockTags = new HashSet<>(Arrays.asList( 993 "h1", "h2", "h3", "h4", "h5", "h6", "p", "pre")); 994 995 protected boolean isSentenceBreak(Name n) { 996 return htmlBlockTags.contains(StringUtils.toLowerCase(n.toString())); 997 } 998 999 protected boolean isSentenceBreak(DCTree t) { 1000 switch (t.getKind()) { 1001 case START_ELEMENT: 1002 return isSentenceBreak(((DCStartElement) t).getName()); 1003 1004 case END_ELEMENT: 1005 return isSentenceBreak(((DCEndElement) t).getName()); 1006 } 1007 return false; 1008 } 1009 1010 /** 1011 * @param start position of first character of string 1012 * @param end position of character beyond last character to be included 1013 */ 1014 String newString(int start, int end) { 1015 return new String(buf, start, end - start); 1016 } 1017 1018 static abstract class TagParser { 1019 enum Kind { INLINE, BLOCK } 1020 1021 Kind kind; 1022 DCTree.Kind treeKind; 1023 1024 TagParser(Kind k, DCTree.Kind tk) { 1025 kind = k; 1026 treeKind = tk; 1027 } 1028 1029 Kind getKind() { 1030 return kind; 1031 } 1032 1033 DCTree.Kind getTreeKind() { 1034 return treeKind; 1035 } 1036 1037 abstract DCTree parse(int pos) throws ParseException; 1038 } 1039 1040 /** 1041 * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a> 1042 */ 1043 private void initTagParsers() { 1044 TagParser[] parsers = { 1045 // @author name-text 1046 new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) { 1047 public DCTree parse(int pos) { 1048 List<DCTree> name = blockContent(); 1049 return m.at(pos).Author(name); 1050 } 1051 }, 1052 1053 // {@code text} 1054 new TagParser(Kind.INLINE, DCTree.Kind.CODE) { 1055 public DCTree parse(int pos) throws ParseException { 1056 DCTree text = inlineText(); 1057 nextChar(); 1058 return m.at(pos).Code((DCText) text); 1059 } 1060 }, 1061 1062 // @deprecated deprecated-text 1063 new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) { 1064 public DCTree parse(int pos) { 1065 List<DCTree> reason = blockContent(); 1066 return m.at(pos).Deprecated(reason); 1067 } 1068 }, 1069 1070 // {@docRoot} 1071 new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) { 1072 public DCTree parse(int pos) throws ParseException { 1073 if (ch == '}') { 1074 nextChar(); 1075 return m.at(pos).DocRoot(); 1076 } 1077 inlineText(); // skip unexpected content 1078 nextChar(); 1079 throw new ParseException("dc.unexpected.content"); 1080 } 1081 }, 1082 1083 // @exception class-name description 1084 new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) { 1085 public DCTree parse(int pos) throws ParseException { 1086 skipWhitespace(); 1087 DCReference ref = reference(false); 1088 List<DCTree> description = blockContent(); 1089 return m.at(pos).Exception(ref, description); 1090 } 1091 }, 1092 1093 // {@inheritDoc} 1094 new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) { 1095 public DCTree parse(int pos) throws ParseException { 1096 if (ch == '}') { 1097 nextChar(); 1098 return m.at(pos).InheritDoc(); 1099 } 1100 inlineText(); // skip unexpected content 1101 nextChar(); 1102 throw new ParseException("dc.unexpected.content"); 1103 } 1104 }, 1105 1106 // {@link package.class#member label} 1107 new TagParser(Kind.INLINE, DCTree.Kind.LINK) { 1108 public DCTree parse(int pos) throws ParseException { 1109 DCReference ref = reference(true); 1110 List<DCTree> label = inlineContent(); 1111 return m.at(pos).Link(ref, label); 1112 } 1113 }, 1114 1115 // {@linkplain package.class#member label} 1116 new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) { 1117 public DCTree parse(int pos) throws ParseException { 1118 DCReference ref = reference(true); 1119 List<DCTree> label = inlineContent(); 1120 return m.at(pos).LinkPlain(ref, label); 1121 } 1122 }, 1123 1124 // {@literal text} 1125 new TagParser(Kind.INLINE, DCTree.Kind.LITERAL) { 1126 public DCTree parse(int pos) throws ParseException { 1127 DCTree text = inlineText(); 1128 nextChar(); 1129 return m.at(pos).Literal((DCText) text); 1130 } 1131 }, 1132 1133 // @param parameter-name description 1134 new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) { 1135 public DCTree parse(int pos) throws ParseException { 1136 skipWhitespace(); 1137 1138 boolean typaram = false; 1139 if (ch == '<') { 1140 typaram = true; 1141 nextChar(); 1142 } 1143 1144 DCIdentifier id = identifier(); 1145 1146 if (typaram) { 1147 if (ch != '>') 1148 throw new ParseException("dc.gt.expected"); 1149 nextChar(); 1150 } 1151 1152 skipWhitespace(); 1153 List<DCTree> desc = blockContent(); 1154 return m.at(pos).Param(typaram, id, desc); 1155 } 1156 }, 1157 1158 // @return description 1159 new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) { 1160 public DCTree parse(int pos) { 1161 List<DCTree> description = blockContent(); 1162 return m.at(pos).Return(description); 1163 } 1164 }, 1165 1166 // @see reference | quoted-string | HTML 1167 new TagParser(Kind.BLOCK, DCTree.Kind.SEE) { 1168 public DCTree parse(int pos) throws ParseException { 1169 skipWhitespace(); 1170 switch (ch) { 1171 case '"': 1172 DCText string = quotedString(); 1173 if (string != null) { 1174 skipWhitespace(); 1175 if (ch == '@' 1176 || ch == EOI && bp == buf.length - 1) { 1177 return m.at(pos).See(List.<DCTree>of(string)); 1178 } 1179 } 1180 break; 1181 1182 case '<': 1183 List<DCTree> html = blockContent(); 1184 if (html != null) 1185 return m.at(pos).See(html); 1186 break; 1187 1188 case '@': 1189 if (newline) 1190 throw new ParseException("dc.no.content"); 1191 break; 1192 1193 case EOI: 1194 if (bp == buf.length - 1) 1195 throw new ParseException("dc.no.content"); 1196 break; 1197 1198 default: 1199 if (isJavaIdentifierStart(ch) || ch == '#') { 1200 DCReference ref = reference(true); 1201 List<DCTree> description = blockContent(); 1202 return m.at(pos).See(description.prepend(ref)); 1203 } 1204 } 1205 throw new ParseException("dc.unexpected.content"); 1206 } 1207 }, 1208 1209 // @serialData data-description 1210 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) { 1211 public DCTree parse(int pos) { 1212 List<DCTree> description = blockContent(); 1213 return m.at(pos).SerialData(description); 1214 } 1215 }, 1216 1217 // @serialField field-name field-type description 1218 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) { 1219 public DCTree parse(int pos) throws ParseException { 1220 skipWhitespace(); 1221 DCIdentifier name = identifier(); 1222 skipWhitespace(); 1223 DCReference type = reference(false); 1224 List<DCTree> description = null; 1225 if (isWhitespace(ch)) { 1226 skipWhitespace(); 1227 description = blockContent(); 1228 } 1229 return m.at(pos).SerialField(name, type, description); 1230 } 1231 }, 1232 1233 // @serial field-description | include | exclude 1234 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) { 1235 public DCTree parse(int pos) { 1236 List<DCTree> description = blockContent(); 1237 return m.at(pos).Serial(description); 1238 } 1239 }, 1240 1241 // @since since-text 1242 new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) { 1243 public DCTree parse(int pos) { 1244 List<DCTree> description = blockContent(); 1245 return m.at(pos).Since(description); 1246 } 1247 }, 1248 1249 // @throws class-name description 1250 new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) { 1251 public DCTree parse(int pos) throws ParseException { 1252 skipWhitespace(); 1253 DCReference ref = reference(false); 1254 List<DCTree> description = blockContent(); 1255 return m.at(pos).Throws(ref, description); 1256 } 1257 }, 1258 1259 // {@value package.class#field} 1260 new TagParser(Kind.INLINE, DCTree.Kind.VALUE) { 1261 public DCTree parse(int pos) throws ParseException { 1262 DCReference ref = reference(true); 1263 skipWhitespace(); 1264 if (ch == '}') { 1265 nextChar(); 1266 return m.at(pos).Value(ref); 1267 } 1268 nextChar(); 1269 throw new ParseException("dc.unexpected.content"); 1270 } 1271 }, 1272 1273 // @version version-text 1274 new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) { 1275 public DCTree parse(int pos) { 1276 List<DCTree> description = blockContent(); 1277 return m.at(pos).Version(description); 1278 } 1279 }, 1280 }; 1281 1282 tagParsers = new HashMap<>(); 1283 for (TagParser p: parsers) 1284 tagParsers.put(names.fromString(p.getTreeKind().tagName), p); 1285 1286 } 1287} 1288