RawCommentList.cpp revision 239462
1239313Sdim//===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===// 2239313Sdim// 3239313Sdim// The LLVM Compiler Infrastructure 4239313Sdim// 5239313Sdim// This file is distributed under the University of Illinois Open Source 6239313Sdim// License. See LICENSE.TXT for details. 7239313Sdim// 8239313Sdim//===----------------------------------------------------------------------===// 9239313Sdim 10239313Sdim#include "clang/AST/RawCommentList.h" 11239313Sdim#include "clang/AST/ASTContext.h" 12239313Sdim#include "clang/AST/Comment.h" 13239313Sdim#include "clang/AST/CommentLexer.h" 14239313Sdim#include "clang/AST/CommentBriefParser.h" 15239313Sdim#include "clang/AST/CommentSema.h" 16239313Sdim#include "clang/AST/CommentParser.h" 17239313Sdim#include "clang/AST/CommentCommandTraits.h" 18239313Sdim#include "llvm/ADT/STLExtras.h" 19239313Sdim 20239313Sdimusing namespace clang; 21239313Sdim 22239313Sdimnamespace { 23239313Sdim/// Get comment kind and bool describing if it is a trailing comment. 24239313Sdimstd::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment) { 25239313Sdim if (Comment.size() < 3 || Comment[0] != '/') 26239313Sdim return std::make_pair(RawComment::RCK_Invalid, false); 27239313Sdim 28239313Sdim RawComment::CommentKind K; 29239313Sdim if (Comment[1] == '/') { 30239313Sdim if (Comment.size() < 3) 31239313Sdim return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); 32239313Sdim 33239313Sdim if (Comment[2] == '/') 34239313Sdim K = RawComment::RCK_BCPLSlash; 35239313Sdim else if (Comment[2] == '!') 36239313Sdim K = RawComment::RCK_BCPLExcl; 37239313Sdim else 38239313Sdim return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); 39239313Sdim } else { 40239313Sdim assert(Comment.size() >= 4); 41239313Sdim 42239313Sdim // Comment lexer does not understand escapes in comment markers, so pretend 43239313Sdim // that this is not a comment. 44239313Sdim if (Comment[1] != '*' || 45239313Sdim Comment[Comment.size() - 2] != '*' || 46239313Sdim Comment[Comment.size() - 1] != '/') 47239313Sdim return std::make_pair(RawComment::RCK_Invalid, false); 48239313Sdim 49239313Sdim if (Comment[2] == '*') 50239313Sdim K = RawComment::RCK_JavaDoc; 51239313Sdim else if (Comment[2] == '!') 52239313Sdim K = RawComment::RCK_Qt; 53239313Sdim else 54239313Sdim return std::make_pair(RawComment::RCK_OrdinaryC, false); 55239313Sdim } 56239313Sdim const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<'); 57239313Sdim return std::make_pair(K, TrailingComment); 58239313Sdim} 59239313Sdim 60239313Sdimbool mergedCommentIsTrailingComment(StringRef Comment) { 61239313Sdim return (Comment.size() > 3) && (Comment[3] == '<'); 62239313Sdim} 63239313Sdim} // unnamed namespace 64239313Sdim 65239313SdimRawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR, 66239313Sdim bool Merged) : 67239313Sdim Range(SR), RawTextValid(false), BriefTextValid(false), 68239462Sdim IsAttached(false), IsAlmostTrailingComment(false), 69239313Sdim BeginLineValid(false), EndLineValid(false) { 70239313Sdim // Extract raw comment text, if possible. 71239313Sdim if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) { 72239313Sdim Kind = RCK_Invalid; 73239313Sdim return; 74239313Sdim } 75239313Sdim 76239313Sdim if (!Merged) { 77239313Sdim // Guess comment kind. 78239313Sdim std::pair<CommentKind, bool> K = getCommentKind(RawText); 79239313Sdim Kind = K.first; 80239313Sdim IsTrailingComment = K.second; 81239313Sdim 82239313Sdim IsAlmostTrailingComment = RawText.startswith("//<") || 83239313Sdim RawText.startswith("/*<"); 84239313Sdim } else { 85239313Sdim Kind = RCK_Merged; 86239313Sdim IsTrailingComment = mergedCommentIsTrailingComment(RawText); 87239313Sdim } 88239313Sdim} 89239313Sdim 90239313Sdimunsigned RawComment::getBeginLine(const SourceManager &SM) const { 91239313Sdim if (BeginLineValid) 92239313Sdim return BeginLine; 93239313Sdim 94239313Sdim std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin()); 95239313Sdim BeginLine = SM.getLineNumber(LocInfo.first, LocInfo.second); 96239313Sdim BeginLineValid = true; 97239313Sdim return BeginLine; 98239313Sdim} 99239313Sdim 100239313Sdimunsigned RawComment::getEndLine(const SourceManager &SM) const { 101239313Sdim if (EndLineValid) 102239313Sdim return EndLine; 103239313Sdim 104239313Sdim std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getEnd()); 105239313Sdim EndLine = SM.getLineNumber(LocInfo.first, LocInfo.second); 106239313Sdim EndLineValid = true; 107239313Sdim return EndLine; 108239313Sdim} 109239313Sdim 110239313SdimStringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const { 111239313Sdim FileID BeginFileID; 112239313Sdim FileID EndFileID; 113239313Sdim unsigned BeginOffset; 114239313Sdim unsigned EndOffset; 115239313Sdim 116239313Sdim llvm::tie(BeginFileID, BeginOffset) = 117239313Sdim SourceMgr.getDecomposedLoc(Range.getBegin()); 118239313Sdim llvm::tie(EndFileID, EndOffset) = 119239313Sdim SourceMgr.getDecomposedLoc(Range.getEnd()); 120239313Sdim 121239313Sdim const unsigned Length = EndOffset - BeginOffset; 122239313Sdim if (Length < 2) 123239313Sdim return StringRef(); 124239313Sdim 125239313Sdim // The comment can't begin in one file and end in another. 126239313Sdim assert(BeginFileID == EndFileID); 127239313Sdim 128239313Sdim bool Invalid = false; 129239313Sdim const char *BufferStart = SourceMgr.getBufferData(BeginFileID, 130239313Sdim &Invalid).data(); 131239313Sdim if (Invalid) 132239313Sdim return StringRef(); 133239313Sdim 134239313Sdim return StringRef(BufferStart + BeginOffset, Length); 135239313Sdim} 136239313Sdim 137239313Sdimconst char *RawComment::extractBriefText(const ASTContext &Context) const { 138239313Sdim // Make sure that RawText is valid. 139239313Sdim getRawText(Context.getSourceManager()); 140239313Sdim 141239313Sdim // Since we will be copying the resulting text, all allocations made during 142239313Sdim // parsing are garbage after resulting string is formed. Thus we can use 143239313Sdim // a separate allocator for all temporary stuff. 144239313Sdim llvm::BumpPtrAllocator Allocator; 145239313Sdim 146239313Sdim comments::CommandTraits Traits; 147239313Sdim comments::Lexer L(Allocator, Traits, 148239313Sdim Range.getBegin(), comments::CommentOptions(), 149239313Sdim RawText.begin(), RawText.end()); 150239313Sdim comments::BriefParser P(L, Traits); 151239313Sdim 152239313Sdim const std::string Result = P.Parse(); 153239313Sdim const unsigned BriefTextLength = Result.size(); 154239313Sdim char *BriefTextPtr = new (Context) char[BriefTextLength + 1]; 155239313Sdim memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1); 156239313Sdim BriefText = BriefTextPtr; 157239313Sdim BriefTextValid = true; 158239313Sdim 159239313Sdim return BriefTextPtr; 160239313Sdim} 161239313Sdim 162239462Sdimcomments::FullComment *RawComment::parse(const ASTContext &Context, 163239462Sdim const Decl *D) const { 164239313Sdim // Make sure that RawText is valid. 165239313Sdim getRawText(Context.getSourceManager()); 166239313Sdim 167239313Sdim comments::CommandTraits Traits; 168239313Sdim comments::Lexer L(Context.getAllocator(), Traits, 169239313Sdim getSourceRange().getBegin(), comments::CommentOptions(), 170239313Sdim RawText.begin(), RawText.end()); 171239313Sdim comments::Sema S(Context.getAllocator(), Context.getSourceManager(), 172239313Sdim Context.getDiagnostics(), Traits); 173239462Sdim S.setDecl(D); 174239313Sdim comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(), 175239313Sdim Context.getDiagnostics(), Traits); 176239313Sdim 177239462Sdim return P.parseFullComment(); 178239313Sdim} 179239313Sdim 180239313Sdimnamespace { 181239313Sdimbool containsOnlyWhitespace(StringRef Str) { 182239313Sdim return Str.find_first_not_of(" \t\f\v\r\n") == StringRef::npos; 183239313Sdim} 184239313Sdim 185239313Sdimbool onlyWhitespaceBetweenComments(SourceManager &SM, 186239313Sdim const RawComment &C1, const RawComment &C2) { 187239313Sdim std::pair<FileID, unsigned> C1EndLocInfo = SM.getDecomposedLoc( 188239313Sdim C1.getSourceRange().getEnd()); 189239313Sdim std::pair<FileID, unsigned> C2BeginLocInfo = SM.getDecomposedLoc( 190239313Sdim C2.getSourceRange().getBegin()); 191239313Sdim 192239313Sdim // Question does not make sense if comments are located in different files. 193239313Sdim if (C1EndLocInfo.first != C2BeginLocInfo.first) 194239313Sdim return false; 195239313Sdim 196239313Sdim bool Invalid = false; 197239313Sdim const char *Buffer = SM.getBufferData(C1EndLocInfo.first, &Invalid).data(); 198239313Sdim if (Invalid) 199239313Sdim return false; 200239313Sdim 201239313Sdim StringRef TextBetweenComments(Buffer + C1EndLocInfo.second, 202239313Sdim C2BeginLocInfo.second - C1EndLocInfo.second); 203239313Sdim 204239313Sdim return containsOnlyWhitespace(TextBetweenComments); 205239313Sdim} 206239313Sdim} // unnamed namespace 207239313Sdim 208239313Sdimvoid RawCommentList::addComment(const RawComment &RC, 209239313Sdim llvm::BumpPtrAllocator &Allocator) { 210239313Sdim if (RC.isInvalid()) 211239313Sdim return; 212239313Sdim 213239313Sdim // Check if the comments are not in source order. 214239313Sdim while (!Comments.empty() && 215239313Sdim !SourceMgr.isBeforeInTranslationUnit( 216239313Sdim Comments.back()->getSourceRange().getBegin(), 217239313Sdim RC.getSourceRange().getBegin())) { 218239313Sdim // If they are, just pop a few last comments that don't fit. 219239313Sdim // This happens if an \#include directive contains comments. 220239313Sdim Comments.pop_back(); 221239313Sdim } 222239313Sdim 223239313Sdim if (OnlyWhitespaceSeen) { 224239313Sdim if (!onlyWhitespaceBetweenComments(SourceMgr, LastComment, RC)) 225239313Sdim OnlyWhitespaceSeen = false; 226239313Sdim } 227239313Sdim 228239313Sdim LastComment = RC; 229239313Sdim 230239313Sdim // Ordinary comments are not interesting for us. 231239313Sdim if (RC.isOrdinary()) 232239313Sdim return; 233239313Sdim 234239313Sdim // If this is the first Doxygen comment, save it (because there isn't 235239313Sdim // anything to merge it with). 236239313Sdim if (Comments.empty()) { 237239313Sdim Comments.push_back(new (Allocator) RawComment(RC)); 238239313Sdim OnlyWhitespaceSeen = true; 239239313Sdim return; 240239313Sdim } 241239313Sdim 242239313Sdim const RawComment &C1 = *Comments.back(); 243239313Sdim const RawComment &C2 = RC; 244239313Sdim 245239313Sdim // Merge comments only if there is only whitespace between them. 246239313Sdim // Can't merge trailing and non-trailing comments. 247239313Sdim // Merge trailing comments if they are on same or consecutive lines. 248239313Sdim if (OnlyWhitespaceSeen && 249239313Sdim (C1.isTrailingComment() == C2.isTrailingComment()) && 250239313Sdim (!C1.isTrailingComment() || 251239313Sdim C1.getEndLine(SourceMgr) + 1 >= C2.getBeginLine(SourceMgr))) { 252239313Sdim SourceRange MergedRange(C1.getSourceRange().getBegin(), 253239313Sdim C2.getSourceRange().getEnd()); 254239313Sdim *Comments.back() = RawComment(SourceMgr, MergedRange, true); 255239313Sdim } else 256239313Sdim Comments.push_back(new (Allocator) RawComment(RC)); 257239313Sdim 258239313Sdim OnlyWhitespaceSeen = true; 259239313Sdim} 260239313Sdim 261