RawCommentList.cpp revision 239313
1146773Ssam//===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===// 2172686Smlaier// 3190207Srpaulo// The LLVM Compiler Infrastructure 4146773Ssam// 5146773Ssam// This file is distributed under the University of Illinois Open Source 6146773Ssam// License. See LICENSE.TXT for details. 7146773Ssam// 8146773Ssam//===----------------------------------------------------------------------===// 9146773Ssam 10146773Ssam#include "clang/AST/RawCommentList.h" 11146773Ssam#include "clang/AST/ASTContext.h" 12146773Ssam#include "clang/AST/Comment.h" 13146773Ssam#include "clang/AST/CommentLexer.h" 14146773Ssam#include "clang/AST/CommentBriefParser.h" 15146773Ssam#include "clang/AST/CommentSema.h" 16146773Ssam#include "clang/AST/CommentParser.h" 17146773Ssam#include "clang/AST/CommentCommandTraits.h" 18146773Ssam#include "llvm/ADT/STLExtras.h" 19146773Ssam 20146773Ssamusing namespace clang; 21146773Ssam 22146773Ssamnamespace { 23146773Ssam/// Get comment kind and bool describing if it is a trailing comment. 24146773Ssamstd::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment) { 25146773Ssam if (Comment.size() < 3 || Comment[0] != '/') 26146773Ssam return std::make_pair(RawComment::RCK_Invalid, false); 27146773Ssam 28146773Ssam RawComment::CommentKind K; 29146773Ssam if (Comment[1] == '/') { 30146773Ssam if (Comment.size() < 3) 31146773Ssam return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); 32146773Ssam 33146773Ssam if (Comment[2] == '/') 34146773Ssam K = RawComment::RCK_BCPLSlash; 35146773Ssam else if (Comment[2] == '!') 36146773Ssam K = RawComment::RCK_BCPLExcl; 37146773Ssam else 38146773Ssam return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); 39146773Ssam } else { 40146773Ssam assert(Comment.size() >= 4); 41146773Ssam 42146773Ssam // Comment lexer does not understand escapes in comment markers, so pretend 43146773Ssam // that this is not a comment. 44146773Ssam if (Comment[1] != '*' || 45146773Ssam Comment[Comment.size() - 2] != '*' || 46146773Ssam Comment[Comment.size() - 1] != '/') 47146773Ssam return std::make_pair(RawComment::RCK_Invalid, false); 48146773Ssam 49146773Ssam if (Comment[2] == '*') 50172686Smlaier K = RawComment::RCK_JavaDoc; 51146773Ssam else if (Comment[2] == '!') 52146773Ssam K = RawComment::RCK_Qt; 53146773Ssam else 54172686Smlaier return std::make_pair(RawComment::RCK_OrdinaryC, false); 55146773Ssam } 56170533Ssam const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<'); 57170533Ssam return std::make_pair(K, TrailingComment); 58170533Ssam} 59170533Ssam 60170533Ssambool mergedCommentIsTrailingComment(StringRef Comment) { 61146773Ssam return (Comment.size() > 3) && (Comment[3] == '<'); 62146773Ssam} 63146773Ssam} // unnamed namespace 64146773Ssam 65146773SsamRawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR, 66146773Ssam bool Merged) : 67146773Ssam Range(SR), RawTextValid(false), BriefTextValid(false), 68146773Ssam IsAlmostTrailingComment(false), 69146773Ssam BeginLineValid(false), EndLineValid(false) { 70146773Ssam // Extract raw comment text, if possible. 71146773Ssam if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) { 72146773Ssam Kind = RCK_Invalid; 73146773Ssam return; 74146773Ssam } 75146773Ssam 76146773Ssam if (!Merged) { 77146773Ssam // Guess comment kind. 78146773Ssam std::pair<CommentKind, bool> K = getCommentKind(RawText); 79146773Ssam Kind = K.first; 80146773Ssam IsTrailingComment = K.second; 81146773Ssam 82146773Ssam IsAlmostTrailingComment = RawText.startswith("//<") || 83146773Ssam RawText.startswith("/*<"); 84146773Ssam } else { 85146773Ssam Kind = RCK_Merged; 86146773Ssam IsTrailingComment = mergedCommentIsTrailingComment(RawText); 87146773Ssam } 88146773Ssam} 89146773Ssam 90146773Ssamconst Decl *RawComment::getDecl() const { 91146773Ssam if (DeclOrParsedComment.isNull()) 92146773Ssam return NULL; 93146773Ssam 94170533Ssam if (const Decl *D = DeclOrParsedComment.dyn_cast<const Decl *>()) 95170533Ssam return D; 96170533Ssam 97146773Ssam return DeclOrParsedComment.get<comments::FullComment *>()->getDecl(); 98146773Ssam} 99146773Ssam 100146773Ssamunsigned RawComment::getBeginLine(const SourceManager &SM) const { 101146773Ssam if (BeginLineValid) 102146773Ssam return BeginLine; 103170533Ssam 104146773Ssam std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin()); 105170533Ssam BeginLine = SM.getLineNumber(LocInfo.first, LocInfo.second); 106170533Ssam BeginLineValid = true; 107146773Ssam return BeginLine; 108146773Ssam} 109146773Ssam 110146773Ssamunsigned RawComment::getEndLine(const SourceManager &SM) const { 111146773Ssam if (EndLineValid) 112146773Ssam return EndLine; 113146773Ssam 114146773Ssam std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getEnd()); 115146773Ssam EndLine = SM.getLineNumber(LocInfo.first, LocInfo.second); 116146773Ssam EndLineValid = true; 117146773Ssam return EndLine; 118146773Ssam} 119146773Ssam 120146773SsamStringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const { 121146773Ssam FileID BeginFileID; 122146773Ssam FileID EndFileID; 123146773Ssam unsigned BeginOffset; 124146773Ssam unsigned EndOffset; 125146773Ssam 126146773Ssam llvm::tie(BeginFileID, BeginOffset) = 127146773Ssam SourceMgr.getDecomposedLoc(Range.getBegin()); 128146773Ssam llvm::tie(EndFileID, EndOffset) = 129146773Ssam SourceMgr.getDecomposedLoc(Range.getEnd()); 130172686Smlaier 131146773Ssam const unsigned Length = EndOffset - BeginOffset; 132146773Ssam if (Length < 2) 133146773Ssam return StringRef(); 134146773Ssam 135146773Ssam // The comment can't begin in one file and end in another. 136146773Ssam assert(BeginFileID == EndFileID); 137146773Ssam 138146773Ssam bool Invalid = false; 139146773Ssam const char *BufferStart = SourceMgr.getBufferData(BeginFileID, 140146773Ssam &Invalid).data(); 141146773Ssam if (Invalid) 142146773Ssam return StringRef(); 143146773Ssam 144146773Ssam return StringRef(BufferStart + BeginOffset, Length); 145146773Ssam} 146146773Ssam 147146773Ssamconst char *RawComment::extractBriefText(const ASTContext &Context) const { 148146773Ssam // Make sure that RawText is valid. 149146773Ssam getRawText(Context.getSourceManager()); 150146773Ssam 151146773Ssam // Since we will be copying the resulting text, all allocations made during 152146773Ssam // parsing are garbage after resulting string is formed. Thus we can use 153146773Ssam // a separate allocator for all temporary stuff. 154146773Ssam llvm::BumpPtrAllocator Allocator; 155146773Ssam 156146773Ssam comments::CommandTraits Traits; 157146773Ssam comments::Lexer L(Allocator, Traits, 158146773Ssam Range.getBegin(), comments::CommentOptions(), 159146773Ssam RawText.begin(), RawText.end()); 160146773Ssam comments::BriefParser P(L, Traits); 161146773Ssam 162146773Ssam const std::string Result = P.Parse(); 163146773Ssam const unsigned BriefTextLength = Result.size(); 164146773Ssam char *BriefTextPtr = new (Context) char[BriefTextLength + 1]; 165170533Ssam memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1); 166170533Ssam BriefText = BriefTextPtr; 167170533Ssam BriefTextValid = true; 168170533Ssam 169170533Ssam return BriefTextPtr; 170170533Ssam} 171170533Ssam 172170533Ssamcomments::FullComment *RawComment::parse(const ASTContext &Context) const { 173170533Ssam // Make sure that RawText is valid. 174170533Ssam getRawText(Context.getSourceManager()); 175170533Ssam 176172686Smlaier comments::CommandTraits Traits; 177172686Smlaier comments::Lexer L(Context.getAllocator(), Traits, 178172686Smlaier getSourceRange().getBegin(), comments::CommentOptions(), 179172686Smlaier RawText.begin(), RawText.end()); 180146773Ssam comments::Sema S(Context.getAllocator(), Context.getSourceManager(), 181146773Ssam Context.getDiagnostics(), Traits); 182146773Ssam S.setDecl(getDecl()); 183146773Ssam comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(), 184146773Ssam Context.getDiagnostics(), Traits); 185146773Ssam 186146773Ssam comments::FullComment *FC = P.parseFullComment(); 187146773Ssam DeclOrParsedComment = FC; 188146773Ssam return FC; 189146773Ssam} 190146773Ssam 191146773Ssamnamespace { 192146773Ssambool containsOnlyWhitespace(StringRef Str) { 193146773Ssam return Str.find_first_not_of(" \t\f\v\r\n") == StringRef::npos; 194146773Ssam} 195146773Ssam 196195709Ssambool onlyWhitespaceBetweenComments(SourceManager &SM, 197146773Ssam const RawComment &C1, const RawComment &C2) { 198146773Ssam std::pair<FileID, unsigned> C1EndLocInfo = SM.getDecomposedLoc( 199146773Ssam C1.getSourceRange().getEnd()); 200146773Ssam std::pair<FileID, unsigned> C2BeginLocInfo = SM.getDecomposedLoc( 201170533Ssam C2.getSourceRange().getBegin()); 202170533Ssam 203170533Ssam // Question does not make sense if comments are located in different files. 204170533Ssam if (C1EndLocInfo.first != C2BeginLocInfo.first) 205170533Ssam return false; 206170533Ssam 207170533Ssam bool Invalid = false; 208170533Ssam const char *Buffer = SM.getBufferData(C1EndLocInfo.first, &Invalid).data(); 209170533Ssam if (Invalid) 210170533Ssam return false; 211170533Ssam 212170533Ssam StringRef TextBetweenComments(Buffer + C1EndLocInfo.second, 213170533Ssam C2BeginLocInfo.second - C1EndLocInfo.second); 214170533Ssam 215170533Ssam return containsOnlyWhitespace(TextBetweenComments); 216170533Ssam} 217146773Ssam} // unnamed namespace 218146773Ssam 219146773Ssamvoid RawCommentList::addComment(const RawComment &RC, 220146773Ssam llvm::BumpPtrAllocator &Allocator) { 221146773Ssam if (RC.isInvalid()) 222146773Ssam return; 223146773Ssam 224146773Ssam // Check if the comments are not in source order. 225146773Ssam while (!Comments.empty() && 226146773Ssam !SourceMgr.isBeforeInTranslationUnit( 227146773Ssam Comments.back()->getSourceRange().getBegin(), 228146773Ssam RC.getSourceRange().getBegin())) { 229146773Ssam // If they are, just pop a few last comments that don't fit. 230146773Ssam // This happens if an \#include directive contains comments. 231146773Ssam Comments.pop_back(); 232146773Ssam } 233170533Ssam 234170533Ssam if (OnlyWhitespaceSeen) { 235170533Ssam if (!onlyWhitespaceBetweenComments(SourceMgr, LastComment, RC)) 236170533Ssam OnlyWhitespaceSeen = false; 237170533Ssam } 238170533Ssam 239146773Ssam LastComment = RC; 240146773Ssam 241 // Ordinary comments are not interesting for us. 242 if (RC.isOrdinary()) 243 return; 244 245 // If this is the first Doxygen comment, save it (because there isn't 246 // anything to merge it with). 247 if (Comments.empty()) { 248 Comments.push_back(new (Allocator) RawComment(RC)); 249 OnlyWhitespaceSeen = true; 250 return; 251 } 252 253 const RawComment &C1 = *Comments.back(); 254 const RawComment &C2 = RC; 255 256 // Merge comments only if there is only whitespace between them. 257 // Can't merge trailing and non-trailing comments. 258 // Merge trailing comments if they are on same or consecutive lines. 259 if (OnlyWhitespaceSeen && 260 (C1.isTrailingComment() == C2.isTrailingComment()) && 261 (!C1.isTrailingComment() || 262 C1.getEndLine(SourceMgr) + 1 >= C2.getBeginLine(SourceMgr))) { 263 SourceRange MergedRange(C1.getSourceRange().getBegin(), 264 C2.getSourceRange().getEnd()); 265 *Comments.back() = RawComment(SourceMgr, MergedRange, true); 266 } else 267 Comments.push_back(new (Allocator) RawComment(RC)); 268 269 OnlyWhitespaceSeen = true; 270} 271 272