1239313Sdim//===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===// 2239313Sdim// 3239313Sdim// The LLVM Compiler Infrastructure 4239313Sdim// 5239313Sdim// This file is distributed under the University of Illinois Open Source 6239313Sdim// License. See LICENSE.TXT for details. 7239313Sdim// 8239313Sdim//===----------------------------------------------------------------------===// 9239313Sdim 10239313Sdim#include "clang/AST/RawCommentList.h" 11239313Sdim#include "clang/AST/ASTContext.h" 12239313Sdim#include "clang/AST/Comment.h" 13249423Sdim#include "clang/AST/CommentBriefParser.h" 14249423Sdim#include "clang/AST/CommentCommandTraits.h" 15239313Sdim#include "clang/AST/CommentLexer.h" 16249423Sdim#include "clang/AST/CommentParser.h" 17239313Sdim#include "clang/AST/CommentSema.h" 18239313Sdim#include "llvm/ADT/STLExtras.h" 19239313Sdim 20239313Sdimusing namespace clang; 21239313Sdim 22239313Sdimnamespace { 23239313Sdim/// Get comment kind and bool describing if it is a trailing comment. 24251662Sdimstd::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment, 25251662Sdim bool ParseAllComments) { 26251662Sdim const size_t MinCommentLength = ParseAllComments ? 2 : 3; 27251662Sdim if ((Comment.size() < MinCommentLength) || Comment[0] != '/') 28239313Sdim return std::make_pair(RawComment::RCK_Invalid, false); 29239313Sdim 30239313Sdim RawComment::CommentKind K; 31239313Sdim if (Comment[1] == '/') { 32239313Sdim if (Comment.size() < 3) 33239313Sdim return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); 34239313Sdim 35239313Sdim if (Comment[2] == '/') 36239313Sdim K = RawComment::RCK_BCPLSlash; 37239313Sdim else if (Comment[2] == '!') 38239313Sdim K = RawComment::RCK_BCPLExcl; 39239313Sdim else 40239313Sdim return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); 41239313Sdim } else { 42239313Sdim assert(Comment.size() >= 4); 43239313Sdim 44239313Sdim // Comment lexer does not understand escapes in comment markers, so pretend 45239313Sdim // that this is not a comment. 46239313Sdim if (Comment[1] != '*' || 47239313Sdim Comment[Comment.size() - 2] != '*' || 48239313Sdim Comment[Comment.size() - 1] != '/') 49239313Sdim return std::make_pair(RawComment::RCK_Invalid, false); 50239313Sdim 51239313Sdim if (Comment[2] == '*') 52239313Sdim K = RawComment::RCK_JavaDoc; 53239313Sdim else if (Comment[2] == '!') 54239313Sdim K = RawComment::RCK_Qt; 55239313Sdim else 56239313Sdim return std::make_pair(RawComment::RCK_OrdinaryC, false); 57239313Sdim } 58239313Sdim const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<'); 59239313Sdim return std::make_pair(K, TrailingComment); 60239313Sdim} 61239313Sdim 62239313Sdimbool mergedCommentIsTrailingComment(StringRef Comment) { 63239313Sdim return (Comment.size() > 3) && (Comment[3] == '<'); 64239313Sdim} 65239313Sdim} // unnamed namespace 66239313Sdim 67239313SdimRawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR, 68251662Sdim bool Merged, bool ParseAllComments) : 69239313Sdim Range(SR), RawTextValid(false), BriefTextValid(false), 70239462Sdim IsAttached(false), IsAlmostTrailingComment(false), 71263508Sdim ParseAllComments(ParseAllComments) { 72239313Sdim // Extract raw comment text, if possible. 73239313Sdim if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) { 74239313Sdim Kind = RCK_Invalid; 75239313Sdim return; 76239313Sdim } 77239313Sdim 78239313Sdim if (!Merged) { 79239313Sdim // Guess comment kind. 80251662Sdim std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments); 81239313Sdim Kind = K.first; 82239313Sdim IsTrailingComment = K.second; 83239313Sdim 84239313Sdim IsAlmostTrailingComment = RawText.startswith("//<") || 85239313Sdim RawText.startswith("/*<"); 86239313Sdim } else { 87239313Sdim Kind = RCK_Merged; 88239313Sdim IsTrailingComment = mergedCommentIsTrailingComment(RawText); 89239313Sdim } 90239313Sdim} 91239313Sdim 92239313SdimStringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const { 93239313Sdim FileID BeginFileID; 94239313Sdim FileID EndFileID; 95239313Sdim unsigned BeginOffset; 96239313Sdim unsigned EndOffset; 97239313Sdim 98239313Sdim llvm::tie(BeginFileID, BeginOffset) = 99239313Sdim SourceMgr.getDecomposedLoc(Range.getBegin()); 100239313Sdim llvm::tie(EndFileID, EndOffset) = 101239313Sdim SourceMgr.getDecomposedLoc(Range.getEnd()); 102239313Sdim 103239313Sdim const unsigned Length = EndOffset - BeginOffset; 104239313Sdim if (Length < 2) 105239313Sdim return StringRef(); 106239313Sdim 107239313Sdim // The comment can't begin in one file and end in another. 108239313Sdim assert(BeginFileID == EndFileID); 109239313Sdim 110239313Sdim bool Invalid = false; 111239313Sdim const char *BufferStart = SourceMgr.getBufferData(BeginFileID, 112239313Sdim &Invalid).data(); 113239313Sdim if (Invalid) 114239313Sdim return StringRef(); 115239313Sdim 116239313Sdim return StringRef(BufferStart + BeginOffset, Length); 117239313Sdim} 118239313Sdim 119239313Sdimconst char *RawComment::extractBriefText(const ASTContext &Context) const { 120239313Sdim // Make sure that RawText is valid. 121239313Sdim getRawText(Context.getSourceManager()); 122239313Sdim 123239313Sdim // Since we will be copying the resulting text, all allocations made during 124239313Sdim // parsing are garbage after resulting string is formed. Thus we can use 125239313Sdim // a separate allocator for all temporary stuff. 126239313Sdim llvm::BumpPtrAllocator Allocator; 127239313Sdim 128251662Sdim comments::Lexer L(Allocator, Context.getDiagnostics(), 129251662Sdim Context.getCommentCommandTraits(), 130243830Sdim Range.getBegin(), 131239313Sdim RawText.begin(), RawText.end()); 132243830Sdim comments::BriefParser P(L, Context.getCommentCommandTraits()); 133239313Sdim 134239313Sdim const std::string Result = P.Parse(); 135239313Sdim const unsigned BriefTextLength = Result.size(); 136239313Sdim char *BriefTextPtr = new (Context) char[BriefTextLength + 1]; 137239313Sdim memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1); 138239313Sdim BriefText = BriefTextPtr; 139239313Sdim BriefTextValid = true; 140239313Sdim 141239313Sdim return BriefTextPtr; 142239313Sdim} 143239313Sdim 144239462Sdimcomments::FullComment *RawComment::parse(const ASTContext &Context, 145243830Sdim const Preprocessor *PP, 146239462Sdim const Decl *D) const { 147239313Sdim // Make sure that RawText is valid. 148239313Sdim getRawText(Context.getSourceManager()); 149239313Sdim 150251662Sdim comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(), 151251662Sdim Context.getCommentCommandTraits(), 152243830Sdim getSourceRange().getBegin(), 153239313Sdim RawText.begin(), RawText.end()); 154239313Sdim comments::Sema S(Context.getAllocator(), Context.getSourceManager(), 155243830Sdim Context.getDiagnostics(), 156243830Sdim Context.getCommentCommandTraits(), 157243830Sdim PP); 158239462Sdim S.setDecl(D); 159239313Sdim comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(), 160243830Sdim Context.getDiagnostics(), 161243830Sdim Context.getCommentCommandTraits()); 162239313Sdim 163239462Sdim return P.parseFullComment(); 164239313Sdim} 165239313Sdim 166263508Sdimstatic bool onlyWhitespaceBetween(SourceManager &SM, 167263508Sdim SourceLocation Loc1, SourceLocation Loc2, 168263508Sdim unsigned MaxNewlinesAllowed) { 169243830Sdim std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1); 170243830Sdim std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2); 171239313Sdim 172243830Sdim // Question does not make sense if locations are in different files. 173243830Sdim if (Loc1Info.first != Loc2Info.first) 174239313Sdim return false; 175239313Sdim 176239313Sdim bool Invalid = false; 177243830Sdim const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data(); 178239313Sdim if (Invalid) 179239313Sdim return false; 180239313Sdim 181263508Sdim unsigned NumNewlines = 0; 182263508Sdim assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!"); 183263508Sdim // Look for non-whitespace characters and remember any newlines seen. 184263508Sdim for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) { 185263508Sdim switch (Buffer[I]) { 186263508Sdim default: 187263508Sdim return false; 188263508Sdim case ' ': 189263508Sdim case '\t': 190263508Sdim case '\f': 191263508Sdim case '\v': 192263508Sdim break; 193263508Sdim case '\r': 194263508Sdim case '\n': 195263508Sdim ++NumNewlines; 196263508Sdim 197263508Sdim // Check if we have found more than the maximum allowed number of 198263508Sdim // newlines. 199263508Sdim if (NumNewlines > MaxNewlinesAllowed) 200263508Sdim return false; 201263508Sdim 202263508Sdim // Collapse \r\n and \n\r into a single newline. 203263508Sdim if (I + 1 != Loc2Info.second && 204263508Sdim (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') && 205263508Sdim Buffer[I] != Buffer[I + 1]) 206263508Sdim ++I; 207263508Sdim break; 208263508Sdim } 209263508Sdim } 210263508Sdim 211263508Sdim return true; 212239313Sdim} 213239313Sdim 214239313Sdimvoid RawCommentList::addComment(const RawComment &RC, 215239313Sdim llvm::BumpPtrAllocator &Allocator) { 216239313Sdim if (RC.isInvalid()) 217239313Sdim return; 218239313Sdim 219239313Sdim // Check if the comments are not in source order. 220239313Sdim while (!Comments.empty() && 221263508Sdim !SourceMgr.isBeforeInTranslationUnit(Comments.back()->getLocStart(), 222263508Sdim RC.getLocStart())) { 223239313Sdim // If they are, just pop a few last comments that don't fit. 224239313Sdim // This happens if an \#include directive contains comments. 225239313Sdim Comments.pop_back(); 226239313Sdim } 227239313Sdim 228239313Sdim // Ordinary comments are not interesting for us. 229239313Sdim if (RC.isOrdinary()) 230239313Sdim return; 231239313Sdim 232239313Sdim // If this is the first Doxygen comment, save it (because there isn't 233239313Sdim // anything to merge it with). 234239313Sdim if (Comments.empty()) { 235239313Sdim Comments.push_back(new (Allocator) RawComment(RC)); 236239313Sdim return; 237239313Sdim } 238239313Sdim 239239313Sdim const RawComment &C1 = *Comments.back(); 240239313Sdim const RawComment &C2 = RC; 241239313Sdim 242239313Sdim // Merge comments only if there is only whitespace between them. 243239313Sdim // Can't merge trailing and non-trailing comments. 244243830Sdim // Merge comments if they are on same or consecutive lines. 245263508Sdim if (C1.isTrailingComment() == C2.isTrailingComment() && 246263508Sdim onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(), 247263508Sdim /*MaxNewlinesAllowed=*/1)) { 248263508Sdim SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd()); 249263508Sdim *Comments.back() = RawComment(SourceMgr, MergedRange, true, 250263508Sdim RC.isParseAllComments()); 251263508Sdim } else { 252263508Sdim Comments.push_back(new (Allocator) RawComment(RC)); 253243830Sdim } 254239313Sdim} 255