1//===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
11/// fixes namespace end comments.
12///
13//===----------------------------------------------------------------------===//
14
15#include "NamespaceEndCommentsFixer.h"
16#include "clang/Basic/TokenKinds.h"
17#include "llvm/Support/Debug.h"
18#include "llvm/Support/Regex.h"
19
20#define DEBUG_TYPE "namespace-end-comments-fixer"
21
22namespace clang {
23namespace format {
24
25namespace {
26// Iterates all tokens starting from StartTok to EndTok and apply Fn to all
27// tokens between them including StartTok and EndTok. Returns the token after
28// EndTok.
29const FormatToken *
30processTokens(const FormatToken *Tok, tok::TokenKind StartTok,
31              tok::TokenKind EndTok,
32              llvm::function_ref<void(const FormatToken *)> Fn) {
33  if (!Tok || Tok->isNot(StartTok))
34    return Tok;
35  int NestLevel = 0;
36  do {
37    if (Tok->is(StartTok))
38      ++NestLevel;
39    else if (Tok->is(EndTok))
40      --NestLevel;
41    if (Fn)
42      Fn(Tok);
43    Tok = Tok->getNextNonComment();
44  } while (Tok && NestLevel > 0);
45  return Tok;
46}
47
48const FormatToken *skipAttribute(const FormatToken *Tok) {
49  if (!Tok)
50    return nullptr;
51  if (Tok->isAttribute()) {
52    Tok = Tok->getNextNonComment();
53    Tok = processTokens(Tok, tok::l_paren, tok::r_paren, nullptr);
54  } else if (Tok->is(tok::l_square)) {
55    Tok = processTokens(Tok, tok::l_square, tok::r_square, nullptr);
56  }
57  return Tok;
58}
59
60// Computes the name of a namespace given the namespace token.
61// Returns "" for anonymous namespace.
62std::string computeName(const FormatToken *NamespaceTok) {
63  assert(NamespaceTok &&
64         NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
65         "expecting a namespace token");
66  std::string name;
67  const FormatToken *Tok = NamespaceTok->getNextNonComment();
68  if (NamespaceTok->is(TT_NamespaceMacro)) {
69    // Collects all the non-comment tokens between opening parenthesis
70    // and closing parenthesis or comma.
71    assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
72    Tok = Tok->getNextNonComment();
73    while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
74      name += Tok->TokenText;
75      Tok = Tok->getNextNonComment();
76    }
77    return name;
78  }
79  Tok = skipAttribute(Tok);
80
81  std::string FirstNSName;
82  // For `namespace [[foo]] A::B::inline C {` or
83  // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
84  // Peek for the first '::' (or '{' or '(')) and then return all tokens from
85  // one token before that up until the '{'. A '(' might be a macro with
86  // arguments.
87  const FormatToken *FirstNSTok = nullptr;
88  while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) {
89    if (FirstNSTok)
90      FirstNSName += FirstNSTok->TokenText;
91    FirstNSTok = Tok;
92    Tok = Tok->getNextNonComment();
93  }
94
95  if (FirstNSTok)
96    Tok = FirstNSTok;
97  Tok = skipAttribute(Tok);
98
99  FirstNSTok = nullptr;
100  // Add everything from '(' to ')'.
101  auto AddToken = [&name](const FormatToken *Tok) { name += Tok->TokenText; };
102  bool IsPrevColoncolon = false;
103  bool HasColoncolon = false;
104  bool IsPrevInline = false;
105  bool NameFinished = false;
106  // If we found '::' in name, then it's the name. Otherwise, we can't tell
107  // which one is name. For example, `namespace A B {`.
108  while (Tok && Tok->isNot(tok::l_brace)) {
109    if (FirstNSTok) {
110      if (!IsPrevInline && HasColoncolon && !IsPrevColoncolon) {
111        if (FirstNSTok->is(tok::l_paren)) {
112          FirstNSTok = Tok =
113              processTokens(FirstNSTok, tok::l_paren, tok::r_paren, AddToken);
114          continue;
115        }
116        if (FirstNSTok->isNot(tok::coloncolon)) {
117          NameFinished = true;
118          break;
119        }
120      }
121      name += FirstNSTok->TokenText;
122      IsPrevColoncolon = FirstNSTok->is(tok::coloncolon);
123      HasColoncolon = HasColoncolon || IsPrevColoncolon;
124      if (FirstNSTok->is(tok::kw_inline)) {
125        name += " ";
126        IsPrevInline = true;
127      }
128    }
129    FirstNSTok = Tok;
130    Tok = Tok->getNextNonComment();
131    const FormatToken *TokAfterAttr = skipAttribute(Tok);
132    if (TokAfterAttr != Tok)
133      FirstNSTok = Tok = TokAfterAttr;
134  }
135  if (!NameFinished && FirstNSTok && FirstNSTok->isNot(tok::l_brace))
136    name += FirstNSTok->TokenText;
137  if (FirstNSName.empty() || HasColoncolon)
138    return name;
139  return name.empty() ? FirstNSName : FirstNSName + " " + name;
140}
141
142std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
143                                  const FormatToken *NamespaceTok,
144                                  unsigned SpacesToAdd) {
145  std::string text = "//";
146  text.append(SpacesToAdd, ' ');
147  text += NamespaceTok->TokenText;
148  if (NamespaceTok->is(TT_NamespaceMacro))
149    text += "(";
150  else if (!NamespaceName.empty())
151    text += ' ';
152  text += NamespaceName;
153  if (NamespaceTok->is(TT_NamespaceMacro))
154    text += ")";
155  if (AddNewline)
156    text += '\n';
157  return text;
158}
159
160bool hasEndComment(const FormatToken *RBraceTok) {
161  return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
162}
163
164bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
165                     const FormatToken *NamespaceTok) {
166  assert(hasEndComment(RBraceTok));
167  const FormatToken *Comment = RBraceTok->Next;
168
169  // Matches a valid namespace end comment.
170  // Valid namespace end comments don't need to be edited.
171  static const llvm::Regex NamespaceCommentPattern =
172      llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
173                  "namespace( +([a-zA-Z0-9:_ ]+))?\\.? *(\\*/)?$",
174                  llvm::Regex::IgnoreCase);
175  static const llvm::Regex NamespaceMacroCommentPattern =
176      llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
177                  "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*|\".+\")\\)\\.? *(\\*/)?$",
178                  llvm::Regex::IgnoreCase);
179
180  SmallVector<StringRef, 8> Groups;
181  if (NamespaceTok->is(TT_NamespaceMacro) &&
182      NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
183    StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
184    // The name of the macro must be used.
185    if (NamespaceTokenText != NamespaceTok->TokenText)
186      return false;
187  } else if (NamespaceTok->isNot(tok::kw_namespace) ||
188             !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
189    // Comment does not match regex.
190    return false;
191  }
192  StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5].rtrim() : "";
193  // Anonymous namespace comments must not mention a namespace name.
194  if (NamespaceName.empty() && !NamespaceNameInComment.empty())
195    return false;
196  StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
197  // Named namespace comments must not mention anonymous namespace.
198  if (!NamespaceName.empty() && !AnonymousInComment.empty())
199    return false;
200  if (NamespaceNameInComment == NamespaceName)
201    return true;
202
203  // Has namespace comment flowed onto the next line.
204  // } // namespace
205  //   // verylongnamespacenamethatdidnotfitonthepreviouscommentline
206  if (!(Comment->Next && Comment->Next->is(TT_LineComment)))
207    return false;
208
209  static const llvm::Regex CommentPattern = llvm::Regex(
210      "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase);
211
212  // Pull out just the comment text.
213  if (!CommentPattern.match(Comment->Next->TokenText, &Groups))
214    return false;
215  NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : "";
216
217  return NamespaceNameInComment == NamespaceName;
218}
219
220void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
221                   const SourceManager &SourceMgr,
222                   tooling::Replacements *Fixes) {
223  auto EndLoc = RBraceTok->Tok.getEndLoc();
224  auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
225  auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
226  if (Err) {
227    llvm::errs() << "Error while adding namespace end comment: "
228                 << llvm::toString(std::move(Err)) << "\n";
229  }
230}
231
232void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
233                      const SourceManager &SourceMgr,
234                      tooling::Replacements *Fixes) {
235  assert(hasEndComment(RBraceTok));
236  const FormatToken *Comment = RBraceTok->Next;
237  auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
238                                             Comment->Tok.getEndLoc());
239  auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
240  if (Err) {
241    llvm::errs() << "Error while updating namespace end comment: "
242                 << llvm::toString(std::move(Err)) << "\n";
243  }
244}
245} // namespace
246
247const FormatToken *
248getNamespaceToken(const AnnotatedLine *Line,
249                  const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
250  if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
251    return nullptr;
252  size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
253  if (StartLineIndex == UnwrappedLine::kInvalidIndex)
254    return nullptr;
255  assert(StartLineIndex < AnnotatedLines.size());
256  const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
257  if (NamespaceTok->is(tok::l_brace)) {
258    // "namespace" keyword can be on the line preceding '{', e.g. in styles
259    // where BraceWrapping.AfterNamespace is true.
260    if (StartLineIndex > 0) {
261      NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
262      if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi))
263        return nullptr;
264    }
265  }
266
267  return NamespaceTok->getNamespaceToken();
268}
269
270StringRef
271getNamespaceTokenText(const AnnotatedLine *Line,
272                      const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
273  const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
274  return NamespaceTok ? NamespaceTok->TokenText : StringRef();
275}
276
277NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
278                                                     const FormatStyle &Style)
279    : TokenAnalyzer(Env, Style) {}
280
281std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
282    TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
283    FormatTokenLexer &Tokens) {
284  const SourceManager &SourceMgr = Env.getSourceManager();
285  AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
286  tooling::Replacements Fixes;
287
288  // Spin through the lines and ensure we have balanced braces.
289  int Braces = 0;
290  for (AnnotatedLine *Line : AnnotatedLines) {
291    FormatToken *Tok = Line->First;
292    while (Tok) {
293      Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0;
294      Tok = Tok->Next;
295    }
296  }
297  // Don't attempt to comment unbalanced braces or this can
298  // lead to comments being placed on the closing brace which isn't
299  // the matching brace of the namespace. (occurs during incomplete editing).
300  if (Braces != 0)
301    return {Fixes, 0};
302
303  std::string AllNamespaceNames;
304  size_t StartLineIndex = SIZE_MAX;
305  StringRef NamespaceTokenText;
306  unsigned int CompactedNamespacesCount = 0;
307  for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
308    const AnnotatedLine *EndLine = AnnotatedLines[I];
309    const FormatToken *NamespaceTok =
310        getNamespaceToken(EndLine, AnnotatedLines);
311    if (!NamespaceTok)
312      continue;
313    FormatToken *RBraceTok = EndLine->First;
314    if (RBraceTok->Finalized)
315      continue;
316    RBraceTok->Finalized = true;
317    const FormatToken *EndCommentPrevTok = RBraceTok;
318    // Namespaces often end with '};'. In that case, attach namespace end
319    // comments to the semicolon tokens.
320    if (RBraceTok->Next && RBraceTok->Next->is(tok::semi))
321      EndCommentPrevTok = RBraceTok->Next;
322    if (StartLineIndex == SIZE_MAX)
323      StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
324    std::string NamespaceName = computeName(NamespaceTok);
325    if (Style.CompactNamespaces) {
326      if (CompactedNamespacesCount == 0)
327        NamespaceTokenText = NamespaceTok->TokenText;
328      if ((I + 1 < E) &&
329          NamespaceTokenText ==
330              getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
331          StartLineIndex - CompactedNamespacesCount - 1 ==
332              AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
333          !AnnotatedLines[I + 1]->First->Finalized) {
334        if (hasEndComment(EndCommentPrevTok)) {
335          // remove end comment, it will be merged in next one
336          updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
337        }
338        ++CompactedNamespacesCount;
339        if (!NamespaceName.empty())
340          AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
341        continue;
342      }
343      NamespaceName += AllNamespaceNames;
344      CompactedNamespacesCount = 0;
345      AllNamespaceNames = std::string();
346    }
347    // The next token in the token stream after the place where the end comment
348    // token must be. This is either the next token on the current line or the
349    // first token on the next line.
350    const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
351    if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
352      EndCommentNextTok = EndCommentNextTok->Next;
353    if (!EndCommentNextTok && I + 1 < E)
354      EndCommentNextTok = AnnotatedLines[I + 1]->First;
355    bool AddNewline = EndCommentNextTok &&
356                      EndCommentNextTok->NewlinesBefore == 0 &&
357                      EndCommentNextTok->isNot(tok::eof);
358    const std::string EndCommentText =
359        computeEndCommentText(NamespaceName, AddNewline, NamespaceTok,
360                              Style.SpacesInLineCommentPrefix.Minimum);
361    if (!hasEndComment(EndCommentPrevTok)) {
362      unsigned LineCount = 0;
363      for (auto J = StartLineIndex + 1; J < I; ++J)
364        LineCount += AnnotatedLines[J]->size();
365      if (LineCount > Style.ShortNamespaceLines) {
366        addEndComment(EndCommentPrevTok,
367                      std::string(Style.SpacesBeforeTrailingComments, ' ') +
368                          EndCommentText,
369                      SourceMgr, &Fixes);
370      }
371    } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
372                                NamespaceTok)) {
373      updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
374    }
375    StartLineIndex = SIZE_MAX;
376  }
377  return {Fixes, 0};
378}
379
380} // namespace format
381} // namespace clang
382