1//===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// This file contains the implementation of MacroCallReconstructor, which fits
12/// an reconstructed macro call to a parsed set of UnwrappedLines.
13///
14//===----------------------------------------------------------------------===//
15
16#include "Macros.h"
17
18#include "UnwrappedLineParser.h"
19#include "clang/Basic/TokenKinds.h"
20#include "llvm/ADT/DenseSet.h"
21#include "llvm/Support/Debug.h"
22#include <cassert>
23
24#define DEBUG_TYPE "format-reconstruct"
25
26namespace clang {
27namespace format {
28
29// Call \p Call for each token in the unwrapped line given, passing
30// the token, its parent and whether it is the first token in the line.
31template <typename T>
32void forEachToken(const UnwrappedLine &Line, const T &Call,
33                  FormatToken *Parent = nullptr) {
34  bool First = true;
35  for (const auto &N : Line.Tokens) {
36    Call(N.Tok, Parent, First);
37    First = false;
38    for (const auto &Child : N.Children)
39      forEachToken(Child, Call, N.Tok);
40  }
41}
42
43MacroCallReconstructor::MacroCallReconstructor(
44    unsigned Level,
45    const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
46        &ActiveExpansions)
47    : Level(Level), IdToReconstructed(ActiveExpansions) {
48  Result.Tokens.push_back(std::make_unique<LineNode>());
49  ActiveReconstructedLines.push_back(&Result);
50}
51
52void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
53  assert(State != Finalized);
54  LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
55  forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) {
56    add(Token, Parent, First);
57  });
58  assert(InProgress || finished());
59}
60
61UnwrappedLine MacroCallReconstructor::takeResult() && {
62  finalize();
63  assert(Result.Tokens.size() == 1 &&
64         Result.Tokens.front()->Children.size() == 1);
65  UnwrappedLine Final =
66      createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level);
67  assert(!Final.Tokens.empty());
68  return Final;
69}
70
71// Reconstruct the position of the next \p Token, given its parent \p
72// ExpandedParent in the incoming unwrapped line. \p First specifies whether it
73// is the first token in a given unwrapped line.
74void MacroCallReconstructor::add(FormatToken *Token,
75                                 FormatToken *ExpandedParent, bool First) {
76  LLVM_DEBUG(
77      llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
78                   << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
79                   << ", First: " << First << "\n");
80  // In order to be able to find the correct parent in the reconstructed token
81  // stream, we need to continue the last open reconstruction until we find the
82  // given token if it is part of the reconstructed token stream.
83  //
84  // Note that hidden tokens can be part of the reconstructed stream in nested
85  // macro calls.
86  // For example, given
87  //   #define C(x, y) x y
88  //   #define B(x) {x}
89  // And the call:
90  //   C(a, B(b))
91  // The outer macro call will be C(a, {b}), and the hidden token '}' can be
92  // found in the reconstructed token stream of that expansion level.
93  // In the expanded token stream
94  //   a {b}
95  // 'b' is a child of '{'. We need to continue the open expansion of the ','
96  // in the call of 'C' in order to correctly set the ',' as the parent of '{',
97  // so we later set the spelled token 'b' as a child of the ','.
98  if (!ActiveExpansions.empty() && Token->MacroCtx &&
99      (Token->MacroCtx->Role != MR_Hidden ||
100       ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {
101    if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token))
102      First = true;
103  }
104
105  prepareParent(ExpandedParent, First);
106
107  if (Token->MacroCtx) {
108    // If this token was generated by a macro call, add the reconstructed
109    // equivalent of the token.
110    reconstruct(Token);
111  } else {
112    // Otherwise, we add it to the current line.
113    appendToken(Token);
114  }
115}
116
117// Adjusts the stack of active reconstructed lines so we're ready to push
118// tokens. The tokens to be pushed are children of ExpandedParent in the
119// expanded code.
120//
121// This may entail:
122// - creating a new line, if the parent is on the active line
123// - popping active lines, if the parent is further up the stack
124//
125// Postcondition:
126// ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
127// reconstructed replacement token as a parent (when possible) - that is, the
128// last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
129// is the parent of ActiveReconstructedLines.back() in the reconstructed
130// unwrapped line.
131void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
132                                           bool NewLine) {
133  LLVM_DEBUG({
134    llvm::dbgs() << "ParentMap:\n";
135    debugParentMap();
136  });
137  // We want to find the parent in the new unwrapped line, where the expanded
138  // parent might have been replaced during reconstruction.
139  FormatToken *Parent = getParentInResult(ExpandedParent);
140  LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
141                          << (Parent ? Parent->TokenText : "<null>") << "\n");
142
143  FormatToken *OpenMacroParent = nullptr;
144  if (!MacroCallStructure.empty()) {
145    // Inside a macro expansion, it is possible to lose track of the correct
146    // parent - either because it is already popped, for example because it was
147    // in a different macro argument (e.g. M({, })), or when we work on invalid
148    // code.
149    // Thus, we use the innermost macro call's parent as the parent at which
150    // we stop; this allows us to stay within the macro expansion and keeps
151    // any problems confined to the extent of the macro call.
152    OpenMacroParent =
153        getParentInResult(MacroCallStructure.back().MacroCallLParen);
154    LLVM_DEBUG(llvm::dbgs()
155               << "MacroCallLParen: "
156               << MacroCallStructure.back().MacroCallLParen->TokenText
157               << ", OpenMacroParent: "
158               << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")
159               << "\n");
160  }
161  if (NewLine ||
162      (!ActiveReconstructedLines.back()->Tokens.empty() &&
163       Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {
164    // If we are at the first token in a new line, we want to also
165    // create a new line in the resulting reconstructed unwrapped line.
166    while (ActiveReconstructedLines.back()->Tokens.empty() ||
167           (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&
168            ActiveReconstructedLines.back()->Tokens.back()->Tok !=
169                OpenMacroParent)) {
170      ActiveReconstructedLines.pop_back();
171      assert(!ActiveReconstructedLines.empty());
172    }
173    assert(!ActiveReconstructedLines.empty());
174    ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
175        std::make_unique<ReconstructedLine>());
176    ActiveReconstructedLines.push_back(
177        &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
178  } else if (parentLine().Tokens.back()->Tok != Parent) {
179    // If we're not the first token in a new line, pop lines until we find
180    // the child of \c Parent in the stack.
181    while (Parent != parentLine().Tokens.back()->Tok &&
182           parentLine().Tokens.back()->Tok &&
183           parentLine().Tokens.back()->Tok != OpenMacroParent) {
184      ActiveReconstructedLines.pop_back();
185      assert(!ActiveReconstructedLines.empty());
186    }
187  }
188  assert(!ActiveReconstructedLines.empty());
189}
190
191// For a given \p Parent in the incoming expanded token stream, find the
192// corresponding parent in the output.
193FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {
194  FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);
195  if (!Mapped)
196    return Parent;
197  for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent))
198    Parent = Mapped;
199  // If we use a different token than the parent in the expanded token stream
200  // as parent, mark it as a special parent, so the formatting code knows it
201  // needs to have its children formatted.
202  Parent->MacroParent = true;
203  return Parent;
204}
205
206// Reconstruct a \p Token that was expanded from a macro call.
207void MacroCallReconstructor::reconstruct(FormatToken *Token) {
208  assert(Token->MacroCtx);
209  // A single token can be the only result of a macro call:
210  // Given: #define ID(x, y) ;
211  // And the call: ID(<some>, <tokens>)
212  // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
213  if (Token->MacroCtx->StartOfExpansion) {
214    startReconstruction(Token);
215    // If the order of tokens in the expanded token stream is not the
216    // same as the order of tokens in the reconstructed stream, we need
217    // to reconstruct tokens that arrive later in the stream.
218    if (Token->MacroCtx->Role != MR_Hidden)
219      reconstructActiveCallUntil(Token);
220  }
221  assert(!ActiveExpansions.empty());
222  if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {
223    assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());
224    if (Token->MacroCtx->Role != MR_Hidden) {
225      // The current token in the reconstructed token stream must be the token
226      // we're looking for - we either arrive here after startReconstruction,
227      // which initiates the stream to the first token, or after
228      // continueReconstructionUntil skipped until the expected token in the
229      // reconstructed stream at the start of add(...).
230      assert(ActiveExpansions.back().SpelledI->Tok == Token);
231      processNextReconstructed();
232    } else if (!currentLine()->Tokens.empty()) {
233      // Map all hidden tokens to the last visible token in the output.
234      // If the hidden token is a parent, we'll use the last visible
235      // token as the parent of the hidden token's children.
236      SpelledParentToReconstructedParent[Token] =
237          currentLine()->Tokens.back()->Tok;
238    } else {
239      for (auto I = ActiveReconstructedLines.rbegin(),
240                E = ActiveReconstructedLines.rend();
241           I != E; ++I) {
242        if (!(*I)->Tokens.empty()) {
243          SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;
244          break;
245        }
246      }
247    }
248  }
249  if (Token->MacroCtx->EndOfExpansion)
250    endReconstruction(Token);
251}
252
253// Given a \p Token that starts an expansion, reconstruct the beginning of the
254// macro call.
255// For example, given: #define ID(x) x
256// And the call: ID(int a)
257// Reconstructs: ID(
258void MacroCallReconstructor::startReconstruction(FormatToken *Token) {
259  assert(Token->MacroCtx);
260  assert(!Token->MacroCtx->ExpandedFrom.empty());
261  assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());
262#ifndef NDEBUG
263  // Check that the token's reconstruction stack matches our current
264  // reconstruction stack.
265  for (size_t I = 0; I < ActiveExpansions.size(); ++I) {
266    assert(ActiveExpansions[I].ID ==
267           Token->MacroCtx
268               ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
269  }
270#endif
271  // Start reconstruction for all calls for which this token is the first token
272  // generated by the call.
273  // Note that the token's expanded from stack is inside-to-outside, and the
274  // expansions for which this token is not the first are the outermost ones.
275  ArrayRef<FormatToken *> StartedMacros =
276      ArrayRef(Token->MacroCtx->ExpandedFrom)
277          .drop_back(ActiveExpansions.size());
278  assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);
279  // We reconstruct macro calls outside-to-inside.
280  for (FormatToken *ID : llvm::reverse(StartedMacros)) {
281    // We found a macro call to be reconstructed; the next time our
282    // reconstruction stack is empty we know we finished an reconstruction.
283#ifndef NDEBUG
284    State = InProgress;
285#endif
286    // Put the reconstructed macro call's token into our reconstruction stack.
287    auto IU = IdToReconstructed.find(ID);
288    assert(IU != IdToReconstructed.end());
289    ActiveExpansions.push_back(
290        {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
291    // Process the macro call's identifier.
292    processNextReconstructed();
293    if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)
294      continue;
295    if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {
296      // Process the optional opening parenthesis.
297      processNextReconstructed();
298    }
299  }
300}
301
302// Add all tokens in the reconstruction stream to the output until we find the
303// given \p Token.
304bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {
305  assert(!ActiveExpansions.empty());
306  bool PassedMacroComma = false;
307  // FIXME: If Token was already expanded earlier, due to
308  // a change in order, we will not find it, but need to
309  // skip it.
310  while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&
311         ActiveExpansions.back().SpelledI->Tok != Token) {
312    PassedMacroComma = processNextReconstructed() || PassedMacroComma;
313  }
314  return PassedMacroComma;
315}
316
317// End all reconstructions for which \p Token is the final token.
318void MacroCallReconstructor::endReconstruction(FormatToken *Token) {
319  assert(Token->MacroCtx &&
320         (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));
321  for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
322    LLVM_DEBUG([&] {
323      // Check all remaining tokens but the final closing parenthesis and
324      // optional trailing comment were already reconstructed at an inner
325      // expansion level.
326      for (auto T = ActiveExpansions.back().SpelledI;
327           T != ActiveExpansions.back().SpelledE; ++T) {
328        FormatToken *Token = T->Tok;
329        bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||
330                             std::next(T)->Tok->isTrailingComment()) &&
331                            !Token->MacroCtx && Token->is(tok::r_paren);
332        bool TrailingComment = Token->isTrailingComment();
333        bool PreviousLevel =
334            Token->MacroCtx &&
335            (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());
336        if (!ClosingParen && !TrailingComment && !PreviousLevel)
337          llvm::dbgs() << "At token: " << Token->TokenText << "\n";
338        // In addition to the following cases, we can also run into this
339        // when a macro call had more arguments than expected; in that case,
340        // the comma and the remaining tokens in the macro call will
341        // potentially end up in the line when we finish the expansion.
342        // FIXME: Add the information which arguments are unused, and assert
343        // one of the cases below plus reconstructed macro argument tokens.
344        // assert(ClosingParen || TrailingComment || PreviousLevel);
345      }
346    }());
347    // Handle the remaining open tokens:
348    // - expand the closing parenthesis, if it exists, including an optional
349    //   trailing comment
350    // - handle tokens that were already reconstructed at an inner expansion
351    //   level
352    // - handle tokens when a macro call had more than the expected number of
353    //   arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
354    //   up with the sequence ", b, c)" being open at the end of the
355    //   reconstruction; we want to gracefully handle that case
356    //
357    // FIXME: See the above debug-check for what we will need to do to be
358    // able to assert this.
359    for (auto T = ActiveExpansions.back().SpelledI;
360         T != ActiveExpansions.back().SpelledE; ++T) {
361      processNextReconstructed();
362    }
363    ActiveExpansions.pop_back();
364  }
365}
366
367void MacroCallReconstructor::debugParentMap() const {
368  llvm::DenseSet<FormatToken *> Values;
369  for (const auto &P : SpelledParentToReconstructedParent)
370    Values.insert(P.second);
371
372  for (const auto &P : SpelledParentToReconstructedParent) {
373    if (Values.contains(P.first))
374      continue;
375    llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");
376    for (auto I = SpelledParentToReconstructedParent.find(P.first),
377              E = SpelledParentToReconstructedParent.end();
378         I != E; I = SpelledParentToReconstructedParent.find(I->second)) {
379      llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");
380    }
381    llvm::dbgs() << "\n";
382  }
383}
384
385// If visible, add the next token of the reconstructed token sequence to the
386// output. Returns whether reconstruction passed a comma that is part of a
387// macro call.
388bool MacroCallReconstructor::processNextReconstructed() {
389  FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;
390  ++ActiveExpansions.back().SpelledI;
391  if (Token->MacroCtx) {
392    // Skip tokens that are not part of the macro call.
393    if (Token->MacroCtx->Role == MR_Hidden)
394      return false;
395    // Skip tokens we already expanded during an inner reconstruction.
396    // For example, given: #define ID(x) {x}
397    // And the call: ID(ID(f))
398    // We get two reconstructions:
399    // ID(f) -> {f}
400    // ID({f}) -> {{f}}
401    // We reconstruct f during the first reconstruction, and skip it during the
402    // second reconstruction.
403    if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size())
404      return false;
405  }
406  // Tokens that do not have a macro context are tokens in that are part of the
407  // macro call that have not taken part in expansion.
408  if (!Token->MacroCtx) {
409    // Put the parentheses and commas of a macro call into the same line;
410    // if the arguments produce new unwrapped lines, they will become children
411    // of the corresponding opening parenthesis or comma tokens in the
412    // reconstructed call.
413    if (Token->is(tok::l_paren)) {
414      MacroCallStructure.push_back(MacroCallState(
415          currentLine(), parentLine().Tokens.back()->Tok, Token));
416      // All tokens that are children of the previous line's last token in the
417      // reconstructed token stream will now be children of the l_paren token.
418      // For example, for the line containing the macro calls:
419      //   auto x = ID({ID(2)});
420      // We will build up a map <null> -> ( -> ( with the first and second
421      // l_paren of the macro call respectively. New lines that come in with a
422      // <null> parent will then become children of the l_paren token of the
423      // currently innermost macro call.
424      SpelledParentToReconstructedParent[MacroCallStructure.back()
425                                             .ParentLastToken] = Token;
426      appendToken(Token);
427      prepareParent(Token, /*NewLine=*/true);
428      Token->MacroParent = true;
429      return false;
430    }
431    if (!MacroCallStructure.empty()) {
432      if (Token->is(tok::comma)) {
433        // Make new lines inside the next argument children of the comma token.
434        SpelledParentToReconstructedParent
435            [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
436        Token->MacroParent = true;
437        appendToken(Token, MacroCallStructure.back().Line);
438        prepareParent(Token, /*NewLine=*/true);
439        return true;
440      }
441      if (Token->is(tok::r_paren)) {
442        appendToken(Token, MacroCallStructure.back().Line);
443        SpelledParentToReconstructedParent.erase(
444            MacroCallStructure.back().ParentLastToken);
445        MacroCallStructure.pop_back();
446        return false;
447      }
448    }
449  }
450  // Note that any tokens that are tagged with MR_None have been passed as
451  // arguments to the macro that have not been expanded, for example:
452  // Given: #define ID(X) x
453  // When calling: ID(a, b)
454  // 'b' will be part of the reconstructed token stream, but tagged MR_None.
455  // Given that erroring out in this case would be disruptive, we continue
456  // pushing the (unformatted) token.
457  // FIXME: This can lead to unfortunate formatting decisions - give the user
458  // a hint that their macro definition is broken.
459  appendToken(Token);
460  return false;
461}
462
463void MacroCallReconstructor::finalize() {
464#ifndef NDEBUG
465  assert(State != Finalized && finished());
466  State = Finalized;
467#endif
468
469  // We created corresponding unwrapped lines for each incoming line as children
470  // the the toplevel null token.
471  assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());
472  LLVM_DEBUG({
473    llvm::dbgs() << "Finalizing reconstructed lines:\n";
474    debug(Result, 0);
475  });
476
477  // The first line becomes the top level line in the resulting unwrapped line.
478  LineNode &Top = *Result.Tokens.front();
479  auto *I = Top.Children.begin();
480  // Every subsequent line will become a child of the last token in the previous
481  // line, which is the token prior to the first token in the line.
482  LineNode *Last = (*I)->Tokens.back().get();
483  ++I;
484  for (auto *E = Top.Children.end(); I != E; ++I) {
485    assert(Last->Children.empty());
486    Last->Children.push_back(std::move(*I));
487
488    // Mark the previous line's last token as generated by a macro expansion
489    // so the formatting algorithm can take that into account.
490    Last->Tok->MacroParent = true;
491
492    Last = Last->Children.back()->Tokens.back().get();
493  }
494  Top.Children.resize(1);
495}
496
497void MacroCallReconstructor::appendToken(FormatToken *Token,
498                                         ReconstructedLine *L) {
499  L = L ? L : currentLine();
500  LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
501  L->Tokens.push_back(std::make_unique<LineNode>(Token));
502}
503
504UnwrappedLine
505MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line,
506                                            int Level) {
507  UnwrappedLine Result;
508  Result.Level = Level;
509  for (const auto &N : Line.Tokens) {
510    Result.Tokens.push_back(N->Tok);
511    UnwrappedLineNode &Current = Result.Tokens.back();
512    for (const auto &Child : N->Children) {
513      if (Child->Tokens.empty())
514        continue;
515      Current.Children.push_back(createUnwrappedLine(*Child, Level + 1));
516    }
517    if (Current.Children.size() == 1 &&
518        Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
519      Result.Tokens.splice(Result.Tokens.end(),
520                           Current.Children.front().Tokens);
521      Current.Children.clear();
522    }
523  }
524  return Result;
525}
526
527void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) {
528  for (int i = 0; i < Level; ++i)
529    llvm::dbgs() << " ";
530  for (const auto &N : Line.Tokens) {
531    if (!N)
532      continue;
533    if (N->Tok)
534      llvm::dbgs() << N->Tok->TokenText << " ";
535    for (const auto &Child : N->Children) {
536      llvm::dbgs() << "\n";
537      debug(*Child, Level + 1);
538      for (int i = 0; i < Level; ++i)
539        llvm::dbgs() << " ";
540    }
541  }
542  llvm::dbgs() << "\n";
543}
544
545MacroCallReconstructor::ReconstructedLine &
546MacroCallReconstructor::parentLine() {
547  return **std::prev(std::prev(ActiveReconstructedLines.end()));
548}
549
550MacroCallReconstructor::ReconstructedLine *
551MacroCallReconstructor::currentLine() {
552  return ActiveReconstructedLines.back();
553}
554
555MacroCallReconstructor::MacroCallState::MacroCallState(
556    MacroCallReconstructor::ReconstructedLine *Line,
557    FormatToken *ParentLastToken, FormatToken *MacroCallLParen)
558    : Line(Line), ParentLastToken(ParentLastToken),
559      MacroCallLParen(MacroCallLParen) {
560  LLVM_DEBUG(
561      llvm::dbgs() << "ParentLastToken: "
562                   << (ParentLastToken ? ParentLastToken->TokenText : "<null>")
563                   << "\n");
564
565  assert(MacroCallLParen->is(tok::l_paren));
566}
567
568} // namespace format
569} // namespace clang
570