1//===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of MacroExpander, which handles macro
11/// configuration and expansion while formatting.
12///
13//===----------------------------------------------------------------------===//
14
15#include "Macros.h"
16
17#include "Encoding.h"
18#include "FormatToken.h"
19#include "FormatTokenLexer.h"
20#include "clang/Basic/TokenKinds.h"
21#include "clang/Format/Format.h"
22#include "clang/Lex/HeaderSearch.h"
23#include "clang/Lex/HeaderSearchOptions.h"
24#include "clang/Lex/Lexer.h"
25#include "clang/Lex/ModuleLoader.h"
26#include "clang/Lex/Preprocessor.h"
27#include "clang/Lex/PreprocessorOptions.h"
28#include "llvm/ADT/StringSet.h"
29#include "llvm/Support/ErrorHandling.h"
30
31namespace clang {
32namespace format {
33
34struct MacroExpander::Definition {
35  StringRef Name;
36  SmallVector<FormatToken *, 8> Params;
37  SmallVector<FormatToken *, 8> Body;
38
39  // Map from each argument's name to its position in the argument list.
40  // With "M(x, y) x + y":
41  //   x -> 0
42  //   y -> 1
43  llvm::StringMap<size_t> ArgMap;
44
45  bool ObjectLike = true;
46};
47
48class MacroExpander::DefinitionParser {
49public:
50  DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
51    assert(!Tokens.empty());
52    Current = Tokens[0];
53  }
54
55  // Parse the token stream and return the corresponding Definition object.
56  // Returns an empty definition object with a null-Name on error.
57  MacroExpander::Definition parse() {
58    if (Current->isNot(tok::identifier))
59      return {};
60    Def.Name = Current->TokenText;
61    nextToken();
62    if (Current->is(tok::l_paren)) {
63      Def.ObjectLike = false;
64      if (!parseParams())
65        return {};
66    }
67    if (!parseExpansion())
68      return {};
69
70    return Def;
71  }
72
73private:
74  bool parseParams() {
75    assert(Current->is(tok::l_paren));
76    nextToken();
77    while (Current->is(tok::identifier)) {
78      Def.Params.push_back(Current);
79      Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
80      nextToken();
81      if (Current->isNot(tok::comma))
82        break;
83      nextToken();
84    }
85    if (Current->isNot(tok::r_paren))
86      return false;
87    nextToken();
88    return true;
89  }
90
91  bool parseExpansion() {
92    if (!Current->isOneOf(tok::equal, tok::eof))
93      return false;
94    if (Current->is(tok::equal))
95      nextToken();
96    parseTail();
97    return true;
98  }
99
100  void parseTail() {
101    while (Current->isNot(tok::eof)) {
102      Def.Body.push_back(Current);
103      nextToken();
104    }
105    Def.Body.push_back(Current);
106  }
107
108  void nextToken() {
109    if (Pos + 1 < Tokens.size())
110      ++Pos;
111    Current = Tokens[Pos];
112    Current->Finalized = true;
113  }
114
115  size_t Pos = 0;
116  FormatToken *Current = nullptr;
117  Definition Def;
118  ArrayRef<FormatToken *> Tokens;
119};
120
121MacroExpander::MacroExpander(
122    const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr,
123    const FormatStyle &Style,
124    llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
125    IdentifierTable &IdentTable)
126    : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
127      IdentTable(IdentTable) {
128  for (const std::string &Macro : Macros)
129    parseDefinition(Macro);
130}
131
132MacroExpander::~MacroExpander() = default;
133
134void MacroExpander::parseDefinition(const std::string &Macro) {
135  Buffers.push_back(
136      llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
137  clang::FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
138  FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
139                       Allocator, IdentTable);
140  const auto Tokens = Lex.lex();
141  if (!Tokens.empty()) {
142    DefinitionParser Parser(Tokens);
143    auto Definition = Parser.parse();
144    if (Definition.ObjectLike) {
145      ObjectLike[Definition.Name] = std::move(Definition);
146    } else {
147      FunctionLike[Definition.Name][Definition.Params.size()] =
148          std::move(Definition);
149    }
150  }
151}
152
153bool MacroExpander::defined(llvm::StringRef Name) const {
154  return FunctionLike.contains(Name) || ObjectLike.contains(Name);
155}
156
157bool MacroExpander::objectLike(llvm::StringRef Name) const {
158  return ObjectLike.contains(Name);
159}
160
161bool MacroExpander::hasArity(llvm::StringRef Name, unsigned Arity) const {
162  auto it = FunctionLike.find(Name);
163  return it != FunctionLike.end() && it->second.contains(Arity);
164}
165
166llvm::SmallVector<FormatToken *, 8>
167MacroExpander::expand(FormatToken *ID,
168                      std::optional<ArgsList> OptionalArgs) const {
169  if (OptionalArgs)
170    assert(hasArity(ID->TokenText, OptionalArgs->size()));
171  else
172    assert(objectLike(ID->TokenText));
173  const Definition &Def = OptionalArgs
174                              ? FunctionLike.find(ID->TokenText)
175                                    ->second.find(OptionalArgs.value().size())
176                                    ->second
177                              : ObjectLike.find(ID->TokenText)->second;
178  ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList();
179  SmallVector<FormatToken *, 8> Result;
180  // Expand each argument at most once.
181  llvm::StringSet<> ExpandedArgs;
182
183  // Adds the given token to Result.
184  auto pushToken = [&](FormatToken *Tok) {
185    Tok->MacroCtx->ExpandedFrom.push_back(ID);
186    Result.push_back(Tok);
187  };
188
189  // If Tok references a parameter, adds the corresponding argument to Result.
190  // Returns false if Tok does not reference a parameter.
191  auto expandArgument = [&](FormatToken *Tok) -> bool {
192    // If the current token references a parameter, expand the corresponding
193    // argument.
194    if (Tok->isNot(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
195      return false;
196    ExpandedArgs.insert(Tok->TokenText);
197    auto I = Def.ArgMap.find(Tok->TokenText);
198    if (I == Def.ArgMap.end())
199      return false;
200    // If there are fewer arguments than referenced parameters, treat the
201    // parameter as empty.
202    // FIXME: Potentially fully abort the expansion instead.
203    if (I->getValue() >= Args.size())
204      return true;
205    for (FormatToken *Arg : Args[I->getValue()]) {
206      // A token can be part of a macro argument at multiple levels.
207      // For example, with "ID(x) x":
208      // in ID(ID(x)), 'x' is expanded first as argument to the inner
209      // ID, then again as argument to the outer ID. We keep the macro
210      // role the token had from the inner expansion.
211      if (!Arg->MacroCtx)
212        Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
213      pushToken(Arg);
214    }
215    return true;
216  };
217
218  // Expand the definition into Result.
219  for (FormatToken *Tok : Def.Body) {
220    if (expandArgument(Tok))
221      continue;
222    // Create a copy of the tokens from the macro body, i.e. were not provided
223    // by user code.
224    FormatToken *New = new (Allocator.Allocate()) FormatToken;
225    New->copyFrom(*Tok);
226    assert(!New->MacroCtx);
227    // Tokens that are not part of the user code are not formatted.
228    New->MacroCtx = MacroExpansion(MR_Hidden);
229    pushToken(New);
230  }
231  assert(Result.size() >= 1 && Result.back()->is(tok::eof));
232  if (Result.size() > 1) {
233    ++Result[0]->MacroCtx->StartOfExpansion;
234    ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
235  }
236  return Result;
237}
238
239} // namespace format
240} // namespace clang
241