1//===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// This file contains the implementation of MacroExpander, which handles macro
12/// configuration and expansion while formatting.
13///
14//===----------------------------------------------------------------------===//
15
16#include "Macros.h"
17
18#include "Encoding.h"
19#include "FormatToken.h"
20#include "FormatTokenLexer.h"
21#include "clang/Basic/TokenKinds.h"
22#include "clang/Format/Format.h"
23#include "clang/Lex/HeaderSearch.h"
24#include "clang/Lex/HeaderSearchOptions.h"
25#include "clang/Lex/Lexer.h"
26#include "clang/Lex/ModuleLoader.h"
27#include "clang/Lex/Preprocessor.h"
28#include "clang/Lex/PreprocessorOptions.h"
29#include "llvm/ADT/StringSet.h"
30#include "llvm/Support/ErrorHandling.h"
31
32namespace clang {
33namespace format {
34
35struct MacroExpander::Definition {
36  StringRef Name;
37  SmallVector<FormatToken *, 8> Params;
38  SmallVector<FormatToken *, 8> Body;
39
40  // Map from each argument's name to its position in the argument list.
41  // With "M(x, y) x + y":
42  //   x -> 0
43  //   y -> 1
44  llvm::StringMap<size_t> ArgMap;
45
46  bool ObjectLike = true;
47};
48
49class MacroExpander::DefinitionParser {
50public:
51  DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
52    assert(!Tokens.empty());
53    Current = Tokens[0];
54  }
55
56  // Parse the token stream and return the corresonding Definition object.
57  // Returns an empty definition object with a null-Name on error.
58  MacroExpander::Definition parse() {
59    if (!Current->is(tok::identifier))
60      return {};
61    Def.Name = Current->TokenText;
62    nextToken();
63    if (Current->is(tok::l_paren)) {
64      Def.ObjectLike = false;
65      if (!parseParams())
66        return {};
67    }
68    if (!parseExpansion())
69      return {};
70
71    return Def;
72  }
73
74private:
75  bool parseParams() {
76    assert(Current->is(tok::l_paren));
77    nextToken();
78    while (Current->is(tok::identifier)) {
79      Def.Params.push_back(Current);
80      Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
81      nextToken();
82      if (Current->isNot(tok::comma))
83        break;
84      nextToken();
85    }
86    if (Current->isNot(tok::r_paren))
87      return false;
88    nextToken();
89    return true;
90  }
91
92  bool parseExpansion() {
93    if (!Current->isOneOf(tok::equal, tok::eof))
94      return false;
95    if (Current->is(tok::equal))
96      nextToken();
97    parseTail();
98    return true;
99  }
100
101  void parseTail() {
102    while (Current->isNot(tok::eof)) {
103      Def.Body.push_back(Current);
104      nextToken();
105    }
106    Def.Body.push_back(Current);
107  }
108
109  void nextToken() {
110    if (Pos + 1 < Tokens.size())
111      ++Pos;
112    Current = Tokens[Pos];
113    Current->Finalized = true;
114  }
115
116  size_t Pos = 0;
117  FormatToken *Current = nullptr;
118  Definition Def;
119  ArrayRef<FormatToken *> Tokens;
120};
121
122MacroExpander::MacroExpander(
123    const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr,
124    const FormatStyle &Style,
125    llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
126    IdentifierTable &IdentTable)
127    : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
128      IdentTable(IdentTable) {
129  for (const std::string &Macro : Macros) {
130    parseDefinition(Macro);
131  }
132}
133
134MacroExpander::~MacroExpander() = default;
135
136void MacroExpander::parseDefinition(const std::string &Macro) {
137  Buffers.push_back(
138      llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
139  clang::FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
140  FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
141                       Allocator, IdentTable);
142  const auto Tokens = Lex.lex();
143  if (!Tokens.empty()) {
144    DefinitionParser Parser(Tokens);
145    auto Definition = Parser.parse();
146    Definitions[Definition.Name] = std::move(Definition);
147  }
148}
149
150bool MacroExpander::defined(llvm::StringRef Name) const {
151  return Definitions.find(Name) != Definitions.end();
152}
153
154bool MacroExpander::objectLike(llvm::StringRef Name) const {
155  return Definitions.find(Name)->second.ObjectLike;
156}
157
158llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID,
159                                                          ArgsList Args) const {
160  assert(defined(ID->TokenText));
161  SmallVector<FormatToken *, 8> Result;
162  const Definition &Def = Definitions.find(ID->TokenText)->second;
163
164  // Expand each argument at most once.
165  llvm::StringSet<> ExpandedArgs;
166
167  // Adds the given token to Result.
168  auto pushToken = [&](FormatToken *Tok) {
169    Tok->MacroCtx->ExpandedFrom.push_back(ID);
170    Result.push_back(Tok);
171  };
172
173  // If Tok references a parameter, adds the corresponding argument to Result.
174  // Returns false if Tok does not reference a parameter.
175  auto expandArgument = [&](FormatToken *Tok) -> bool {
176    // If the current token references a parameter, expand the corresponding
177    // argument.
178    if (!Tok->is(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
179      return false;
180    ExpandedArgs.insert(Tok->TokenText);
181    auto I = Def.ArgMap.find(Tok->TokenText);
182    if (I == Def.ArgMap.end())
183      return false;
184    // If there are fewer arguments than referenced parameters, treat the
185    // parameter as empty.
186    // FIXME: Potentially fully abort the expansion instead.
187    if (I->getValue() >= Args.size())
188      return true;
189    for (FormatToken *Arg : Args[I->getValue()]) {
190      // A token can be part of a macro argument at multiple levels.
191      // For example, with "ID(x) x":
192      // in ID(ID(x)), 'x' is expanded first as argument to the inner
193      // ID, then again as argument to the outer ID. We keep the macro
194      // role the token had from the inner expansion.
195      if (!Arg->MacroCtx)
196        Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
197      pushToken(Arg);
198    }
199    return true;
200  };
201
202  // Expand the definition into Result.
203  for (FormatToken *Tok : Def.Body) {
204    if (expandArgument(Tok))
205      continue;
206    // Create a copy of the tokens from the macro body, i.e. were not provided
207    // by user code.
208    FormatToken *New = new (Allocator.Allocate()) FormatToken;
209    New->copyFrom(*Tok);
210    assert(!New->MacroCtx);
211    // Tokens that are not part of the user code are not formatted.
212    New->MacroCtx = MacroExpansion(MR_Hidden);
213    pushToken(New);
214  }
215  assert(Result.size() >= 1 && Result.back()->is(tok::eof));
216  if (Result.size() > 1) {
217    ++Result[0]->MacroCtx->StartOfExpansion;
218    ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
219  }
220  return Result;
221}
222
223} // namespace format
224} // namespace clang
225