1//===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Analysis/MacroExpansionContext.h"
10#include "llvm/Support/Debug.h"
11
12#define DEBUG_TYPE "macro-expansion-context"
13
14static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS,
15                          clang::Token Tok);
16
17namespace clang {
18namespace detail {
19class MacroExpansionRangeRecorder : public PPCallbacks {
20  const Preprocessor &PP;
21  SourceManager &SM;
22  MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
23
24public:
25  explicit MacroExpansionRangeRecorder(
26      const Preprocessor &PP, SourceManager &SM,
27      MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
28      : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
29
30  void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
31                    SourceRange Range, const MacroArgs *Args) override {
32    // Ignore annotation tokens like: _Pragma("pack(push, 1)")
33    if (MacroName.getIdentifierInfo()->getName() == "_Pragma")
34      return;
35
36    SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation());
37    assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
38
39    const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
40      // If the range is empty, use the length of the macro.
41      if (Range.getBegin() == Range.getEnd())
42        return SM.getExpansionLoc(
43            MacroName.getLocation().getLocWithOffset(MacroName.getLength()));
44
45      // Include the last character.
46      return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1);
47    }();
48
49    (void)PP;
50    LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
51               dumpTokenInto(PP, llvm::dbgs(), MacroName);
52               llvm::dbgs()
53               << "' with length " << MacroName.getLength() << " at ";
54               MacroNameBegin.print(llvm::dbgs(), SM);
55               llvm::dbgs() << ", expansion end at ";
56               ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
57
58    // If the expansion range is empty, use the identifier of the macro as a
59    // range.
60    MacroExpansionContext::ExpansionRangeMap::iterator It;
61    bool Inserted;
62    std::tie(It, Inserted) =
63        ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd);
64    if (Inserted) {
65      LLVM_DEBUG(llvm::dbgs() << "maps ";
66                 It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
67                 It->getSecond().print(llvm::dbgs(), SM);
68                 llvm::dbgs() << '\n';);
69    } else {
70      if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) {
71        It->getSecond() = ExpansionEnd;
72        LLVM_DEBUG(
73            llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
74            llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
75            llvm::dbgs() << '\n';);
76      }
77    }
78  }
79};
80} // namespace detail
81} // namespace clang
82
83using namespace clang;
84
85MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts)
86    : LangOpts(LangOpts) {}
87
88void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) {
89  PP = &NewPP;
90  SM = &NewPP.getSourceManager();
91
92  // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
93  PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>(
94      *PP, *SM, ExpansionRanges));
95  // Same applies here.
96  PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
97}
98
99Optional<StringRef>
100MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const {
101  if (MacroExpansionLoc.isMacroID())
102    return llvm::None;
103
104  // If there was no macro expansion at that location, return None.
105  if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end())
106    return llvm::None;
107
108  // There was macro expansion, but resulted in no tokens, return empty string.
109  const auto It = ExpandedTokens.find_as(MacroExpansionLoc);
110  if (It == ExpandedTokens.end())
111    return StringRef{""};
112
113  // Otherwise we have the actual token sequence as string.
114  return StringRef{It->getSecond()};
115}
116
117Optional<StringRef>
118MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const {
119  if (MacroExpansionLoc.isMacroID())
120    return llvm::None;
121
122  const auto It = ExpansionRanges.find_as(MacroExpansionLoc);
123  if (It == ExpansionRanges.end())
124    return llvm::None;
125
126  assert(It->getFirst() != It->getSecond() &&
127         "Every macro expansion must cover a non-empty range.");
128
129  return Lexer::getSourceText(
130      CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM,
131      LangOpts);
132}
133
134void MacroExpansionContext::dumpExpansionRanges() const {
135  dumpExpansionRangesToStream(llvm::dbgs());
136}
137void MacroExpansionContext::dumpExpandedTexts() const {
138  dumpExpandedTextsToStream(llvm::dbgs());
139}
140
141void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const {
142  std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
143  LocalExpansionRanges.reserve(ExpansionRanges.size());
144  for (const auto &Record : ExpansionRanges)
145    LocalExpansionRanges.emplace_back(
146        std::make_pair(Record.getFirst(), Record.getSecond()));
147  llvm::sort(LocalExpansionRanges);
148
149  OS << "\n=============== ExpansionRanges ===============\n";
150  for (const auto &Record : LocalExpansionRanges) {
151    OS << "> ";
152    Record.first.print(OS, *SM);
153    OS << ", ";
154    Record.second.print(OS, *SM);
155    OS << '\n';
156  }
157}
158
159void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const {
160  std::vector<std::pair<SourceLocation, MacroExpansionText>>
161      LocalExpandedTokens;
162  LocalExpandedTokens.reserve(ExpandedTokens.size());
163  for (const auto &Record : ExpandedTokens)
164    LocalExpandedTokens.emplace_back(
165        std::make_pair(Record.getFirst(), Record.getSecond()));
166  llvm::sort(LocalExpandedTokens);
167
168  OS << "\n=============== ExpandedTokens ===============\n";
169  for (const auto &Record : LocalExpandedTokens) {
170    OS << "> ";
171    Record.first.print(OS, *SM);
172    OS << " -> '" << Record.second << "'\n";
173  }
174}
175
176static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
177  assert(Tok.isNot(tok::raw_identifier));
178
179  // Ignore annotation tokens like: _Pragma("pack(push, 1)")
180  if (Tok.isAnnotation())
181    return;
182
183  if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
184    // FIXME: For now, we don't respect whitespaces between macro expanded
185    // tokens. We just emit a space after every identifier to produce a valid
186    // code for `int a ;` like expansions.
187    //              ^-^-- Space after the 'int' and 'a' identifiers.
188    OS << II->getName() << ' ';
189  } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
190    OS << StringRef(Tok.getLiteralData(), Tok.getLength());
191  } else {
192    char Tmp[256];
193    if (Tok.getLength() < sizeof(Tmp)) {
194      const char *TokPtr = Tmp;
195      // FIXME: Might use a different overload for cleaner callsite.
196      unsigned Len = PP.getSpelling(Tok, TokPtr);
197      OS.write(TokPtr, Len);
198    } else {
199      OS << "<too long token>";
200    }
201  }
202}
203
204void MacroExpansionContext::onTokenLexed(const Token &Tok) {
205  SourceLocation SLoc = Tok.getLocation();
206  if (SLoc.isFileID())
207    return;
208
209  LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
210             dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
211             SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';);
212
213  // Remove spelling location.
214  SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc);
215
216  MacroExpansionText TokenAsString;
217  llvm::raw_svector_ostream OS(TokenAsString);
218
219  // FIXME: Prepend newlines and space to produce the exact same output as the
220  // preprocessor would for this token.
221
222  dumpTokenInto(*PP, OS, Tok);
223
224  ExpansionMap::iterator It;
225  bool Inserted;
226  std::tie(It, Inserted) =
227      ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString));
228  if (!Inserted)
229    It->getSecond().append(TokenAsString);
230}
231
232