1//===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Tooling/Transformer/RangeSelector.h"
10#include "clang/AST/Expr.h"
11#include "clang/AST/TypeLoc.h"
12#include "clang/ASTMatchers/ASTMatchFinder.h"
13#include "clang/Basic/SourceLocation.h"
14#include "clang/Lex/Lexer.h"
15#include "clang/Tooling/Transformer/SourceCode.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Support/Errc.h"
18#include "llvm/Support/Error.h"
19#include <string>
20#include <utility>
21#include <vector>
22
23using namespace clang;
24using namespace transformer;
25
26using ast_matchers::MatchFinder;
27using llvm::Error;
28using llvm::StringError;
29
30using MatchResult = MatchFinder::MatchResult;
31
32static Error invalidArgumentError(Twine Message) {
33  return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message);
34}
35
36static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
37  return invalidArgumentError("mismatched type (node id=" + ID +
38                              " kind=" + Kind.asStringRef() + ")");
39}
40
41static Error typeError(StringRef ID, const ASTNodeKind &Kind,
42                       Twine ExpectedType) {
43  return invalidArgumentError("mismatched type: expected one of " +
44                              ExpectedType + " (node id=" + ID +
45                              " kind=" + Kind.asStringRef() + ")");
46}
47
48static Error missingPropertyError(StringRef ID, Twine Description,
49                                  StringRef Property) {
50  return invalidArgumentError(Description + " requires property '" + Property +
51                              "' (node id=" + ID + ")");
52}
53
54static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes,
55                                      StringRef ID) {
56  auto &NodesMap = Nodes.getMap();
57  auto It = NodesMap.find(ID);
58  if (It == NodesMap.end())
59    return invalidArgumentError("ID not bound: " + ID);
60  return It->second;
61}
62
63// FIXME: handling of macros should be configurable.
64static SourceLocation findPreviousTokenStart(SourceLocation Start,
65                                             const SourceManager &SM,
66                                             const LangOptions &LangOpts) {
67  if (Start.isInvalid() || Start.isMacroID())
68    return SourceLocation();
69
70  SourceLocation BeforeStart = Start.getLocWithOffset(-1);
71  if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
72    return SourceLocation();
73
74  return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
75}
76
77// Finds the start location of the previous token of kind \p TK.
78// FIXME: handling of macros should be configurable.
79static SourceLocation findPreviousTokenKind(SourceLocation Start,
80                                            const SourceManager &SM,
81                                            const LangOptions &LangOpts,
82                                            tok::TokenKind TK) {
83  while (true) {
84    SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
85    if (L.isInvalid() || L.isMacroID())
86      return SourceLocation();
87
88    Token T;
89    if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
90      return SourceLocation();
91
92    if (T.is(TK))
93      return T.getLocation();
94
95    Start = L;
96  }
97}
98
99static SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM,
100                                    const LangOptions &LangOpts) {
101  SourceLocation EndLoc =
102      E.getNumArgs() == 0 ? E.getRParenLoc() : E.getArg(0)->getBeginLoc();
103  return findPreviousTokenKind(EndLoc, SM, LangOpts, tok::TokenKind::l_paren);
104}
105
106RangeSelector transformer::before(RangeSelector Selector) {
107  return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
108    Expected<CharSourceRange> SelectedRange = Selector(Result);
109    if (!SelectedRange)
110      return SelectedRange.takeError();
111    return CharSourceRange::getCharRange(SelectedRange->getBegin());
112  };
113}
114
115RangeSelector transformer::after(RangeSelector Selector) {
116  return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
117    Expected<CharSourceRange> SelectedRange = Selector(Result);
118    if (!SelectedRange)
119      return SelectedRange.takeError();
120    SourceLocation End = SelectedRange->getEnd();
121    if (SelectedRange->isTokenRange()) {
122      // We need to find the actual (exclusive) end location from which to
123      // create a new source range. However, that's not guaranteed to be valid,
124      // even if the token location itself is valid. So, we create a token range
125      // consisting only of the last token, then map that range back to the
126      // source file. If that succeeds, we have a valid location for the end of
127      // the generated range.
128      CharSourceRange Range = Lexer::makeFileCharRange(
129          CharSourceRange::getTokenRange(SelectedRange->getEnd()),
130          *Result.SourceManager, Result.Context->getLangOpts());
131      if (Range.isInvalid())
132        return invalidArgumentError(
133            "after: can't resolve sub-range to valid source range");
134      End = Range.getEnd();
135    }
136
137    return CharSourceRange::getCharRange(End);
138  };
139}
140
141RangeSelector transformer::node(std::string ID) {
142  return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
143    Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
144    if (!Node)
145      return Node.takeError();
146    return (Node->get<Decl>() != nullptr ||
147            (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr))
148               ? tooling::getExtendedRange(*Node, tok::TokenKind::semi,
149                                           *Result.Context)
150               : CharSourceRange::getTokenRange(Node->getSourceRange());
151  };
152}
153
154RangeSelector transformer::statement(std::string ID) {
155  return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
156    Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
157    if (!Node)
158      return Node.takeError();
159    return tooling::getExtendedRange(*Node, tok::TokenKind::semi,
160                                     *Result.Context);
161  };
162}
163
164RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) {
165  return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
166    Expected<CharSourceRange> BeginRange = Begin(Result);
167    if (!BeginRange)
168      return BeginRange.takeError();
169    Expected<CharSourceRange> EndRange = End(Result);
170    if (!EndRange)
171      return EndRange.takeError();
172    SourceLocation B = BeginRange->getBegin();
173    SourceLocation E = EndRange->getEnd();
174    // Note: we are precluding the possibility of sub-token ranges in the case
175    // that EndRange is a token range.
176    if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) {
177      return invalidArgumentError("Bad range: out of order");
178    }
179    return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
180  };
181}
182
183RangeSelector transformer::encloseNodes(std::string BeginID,
184                                        std::string EndID) {
185  return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
186}
187
188RangeSelector transformer::member(std::string ID) {
189  return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
190    Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
191    if (!Node)
192      return Node.takeError();
193    if (auto *M = Node->get<clang::MemberExpr>())
194      return CharSourceRange::getTokenRange(
195          M->getMemberNameInfo().getSourceRange());
196    return typeError(ID, Node->getNodeKind(), "MemberExpr");
197  };
198}
199
200RangeSelector transformer::name(std::string ID) {
201  return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
202    Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
203    if (!N)
204      return N.takeError();
205    auto &Node = *N;
206    if (const auto *D = Node.get<NamedDecl>()) {
207      if (!D->getDeclName().isIdentifier())
208        return missingPropertyError(ID, "name", "identifier");
209      SourceLocation L = D->getLocation();
210      auto R = CharSourceRange::getTokenRange(L, L);
211      // Verify that the range covers exactly the name.
212      // FIXME: extend this code to support cases like `operator +` or
213      // `foo<int>` for which this range will be too short.  Doing so will
214      // require subcasing `NamedDecl`, because it doesn't provide virtual
215      // access to the \c DeclarationNameInfo.
216      if (tooling::getText(R, *Result.Context) != D->getName())
217        return CharSourceRange();
218      return R;
219    }
220    if (const auto *E = Node.get<DeclRefExpr>()) {
221      if (!E->getNameInfo().getName().isIdentifier())
222        return missingPropertyError(ID, "name", "identifier");
223      SourceLocation L = E->getLocation();
224      return CharSourceRange::getTokenRange(L, L);
225    }
226    if (const auto *I = Node.get<CXXCtorInitializer>()) {
227      if (!I->isMemberInitializer() && I->isWritten())
228        return missingPropertyError(ID, "name", "explicit member initializer");
229      SourceLocation L = I->getMemberLocation();
230      return CharSourceRange::getTokenRange(L, L);
231    }
232    if (const auto *T = Node.get<TypeLoc>()) {
233      TypeLoc Loc = *T;
234      auto ET = Loc.getAs<ElaboratedTypeLoc>();
235      if (!ET.isNull()) {
236        Loc = ET.getNamedTypeLoc();
237      }
238      return CharSourceRange::getTokenRange(Loc.getSourceRange());
239    }
240    return typeError(ID, Node.getNodeKind(),
241                     "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc");
242  };
243}
244
245namespace {
246// FIXME: make this available in the public API for users to easily create their
247// own selectors.
248
249// Creates a selector from a range-selection function \p Func, which selects a
250// range that is relative to a bound node id.  \c T is the node type expected by
251// \p Func.
252template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
253class RelativeSelector {
254  std::string ID;
255
256public:
257  RelativeSelector(std::string ID) : ID(std::move(ID)) {}
258
259  Expected<CharSourceRange> operator()(const MatchResult &Result) {
260    Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
261    if (!N)
262      return N.takeError();
263    if (const auto *Arg = N->get<T>())
264      return Func(Result, *Arg);
265    return typeError(ID, N->getNodeKind());
266  }
267};
268} // namespace
269
270// FIXME: Change the following functions from being in an anonymous namespace
271// to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
272// (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
273// namespace works around a bug in earlier versions.
274namespace {
275// Returns the range of the statements (all source between the braces).
276CharSourceRange getStatementsRange(const MatchResult &,
277                                   const CompoundStmt &CS) {
278  return CharSourceRange::getCharRange(CS.getLBracLoc().getLocWithOffset(1),
279                                       CS.getRBracLoc());
280}
281} // namespace
282
283RangeSelector transformer::statements(std::string ID) {
284  return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
285}
286
287namespace {
288// Returns the range of the source between the call's parentheses.
289CharSourceRange getCallArgumentsRange(const MatchResult &Result,
290                                      const CallExpr &CE) {
291  return CharSourceRange::getCharRange(
292      findOpenParen(CE, *Result.SourceManager, Result.Context->getLangOpts())
293          .getLocWithOffset(1),
294      CE.getRParenLoc());
295}
296} // namespace
297
298RangeSelector transformer::callArgs(std::string ID) {
299  return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
300}
301
302namespace {
303// Returns the range of the elements of the initializer list. Includes all
304// source between the braces.
305CharSourceRange getElementsRange(const MatchResult &,
306                                 const InitListExpr &E) {
307  return CharSourceRange::getCharRange(E.getLBraceLoc().getLocWithOffset(1),
308                                       E.getRBraceLoc());
309}
310} // namespace
311
312RangeSelector transformer::initListElements(std::string ID) {
313  return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
314}
315
316namespace {
317// Returns the range of the else branch, including the `else` keyword.
318CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
319  return tooling::maybeExtendRange(
320      CharSourceRange::getTokenRange(S.getElseLoc(), S.getEndLoc()),
321      tok::TokenKind::semi, *Result.Context);
322}
323} // namespace
324
325RangeSelector transformer::elseBranch(std::string ID) {
326  return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
327}
328
329RangeSelector transformer::expansion(RangeSelector S) {
330  return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
331    Expected<CharSourceRange> SRange = S(Result);
332    if (!SRange)
333      return SRange.takeError();
334    return Result.SourceManager->getExpansionRange(*SRange);
335  };
336}
337