1//===- DataflowAnalysis.h ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file defines base types and functions for building dataflow analyses
10//  that run over Control-Flow Graphs (CFGs).
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
15#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
16
17#include <iterator>
18#include <optional>
19#include <type_traits>
20#include <utility>
21#include <vector>
22
23#include "clang/AST/ASTContext.h"
24#include "clang/Analysis/CFG.h"
25#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
26#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
27#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
28#include "clang/Analysis/FlowSensitive/MatchSwitch.h"
29#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
30#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/STLFunctionalExtras.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/Support/Errc.h"
35#include "llvm/Support/Error.h"
36
37namespace clang {
38namespace dataflow {
39
40/// Base class template for dataflow analyses built on a single lattice type.
41///
42/// Requirements:
43///
44///  `Derived` must be derived from a specialization of this class template and
45///  must provide the following public members:
46///   * `LatticeT initialElement()` - returns a lattice element that models the
47///     initial state of a basic block;
48///   * `void transfer(const CFGElement &, LatticeT &, Environment &)` - applies
49///     the analysis transfer function for a given CFG element and lattice
50///     element.
51///
52///  `Derived` can optionally provide the following members:
53///  * `void transferBranch(bool Branch, const Stmt *Stmt, TypeErasedLattice &E,
54///                         Environment &Env)` - applies the analysis transfer
55///    function for a given edge from a CFG block of a conditional statement.
56///
57///  `Derived` can optionally override the following members:
58///   * `bool merge(QualType, const Value &, const Value &, Value &,
59///     Environment &)` -  joins distinct values. This could be a strict
60///     lattice join or a more general widening operation.
61///
62///  `LatticeT` is a bounded join-semilattice that is used by `Derived` and must
63///  provide the following public members:
64///   * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the
65///     argument by computing their least upper bound, modifies the object if
66///     necessary, and returns an effect indicating whether any changes were
67///     made to it;
68///     FIXME: make it `static LatticeT join(const LatticeT&, const LatticeT&)`
69///   * `bool operator==(const LatticeT &) const` - returns true if and only if
70///     the object is equal to the argument.
71///
72/// `LatticeT` can optionally provide the following members:
73///  * `LatticeJoinEffect widen(const LatticeT &Previous)` - replaces the
74///    lattice element with an  approximation that can reach a fixed point more
75///    quickly than iterated application of the transfer function alone. The
76///    previous value is provided to inform the choice of widened value. The
77///    function must also serve as a comparison operation, by indicating whether
78///    the widened value is equivalent to the previous value with the returned
79///    `LatticeJoinEffect`.
80template <typename Derived, typename LatticeT>
81class DataflowAnalysis : public TypeErasedDataflowAnalysis {
82public:
83  /// Bounded join-semilattice that is used in the analysis.
84  using Lattice = LatticeT;
85
86  explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {}
87
88  explicit DataflowAnalysis(ASTContext &Context,
89                            DataflowAnalysisOptions Options)
90      : TypeErasedDataflowAnalysis(Options), Context(Context) {}
91
92  ASTContext &getASTContext() final { return Context; }
93
94  TypeErasedLattice typeErasedInitialElement() final {
95    return {static_cast<Derived *>(this)->initialElement()};
96  }
97
98  TypeErasedLattice joinTypeErased(const TypeErasedLattice &E1,
99                                   const TypeErasedLattice &E2) final {
100    // FIXME: change the signature of join() to avoid copying here.
101    Lattice L1 = llvm::any_cast<const Lattice &>(E1.Value);
102    const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
103    L1.join(L2);
104    return {std::move(L1)};
105  }
106
107  LatticeJoinEffect widenTypeErased(TypeErasedLattice &Current,
108                                    const TypeErasedLattice &Previous) final {
109    Lattice &C = llvm::any_cast<Lattice &>(Current.Value);
110    const Lattice &P = llvm::any_cast<const Lattice &>(Previous.Value);
111    return widenInternal(Rank0{}, C, P);
112  }
113
114  bool isEqualTypeErased(const TypeErasedLattice &E1,
115                         const TypeErasedLattice &E2) final {
116    const Lattice &L1 = llvm::any_cast<const Lattice &>(E1.Value);
117    const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
118    return L1 == L2;
119  }
120
121  void transferTypeErased(const CFGElement &Element, TypeErasedLattice &E,
122                          Environment &Env) final {
123    Lattice &L = llvm::any_cast<Lattice &>(E.Value);
124    static_cast<Derived *>(this)->transfer(Element, L, Env);
125  }
126
127  void transferBranchTypeErased(bool Branch, const Stmt *Stmt,
128                                TypeErasedLattice &E, Environment &Env) final {
129    transferBranchInternal(Rank0{}, *static_cast<Derived *>(this), Branch, Stmt,
130                           E, Env);
131  }
132
133private:
134  // These `Rank` structs are used for template metaprogramming to choose
135  // between overloads.
136  struct Rank1 {};
137  struct Rank0 : Rank1 {};
138
139  // The first-choice implementation: use `widen` when it is available.
140  template <typename T>
141  static auto widenInternal(Rank0, T &Current, const T &Prev)
142      -> decltype(Current.widen(Prev)) {
143    return Current.widen(Prev);
144  }
145
146  // The second-choice implementation: `widen` is unavailable. Widening is
147  // merged with equality checking, so when widening is unimplemented, we
148  // default to equality checking.
149  static LatticeJoinEffect widenInternal(Rank1, const Lattice &Current,
150                                         const Lattice &Prev) {
151    return Prev == Current ? LatticeJoinEffect::Unchanged
152                           : LatticeJoinEffect::Changed;
153  }
154
155  // The first-choice implementation: `transferBranch` is implemented.
156  template <typename Analysis>
157  static auto transferBranchInternal(Rank0, Analysis &A, bool Branch,
158                                     const Stmt *Stmt, TypeErasedLattice &L,
159                                     Environment &Env)
160      -> std::void_t<decltype(A.transferBranch(
161          Branch, Stmt, std::declval<LatticeT &>(), Env))> {
162    A.transferBranch(Branch, Stmt, llvm::any_cast<Lattice &>(L.Value), Env);
163  }
164
165  // The second-choice implementation: `transferBranch` is unimplemented. No-op.
166  template <typename Analysis>
167  static void transferBranchInternal(Rank1, Analysis &A, bool, const Stmt *,
168                                     TypeErasedLattice &, Environment &) {}
169
170  ASTContext &Context;
171};
172
173// Model of the program at a given program point.
174template <typename LatticeT> struct DataflowAnalysisState {
175  // Model of a program property.
176  LatticeT Lattice;
177
178  // Model of the state of the program (store and heap).
179  Environment Env;
180};
181
182/// Performs dataflow analysis and returns a mapping from basic block IDs to
183/// dataflow analysis states that model the respective basic blocks. The
184/// returned vector, if any, will have the same size as the number of CFG
185/// blocks, with indices corresponding to basic block IDs. Returns an error if
186/// the dataflow analysis cannot be performed successfully. Otherwise, calls
187/// `PostVisitCFG` on each CFG element with the final analysis results at that
188/// program point.
189///
190/// `MaxBlockVisits` caps the number of block visits during analysis. See
191/// `runTypeErasedDataflowAnalysis` for a full description. The default value is
192/// essentially arbitrary -- large enough to accommodate what seems like any
193/// reasonable CFG, but still small enough to limit the cost of hitting the
194/// limit.
195template <typename AnalysisT>
196llvm::Expected<std::vector<
197    std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
198runDataflowAnalysis(
199    const ControlFlowContext &CFCtx, AnalysisT &Analysis,
200    const Environment &InitEnv,
201    std::function<void(const CFGElement &, const DataflowAnalysisState<
202                                               typename AnalysisT::Lattice> &)>
203        PostVisitCFG = nullptr,
204    std::int32_t MaxBlockVisits = 20'000) {
205  std::function<void(const CFGElement &,
206                     const TypeErasedDataflowAnalysisState &)>
207      PostVisitCFGClosure = nullptr;
208  if (PostVisitCFG) {
209    PostVisitCFGClosure = [&PostVisitCFG](
210                              const CFGElement &Element,
211                              const TypeErasedDataflowAnalysisState &State) {
212      auto *Lattice =
213          llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
214      // FIXME: we should not be copying the environment here!
215      // Ultimately the PostVisitCFG only gets a const reference anyway.
216      PostVisitCFG(Element, DataflowAnalysisState<typename AnalysisT::Lattice>{
217                                *Lattice, State.Env.fork()});
218    };
219  }
220
221  auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis(
222      CFCtx, Analysis, InitEnv, PostVisitCFGClosure, MaxBlockVisits);
223  if (!TypeErasedBlockStates)
224    return TypeErasedBlockStates.takeError();
225
226  std::vector<std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
227      BlockStates;
228  BlockStates.reserve(TypeErasedBlockStates->size());
229
230  llvm::transform(
231      std::move(*TypeErasedBlockStates), std::back_inserter(BlockStates),
232      [](auto &OptState) {
233        return llvm::transformOptional(
234            std::move(OptState), [](TypeErasedDataflowAnalysisState &&State) {
235              return DataflowAnalysisState<typename AnalysisT::Lattice>{
236                  llvm::any_cast<typename AnalysisT::Lattice>(
237                      std::move(State.Lattice.Value)),
238                  std::move(State.Env)};
239            });
240      });
241  return std::move(BlockStates);
242}
243
244// Create an analysis class that is derived from `DataflowAnalysis`. This is an
245// SFINAE adapter that allows us to call two different variants of constructor
246// (either with or without the optional `Environment` parameter).
247// FIXME: Make all classes derived from `DataflowAnalysis` take an `Environment`
248// parameter in their constructor so that we can get rid of this abomination.
249template <typename AnalysisT>
250auto createAnalysis(ASTContext &ASTCtx, Environment &Env)
251    -> decltype(AnalysisT(ASTCtx, Env)) {
252  return AnalysisT(ASTCtx, Env);
253}
254template <typename AnalysisT>
255auto createAnalysis(ASTContext &ASTCtx, Environment &Env)
256    -> decltype(AnalysisT(ASTCtx)) {
257  return AnalysisT(ASTCtx);
258}
259
260/// Runs a dataflow analysis over the given function and then runs `Diagnoser`
261/// over the results. Returns a list of diagnostics for `FuncDecl` or an
262/// error. Currently, errors can occur (at least) because the analysis requires
263/// too many iterations over the CFG or the SAT solver times out.
264///
265/// The default value of `MaxSATIterations` was chosen based on the following
266/// observations:
267/// - Non-pathological calls to the solver typically require only a few hundred
268///   iterations.
269/// - This limit is still low enough to keep runtimes acceptable (on typical
270///   machines) in cases where we hit the limit.
271///
272/// `MaxBlockVisits` caps the number of block visits during analysis. See
273/// `runDataflowAnalysis` for a full description and explanation of the default
274/// value.
275template <typename AnalysisT, typename Diagnostic>
276llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
277    const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
278    llvm::function_ref<llvm::SmallVector<Diagnostic>(
279        const CFGElement &, ASTContext &,
280        const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)>
281        Diagnoser,
282    std::int64_t MaxSATIterations = 1'000'000'000,
283    std::int32_t MaxBlockVisits = 20'000) {
284  llvm::Expected<ControlFlowContext> Context =
285      ControlFlowContext::build(FuncDecl);
286  if (!Context)
287    return Context.takeError();
288
289  auto OwnedSolver = std::make_unique<WatchedLiteralsSolver>(MaxSATIterations);
290  const WatchedLiteralsSolver *Solver = OwnedSolver.get();
291  DataflowAnalysisContext AnalysisContext(std::move(OwnedSolver));
292  Environment Env(AnalysisContext, FuncDecl);
293  AnalysisT Analysis = createAnalysis<AnalysisT>(ASTCtx, Env);
294  llvm::SmallVector<Diagnostic> Diagnostics;
295  if (llvm::Error Err =
296          runTypeErasedDataflowAnalysis(
297              *Context, Analysis, Env,
298              [&ASTCtx, &Diagnoser, &Diagnostics](
299                  const CFGElement &Elt,
300                  const TypeErasedDataflowAnalysisState &State) mutable {
301                auto EltDiagnostics = Diagnoser(
302                    Elt, ASTCtx,
303                    TransferStateForDiagnostics<typename AnalysisT::Lattice>(
304                        llvm::any_cast<const typename AnalysisT::Lattice &>(
305                            State.Lattice.Value),
306                        State.Env));
307                llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
308              },
309              MaxBlockVisits)
310              .takeError())
311    return std::move(Err);
312
313  if (Solver->reachedLimit())
314    return llvm::createStringError(llvm::errc::interrupted,
315                                   "SAT solver timed out");
316
317  return Diagnostics;
318}
319
320/// Abstract base class for dataflow "models": reusable analysis components that
321/// model a particular aspect of program semantics in the `Environment`. For
322/// example, a model may capture a type and its related functions.
323class DataflowModel : public Environment::ValueModel {
324public:
325  /// Return value indicates whether the model processed the `Element`.
326  virtual bool transfer(const CFGElement &Element, Environment &Env) = 0;
327};
328
329} // namespace dataflow
330} // namespace clang
331
332#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
333