1//===- DataflowAnalysis.h ---------------------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines base types and functions for building dataflow analyses 10// that run over Control-Flow Graphs (CFGs). 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H 15#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H 16 17#include <iterator> 18#include <optional> 19#include <type_traits> 20#include <utility> 21#include <vector> 22 23#include "clang/AST/ASTContext.h" 24#include "clang/Analysis/CFG.h" 25#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" 26#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 27#include "clang/Analysis/FlowSensitive/DataflowLattice.h" 28#include "clang/Analysis/FlowSensitive/MatchSwitch.h" 29#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" 30#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" 31#include "llvm/ADT/STLExtras.h" 32#include "llvm/ADT/STLFunctionalExtras.h" 33#include "llvm/ADT/SmallVector.h" 34#include "llvm/Support/Errc.h" 35#include "llvm/Support/Error.h" 36 37namespace clang { 38namespace dataflow { 39 40/// Base class template for dataflow analyses built on a single lattice type. 41/// 42/// Requirements: 43/// 44/// `Derived` must be derived from a specialization of this class template and 45/// must provide the following public members: 46/// * `LatticeT initialElement()` - returns a lattice element that models the 47/// initial state of a basic block; 48/// * `void transfer(const CFGElement &, LatticeT &, Environment &)` - applies 49/// the analysis transfer function for a given CFG element and lattice 50/// element. 51/// 52/// `Derived` can optionally provide the following members: 53/// * `void transferBranch(bool Branch, const Stmt *Stmt, TypeErasedLattice &E, 54/// Environment &Env)` - applies the analysis transfer 55/// function for a given edge from a CFG block of a conditional statement. 56/// 57/// `Derived` can optionally override the following members: 58/// * `bool merge(QualType, const Value &, const Value &, Value &, 59/// Environment &)` - joins distinct values. This could be a strict 60/// lattice join or a more general widening operation. 61/// 62/// `LatticeT` is a bounded join-semilattice that is used by `Derived` and must 63/// provide the following public members: 64/// * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the 65/// argument by computing their least upper bound, modifies the object if 66/// necessary, and returns an effect indicating whether any changes were 67/// made to it; 68/// FIXME: make it `static LatticeT join(const LatticeT&, const LatticeT&)` 69/// * `bool operator==(const LatticeT &) const` - returns true if and only if 70/// the object is equal to the argument. 71/// 72/// `LatticeT` can optionally provide the following members: 73/// * `LatticeJoinEffect widen(const LatticeT &Previous)` - replaces the 74/// lattice element with an approximation that can reach a fixed point more 75/// quickly than iterated application of the transfer function alone. The 76/// previous value is provided to inform the choice of widened value. The 77/// function must also serve as a comparison operation, by indicating whether 78/// the widened value is equivalent to the previous value with the returned 79/// `LatticeJoinEffect`. 80template <typename Derived, typename LatticeT> 81class DataflowAnalysis : public TypeErasedDataflowAnalysis { 82public: 83 /// Bounded join-semilattice that is used in the analysis. 84 using Lattice = LatticeT; 85 86 explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {} 87 88 explicit DataflowAnalysis(ASTContext &Context, 89 DataflowAnalysisOptions Options) 90 : TypeErasedDataflowAnalysis(Options), Context(Context) {} 91 92 ASTContext &getASTContext() final { return Context; } 93 94 TypeErasedLattice typeErasedInitialElement() final { 95 return {static_cast<Derived *>(this)->initialElement()}; 96 } 97 98 TypeErasedLattice joinTypeErased(const TypeErasedLattice &E1, 99 const TypeErasedLattice &E2) final { 100 // FIXME: change the signature of join() to avoid copying here. 101 Lattice L1 = llvm::any_cast<const Lattice &>(E1.Value); 102 const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value); 103 L1.join(L2); 104 return {std::move(L1)}; 105 } 106 107 LatticeJoinEffect widenTypeErased(TypeErasedLattice &Current, 108 const TypeErasedLattice &Previous) final { 109 Lattice &C = llvm::any_cast<Lattice &>(Current.Value); 110 const Lattice &P = llvm::any_cast<const Lattice &>(Previous.Value); 111 return widenInternal(Rank0{}, C, P); 112 } 113 114 bool isEqualTypeErased(const TypeErasedLattice &E1, 115 const TypeErasedLattice &E2) final { 116 const Lattice &L1 = llvm::any_cast<const Lattice &>(E1.Value); 117 const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value); 118 return L1 == L2; 119 } 120 121 void transferTypeErased(const CFGElement &Element, TypeErasedLattice &E, 122 Environment &Env) final { 123 Lattice &L = llvm::any_cast<Lattice &>(E.Value); 124 static_cast<Derived *>(this)->transfer(Element, L, Env); 125 } 126 127 void transferBranchTypeErased(bool Branch, const Stmt *Stmt, 128 TypeErasedLattice &E, Environment &Env) final { 129 transferBranchInternal(Rank0{}, *static_cast<Derived *>(this), Branch, Stmt, 130 E, Env); 131 } 132 133private: 134 // These `Rank` structs are used for template metaprogramming to choose 135 // between overloads. 136 struct Rank1 {}; 137 struct Rank0 : Rank1 {}; 138 139 // The first-choice implementation: use `widen` when it is available. 140 template <typename T> 141 static auto widenInternal(Rank0, T &Current, const T &Prev) 142 -> decltype(Current.widen(Prev)) { 143 return Current.widen(Prev); 144 } 145 146 // The second-choice implementation: `widen` is unavailable. Widening is 147 // merged with equality checking, so when widening is unimplemented, we 148 // default to equality checking. 149 static LatticeJoinEffect widenInternal(Rank1, const Lattice &Current, 150 const Lattice &Prev) { 151 return Prev == Current ? LatticeJoinEffect::Unchanged 152 : LatticeJoinEffect::Changed; 153 } 154 155 // The first-choice implementation: `transferBranch` is implemented. 156 template <typename Analysis> 157 static auto transferBranchInternal(Rank0, Analysis &A, bool Branch, 158 const Stmt *Stmt, TypeErasedLattice &L, 159 Environment &Env) 160 -> std::void_t<decltype(A.transferBranch( 161 Branch, Stmt, std::declval<LatticeT &>(), Env))> { 162 A.transferBranch(Branch, Stmt, llvm::any_cast<Lattice &>(L.Value), Env); 163 } 164 165 // The second-choice implementation: `transferBranch` is unimplemented. No-op. 166 template <typename Analysis> 167 static void transferBranchInternal(Rank1, Analysis &A, bool, const Stmt *, 168 TypeErasedLattice &, Environment &) {} 169 170 ASTContext &Context; 171}; 172 173// Model of the program at a given program point. 174template <typename LatticeT> struct DataflowAnalysisState { 175 // Model of a program property. 176 LatticeT Lattice; 177 178 // Model of the state of the program (store and heap). 179 Environment Env; 180}; 181 182/// Performs dataflow analysis and returns a mapping from basic block IDs to 183/// dataflow analysis states that model the respective basic blocks. The 184/// returned vector, if any, will have the same size as the number of CFG 185/// blocks, with indices corresponding to basic block IDs. Returns an error if 186/// the dataflow analysis cannot be performed successfully. Otherwise, calls 187/// `PostVisitCFG` on each CFG element with the final analysis results at that 188/// program point. 189/// 190/// `MaxBlockVisits` caps the number of block visits during analysis. See 191/// `runTypeErasedDataflowAnalysis` for a full description. The default value is 192/// essentially arbitrary -- large enough to accommodate what seems like any 193/// reasonable CFG, but still small enough to limit the cost of hitting the 194/// limit. 195template <typename AnalysisT> 196llvm::Expected<std::vector< 197 std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>> 198runDataflowAnalysis( 199 const ControlFlowContext &CFCtx, AnalysisT &Analysis, 200 const Environment &InitEnv, 201 std::function<void(const CFGElement &, const DataflowAnalysisState< 202 typename AnalysisT::Lattice> &)> 203 PostVisitCFG = nullptr, 204 std::int32_t MaxBlockVisits = 20'000) { 205 std::function<void(const CFGElement &, 206 const TypeErasedDataflowAnalysisState &)> 207 PostVisitCFGClosure = nullptr; 208 if (PostVisitCFG) { 209 PostVisitCFGClosure = [&PostVisitCFG]( 210 const CFGElement &Element, 211 const TypeErasedDataflowAnalysisState &State) { 212 auto *Lattice = 213 llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value); 214 // FIXME: we should not be copying the environment here! 215 // Ultimately the PostVisitCFG only gets a const reference anyway. 216 PostVisitCFG(Element, DataflowAnalysisState<typename AnalysisT::Lattice>{ 217 *Lattice, State.Env.fork()}); 218 }; 219 } 220 221 auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis( 222 CFCtx, Analysis, InitEnv, PostVisitCFGClosure, MaxBlockVisits); 223 if (!TypeErasedBlockStates) 224 return TypeErasedBlockStates.takeError(); 225 226 std::vector<std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>> 227 BlockStates; 228 BlockStates.reserve(TypeErasedBlockStates->size()); 229 230 llvm::transform( 231 std::move(*TypeErasedBlockStates), std::back_inserter(BlockStates), 232 [](auto &OptState) { 233 return llvm::transformOptional( 234 std::move(OptState), [](TypeErasedDataflowAnalysisState &&State) { 235 return DataflowAnalysisState<typename AnalysisT::Lattice>{ 236 llvm::any_cast<typename AnalysisT::Lattice>( 237 std::move(State.Lattice.Value)), 238 std::move(State.Env)}; 239 }); 240 }); 241 return std::move(BlockStates); 242} 243 244// Create an analysis class that is derived from `DataflowAnalysis`. This is an 245// SFINAE adapter that allows us to call two different variants of constructor 246// (either with or without the optional `Environment` parameter). 247// FIXME: Make all classes derived from `DataflowAnalysis` take an `Environment` 248// parameter in their constructor so that we can get rid of this abomination. 249template <typename AnalysisT> 250auto createAnalysis(ASTContext &ASTCtx, Environment &Env) 251 -> decltype(AnalysisT(ASTCtx, Env)) { 252 return AnalysisT(ASTCtx, Env); 253} 254template <typename AnalysisT> 255auto createAnalysis(ASTContext &ASTCtx, Environment &Env) 256 -> decltype(AnalysisT(ASTCtx)) { 257 return AnalysisT(ASTCtx); 258} 259 260/// Runs a dataflow analysis over the given function and then runs `Diagnoser` 261/// over the results. Returns a list of diagnostics for `FuncDecl` or an 262/// error. Currently, errors can occur (at least) because the analysis requires 263/// too many iterations over the CFG or the SAT solver times out. 264/// 265/// The default value of `MaxSATIterations` was chosen based on the following 266/// observations: 267/// - Non-pathological calls to the solver typically require only a few hundred 268/// iterations. 269/// - This limit is still low enough to keep runtimes acceptable (on typical 270/// machines) in cases where we hit the limit. 271/// 272/// `MaxBlockVisits` caps the number of block visits during analysis. See 273/// `runDataflowAnalysis` for a full description and explanation of the default 274/// value. 275template <typename AnalysisT, typename Diagnostic> 276llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction( 277 const FunctionDecl &FuncDecl, ASTContext &ASTCtx, 278 llvm::function_ref<llvm::SmallVector<Diagnostic>( 279 const CFGElement &, ASTContext &, 280 const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)> 281 Diagnoser, 282 std::int64_t MaxSATIterations = 1'000'000'000, 283 std::int32_t MaxBlockVisits = 20'000) { 284 llvm::Expected<ControlFlowContext> Context = 285 ControlFlowContext::build(FuncDecl); 286 if (!Context) 287 return Context.takeError(); 288 289 auto OwnedSolver = std::make_unique<WatchedLiteralsSolver>(MaxSATIterations); 290 const WatchedLiteralsSolver *Solver = OwnedSolver.get(); 291 DataflowAnalysisContext AnalysisContext(std::move(OwnedSolver)); 292 Environment Env(AnalysisContext, FuncDecl); 293 AnalysisT Analysis = createAnalysis<AnalysisT>(ASTCtx, Env); 294 llvm::SmallVector<Diagnostic> Diagnostics; 295 if (llvm::Error Err = 296 runTypeErasedDataflowAnalysis( 297 *Context, Analysis, Env, 298 [&ASTCtx, &Diagnoser, &Diagnostics]( 299 const CFGElement &Elt, 300 const TypeErasedDataflowAnalysisState &State) mutable { 301 auto EltDiagnostics = Diagnoser( 302 Elt, ASTCtx, 303 TransferStateForDiagnostics<typename AnalysisT::Lattice>( 304 llvm::any_cast<const typename AnalysisT::Lattice &>( 305 State.Lattice.Value), 306 State.Env)); 307 llvm::move(EltDiagnostics, std::back_inserter(Diagnostics)); 308 }, 309 MaxBlockVisits) 310 .takeError()) 311 return std::move(Err); 312 313 if (Solver->reachedLimit()) 314 return llvm::createStringError(llvm::errc::interrupted, 315 "SAT solver timed out"); 316 317 return Diagnostics; 318} 319 320/// Abstract base class for dataflow "models": reusable analysis components that 321/// model a particular aspect of program semantics in the `Environment`. For 322/// example, a model may capture a type and its related functions. 323class DataflowModel : public Environment::ValueModel { 324public: 325 /// Return value indicates whether the model processed the `Element`. 326 virtual bool transfer(const CFGElement &Element, Environment &Env) = 0; 327}; 328 329} // namespace dataflow 330} // namespace clang 331 332#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H 333