1//===- ExplodedGraph.h - Local, Path-Sens. "Exploded Graph" -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file defines the template classes ExplodedNode and ExplodedGraph,
10//  which represent a path-sensitive, intra-procedural "exploded graph."
11//  See "Precise interprocedural dataflow analysis via graph reachability"
12//  by Reps, Horwitz, and Sagiv
13//  (http://portal.acm.org/citation.cfm?id=199462) for the definition of an
14//  exploded graph.
15//
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
19#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
20
21#include "clang/Analysis/AnalysisDeclContext.h"
22#include "clang/Analysis/ProgramPoint.h"
23#include "clang/Analysis/Support/BumpVector.h"
24#include "clang/Basic/LLVM.h"
25#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
26#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
27#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/DepthFirstIterator.h"
31#include "llvm/ADT/FoldingSet.h"
32#include "llvm/ADT/GraphTraits.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/SetVector.h"
35#include "llvm/ADT/iterator_range.h"
36#include "llvm/Support/Allocator.h"
37#include "llvm/Support/Compiler.h"
38#include <cassert>
39#include <cstdint>
40#include <memory>
41#include <optional>
42#include <utility>
43#include <vector>
44
45namespace clang {
46
47class CFG;
48class Decl;
49class Expr;
50class ParentMap;
51class Stmt;
52
53namespace ento {
54
55class ExplodedGraph;
56
57//===----------------------------------------------------------------------===//
58// ExplodedGraph "implementation" classes.  These classes are not typed to
59// contain a specific kind of state.  Typed-specialized versions are defined
60// on top of these classes.
61//===----------------------------------------------------------------------===//
62
63// ExplodedNode is not constified all over the engine because we need to add
64// successors to it at any time after creating it.
65
66class ExplodedNode : public llvm::FoldingSetNode {
67  friend class BranchNodeBuilder;
68  friend class CoreEngine;
69  friend class EndOfFunctionNodeBuilder;
70  friend class ExplodedGraph;
71  friend class IndirectGotoNodeBuilder;
72  friend class NodeBuilder;
73  friend class SwitchNodeBuilder;
74
75  /// Efficiently stores a list of ExplodedNodes, or an optional flag.
76  ///
77  /// NodeGroup provides opaque storage for a list of ExplodedNodes, optimizing
78  /// for the case when there is only one node in the group. This is a fairly
79  /// common case in an ExplodedGraph, where most nodes have only one
80  /// predecessor and many have only one successor. It can also be used to
81  /// store a flag rather than a node list, which ExplodedNode uses to mark
82  /// whether a node is a sink. If the flag is set, the group is implicitly
83  /// empty and no nodes may be added.
84  class NodeGroup {
85    // Conceptually a discriminated union. If the low bit is set, the node is
86    // a sink. If the low bit is not set, the pointer refers to the storage
87    // for the nodes in the group.
88    // This is not a PointerIntPair in order to keep the storage type opaque.
89    uintptr_t P;
90
91  public:
92    NodeGroup(bool Flag = false) : P(Flag) {
93      assert(getFlag() == Flag);
94    }
95
96    ExplodedNode * const *begin() const;
97
98    ExplodedNode * const *end() const;
99
100    unsigned size() const;
101
102    bool empty() const { return P == 0 || getFlag() != 0; }
103
104    /// Adds a node to the list.
105    ///
106    /// The group must not have been created with its flag set.
107    void addNode(ExplodedNode *N, ExplodedGraph &G);
108
109    /// Replaces the single node in this group with a new node.
110    ///
111    /// Note that this should only be used when you know the group was not
112    /// created with its flag set, and that the group is empty or contains
113    /// only a single node.
114    void replaceNode(ExplodedNode *node);
115
116    /// Returns whether this group was created with its flag set.
117    bool getFlag() const {
118      return (P & 1);
119    }
120  };
121
122  /// Location - The program location (within a function body) associated
123  ///  with this node.
124  const ProgramPoint Location;
125
126  /// State - The state associated with this node.
127  ProgramStateRef State;
128
129  /// Preds - The predecessors of this node.
130  NodeGroup Preds;
131
132  /// Succs - The successors of this node.
133  NodeGroup Succs;
134
135  int64_t Id;
136
137public:
138  explicit ExplodedNode(const ProgramPoint &loc, ProgramStateRef state,
139                        int64_t Id, bool IsSink)
140      : Location(loc), State(std::move(state)), Succs(IsSink), Id(Id) {
141    assert(isSink() == IsSink);
142  }
143
144  /// getLocation - Returns the edge associated with the given node.
145  ProgramPoint getLocation() const { return Location; }
146
147  const LocationContext *getLocationContext() const {
148    return getLocation().getLocationContext();
149  }
150
151  const StackFrameContext *getStackFrame() const {
152    return getLocation().getStackFrame();
153  }
154
155  const Decl &getCodeDecl() const { return *getLocationContext()->getDecl(); }
156
157  CFG &getCFG() const { return *getLocationContext()->getCFG(); }
158
159  const CFGBlock *getCFGBlock() const;
160
161  const ParentMap &getParentMap() const {
162    return getLocationContext()->getParentMap();
163  }
164
165  template <typename T> T &getAnalysis() const {
166    return *getLocationContext()->getAnalysis<T>();
167  }
168
169  const ProgramStateRef &getState() const { return State; }
170
171  template <typename T> std::optional<T> getLocationAs() const & {
172    return Location.getAs<T>();
173  }
174
175  /// Get the value of an arbitrary expression at this node.
176  SVal getSVal(const Stmt *S) const {
177    return getState()->getSVal(S, getLocationContext());
178  }
179
180  static void Profile(llvm::FoldingSetNodeID &ID,
181                      const ProgramPoint &Loc,
182                      const ProgramStateRef &state,
183                      bool IsSink) {
184    ID.Add(Loc);
185    ID.AddPointer(state.get());
186    ID.AddBoolean(IsSink);
187  }
188
189  void Profile(llvm::FoldingSetNodeID& ID) const {
190    // We avoid copy constructors by not using accessors.
191    Profile(ID, Location, State, isSink());
192  }
193
194  /// addPredeccessor - Adds a predecessor to the current node, and
195  ///  in tandem add this node as a successor of the other node.
196  void addPredecessor(ExplodedNode *V, ExplodedGraph &G);
197
198  unsigned succ_size() const { return Succs.size(); }
199  unsigned pred_size() const { return Preds.size(); }
200  bool succ_empty() const { return Succs.empty(); }
201  bool pred_empty() const { return Preds.empty(); }
202
203  bool isSink() const { return Succs.getFlag(); }
204
205  bool hasSinglePred() const {
206    return (pred_size() == 1);
207  }
208
209  ExplodedNode *getFirstPred() {
210    return pred_empty() ? nullptr : *(pred_begin());
211  }
212
213  const ExplodedNode *getFirstPred() const {
214    return const_cast<ExplodedNode*>(this)->getFirstPred();
215  }
216
217  ExplodedNode *getFirstSucc() {
218    return succ_empty() ? nullptr : *(succ_begin());
219  }
220
221  const ExplodedNode *getFirstSucc() const {
222    return const_cast<ExplodedNode*>(this)->getFirstSucc();
223  }
224
225  // Iterators over successor and predecessor vertices.
226  using succ_iterator = ExplodedNode * const *;
227  using succ_range = llvm::iterator_range<succ_iterator>;
228
229  using const_succ_iterator = const ExplodedNode * const *;
230  using const_succ_range = llvm::iterator_range<const_succ_iterator>;
231
232  using pred_iterator = ExplodedNode * const *;
233  using pred_range = llvm::iterator_range<pred_iterator>;
234
235  using const_pred_iterator = const ExplodedNode * const *;
236  using const_pred_range = llvm::iterator_range<const_pred_iterator>;
237
238  pred_iterator pred_begin() { return Preds.begin(); }
239  pred_iterator pred_end() { return Preds.end(); }
240  pred_range preds() { return {Preds.begin(), Preds.end()}; }
241
242  const_pred_iterator pred_begin() const {
243    return const_cast<ExplodedNode*>(this)->pred_begin();
244  }
245  const_pred_iterator pred_end() const {
246    return const_cast<ExplodedNode*>(this)->pred_end();
247  }
248  const_pred_range preds() const { return {Preds.begin(), Preds.end()}; }
249
250  succ_iterator succ_begin() { return Succs.begin(); }
251  succ_iterator succ_end() { return Succs.end(); }
252  succ_range succs() { return {Succs.begin(), Succs.end()}; }
253
254  const_succ_iterator succ_begin() const {
255    return const_cast<ExplodedNode*>(this)->succ_begin();
256  }
257  const_succ_iterator succ_end() const {
258    return const_cast<ExplodedNode*>(this)->succ_end();
259  }
260  const_succ_range succs() const { return {Succs.begin(), Succs.end()}; }
261
262  int64_t getID() const { return Id; }
263
264  /// The node is trivial if it has only one successor, only one predecessor,
265  /// it's predecessor has only one successor,
266  /// and its program state is the same as the program state of the previous
267  /// node.
268  /// Trivial nodes may be skipped while printing exploded graph.
269  bool isTrivial() const;
270
271  /// If the node's program point corresponds to a statement, retrieve that
272  /// statement. Useful for figuring out where to put a warning or a note.
273  /// If the statement belongs to a body-farmed definition,
274  /// retrieve the call site for that definition.
275  const Stmt *getStmtForDiagnostics() const;
276
277  /// Find the next statement that was executed on this node's execution path.
278  /// Useful for explaining control flow that follows the current node.
279  /// If the statement belongs to a body-farmed definition, retrieve the
280  /// call site for that definition.
281  const Stmt *getNextStmtForDiagnostics() const;
282
283  /// Find the statement that was executed immediately before this node.
284  /// Useful when the node corresponds to a CFG block entrance.
285  /// If the statement belongs to a body-farmed definition, retrieve the
286  /// call site for that definition.
287  const Stmt *getPreviousStmtForDiagnostics() const;
288
289  /// Find the statement that was executed at or immediately before this node.
290  /// Useful when any nearby statement will do.
291  /// If the statement belongs to a body-farmed definition, retrieve the
292  /// call site for that definition.
293  const Stmt *getCurrentOrPreviousStmtForDiagnostics() const;
294
295private:
296  void replaceSuccessor(ExplodedNode *node) { Succs.replaceNode(node); }
297  void replacePredecessor(ExplodedNode *node) { Preds.replaceNode(node); }
298};
299
300using InterExplodedGraphMap =
301    llvm::DenseMap<const ExplodedNode *, const ExplodedNode *>;
302
303class ExplodedGraph {
304protected:
305  friend class CoreEngine;
306
307  // Type definitions.
308  using NodeVector = std::vector<ExplodedNode *>;
309
310  /// The roots of the simulation graph. Usually there will be only
311  /// one, but clients are free to establish multiple subgraphs within a single
312  /// SimulGraph. Moreover, these subgraphs can often merge when paths from
313  /// different roots reach the same state at the same program location.
314  NodeVector Roots;
315
316  /// The nodes in the simulation graph which have been
317  /// specially marked as the endpoint of an abstract simulation path.
318  NodeVector EndNodes;
319
320  /// Nodes - The nodes in the graph.
321  llvm::FoldingSet<ExplodedNode> Nodes;
322
323  /// BVC - Allocator and context for allocating nodes and their predecessor
324  /// and successor groups.
325  BumpVectorContext BVC;
326
327  /// NumNodes - The number of nodes in the graph.
328  int64_t NumNodes = 0;
329
330  /// A list of recently allocated nodes that can potentially be recycled.
331  NodeVector ChangedNodes;
332
333  /// A list of nodes that can be reused.
334  NodeVector FreeNodes;
335
336  /// Determines how often nodes are reclaimed.
337  ///
338  /// If this is 0, nodes will never be reclaimed.
339  unsigned ReclaimNodeInterval = 0;
340
341  /// Counter to determine when to reclaim nodes.
342  unsigned ReclaimCounter;
343
344public:
345  ExplodedGraph();
346  ~ExplodedGraph();
347
348  /// Retrieve the node associated with a (Location,State) pair,
349  ///  where the 'Location' is a ProgramPoint in the CFG.  If no node for
350  ///  this pair exists, it is created. IsNew is set to true if
351  ///  the node was freshly created.
352  ExplodedNode *getNode(const ProgramPoint &L, ProgramStateRef State,
353                        bool IsSink = false,
354                        bool* IsNew = nullptr);
355
356  /// Create a node for a (Location, State) pair,
357  ///  but don't store it for deduplication later.  This
358  ///  is useful when copying an already completed
359  ///  ExplodedGraph for further processing.
360  ExplodedNode *createUncachedNode(const ProgramPoint &L,
361    ProgramStateRef State,
362    int64_t Id,
363    bool IsSink = false);
364
365  std::unique_ptr<ExplodedGraph> MakeEmptyGraph() const {
366    return std::make_unique<ExplodedGraph>();
367  }
368
369  /// addRoot - Add an untyped node to the set of roots.
370  ExplodedNode *addRoot(ExplodedNode *V) {
371    Roots.push_back(V);
372    return V;
373  }
374
375  /// addEndOfPath - Add an untyped node to the set of EOP nodes.
376  ExplodedNode *addEndOfPath(ExplodedNode *V) {
377    EndNodes.push_back(V);
378    return V;
379  }
380
381  unsigned num_roots() const { return Roots.size(); }
382  unsigned num_eops() const { return EndNodes.size(); }
383
384  bool empty() const { return NumNodes == 0; }
385  unsigned size() const { return NumNodes; }
386
387  void reserve(unsigned NodeCount) { Nodes.reserve(NodeCount); }
388
389  // Iterators.
390  using NodeTy = ExplodedNode;
391  using AllNodesTy = llvm::FoldingSet<ExplodedNode>;
392  using roots_iterator = NodeVector::iterator;
393  using const_roots_iterator = NodeVector::const_iterator;
394  using eop_iterator = NodeVector::iterator;
395  using const_eop_iterator = NodeVector::const_iterator;
396  using node_iterator = AllNodesTy::iterator;
397  using const_node_iterator = AllNodesTy::const_iterator;
398
399  llvm::iterator_range<node_iterator> nodes() { return Nodes; }
400
401  llvm::iterator_range<const_node_iterator> nodes() const { return Nodes; }
402
403  roots_iterator roots_begin() { return Roots.begin(); }
404
405  roots_iterator roots_end() { return Roots.end(); }
406
407  const_roots_iterator roots_begin() const { return Roots.begin(); }
408
409  const_roots_iterator roots_end() const { return Roots.end(); }
410
411  eop_iterator eop_begin() { return EndNodes.begin(); }
412
413  eop_iterator eop_end() { return EndNodes.end(); }
414
415  const_eop_iterator eop_begin() const { return EndNodes.begin(); }
416
417  const_eop_iterator eop_end() const { return EndNodes.end(); }
418
419  llvm::BumpPtrAllocator & getAllocator() { return BVC.getAllocator(); }
420  BumpVectorContext &getNodeAllocator() { return BVC; }
421
422  using NodeMap = llvm::DenseMap<const ExplodedNode *, ExplodedNode *>;
423
424  /// Creates a trimmed version of the graph that only contains paths leading
425  /// to the given nodes.
426  ///
427  /// \param Nodes The nodes which must appear in the final graph. Presumably
428  ///              these are end-of-path nodes (i.e. they have no successors).
429  /// \param[out] ForwardMap A optional map from nodes in this graph to nodes in
430  ///                        the returned graph.
431  /// \param[out] InverseMap An optional map from nodes in the returned graph to
432  ///                        nodes in this graph.
433  /// \returns The trimmed graph
434  std::unique_ptr<ExplodedGraph>
435  trim(ArrayRef<const NodeTy *> Nodes,
436       InterExplodedGraphMap *ForwardMap = nullptr,
437       InterExplodedGraphMap *InverseMap = nullptr) const;
438
439  /// Enable tracking of recently allocated nodes for potential reclamation
440  /// when calling reclaimRecentlyAllocatedNodes().
441  void enableNodeReclamation(unsigned Interval) {
442    ReclaimCounter = ReclaimNodeInterval = Interval;
443  }
444
445  /// Reclaim "uninteresting" nodes created since the last time this method
446  /// was called.
447  void reclaimRecentlyAllocatedNodes();
448
449  /// Returns true if nodes for the given expression kind are always
450  ///        kept around.
451  static bool isInterestingLValueExpr(const Expr *Ex);
452
453private:
454  bool shouldCollect(const ExplodedNode *node);
455  void collectNode(ExplodedNode *node);
456};
457
458class ExplodedNodeSet {
459  using ImplTy = llvm::SmallSetVector<ExplodedNode *, 4>;
460  ImplTy Impl;
461
462public:
463  ExplodedNodeSet(ExplodedNode *N) {
464    assert(N && !static_cast<ExplodedNode*>(N)->isSink());
465    Impl.insert(N);
466  }
467
468  ExplodedNodeSet() = default;
469
470  void Add(ExplodedNode *N) {
471    if (N && !static_cast<ExplodedNode*>(N)->isSink()) Impl.insert(N);
472  }
473
474  using iterator = ImplTy::iterator;
475  using const_iterator = ImplTy::const_iterator;
476
477  unsigned size() const { return Impl.size();  }
478  bool empty()    const { return Impl.empty(); }
479  bool erase(ExplodedNode *N) { return Impl.remove(N); }
480
481  void clear() { Impl.clear(); }
482
483  void insert(const ExplodedNodeSet &S) {
484    assert(&S != this);
485    if (empty())
486      Impl = S.Impl;
487    else
488      Impl.insert(S.begin(), S.end());
489  }
490
491  iterator begin() { return Impl.begin(); }
492  iterator end() { return Impl.end(); }
493
494  const_iterator begin() const { return Impl.begin(); }
495  const_iterator end() const { return Impl.end(); }
496};
497
498} // namespace ento
499
500} // namespace clang
501
502// GraphTraits
503
504namespace llvm {
505  template <> struct GraphTraits<clang::ento::ExplodedGraph *> {
506    using GraphTy = clang::ento::ExplodedGraph *;
507    using NodeRef = clang::ento::ExplodedNode *;
508    using ChildIteratorType = clang::ento::ExplodedNode::succ_iterator;
509    using nodes_iterator = llvm::df_iterator<GraphTy>;
510
511    static NodeRef getEntryNode(const GraphTy G) {
512      return *G->roots_begin();
513    }
514
515    static bool predecessorOfTrivial(NodeRef N) {
516      return N->succ_size() == 1 && N->getFirstSucc()->isTrivial();
517    }
518
519    static ChildIteratorType child_begin(NodeRef N) {
520      if (predecessorOfTrivial(N))
521        return child_begin(*N->succ_begin());
522      return N->succ_begin();
523    }
524
525    static ChildIteratorType child_end(NodeRef N) {
526      if (predecessorOfTrivial(N))
527        return child_end(N->getFirstSucc());
528      return N->succ_end();
529    }
530
531    static nodes_iterator nodes_begin(const GraphTy G) {
532      return df_begin(G);
533    }
534
535    static nodes_iterator nodes_end(const GraphTy G) {
536      return df_end(G);
537    }
538  };
539} // namespace llvm
540
541#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
542