1//===- CoreEngine.h - Path-Sensitive Dataflow Engine ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file defines a generic engine for intraprocedural, path-sensitive,
10//  dataflow analysis via graph reachability.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_COREENGINE_H
15#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_COREENGINE_H
16
17#include "clang/AST/Stmt.h"
18#include "clang/Analysis/AnalysisDeclContext.h"
19#include "clang/Analysis/CFG.h"
20#include "clang/Analysis/ProgramPoint.h"
21#include "clang/Basic/LLVM.h"
22#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
23#include "clang/StaticAnalyzer/Core/PathSensitive/BlockCounter.h"
24#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
25#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
26#include "clang/StaticAnalyzer/Core/PathSensitive/WorkList.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/Support/Casting.h"
29#include <cassert>
30#include <memory>
31#include <utility>
32#include <vector>
33
34namespace clang {
35
36class AnalyzerOptions;
37class CXXBindTemporaryExpr;
38class Expr;
39class LabelDecl;
40
41namespace ento {
42
43class FunctionSummariesTy;
44class SubEngine;
45
46//===----------------------------------------------------------------------===//
47/// CoreEngine - Implements the core logic of the graph-reachability
48///   analysis. It traverses the CFG and generates the ExplodedGraph.
49///   Program "states" are treated as opaque void pointers.
50///   The template class CoreEngine (which subclasses CoreEngine)
51///   provides the matching component to the engine that knows the actual types
52///   for states.  Note that this engine only dispatches to transfer functions
53///   at the statement and block-level.  The analyses themselves must implement
54///   any transfer function logic and the sub-expression level (if any).
55class CoreEngine {
56  friend class CommonNodeBuilder;
57  friend class EndOfFunctionNodeBuilder;
58  friend class ExprEngine;
59  friend class IndirectGotoNodeBuilder;
60  friend class NodeBuilder;
61  friend struct NodeBuilderContext;
62  friend class SwitchNodeBuilder;
63
64public:
65  using BlocksExhausted =
66      std::vector<std::pair<BlockEdge, const ExplodedNode *>>;
67
68  using BlocksAborted =
69      std::vector<std::pair<const CFGBlock *, const ExplodedNode *>>;
70
71private:
72  SubEngine &SubEng;
73
74  /// G - The simulation graph.  Each node is a (location,state) pair.
75  mutable ExplodedGraph G;
76
77  /// WList - A set of queued nodes that need to be processed by the
78  ///  worklist algorithm.  It is up to the implementation of WList to decide
79  ///  the order that nodes are processed.
80  std::unique_ptr<WorkList> WList;
81
82  /// BCounterFactory - A factory object for created BlockCounter objects.
83  ///   These are used to record for key nodes in the ExplodedGraph the
84  ///   number of times different CFGBlocks have been visited along a path.
85  BlockCounter::Factory BCounterFactory;
86
87  /// The locations where we stopped doing work because we visited a location
88  ///  too many times.
89  BlocksExhausted blocksExhausted;
90
91  /// The locations where we stopped because the engine aborted analysis,
92  /// usually because it could not reason about something.
93  BlocksAborted blocksAborted;
94
95  /// The information about functions shared by the whole translation unit.
96  /// (This data is owned by AnalysisConsumer.)
97  FunctionSummariesTy *FunctionSummaries;
98
99  /// Add path note tags along the path when we see that something interesting
100  /// is happening. This field is the allocator for such tags.
101  NoteTag::Factory NoteTags;
102
103  void generateNode(const ProgramPoint &Loc,
104                    ProgramStateRef State,
105                    ExplodedNode *Pred);
106
107  void HandleBlockEdge(const BlockEdge &E, ExplodedNode *Pred);
108  void HandleBlockEntrance(const BlockEntrance &E, ExplodedNode *Pred);
109  void HandleBlockExit(const CFGBlock *B, ExplodedNode *Pred);
110
111  void HandleCallEnter(const CallEnter &CE, ExplodedNode *Pred);
112
113  void HandlePostStmt(const CFGBlock *B, unsigned StmtIdx, ExplodedNode *Pred);
114
115  void HandleBranch(const Stmt *Cond, const Stmt *Term, const CFGBlock *B,
116                    ExplodedNode *Pred);
117  void HandleCleanupTemporaryBranch(const CXXBindTemporaryExpr *BTE,
118                                    const CFGBlock *B, ExplodedNode *Pred);
119
120  /// Handle conditional logic for running static initializers.
121  void HandleStaticInit(const DeclStmt *DS, const CFGBlock *B,
122                        ExplodedNode *Pred);
123
124  void HandleVirtualBaseBranch(const CFGBlock *B, ExplodedNode *Pred);
125
126private:
127  ExplodedNode *generateCallExitBeginNode(ExplodedNode *N,
128                                          const ReturnStmt *RS);
129
130public:
131  /// Construct a CoreEngine object to analyze the provided CFG.
132  CoreEngine(SubEngine &subengine,
133             FunctionSummariesTy *FS,
134             AnalyzerOptions &Opts);
135
136  CoreEngine(const CoreEngine &) = delete;
137  CoreEngine &operator=(const CoreEngine &) = delete;
138
139  /// getGraph - Returns the exploded graph.
140  ExplodedGraph &getGraph() { return G; }
141
142  /// ExecuteWorkList - Run the worklist algorithm for a maximum number of
143  ///  steps.  Returns true if there is still simulation state on the worklist.
144  bool ExecuteWorkList(const LocationContext *L, unsigned Steps,
145                       ProgramStateRef InitState);
146
147  /// Returns true if there is still simulation state on the worklist.
148  bool ExecuteWorkListWithInitialState(const LocationContext *L,
149                                       unsigned Steps,
150                                       ProgramStateRef InitState,
151                                       ExplodedNodeSet &Dst);
152
153  /// Dispatch the work list item based on the given location information.
154  /// Use Pred parameter as the predecessor state.
155  void dispatchWorkItem(ExplodedNode* Pred, ProgramPoint Loc,
156                        const WorkListUnit& WU);
157
158  // Functions for external checking of whether we have unfinished work
159  bool wasBlockAborted() const { return !blocksAborted.empty(); }
160  bool wasBlocksExhausted() const { return !blocksExhausted.empty(); }
161  bool hasWorkRemaining() const { return wasBlocksExhausted() ||
162                                         WList->hasWork() ||
163                                         wasBlockAborted(); }
164
165  /// Inform the CoreEngine that a basic block was aborted because
166  /// it could not be completely analyzed.
167  void addAbortedBlock(const ExplodedNode *node, const CFGBlock *block) {
168    blocksAborted.push_back(std::make_pair(block, node));
169  }
170
171  WorkList *getWorkList() const { return WList.get(); }
172
173  BlocksExhausted::const_iterator blocks_exhausted_begin() const {
174    return blocksExhausted.begin();
175  }
176
177  BlocksExhausted::const_iterator blocks_exhausted_end() const {
178    return blocksExhausted.end();
179  }
180
181  BlocksAborted::const_iterator blocks_aborted_begin() const {
182    return blocksAborted.begin();
183  }
184
185  BlocksAborted::const_iterator blocks_aborted_end() const {
186    return blocksAborted.end();
187  }
188
189  /// Enqueue the given set of nodes onto the work list.
190  void enqueue(ExplodedNodeSet &Set);
191
192  /// Enqueue nodes that were created as a result of processing
193  /// a statement onto the work list.
194  void enqueue(ExplodedNodeSet &Set, const CFGBlock *Block, unsigned Idx);
195
196  /// enqueue the nodes corresponding to the end of function onto the
197  /// end of path / work list.
198  void enqueueEndOfFunction(ExplodedNodeSet &Set, const ReturnStmt *RS);
199
200  /// Enqueue a single node created as a result of statement processing.
201  void enqueueStmtNode(ExplodedNode *N, const CFGBlock *Block, unsigned Idx);
202
203  NoteTag::Factory &getNoteTags() { return NoteTags; }
204};
205
206// TODO: Turn into a class.
207struct NodeBuilderContext {
208  const CoreEngine &Eng;
209  const CFGBlock *Block;
210  const LocationContext *LC;
211
212  NodeBuilderContext(const CoreEngine &E, const CFGBlock *B, ExplodedNode *N)
213      : Eng(E), Block(B), LC(N->getLocationContext()) { assert(B); }
214
215  /// Return the CFGBlock associated with this builder.
216  const CFGBlock *getBlock() const { return Block; }
217
218  /// Returns the number of times the current basic block has been
219  /// visited on the exploded graph path.
220  unsigned blockCount() const {
221    return Eng.WList->getBlockCounter().getNumVisited(
222                    LC->getStackFrame(),
223                    Block->getBlockID());
224  }
225};
226
227/// \class NodeBuilder
228/// This is the simplest builder which generates nodes in the
229/// ExplodedGraph.
230///
231/// The main benefit of the builder is that it automatically tracks the
232/// frontier nodes (or destination set). This is the set of nodes which should
233/// be propagated to the next step / builder. They are the nodes which have been
234/// added to the builder (either as the input node set or as the newly
235/// constructed nodes) but did not have any outgoing transitions added.
236class NodeBuilder {
237  virtual void anchor();
238
239protected:
240  const NodeBuilderContext &C;
241
242  /// Specifies if the builder results have been finalized. For example, if it
243  /// is set to false, autotransitions are yet to be generated.
244  bool Finalized;
245
246  bool HasGeneratedNodes = false;
247
248  /// The frontier set - a set of nodes which need to be propagated after
249  /// the builder dies.
250  ExplodedNodeSet &Frontier;
251
252  /// Checks if the results are ready.
253  virtual bool checkResults() {
254    return Finalized;
255  }
256
257  bool hasNoSinksInFrontier() {
258    for (const auto  I : Frontier)
259      if (I->isSink())
260        return false;
261    return true;
262  }
263
264  /// Allow subclasses to finalize results before result_begin() is executed.
265  virtual void finalizeResults() {}
266
267  ExplodedNode *generateNodeImpl(const ProgramPoint &PP,
268                                 ProgramStateRef State,
269                                 ExplodedNode *Pred,
270                                 bool MarkAsSink = false);
271
272public:
273  NodeBuilder(ExplodedNode *SrcNode, ExplodedNodeSet &DstSet,
274              const NodeBuilderContext &Ctx, bool F = true)
275      : C(Ctx), Finalized(F), Frontier(DstSet) {
276    Frontier.Add(SrcNode);
277  }
278
279  NodeBuilder(const ExplodedNodeSet &SrcSet, ExplodedNodeSet &DstSet,
280              const NodeBuilderContext &Ctx, bool F = true)
281      : C(Ctx), Finalized(F), Frontier(DstSet) {
282    Frontier.insert(SrcSet);
283    assert(hasNoSinksInFrontier());
284  }
285
286  virtual ~NodeBuilder() = default;
287
288  /// Generates a node in the ExplodedGraph.
289  ExplodedNode *generateNode(const ProgramPoint &PP,
290                             ProgramStateRef State,
291                             ExplodedNode *Pred) {
292    return generateNodeImpl(PP, State, Pred, false);
293  }
294
295  /// Generates a sink in the ExplodedGraph.
296  ///
297  /// When a node is marked as sink, the exploration from the node is stopped -
298  /// the node becomes the last node on the path and certain kinds of bugs are
299  /// suppressed.
300  ExplodedNode *generateSink(const ProgramPoint &PP,
301                             ProgramStateRef State,
302                             ExplodedNode *Pred) {
303    return generateNodeImpl(PP, State, Pred, true);
304  }
305
306  const ExplodedNodeSet &getResults() {
307    finalizeResults();
308    assert(checkResults());
309    return Frontier;
310  }
311
312  using iterator = ExplodedNodeSet::iterator;
313
314  /// Iterators through the results frontier.
315  iterator begin() {
316    finalizeResults();
317    assert(checkResults());
318    return Frontier.begin();
319  }
320
321  iterator end() {
322    finalizeResults();
323    return Frontier.end();
324  }
325
326  const NodeBuilderContext &getContext() { return C; }
327  bool hasGeneratedNodes() { return HasGeneratedNodes; }
328
329  void takeNodes(const ExplodedNodeSet &S) {
330    for (const auto I : S)
331      Frontier.erase(I);
332  }
333
334  void takeNodes(ExplodedNode *N) { Frontier.erase(N); }
335  void addNodes(const ExplodedNodeSet &S) { Frontier.insert(S); }
336  void addNodes(ExplodedNode *N) { Frontier.Add(N); }
337};
338
339/// \class NodeBuilderWithSinks
340/// This node builder keeps track of the generated sink nodes.
341class NodeBuilderWithSinks: public NodeBuilder {
342  void anchor() override;
343
344protected:
345  SmallVector<ExplodedNode*, 2> sinksGenerated;
346  ProgramPoint &Location;
347
348public:
349  NodeBuilderWithSinks(ExplodedNode *Pred, ExplodedNodeSet &DstSet,
350                       const NodeBuilderContext &Ctx, ProgramPoint &L)
351      : NodeBuilder(Pred, DstSet, Ctx), Location(L) {}
352
353  ExplodedNode *generateNode(ProgramStateRef State,
354                             ExplodedNode *Pred,
355                             const ProgramPointTag *Tag = nullptr) {
356    const ProgramPoint &LocalLoc = (Tag ? Location.withTag(Tag) : Location);
357    return NodeBuilder::generateNode(LocalLoc, State, Pred);
358  }
359
360  ExplodedNode *generateSink(ProgramStateRef State, ExplodedNode *Pred,
361                             const ProgramPointTag *Tag = nullptr) {
362    const ProgramPoint &LocalLoc = (Tag ? Location.withTag(Tag) : Location);
363    ExplodedNode *N = NodeBuilder::generateSink(LocalLoc, State, Pred);
364    if (N && N->isSink())
365      sinksGenerated.push_back(N);
366    return N;
367  }
368
369  const SmallVectorImpl<ExplodedNode*> &getSinks() const {
370    return sinksGenerated;
371  }
372};
373
374/// \class StmtNodeBuilder
375/// This builder class is useful for generating nodes that resulted from
376/// visiting a statement. The main difference from its parent NodeBuilder is
377/// that it creates a statement specific ProgramPoint.
378class StmtNodeBuilder: public NodeBuilder {
379  NodeBuilder *EnclosingBldr;
380
381public:
382  /// Constructs a StmtNodeBuilder. If the builder is going to process
383  /// nodes currently owned by another builder(with larger scope), use
384  /// Enclosing builder to transfer ownership.
385  StmtNodeBuilder(ExplodedNode *SrcNode, ExplodedNodeSet &DstSet,
386                  const NodeBuilderContext &Ctx,
387                  NodeBuilder *Enclosing = nullptr)
388      : NodeBuilder(SrcNode, DstSet, Ctx), EnclosingBldr(Enclosing) {
389    if (EnclosingBldr)
390      EnclosingBldr->takeNodes(SrcNode);
391  }
392
393  StmtNodeBuilder(ExplodedNodeSet &SrcSet, ExplodedNodeSet &DstSet,
394                  const NodeBuilderContext &Ctx,
395                  NodeBuilder *Enclosing = nullptr)
396      : NodeBuilder(SrcSet, DstSet, Ctx), EnclosingBldr(Enclosing) {
397    if (EnclosingBldr)
398      for (const auto I : SrcSet)
399        EnclosingBldr->takeNodes(I);
400  }
401
402  ~StmtNodeBuilder() override;
403
404  using NodeBuilder::generateNode;
405  using NodeBuilder::generateSink;
406
407  ExplodedNode *generateNode(const Stmt *S,
408                             ExplodedNode *Pred,
409                             ProgramStateRef St,
410                             const ProgramPointTag *tag = nullptr,
411                             ProgramPoint::Kind K = ProgramPoint::PostStmtKind){
412    const ProgramPoint &L = ProgramPoint::getProgramPoint(S, K,
413                                  Pred->getLocationContext(), tag);
414    return NodeBuilder::generateNode(L, St, Pred);
415  }
416
417  ExplodedNode *generateSink(const Stmt *S,
418                             ExplodedNode *Pred,
419                             ProgramStateRef St,
420                             const ProgramPointTag *tag = nullptr,
421                             ProgramPoint::Kind K = ProgramPoint::PostStmtKind){
422    const ProgramPoint &L = ProgramPoint::getProgramPoint(S, K,
423                                  Pred->getLocationContext(), tag);
424    return NodeBuilder::generateSink(L, St, Pred);
425  }
426};
427
428/// BranchNodeBuilder is responsible for constructing the nodes
429/// corresponding to the two branches of the if statement - true and false.
430class BranchNodeBuilder: public NodeBuilder {
431  const CFGBlock *DstT;
432  const CFGBlock *DstF;
433
434  bool InFeasibleTrue;
435  bool InFeasibleFalse;
436
437  void anchor() override;
438
439public:
440  BranchNodeBuilder(ExplodedNode *SrcNode, ExplodedNodeSet &DstSet,
441                    const NodeBuilderContext &C,
442                    const CFGBlock *dstT, const CFGBlock *dstF)
443      : NodeBuilder(SrcNode, DstSet, C), DstT(dstT), DstF(dstF),
444        InFeasibleTrue(!DstT), InFeasibleFalse(!DstF) {
445    // The branch node builder does not generate autotransitions.
446    // If there are no successors it means that both branches are infeasible.
447    takeNodes(SrcNode);
448  }
449
450  BranchNodeBuilder(const ExplodedNodeSet &SrcSet, ExplodedNodeSet &DstSet,
451                    const NodeBuilderContext &C,
452                    const CFGBlock *dstT, const CFGBlock *dstF)
453      : NodeBuilder(SrcSet, DstSet, C), DstT(dstT), DstF(dstF),
454        InFeasibleTrue(!DstT), InFeasibleFalse(!DstF) {
455    takeNodes(SrcSet);
456  }
457
458  ExplodedNode *generateNode(ProgramStateRef State, bool branch,
459                             ExplodedNode *Pred);
460
461  const CFGBlock *getTargetBlock(bool branch) const {
462    return branch ? DstT : DstF;
463  }
464
465  void markInfeasible(bool branch) {
466    if (branch)
467      InFeasibleTrue = true;
468    else
469      InFeasibleFalse = true;
470  }
471
472  bool isFeasible(bool branch) {
473    return branch ? !InFeasibleTrue : !InFeasibleFalse;
474  }
475};
476
477class IndirectGotoNodeBuilder {
478  CoreEngine& Eng;
479  const CFGBlock *Src;
480  const CFGBlock &DispatchBlock;
481  const Expr *E;
482  ExplodedNode *Pred;
483
484public:
485  IndirectGotoNodeBuilder(ExplodedNode *pred, const CFGBlock *src,
486                    const Expr *e, const CFGBlock *dispatch, CoreEngine* eng)
487      : Eng(*eng), Src(src), DispatchBlock(*dispatch), E(e), Pred(pred) {}
488
489  class iterator {
490    friend class IndirectGotoNodeBuilder;
491
492    CFGBlock::const_succ_iterator I;
493
494    iterator(CFGBlock::const_succ_iterator i) : I(i) {}
495
496  public:
497    iterator &operator++() { ++I; return *this; }
498    bool operator!=(const iterator &X) const { return I != X.I; }
499
500    const LabelDecl *getLabel() const {
501      return cast<LabelStmt>((*I)->getLabel())->getDecl();
502    }
503
504    const CFGBlock *getBlock() const {
505      return *I;
506    }
507  };
508
509  iterator begin() { return iterator(DispatchBlock.succ_begin()); }
510  iterator end() { return iterator(DispatchBlock.succ_end()); }
511
512  ExplodedNode *generateNode(const iterator &I,
513                             ProgramStateRef State,
514                             bool isSink = false);
515
516  const Expr *getTarget() const { return E; }
517
518  ProgramStateRef getState() const { return Pred->State; }
519
520  const LocationContext *getLocationContext() const {
521    return Pred->getLocationContext();
522  }
523};
524
525class SwitchNodeBuilder {
526  CoreEngine& Eng;
527  const CFGBlock *Src;
528  const Expr *Condition;
529  ExplodedNode *Pred;
530
531public:
532  SwitchNodeBuilder(ExplodedNode *pred, const CFGBlock *src,
533                    const Expr *condition, CoreEngine* eng)
534      : Eng(*eng), Src(src), Condition(condition), Pred(pred) {}
535
536  class iterator {
537    friend class SwitchNodeBuilder;
538
539    CFGBlock::const_succ_reverse_iterator I;
540
541    iterator(CFGBlock::const_succ_reverse_iterator i) : I(i) {}
542
543  public:
544    iterator &operator++() { ++I; return *this; }
545    bool operator!=(const iterator &X) const { return I != X.I; }
546    bool operator==(const iterator &X) const { return I == X.I; }
547
548    const CaseStmt *getCase() const {
549      return cast<CaseStmt>((*I)->getLabel());
550    }
551
552    const CFGBlock *getBlock() const {
553      return *I;
554    }
555  };
556
557  iterator begin() { return iterator(Src->succ_rbegin()+1); }
558  iterator end() { return iterator(Src->succ_rend()); }
559
560  const SwitchStmt *getSwitch() const {
561    return cast<SwitchStmt>(Src->getTerminator());
562  }
563
564  ExplodedNode *generateCaseStmtNode(const iterator &I,
565                                     ProgramStateRef State);
566
567  ExplodedNode *generateDefaultCaseNode(ProgramStateRef State,
568                                        bool isSink = false);
569
570  const Expr *getCondition() const { return Condition; }
571
572  ProgramStateRef getState() const { return Pred->State; }
573
574  const LocationContext *getLocationContext() const {
575    return Pred->getLocationContext();
576  }
577};
578
579} // namespace ento
580
581} // namespace clang
582
583#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_COREENGINE_H
584