SymbolManager.h revision 341825
1//===- SymbolManager.h - Management of Symbolic Values ----------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines SymbolManager, a class that manages symbolic values
11//  created for use by ExprEngine and related classes.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
16#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
17
18#include "clang/AST/Expr.h"
19#include "clang/AST/Type.h"
20#include "clang/Analysis/AnalysisDeclContext.h"
21#include "clang/Basic/LLVM.h"
22#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
23#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
24#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
25#include "llvm/ADT/DenseMap.h"
26#include "llvm/ADT/DenseSet.h"
27#include "llvm/ADT/FoldingSet.h"
28#include "llvm/Support/Allocator.h"
29#include <cassert>
30
31namespace clang {
32
33class ASTContext;
34class Stmt;
35
36namespace ento {
37
38class BasicValueFactory;
39class StoreManager;
40
41///A symbol representing the value stored at a MemRegion.
42class SymbolRegionValue : public SymbolData {
43  const TypedValueRegion *R;
44
45public:
46  SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
47      : SymbolData(SymbolRegionValueKind, sym), R(r) {
48    assert(r);
49    assert(isValidTypeForSymbol(r->getValueType()));
50  }
51
52  const TypedValueRegion* getRegion() const { return R; }
53
54  static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) {
55    profile.AddInteger((unsigned) SymbolRegionValueKind);
56    profile.AddPointer(R);
57  }
58
59  void Profile(llvm::FoldingSetNodeID& profile) override {
60    Profile(profile, R);
61  }
62
63  void dumpToStream(raw_ostream &os) const override;
64  const MemRegion *getOriginRegion() const override { return getRegion(); }
65
66  QualType getType() const override;
67
68  // Implement isa<T> support.
69  static bool classof(const SymExpr *SE) {
70    return SE->getKind() == SymbolRegionValueKind;
71  }
72};
73
74/// A symbol representing the result of an expression in the case when we do
75/// not know anything about what the expression is.
76class SymbolConjured : public SymbolData {
77  const Stmt *S;
78  QualType T;
79  unsigned Count;
80  const LocationContext *LCtx;
81  const void *SymbolTag;
82
83public:
84  SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
85                 QualType t, unsigned count, const void *symbolTag)
86      : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count),
87        LCtx(lctx), SymbolTag(symbolTag) {
88    // FIXME: 's' might be a nullptr if we're conducting invalidation
89    // that was caused by a destructor call on a temporary object,
90    // which has no statement associated with it.
91    // Due to this, we might be creating the same invalidation symbol for
92    // two different invalidation passes (for two different temporaries).
93    assert(lctx);
94    assert(isValidTypeForSymbol(t));
95  }
96
97  const Stmt *getStmt() const { return S; }
98  unsigned getCount() const { return Count; }
99  const void *getTag() const { return SymbolTag; }
100
101  QualType getType() const override;
102
103  void dumpToStream(raw_ostream &os) const override;
104
105  static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S,
106                      QualType T, unsigned Count, const LocationContext *LCtx,
107                      const void *SymbolTag) {
108    profile.AddInteger((unsigned) SymbolConjuredKind);
109    profile.AddPointer(S);
110    profile.AddPointer(LCtx);
111    profile.Add(T);
112    profile.AddInteger(Count);
113    profile.AddPointer(SymbolTag);
114  }
115
116  void Profile(llvm::FoldingSetNodeID& profile) override {
117    Profile(profile, S, T, Count, LCtx, SymbolTag);
118  }
119
120  // Implement isa<T> support.
121  static bool classof(const SymExpr *SE) {
122    return SE->getKind() == SymbolConjuredKind;
123  }
124};
125
126/// A symbol representing the value of a MemRegion whose parent region has
127/// symbolic value.
128class SymbolDerived : public SymbolData {
129  SymbolRef parentSymbol;
130  const TypedValueRegion *R;
131
132public:
133  SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
134      : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {
135    assert(parent);
136    assert(r);
137    assert(isValidTypeForSymbol(r->getValueType()));
138  }
139
140  SymbolRef getParentSymbol() const { return parentSymbol; }
141  const TypedValueRegion *getRegion() const { return R; }
142
143  QualType getType() const override;
144
145  void dumpToStream(raw_ostream &os) const override;
146  const MemRegion *getOriginRegion() const override { return getRegion(); }
147
148  static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent,
149                      const TypedValueRegion *r) {
150    profile.AddInteger((unsigned) SymbolDerivedKind);
151    profile.AddPointer(r);
152    profile.AddPointer(parent);
153  }
154
155  void Profile(llvm::FoldingSetNodeID& profile) override {
156    Profile(profile, parentSymbol, R);
157  }
158
159  // Implement isa<T> support.
160  static bool classof(const SymExpr *SE) {
161    return SE->getKind() == SymbolDerivedKind;
162  }
163};
164
165/// SymbolExtent - Represents the extent (size in bytes) of a bounded region.
166///  Clients should not ask the SymbolManager for a region's extent. Always use
167///  SubRegion::getExtent instead -- the value returned may not be a symbol.
168class SymbolExtent : public SymbolData {
169  const SubRegion *R;
170
171public:
172  SymbolExtent(SymbolID sym, const SubRegion *r)
173      : SymbolData(SymbolExtentKind, sym), R(r) {
174    assert(r);
175  }
176
177  const SubRegion *getRegion() const { return R; }
178
179  QualType getType() const override;
180
181  void dumpToStream(raw_ostream &os) const override;
182
183  static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) {
184    profile.AddInteger((unsigned) SymbolExtentKind);
185    profile.AddPointer(R);
186  }
187
188  void Profile(llvm::FoldingSetNodeID& profile) override {
189    Profile(profile, R);
190  }
191
192  // Implement isa<T> support.
193  static bool classof(const SymExpr *SE) {
194    return SE->getKind() == SymbolExtentKind;
195  }
196};
197
198/// SymbolMetadata - Represents path-dependent metadata about a specific region.
199///  Metadata symbols remain live as long as they are marked as in use before
200///  dead-symbol sweeping AND their associated regions are still alive.
201///  Intended for use by checkers.
202class SymbolMetadata : public SymbolData {
203  const MemRegion* R;
204  const Stmt *S;
205  QualType T;
206  const LocationContext *LCtx;
207  unsigned Count;
208  const void *Tag;
209
210public:
211  SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
212                 const LocationContext *LCtx, unsigned count, const void *tag)
213      : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
214        Count(count), Tag(tag) {
215      assert(r);
216      assert(s);
217      assert(isValidTypeForSymbol(t));
218      assert(LCtx);
219      assert(tag);
220    }
221
222  const MemRegion *getRegion() const { return R; }
223  const Stmt *getStmt() const { return S; }
224  const LocationContext *getLocationContext() const { return LCtx; }
225  unsigned getCount() const { return Count; }
226  const void *getTag() const { return Tag; }
227
228  QualType getType() const override;
229
230  void dumpToStream(raw_ostream &os) const override;
231
232  static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R,
233                      const Stmt *S, QualType T, const LocationContext *LCtx,
234                      unsigned Count, const void *Tag) {
235    profile.AddInteger((unsigned) SymbolMetadataKind);
236    profile.AddPointer(R);
237    profile.AddPointer(S);
238    profile.Add(T);
239    profile.AddPointer(LCtx);
240    profile.AddInteger(Count);
241    profile.AddPointer(Tag);
242  }
243
244  void Profile(llvm::FoldingSetNodeID& profile) override {
245    Profile(profile, R, S, T, LCtx, Count, Tag);
246  }
247
248  // Implement isa<T> support.
249  static bool classof(const SymExpr *SE) {
250    return SE->getKind() == SymbolMetadataKind;
251  }
252};
253
254/// Represents a cast expression.
255class SymbolCast : public SymExpr {
256  const SymExpr *Operand;
257
258  /// Type of the operand.
259  QualType FromTy;
260
261  /// The type of the result.
262  QualType ToTy;
263
264public:
265  SymbolCast(const SymExpr *In, QualType From, QualType To)
266      : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) {
267    assert(In);
268    assert(isValidTypeForSymbol(From));
269    // FIXME: GenericTaintChecker creates symbols of void type.
270    // Otherwise, 'To' should also be a valid type.
271  }
272
273  unsigned computeComplexity() const override {
274    if (Complexity == 0)
275      Complexity = 1 + Operand->computeComplexity();
276    return Complexity;
277  }
278
279  QualType getType() const override { return ToTy; }
280
281  const SymExpr *getOperand() const { return Operand; }
282
283  void dumpToStream(raw_ostream &os) const override;
284
285  static void Profile(llvm::FoldingSetNodeID& ID,
286                      const SymExpr *In, QualType From, QualType To) {
287    ID.AddInteger((unsigned) SymbolCastKind);
288    ID.AddPointer(In);
289    ID.Add(From);
290    ID.Add(To);
291  }
292
293  void Profile(llvm::FoldingSetNodeID& ID) override {
294    Profile(ID, Operand, FromTy, ToTy);
295  }
296
297  // Implement isa<T> support.
298  static bool classof(const SymExpr *SE) {
299    return SE->getKind() == SymbolCastKind;
300  }
301};
302
303/// Represents a symbolic expression involving a binary operator
304class BinarySymExpr : public SymExpr {
305  BinaryOperator::Opcode Op;
306  QualType T;
307
308protected:
309  BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t)
310      : SymExpr(k), Op(op), T(t) {
311    assert(classof(this));
312    assert(isValidTypeForSymbol(t));
313  }
314
315public:
316  // FIXME: We probably need to make this out-of-line to avoid redundant
317  // generation of virtual functions.
318  QualType getType() const override { return T; }
319
320  BinaryOperator::Opcode getOpcode() const { return Op; }
321
322  // Implement isa<T> support.
323  static bool classof(const SymExpr *SE) {
324    Kind k = SE->getKind();
325    return k >= BEGIN_BINARYSYMEXPRS && k <= END_BINARYSYMEXPRS;
326  }
327};
328
329/// Represents a symbolic expression like 'x' + 3.
330class SymIntExpr : public BinarySymExpr {
331  const SymExpr *LHS;
332  const llvm::APSInt& RHS;
333
334public:
335  SymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
336             const llvm::APSInt &rhs, QualType t)
337      : BinarySymExpr(SymIntExprKind, op, t), LHS(lhs), RHS(rhs) {
338    assert(lhs);
339  }
340
341  void dumpToStream(raw_ostream &os) const override;
342
343  const SymExpr *getLHS() const { return LHS; }
344  const llvm::APSInt &getRHS() const { return RHS; }
345
346  unsigned computeComplexity() const override {
347    if (Complexity == 0)
348      Complexity = 1 + LHS->computeComplexity();
349    return Complexity;
350  }
351
352  static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
353                      BinaryOperator::Opcode op, const llvm::APSInt& rhs,
354                      QualType t) {
355    ID.AddInteger((unsigned) SymIntExprKind);
356    ID.AddPointer(lhs);
357    ID.AddInteger(op);
358    ID.AddPointer(&rhs);
359    ID.Add(t);
360  }
361
362  void Profile(llvm::FoldingSetNodeID& ID) override {
363    Profile(ID, LHS, getOpcode(), RHS, getType());
364  }
365
366  // Implement isa<T> support.
367  static bool classof(const SymExpr *SE) {
368    return SE->getKind() == SymIntExprKind;
369  }
370};
371
372/// Represents a symbolic expression like 3 - 'x'.
373class IntSymExpr : public BinarySymExpr {
374  const llvm::APSInt& LHS;
375  const SymExpr *RHS;
376
377public:
378  IntSymExpr(const llvm::APSInt &lhs, BinaryOperator::Opcode op,
379             const SymExpr *rhs, QualType t)
380      : BinarySymExpr(IntSymExprKind, op, t), LHS(lhs), RHS(rhs) {
381    assert(rhs);
382  }
383
384  void dumpToStream(raw_ostream &os) const override;
385
386  const SymExpr *getRHS() const { return RHS; }
387  const llvm::APSInt &getLHS() const { return LHS; }
388
389  unsigned computeComplexity() const override {
390    if (Complexity == 0)
391      Complexity = 1 + RHS->computeComplexity();
392    return Complexity;
393  }
394
395  static void Profile(llvm::FoldingSetNodeID& ID, const llvm::APSInt& lhs,
396                      BinaryOperator::Opcode op, const SymExpr *rhs,
397                      QualType t) {
398    ID.AddInteger((unsigned) IntSymExprKind);
399    ID.AddPointer(&lhs);
400    ID.AddInteger(op);
401    ID.AddPointer(rhs);
402    ID.Add(t);
403  }
404
405  void Profile(llvm::FoldingSetNodeID& ID) override {
406    Profile(ID, LHS, getOpcode(), RHS, getType());
407  }
408
409  // Implement isa<T> support.
410  static bool classof(const SymExpr *SE) {
411    return SE->getKind() == IntSymExprKind;
412  }
413};
414
415/// Represents a symbolic expression like 'x' + 'y'.
416class SymSymExpr : public BinarySymExpr {
417  const SymExpr *LHS;
418  const SymExpr *RHS;
419
420public:
421  SymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs,
422             QualType t)
423      : BinarySymExpr(SymSymExprKind, op, t), LHS(lhs), RHS(rhs) {
424    assert(lhs);
425    assert(rhs);
426  }
427
428  const SymExpr *getLHS() const { return LHS; }
429  const SymExpr *getRHS() const { return RHS; }
430
431  void dumpToStream(raw_ostream &os) const override;
432
433  unsigned computeComplexity() const override {
434    if (Complexity == 0)
435      Complexity = RHS->computeComplexity() + LHS->computeComplexity();
436    return Complexity;
437  }
438
439  static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
440                    BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) {
441    ID.AddInteger((unsigned) SymSymExprKind);
442    ID.AddPointer(lhs);
443    ID.AddInteger(op);
444    ID.AddPointer(rhs);
445    ID.Add(t);
446  }
447
448  void Profile(llvm::FoldingSetNodeID& ID) override {
449    Profile(ID, LHS, getOpcode(), RHS, getType());
450  }
451
452  // Implement isa<T> support.
453  static bool classof(const SymExpr *SE) {
454    return SE->getKind() == SymSymExprKind;
455  }
456};
457
458class SymbolManager {
459  using DataSetTy = llvm::FoldingSet<SymExpr>;
460  using SymbolDependTy = llvm::DenseMap<SymbolRef, SymbolRefSmallVectorTy *>;
461
462  DataSetTy DataSet;
463
464  /// Stores the extra dependencies between symbols: the data should be kept
465  /// alive as long as the key is live.
466  SymbolDependTy SymbolDependencies;
467
468  unsigned SymbolCounter = 0;
469  llvm::BumpPtrAllocator& BPAlloc;
470  BasicValueFactory &BV;
471  ASTContext &Ctx;
472
473public:
474  SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
475                llvm::BumpPtrAllocator& bpalloc)
476      : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
477  ~SymbolManager();
478
479  static bool canSymbolicate(QualType T);
480
481  /// Make a unique symbol for MemRegion R according to its kind.
482  const SymbolRegionValue* getRegionValueSymbol(const TypedValueRegion* R);
483
484  const SymbolConjured* conjureSymbol(const Stmt *E,
485                                      const LocationContext *LCtx,
486                                      QualType T,
487                                      unsigned VisitCount,
488                                      const void *SymbolTag = nullptr);
489
490  const SymbolConjured* conjureSymbol(const Expr *E,
491                                      const LocationContext *LCtx,
492                                      unsigned VisitCount,
493                                      const void *SymbolTag = nullptr) {
494    return conjureSymbol(E, LCtx, E->getType(), VisitCount, SymbolTag);
495  }
496
497  const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol,
498                                        const TypedValueRegion *R);
499
500  const SymbolExtent *getExtentSymbol(const SubRegion *R);
501
502  /// Creates a metadata symbol associated with a specific region.
503  ///
504  /// VisitCount can be used to differentiate regions corresponding to
505  /// different loop iterations, thus, making the symbol path-dependent.
506  const SymbolMetadata *getMetadataSymbol(const MemRegion *R, const Stmt *S,
507                                          QualType T,
508                                          const LocationContext *LCtx,
509                                          unsigned VisitCount,
510                                          const void *SymbolTag = nullptr);
511
512  const SymbolCast* getCastSymbol(const SymExpr *Operand,
513                                  QualType From, QualType To);
514
515  const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
516                                  const llvm::APSInt& rhs, QualType t);
517
518  const SymIntExpr *getSymIntExpr(const SymExpr &lhs, BinaryOperator::Opcode op,
519                                  const llvm::APSInt& rhs, QualType t) {
520    return getSymIntExpr(&lhs, op, rhs, t);
521  }
522
523  const IntSymExpr *getIntSymExpr(const llvm::APSInt& lhs,
524                                  BinaryOperator::Opcode op,
525                                  const SymExpr *rhs, QualType t);
526
527  const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
528                                  const SymExpr *rhs, QualType t);
529
530  QualType getType(const SymExpr *SE) const {
531    return SE->getType();
532  }
533
534  /// Add artificial symbol dependency.
535  ///
536  /// The dependent symbol should stay alive as long as the primary is alive.
537  void addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent);
538
539  const SymbolRefSmallVectorTy *getDependentSymbols(const SymbolRef Primary);
540
541  ASTContext &getContext() { return Ctx; }
542  BasicValueFactory &getBasicVals() { return BV; }
543};
544
545/// A class responsible for cleaning up unused symbols.
546class SymbolReaper {
547  enum SymbolStatus {
548    NotProcessed,
549    HaveMarkedDependents
550  };
551
552  using SymbolSetTy = llvm::DenseSet<SymbolRef>;
553  using SymbolMapTy = llvm::DenseMap<SymbolRef, SymbolStatus>;
554  using RegionSetTy = llvm::DenseSet<const MemRegion *>;
555
556  SymbolMapTy TheLiving;
557  SymbolSetTy MetadataInUse;
558  SymbolSetTy TheDead;
559
560  RegionSetTy RegionRoots;
561
562  const StackFrameContext *LCtx;
563  const Stmt *Loc;
564  SymbolManager& SymMgr;
565  StoreRef reapedStore;
566  llvm::DenseMap<const MemRegion *, unsigned> includedRegionCache;
567
568public:
569  /// Construct a reaper object, which removes everything which is not
570  /// live before we execute statement s in the given location context.
571  ///
572  /// If the statement is NULL, everything is this and parent contexts is
573  /// considered live.
574  /// If the stack frame context is NULL, everything on stack is considered
575  /// dead.
576  SymbolReaper(const StackFrameContext *Ctx, const Stmt *s,
577               SymbolManager &symmgr, StoreManager &storeMgr)
578      : LCtx(Ctx), Loc(s), SymMgr(symmgr), reapedStore(nullptr, storeMgr) {}
579
580  const LocationContext *getLocationContext() const { return LCtx; }
581
582  bool isLive(SymbolRef sym);
583  bool isLiveRegion(const MemRegion *region);
584  bool isLive(const Stmt *ExprVal, const LocationContext *LCtx) const;
585  bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const;
586
587  /// Unconditionally marks a symbol as live.
588  ///
589  /// This should never be
590  /// used by checkers, only by the state infrastructure such as the store and
591  /// environment. Checkers should instead use metadata symbols and markInUse.
592  void markLive(SymbolRef sym);
593
594  /// Marks a symbol as important to a checker.
595  ///
596  /// For metadata symbols,
597  /// this will keep the symbol alive as long as its associated region is also
598  /// live. For other symbols, this has no effect; checkers are not permitted
599  /// to influence the life of other symbols. This should be used before any
600  /// symbol marking has occurred, i.e. in the MarkLiveSymbols callback.
601  void markInUse(SymbolRef sym);
602
603  /// If a symbol is known to be live, marks the symbol as live.
604  ///
605  ///  Otherwise, if the symbol cannot be proven live, it is marked as dead.
606  ///  Returns true if the symbol is dead, false if live.
607  bool maybeDead(SymbolRef sym);
608
609  using dead_iterator = SymbolSetTy::const_iterator;
610
611  dead_iterator dead_begin() const { return TheDead.begin(); }
612  dead_iterator dead_end() const { return TheDead.end(); }
613
614  bool hasDeadSymbols() const {
615    return !TheDead.empty();
616  }
617
618  using region_iterator = RegionSetTy::const_iterator;
619
620  region_iterator region_begin() const { return RegionRoots.begin(); }
621  region_iterator region_end() const { return RegionRoots.end(); }
622
623  /// Returns whether or not a symbol has been confirmed dead.
624  ///
625  /// This should only be called once all marking of dead symbols has completed.
626  /// (For checkers, this means only in the evalDeadSymbols callback.)
627  bool isDead(SymbolRef sym) const {
628    return TheDead.count(sym);
629  }
630
631  void markLive(const MemRegion *region);
632  void markElementIndicesLive(const MemRegion *region);
633
634  /// Set to the value of the symbolic store after
635  /// StoreManager::removeDeadBindings has been called.
636  void setReapedStore(StoreRef st) { reapedStore = st; }
637
638private:
639  /// Mark the symbols dependent on the input symbol as live.
640  void markDependentsLive(SymbolRef sym);
641};
642
643class SymbolVisitor {
644protected:
645  ~SymbolVisitor() = default;
646
647public:
648  SymbolVisitor() = default;
649  SymbolVisitor(const SymbolVisitor &) = default;
650  SymbolVisitor(SymbolVisitor &&) {}
651
652  /// A visitor method invoked by ProgramStateManager::scanReachableSymbols.
653  ///
654  /// The method returns \c true if symbols should continue be scanned and \c
655  /// false otherwise.
656  virtual bool VisitSymbol(SymbolRef sym) = 0;
657  virtual bool VisitMemRegion(const MemRegion *region) { return true; }
658};
659
660} // namespace ento
661
662} // namespace clang
663
664#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
665