SymbolManager.h revision 344779
1//===- SymbolManager.h - Management of Symbolic Values ----------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines SymbolManager, a class that manages symbolic values
11//  created for use by ExprEngine and related classes.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
16#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
17
18#include "clang/AST/Expr.h"
19#include "clang/AST/Type.h"
20#include "clang/Analysis/AnalysisDeclContext.h"
21#include "clang/Basic/LLVM.h"
22#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
23#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
24#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
25#include "llvm/ADT/DenseMap.h"
26#include "llvm/ADT/DenseSet.h"
27#include "llvm/ADT/FoldingSet.h"
28#include "llvm/Support/Allocator.h"
29#include <cassert>
30
31namespace clang {
32
33class ASTContext;
34class Stmt;
35
36namespace ento {
37
38class BasicValueFactory;
39class StoreManager;
40
41///A symbol representing the value stored at a MemRegion.
42class SymbolRegionValue : public SymbolData {
43  const TypedValueRegion *R;
44
45public:
46  SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
47      : SymbolData(SymbolRegionValueKind, sym), R(r) {
48    assert(r);
49    assert(isValidTypeForSymbol(r->getValueType()));
50  }
51
52  const TypedValueRegion* getRegion() const { return R; }
53
54  static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) {
55    profile.AddInteger((unsigned) SymbolRegionValueKind);
56    profile.AddPointer(R);
57  }
58
59  void Profile(llvm::FoldingSetNodeID& profile) override {
60    Profile(profile, R);
61  }
62
63  void dumpToStream(raw_ostream &os) const override;
64  const MemRegion *getOriginRegion() const override { return getRegion(); }
65
66  QualType getType() const override;
67
68  // Implement isa<T> support.
69  static bool classof(const SymExpr *SE) {
70    return SE->getKind() == SymbolRegionValueKind;
71  }
72};
73
74/// A symbol representing the result of an expression in the case when we do
75/// not know anything about what the expression is.
76class SymbolConjured : public SymbolData {
77  const Stmt *S;
78  QualType T;
79  unsigned Count;
80  const LocationContext *LCtx;
81  const void *SymbolTag;
82
83public:
84  SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
85                 QualType t, unsigned count, const void *symbolTag)
86      : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count),
87        LCtx(lctx), SymbolTag(symbolTag) {
88    // FIXME: 's' might be a nullptr if we're conducting invalidation
89    // that was caused by a destructor call on a temporary object,
90    // which has no statement associated with it.
91    // Due to this, we might be creating the same invalidation symbol for
92    // two different invalidation passes (for two different temporaries).
93    assert(lctx);
94    assert(isValidTypeForSymbol(t));
95  }
96
97  const Stmt *getStmt() const { return S; }
98  unsigned getCount() const { return Count; }
99  const void *getTag() const { return SymbolTag; }
100
101  QualType getType() const override;
102
103  void dumpToStream(raw_ostream &os) const override;
104
105  static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S,
106                      QualType T, unsigned Count, const LocationContext *LCtx,
107                      const void *SymbolTag) {
108    profile.AddInteger((unsigned) SymbolConjuredKind);
109    profile.AddPointer(S);
110    profile.AddPointer(LCtx);
111    profile.Add(T);
112    profile.AddInteger(Count);
113    profile.AddPointer(SymbolTag);
114  }
115
116  void Profile(llvm::FoldingSetNodeID& profile) override {
117    Profile(profile, S, T, Count, LCtx, SymbolTag);
118  }
119
120  // Implement isa<T> support.
121  static bool classof(const SymExpr *SE) {
122    return SE->getKind() == SymbolConjuredKind;
123  }
124};
125
126/// A symbol representing the value of a MemRegion whose parent region has
127/// symbolic value.
128class SymbolDerived : public SymbolData {
129  SymbolRef parentSymbol;
130  const TypedValueRegion *R;
131
132public:
133  SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
134      : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {
135    assert(parent);
136    assert(r);
137    assert(isValidTypeForSymbol(r->getValueType()));
138  }
139
140  SymbolRef getParentSymbol() const { return parentSymbol; }
141  const TypedValueRegion *getRegion() const { return R; }
142
143  QualType getType() const override;
144
145  void dumpToStream(raw_ostream &os) const override;
146  const MemRegion *getOriginRegion() const override { return getRegion(); }
147
148  static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent,
149                      const TypedValueRegion *r) {
150    profile.AddInteger((unsigned) SymbolDerivedKind);
151    profile.AddPointer(r);
152    profile.AddPointer(parent);
153  }
154
155  void Profile(llvm::FoldingSetNodeID& profile) override {
156    Profile(profile, parentSymbol, R);
157  }
158
159  // Implement isa<T> support.
160  static bool classof(const SymExpr *SE) {
161    return SE->getKind() == SymbolDerivedKind;
162  }
163};
164
165/// SymbolExtent - Represents the extent (size in bytes) of a bounded region.
166///  Clients should not ask the SymbolManager for a region's extent. Always use
167///  SubRegion::getExtent instead -- the value returned may not be a symbol.
168class SymbolExtent : public SymbolData {
169  const SubRegion *R;
170
171public:
172  SymbolExtent(SymbolID sym, const SubRegion *r)
173      : SymbolData(SymbolExtentKind, sym), R(r) {
174    assert(r);
175  }
176
177  const SubRegion *getRegion() const { return R; }
178
179  QualType getType() const override;
180
181  void dumpToStream(raw_ostream &os) const override;
182
183  static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) {
184    profile.AddInteger((unsigned) SymbolExtentKind);
185    profile.AddPointer(R);
186  }
187
188  void Profile(llvm::FoldingSetNodeID& profile) override {
189    Profile(profile, R);
190  }
191
192  // Implement isa<T> support.
193  static bool classof(const SymExpr *SE) {
194    return SE->getKind() == SymbolExtentKind;
195  }
196};
197
198/// SymbolMetadata - Represents path-dependent metadata about a specific region.
199///  Metadata symbols remain live as long as they are marked as in use before
200///  dead-symbol sweeping AND their associated regions are still alive.
201///  Intended for use by checkers.
202class SymbolMetadata : public SymbolData {
203  const MemRegion* R;
204  const Stmt *S;
205  QualType T;
206  const LocationContext *LCtx;
207  unsigned Count;
208  const void *Tag;
209
210public:
211  SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
212                 const LocationContext *LCtx, unsigned count, const void *tag)
213      : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
214        Count(count), Tag(tag) {
215      assert(r);
216      assert(s);
217      assert(isValidTypeForSymbol(t));
218      assert(LCtx);
219      assert(tag);
220    }
221
222  const MemRegion *getRegion() const { return R; }
223  const Stmt *getStmt() const { return S; }
224  const LocationContext *getLocationContext() const { return LCtx; }
225  unsigned getCount() const { return Count; }
226  const void *getTag() const { return Tag; }
227
228  QualType getType() const override;
229
230  void dumpToStream(raw_ostream &os) const override;
231
232  static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R,
233                      const Stmt *S, QualType T, const LocationContext *LCtx,
234                      unsigned Count, const void *Tag) {
235    profile.AddInteger((unsigned) SymbolMetadataKind);
236    profile.AddPointer(R);
237    profile.AddPointer(S);
238    profile.Add(T);
239    profile.AddPointer(LCtx);
240    profile.AddInteger(Count);
241    profile.AddPointer(Tag);
242  }
243
244  void Profile(llvm::FoldingSetNodeID& profile) override {
245    Profile(profile, R, S, T, LCtx, Count, Tag);
246  }
247
248  // Implement isa<T> support.
249  static bool classof(const SymExpr *SE) {
250    return SE->getKind() == SymbolMetadataKind;
251  }
252};
253
254/// Represents a cast expression.
255class SymbolCast : public SymExpr {
256  const SymExpr *Operand;
257
258  /// Type of the operand.
259  QualType FromTy;
260
261  /// The type of the result.
262  QualType ToTy;
263
264public:
265  SymbolCast(const SymExpr *In, QualType From, QualType To)
266      : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) {
267    assert(In);
268    assert(isValidTypeForSymbol(From));
269    // FIXME: GenericTaintChecker creates symbols of void type.
270    // Otherwise, 'To' should also be a valid type.
271  }
272
273  unsigned computeComplexity() const override {
274    if (Complexity == 0)
275      Complexity = 1 + Operand->computeComplexity();
276    return Complexity;
277  }
278
279  QualType getType() const override { return ToTy; }
280
281  const SymExpr *getOperand() const { return Operand; }
282
283  void dumpToStream(raw_ostream &os) const override;
284
285  static void Profile(llvm::FoldingSetNodeID& ID,
286                      const SymExpr *In, QualType From, QualType To) {
287    ID.AddInteger((unsigned) SymbolCastKind);
288    ID.AddPointer(In);
289    ID.Add(From);
290    ID.Add(To);
291  }
292
293  void Profile(llvm::FoldingSetNodeID& ID) override {
294    Profile(ID, Operand, FromTy, ToTy);
295  }
296
297  // Implement isa<T> support.
298  static bool classof(const SymExpr *SE) {
299    return SE->getKind() == SymbolCastKind;
300  }
301};
302
303/// Represents a symbolic expression involving a binary operator
304class BinarySymExpr : public SymExpr {
305  BinaryOperator::Opcode Op;
306  QualType T;
307
308protected:
309  BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t)
310      : SymExpr(k), Op(op), T(t) {
311    assert(classof(this));
312    // Binary expressions are results of arithmetic. Pointer arithmetic is not
313    // handled by binary expressions, but it is instead handled by applying
314    // sub-regions to regions.
315    assert(isValidTypeForSymbol(t) && !Loc::isLocType(t));
316  }
317
318public:
319  // FIXME: We probably need to make this out-of-line to avoid redundant
320  // generation of virtual functions.
321  QualType getType() const override { return T; }
322
323  BinaryOperator::Opcode getOpcode() const { return Op; }
324
325  // Implement isa<T> support.
326  static bool classof(const SymExpr *SE) {
327    Kind k = SE->getKind();
328    return k >= BEGIN_BINARYSYMEXPRS && k <= END_BINARYSYMEXPRS;
329  }
330};
331
332/// Represents a symbolic expression like 'x' + 3.
333class SymIntExpr : public BinarySymExpr {
334  const SymExpr *LHS;
335  const llvm::APSInt& RHS;
336
337public:
338  SymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
339             const llvm::APSInt &rhs, QualType t)
340      : BinarySymExpr(SymIntExprKind, op, t), LHS(lhs), RHS(rhs) {
341    assert(lhs);
342  }
343
344  void dumpToStream(raw_ostream &os) const override;
345
346  const SymExpr *getLHS() const { return LHS; }
347  const llvm::APSInt &getRHS() const { return RHS; }
348
349  unsigned computeComplexity() const override {
350    if (Complexity == 0)
351      Complexity = 1 + LHS->computeComplexity();
352    return Complexity;
353  }
354
355  static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
356                      BinaryOperator::Opcode op, const llvm::APSInt& rhs,
357                      QualType t) {
358    ID.AddInteger((unsigned) SymIntExprKind);
359    ID.AddPointer(lhs);
360    ID.AddInteger(op);
361    ID.AddPointer(&rhs);
362    ID.Add(t);
363  }
364
365  void Profile(llvm::FoldingSetNodeID& ID) override {
366    Profile(ID, LHS, getOpcode(), RHS, getType());
367  }
368
369  // Implement isa<T> support.
370  static bool classof(const SymExpr *SE) {
371    return SE->getKind() == SymIntExprKind;
372  }
373};
374
375/// Represents a symbolic expression like 3 - 'x'.
376class IntSymExpr : public BinarySymExpr {
377  const llvm::APSInt& LHS;
378  const SymExpr *RHS;
379
380public:
381  IntSymExpr(const llvm::APSInt &lhs, BinaryOperator::Opcode op,
382             const SymExpr *rhs, QualType t)
383      : BinarySymExpr(IntSymExprKind, op, t), LHS(lhs), RHS(rhs) {
384    assert(rhs);
385  }
386
387  void dumpToStream(raw_ostream &os) const override;
388
389  const SymExpr *getRHS() const { return RHS; }
390  const llvm::APSInt &getLHS() const { return LHS; }
391
392  unsigned computeComplexity() const override {
393    if (Complexity == 0)
394      Complexity = 1 + RHS->computeComplexity();
395    return Complexity;
396  }
397
398  static void Profile(llvm::FoldingSetNodeID& ID, const llvm::APSInt& lhs,
399                      BinaryOperator::Opcode op, const SymExpr *rhs,
400                      QualType t) {
401    ID.AddInteger((unsigned) IntSymExprKind);
402    ID.AddPointer(&lhs);
403    ID.AddInteger(op);
404    ID.AddPointer(rhs);
405    ID.Add(t);
406  }
407
408  void Profile(llvm::FoldingSetNodeID& ID) override {
409    Profile(ID, LHS, getOpcode(), RHS, getType());
410  }
411
412  // Implement isa<T> support.
413  static bool classof(const SymExpr *SE) {
414    return SE->getKind() == IntSymExprKind;
415  }
416};
417
418/// Represents a symbolic expression like 'x' + 'y'.
419class SymSymExpr : public BinarySymExpr {
420  const SymExpr *LHS;
421  const SymExpr *RHS;
422
423public:
424  SymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs,
425             QualType t)
426      : BinarySymExpr(SymSymExprKind, op, t), LHS(lhs), RHS(rhs) {
427    assert(lhs);
428    assert(rhs);
429  }
430
431  const SymExpr *getLHS() const { return LHS; }
432  const SymExpr *getRHS() const { return RHS; }
433
434  void dumpToStream(raw_ostream &os) const override;
435
436  unsigned computeComplexity() const override {
437    if (Complexity == 0)
438      Complexity = RHS->computeComplexity() + LHS->computeComplexity();
439    return Complexity;
440  }
441
442  static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
443                    BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) {
444    ID.AddInteger((unsigned) SymSymExprKind);
445    ID.AddPointer(lhs);
446    ID.AddInteger(op);
447    ID.AddPointer(rhs);
448    ID.Add(t);
449  }
450
451  void Profile(llvm::FoldingSetNodeID& ID) override {
452    Profile(ID, LHS, getOpcode(), RHS, getType());
453  }
454
455  // Implement isa<T> support.
456  static bool classof(const SymExpr *SE) {
457    return SE->getKind() == SymSymExprKind;
458  }
459};
460
461class SymbolManager {
462  using DataSetTy = llvm::FoldingSet<SymExpr>;
463  using SymbolDependTy = llvm::DenseMap<SymbolRef, SymbolRefSmallVectorTy *>;
464
465  DataSetTy DataSet;
466
467  /// Stores the extra dependencies between symbols: the data should be kept
468  /// alive as long as the key is live.
469  SymbolDependTy SymbolDependencies;
470
471  unsigned SymbolCounter = 0;
472  llvm::BumpPtrAllocator& BPAlloc;
473  BasicValueFactory &BV;
474  ASTContext &Ctx;
475
476public:
477  SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
478                llvm::BumpPtrAllocator& bpalloc)
479      : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
480  ~SymbolManager();
481
482  static bool canSymbolicate(QualType T);
483
484  /// Make a unique symbol for MemRegion R according to its kind.
485  const SymbolRegionValue* getRegionValueSymbol(const TypedValueRegion* R);
486
487  const SymbolConjured* conjureSymbol(const Stmt *E,
488                                      const LocationContext *LCtx,
489                                      QualType T,
490                                      unsigned VisitCount,
491                                      const void *SymbolTag = nullptr);
492
493  const SymbolConjured* conjureSymbol(const Expr *E,
494                                      const LocationContext *LCtx,
495                                      unsigned VisitCount,
496                                      const void *SymbolTag = nullptr) {
497    return conjureSymbol(E, LCtx, E->getType(), VisitCount, SymbolTag);
498  }
499
500  const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol,
501                                        const TypedValueRegion *R);
502
503  const SymbolExtent *getExtentSymbol(const SubRegion *R);
504
505  /// Creates a metadata symbol associated with a specific region.
506  ///
507  /// VisitCount can be used to differentiate regions corresponding to
508  /// different loop iterations, thus, making the symbol path-dependent.
509  const SymbolMetadata *getMetadataSymbol(const MemRegion *R, const Stmt *S,
510                                          QualType T,
511                                          const LocationContext *LCtx,
512                                          unsigned VisitCount,
513                                          const void *SymbolTag = nullptr);
514
515  const SymbolCast* getCastSymbol(const SymExpr *Operand,
516                                  QualType From, QualType To);
517
518  const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
519                                  const llvm::APSInt& rhs, QualType t);
520
521  const SymIntExpr *getSymIntExpr(const SymExpr &lhs, BinaryOperator::Opcode op,
522                                  const llvm::APSInt& rhs, QualType t) {
523    return getSymIntExpr(&lhs, op, rhs, t);
524  }
525
526  const IntSymExpr *getIntSymExpr(const llvm::APSInt& lhs,
527                                  BinaryOperator::Opcode op,
528                                  const SymExpr *rhs, QualType t);
529
530  const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
531                                  const SymExpr *rhs, QualType t);
532
533  QualType getType(const SymExpr *SE) const {
534    return SE->getType();
535  }
536
537  /// Add artificial symbol dependency.
538  ///
539  /// The dependent symbol should stay alive as long as the primary is alive.
540  void addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent);
541
542  const SymbolRefSmallVectorTy *getDependentSymbols(const SymbolRef Primary);
543
544  ASTContext &getContext() { return Ctx; }
545  BasicValueFactory &getBasicVals() { return BV; }
546};
547
548/// A class responsible for cleaning up unused symbols.
549class SymbolReaper {
550  enum SymbolStatus {
551    NotProcessed,
552    HaveMarkedDependents
553  };
554
555  using SymbolSetTy = llvm::DenseSet<SymbolRef>;
556  using SymbolMapTy = llvm::DenseMap<SymbolRef, SymbolStatus>;
557  using RegionSetTy = llvm::DenseSet<const MemRegion *>;
558
559  SymbolMapTy TheLiving;
560  SymbolSetTy MetadataInUse;
561
562  RegionSetTy RegionRoots;
563
564  const StackFrameContext *LCtx;
565  const Stmt *Loc;
566  SymbolManager& SymMgr;
567  StoreRef reapedStore;
568  llvm::DenseMap<const MemRegion *, unsigned> includedRegionCache;
569
570public:
571  /// Construct a reaper object, which removes everything which is not
572  /// live before we execute statement s in the given location context.
573  ///
574  /// If the statement is NULL, everything is this and parent contexts is
575  /// considered live.
576  /// If the stack frame context is NULL, everything on stack is considered
577  /// dead.
578  SymbolReaper(const StackFrameContext *Ctx, const Stmt *s,
579               SymbolManager &symmgr, StoreManager &storeMgr)
580      : LCtx(Ctx), Loc(s), SymMgr(symmgr), reapedStore(nullptr, storeMgr) {}
581
582  const LocationContext *getLocationContext() const { return LCtx; }
583
584  bool isLive(SymbolRef sym);
585  bool isLiveRegion(const MemRegion *region);
586  bool isLive(const Stmt *ExprVal, const LocationContext *LCtx) const;
587  bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const;
588
589  /// Unconditionally marks a symbol as live.
590  ///
591  /// This should never be
592  /// used by checkers, only by the state infrastructure such as the store and
593  /// environment. Checkers should instead use metadata symbols and markInUse.
594  void markLive(SymbolRef sym);
595
596  /// Marks a symbol as important to a checker.
597  ///
598  /// For metadata symbols,
599  /// this will keep the symbol alive as long as its associated region is also
600  /// live. For other symbols, this has no effect; checkers are not permitted
601  /// to influence the life of other symbols. This should be used before any
602  /// symbol marking has occurred, i.e. in the MarkLiveSymbols callback.
603  void markInUse(SymbolRef sym);
604
605  using region_iterator = RegionSetTy::const_iterator;
606
607  region_iterator region_begin() const { return RegionRoots.begin(); }
608  region_iterator region_end() const { return RegionRoots.end(); }
609
610  /// Returns whether or not a symbol has been confirmed dead.
611  ///
612  /// This should only be called once all marking of dead symbols has completed.
613  /// (For checkers, this means only in the checkDeadSymbols callback.)
614  bool isDead(SymbolRef sym) {
615    return !isLive(sym);
616  }
617
618  void markLive(const MemRegion *region);
619  void markElementIndicesLive(const MemRegion *region);
620
621  /// Set to the value of the symbolic store after
622  /// StoreManager::removeDeadBindings has been called.
623  void setReapedStore(StoreRef st) { reapedStore = st; }
624
625private:
626  /// Mark the symbols dependent on the input symbol as live.
627  void markDependentsLive(SymbolRef sym);
628};
629
630class SymbolVisitor {
631protected:
632  ~SymbolVisitor() = default;
633
634public:
635  SymbolVisitor() = default;
636  SymbolVisitor(const SymbolVisitor &) = default;
637  SymbolVisitor(SymbolVisitor &&) {}
638
639  /// A visitor method invoked by ProgramStateManager::scanReachableSymbols.
640  ///
641  /// The method returns \c true if symbols should continue be scanned and \c
642  /// false otherwise.
643  virtual bool VisitSymbol(SymbolRef sym) = 0;
644  virtual bool VisitMemRegion(const MemRegion *) { return true; }
645};
646
647} // namespace ento
648
649} // namespace clang
650
651#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
652