SymbolManager.h revision 249423
1//== SymbolManager.h - Management of Symbolic Values ------------*- C++ -*--==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines SymbolManager, a class that manages symbolic values
11//  created for use by ExprEngine and related classes.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_GR_SYMMGR_H
16#define LLVM_CLANG_GR_SYMMGR_H
17
18#include "clang/AST/Decl.h"
19#include "clang/AST/Expr.h"
20#include "clang/Analysis/AnalysisContext.h"
21#include "clang/Basic/LLVM.h"
22#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/DenseSet.h"
25#include "llvm/ADT/FoldingSet.h"
26#include "llvm/Support/DataTypes.h"
27
28namespace llvm {
29class BumpPtrAllocator;
30}
31
32namespace clang {
33  class ASTContext;
34  class StackFrameContext;
35
36namespace ento {
37  class BasicValueFactory;
38  class MemRegion;
39  class SubRegion;
40  class TypedValueRegion;
41  class VarRegion;
42
43/// \brief Symbolic value. These values used to capture symbolic execution of
44/// the program.
45class SymExpr : public llvm::FoldingSetNode {
46  virtual void anchor();
47public:
48  enum Kind { RegionValueKind, ConjuredKind, DerivedKind, ExtentKind,
49              MetadataKind,
50              BEGIN_SYMBOLS = RegionValueKind,
51              END_SYMBOLS = MetadataKind,
52              SymIntKind, IntSymKind, SymSymKind, CastSymbolKind };
53private:
54  Kind K;
55
56protected:
57  SymExpr(Kind k) : K(k) {}
58
59public:
60  virtual ~SymExpr() {}
61
62  Kind getKind() const { return K; }
63
64  virtual void dump() const;
65
66  virtual void dumpToStream(raw_ostream &os) const {}
67
68  virtual QualType getType() const = 0;
69  virtual void Profile(llvm::FoldingSetNodeID& profile) = 0;
70
71  /// \brief Iterator over symbols that the current symbol depends on.
72  ///
73  /// For SymbolData, it's the symbol itself; for expressions, it's the
74  /// expression symbol and all the operands in it. Note, SymbolDerived is
75  /// treated as SymbolData - the iterator will NOT visit the parent region.
76  class symbol_iterator {
77    SmallVector<const SymExpr*, 5> itr;
78    void expand();
79  public:
80    symbol_iterator() {}
81    symbol_iterator(const SymExpr *SE);
82
83    symbol_iterator &operator++();
84    const SymExpr* operator*();
85
86    bool operator==(const symbol_iterator &X) const;
87    bool operator!=(const symbol_iterator &X) const;
88  };
89
90  symbol_iterator symbol_begin() const {
91    return symbol_iterator(this);
92  }
93  static symbol_iterator symbol_end() { return symbol_iterator(); }
94
95  unsigned computeComplexity() const;
96};
97
98typedef const SymExpr* SymbolRef;
99typedef SmallVector<SymbolRef, 2> SymbolRefSmallVectorTy;
100
101typedef unsigned SymbolID;
102/// \brief A symbol representing data which can be stored in a memory location
103/// (region).
104class SymbolData : public SymExpr {
105  virtual void anchor();
106  const SymbolID Sym;
107
108protected:
109  SymbolData(Kind k, SymbolID sym) : SymExpr(k), Sym(sym) {}
110
111public:
112  virtual ~SymbolData() {}
113
114  SymbolID getSymbolID() const { return Sym; }
115
116  // Implement isa<T> support.
117  static inline bool classof(const SymExpr *SE) {
118    Kind k = SE->getKind();
119    return k >= BEGIN_SYMBOLS && k <= END_SYMBOLS;
120  }
121};
122
123///\brief A symbol representing the value stored at a MemRegion.
124class SymbolRegionValue : public SymbolData {
125  const TypedValueRegion *R;
126
127public:
128  SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
129    : SymbolData(RegionValueKind, sym), R(r) {}
130
131  const TypedValueRegion* getRegion() const { return R; }
132
133  static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) {
134    profile.AddInteger((unsigned) RegionValueKind);
135    profile.AddPointer(R);
136  }
137
138  virtual void Profile(llvm::FoldingSetNodeID& profile) {
139    Profile(profile, R);
140  }
141
142  virtual void dumpToStream(raw_ostream &os) const;
143
144  QualType getType() const;
145
146  // Implement isa<T> support.
147  static inline bool classof(const SymExpr *SE) {
148    return SE->getKind() == RegionValueKind;
149  }
150};
151
152/// A symbol representing the result of an expression in the case when we do
153/// not know anything about what the expression is.
154class SymbolConjured : public SymbolData {
155  const Stmt *S;
156  QualType T;
157  unsigned Count;
158  const LocationContext *LCtx;
159  const void *SymbolTag;
160
161public:
162  SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
163		 QualType t, unsigned count,
164                 const void *symbolTag)
165    : SymbolData(ConjuredKind, sym), S(s), T(t), Count(count),
166      LCtx(lctx),
167      SymbolTag(symbolTag) {}
168
169  const Stmt *getStmt() const { return S; }
170  unsigned getCount() const { return Count; }
171  const void *getTag() const { return SymbolTag; }
172
173  QualType getType() const;
174
175  virtual void dumpToStream(raw_ostream &os) const;
176
177  static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S,
178                      QualType T, unsigned Count, const LocationContext *LCtx,
179                      const void *SymbolTag) {
180    profile.AddInteger((unsigned) ConjuredKind);
181    profile.AddPointer(S);
182    profile.AddPointer(LCtx);
183    profile.Add(T);
184    profile.AddInteger(Count);
185    profile.AddPointer(SymbolTag);
186  }
187
188  virtual void Profile(llvm::FoldingSetNodeID& profile) {
189    Profile(profile, S, T, Count, LCtx, SymbolTag);
190  }
191
192  // Implement isa<T> support.
193  static inline bool classof(const SymExpr *SE) {
194    return SE->getKind() == ConjuredKind;
195  }
196};
197
198/// A symbol representing the value of a MemRegion whose parent region has
199/// symbolic value.
200class SymbolDerived : public SymbolData {
201  SymbolRef parentSymbol;
202  const TypedValueRegion *R;
203
204public:
205  SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
206    : SymbolData(DerivedKind, sym), parentSymbol(parent), R(r) {}
207
208  SymbolRef getParentSymbol() const { return parentSymbol; }
209  const TypedValueRegion *getRegion() const { return R; }
210
211  QualType getType() const;
212
213  virtual void dumpToStream(raw_ostream &os) const;
214
215  static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent,
216                      const TypedValueRegion *r) {
217    profile.AddInteger((unsigned) DerivedKind);
218    profile.AddPointer(r);
219    profile.AddPointer(parent);
220  }
221
222  virtual void Profile(llvm::FoldingSetNodeID& profile) {
223    Profile(profile, parentSymbol, R);
224  }
225
226  // Implement isa<T> support.
227  static inline bool classof(const SymExpr *SE) {
228    return SE->getKind() == DerivedKind;
229  }
230};
231
232/// SymbolExtent - Represents the extent (size in bytes) of a bounded region.
233///  Clients should not ask the SymbolManager for a region's extent. Always use
234///  SubRegion::getExtent instead -- the value returned may not be a symbol.
235class SymbolExtent : public SymbolData {
236  const SubRegion *R;
237
238public:
239  SymbolExtent(SymbolID sym, const SubRegion *r)
240  : SymbolData(ExtentKind, sym), R(r) {}
241
242  const SubRegion *getRegion() const { return R; }
243
244  QualType getType() const;
245
246  virtual void dumpToStream(raw_ostream &os) const;
247
248  static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) {
249    profile.AddInteger((unsigned) ExtentKind);
250    profile.AddPointer(R);
251  }
252
253  virtual void Profile(llvm::FoldingSetNodeID& profile) {
254    Profile(profile, R);
255  }
256
257  // Implement isa<T> support.
258  static inline bool classof(const SymExpr *SE) {
259    return SE->getKind() == ExtentKind;
260  }
261};
262
263/// SymbolMetadata - Represents path-dependent metadata about a specific region.
264///  Metadata symbols remain live as long as they are marked as in use before
265///  dead-symbol sweeping AND their associated regions are still alive.
266///  Intended for use by checkers.
267class SymbolMetadata : public SymbolData {
268  const MemRegion* R;
269  const Stmt *S;
270  QualType T;
271  unsigned Count;
272  const void *Tag;
273public:
274  SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
275                 unsigned count, const void *tag)
276  : SymbolData(MetadataKind, sym), R(r), S(s), T(t), Count(count), Tag(tag) {}
277
278  const MemRegion *getRegion() const { return R; }
279  const Stmt *getStmt() const { return S; }
280  unsigned getCount() const { return Count; }
281  const void *getTag() const { return Tag; }
282
283  QualType getType() const;
284
285  virtual void dumpToStream(raw_ostream &os) const;
286
287  static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R,
288                      const Stmt *S, QualType T, unsigned Count,
289                      const void *Tag) {
290    profile.AddInteger((unsigned) MetadataKind);
291    profile.AddPointer(R);
292    profile.AddPointer(S);
293    profile.Add(T);
294    profile.AddInteger(Count);
295    profile.AddPointer(Tag);
296  }
297
298  virtual void Profile(llvm::FoldingSetNodeID& profile) {
299    Profile(profile, R, S, T, Count, Tag);
300  }
301
302  // Implement isa<T> support.
303  static inline bool classof(const SymExpr *SE) {
304    return SE->getKind() == MetadataKind;
305  }
306};
307
308/// \brief Represents a cast expression.
309class SymbolCast : public SymExpr {
310  const SymExpr *Operand;
311  /// Type of the operand.
312  QualType FromTy;
313  /// The type of the result.
314  QualType ToTy;
315
316public:
317  SymbolCast(const SymExpr *In, QualType From, QualType To) :
318    SymExpr(CastSymbolKind), Operand(In), FromTy(From), ToTy(To) { }
319
320  QualType getType() const { return ToTy; }
321
322  const SymExpr *getOperand() const { return Operand; }
323
324  virtual void dumpToStream(raw_ostream &os) const;
325
326  static void Profile(llvm::FoldingSetNodeID& ID,
327                      const SymExpr *In, QualType From, QualType To) {
328    ID.AddInteger((unsigned) CastSymbolKind);
329    ID.AddPointer(In);
330    ID.Add(From);
331    ID.Add(To);
332  }
333
334  void Profile(llvm::FoldingSetNodeID& ID) {
335    Profile(ID, Operand, FromTy, ToTy);
336  }
337
338  // Implement isa<T> support.
339  static inline bool classof(const SymExpr *SE) {
340    return SE->getKind() == CastSymbolKind;
341  }
342};
343
344/// SymIntExpr - Represents symbolic expression like 'x' + 3.
345class SymIntExpr : public SymExpr {
346  const SymExpr *LHS;
347  BinaryOperator::Opcode Op;
348  const llvm::APSInt& RHS;
349  QualType T;
350
351public:
352  SymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
353             const llvm::APSInt& rhs, QualType t)
354    : SymExpr(SymIntKind), LHS(lhs), Op(op), RHS(rhs), T(t) {}
355
356  // FIXME: We probably need to make this out-of-line to avoid redundant
357  // generation of virtual functions.
358  QualType getType() const { return T; }
359
360  BinaryOperator::Opcode getOpcode() const { return Op; }
361
362  virtual void dumpToStream(raw_ostream &os) const;
363
364  const SymExpr *getLHS() const { return LHS; }
365  const llvm::APSInt &getRHS() const { return RHS; }
366
367  static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
368                      BinaryOperator::Opcode op, const llvm::APSInt& rhs,
369                      QualType t) {
370    ID.AddInteger((unsigned) SymIntKind);
371    ID.AddPointer(lhs);
372    ID.AddInteger(op);
373    ID.AddPointer(&rhs);
374    ID.Add(t);
375  }
376
377  void Profile(llvm::FoldingSetNodeID& ID) {
378    Profile(ID, LHS, Op, RHS, T);
379  }
380
381  // Implement isa<T> support.
382  static inline bool classof(const SymExpr *SE) {
383    return SE->getKind() == SymIntKind;
384  }
385};
386
387/// IntSymExpr - Represents symbolic expression like 3 - 'x'.
388class IntSymExpr : public SymExpr {
389  const llvm::APSInt& LHS;
390  BinaryOperator::Opcode Op;
391  const SymExpr *RHS;
392  QualType T;
393
394public:
395  IntSymExpr(const llvm::APSInt& lhs, BinaryOperator::Opcode op,
396             const SymExpr *rhs, QualType t)
397    : SymExpr(IntSymKind), LHS(lhs), Op(op), RHS(rhs), T(t) {}
398
399  QualType getType() const { return T; }
400
401  BinaryOperator::Opcode getOpcode() const { return Op; }
402
403  virtual void dumpToStream(raw_ostream &os) const;
404
405  const SymExpr *getRHS() const { return RHS; }
406  const llvm::APSInt &getLHS() const { return LHS; }
407
408  static void Profile(llvm::FoldingSetNodeID& ID, const llvm::APSInt& lhs,
409                      BinaryOperator::Opcode op, const SymExpr *rhs,
410                      QualType t) {
411    ID.AddInteger((unsigned) IntSymKind);
412    ID.AddPointer(&lhs);
413    ID.AddInteger(op);
414    ID.AddPointer(rhs);
415    ID.Add(t);
416  }
417
418  void Profile(llvm::FoldingSetNodeID& ID) {
419    Profile(ID, LHS, Op, RHS, T);
420  }
421
422  // Implement isa<T> support.
423  static inline bool classof(const SymExpr *SE) {
424    return SE->getKind() == IntSymKind;
425  }
426};
427
428/// SymSymExpr - Represents symbolic expression like 'x' + 'y'.
429class SymSymExpr : public SymExpr {
430  const SymExpr *LHS;
431  BinaryOperator::Opcode Op;
432  const SymExpr *RHS;
433  QualType T;
434
435public:
436  SymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs,
437             QualType t)
438    : SymExpr(SymSymKind), LHS(lhs), Op(op), RHS(rhs), T(t) {}
439
440  BinaryOperator::Opcode getOpcode() const { return Op; }
441  const SymExpr *getLHS() const { return LHS; }
442  const SymExpr *getRHS() const { return RHS; }
443
444  // FIXME: We probably need to make this out-of-line to avoid redundant
445  // generation of virtual functions.
446  QualType getType() const { return T; }
447
448  virtual void dumpToStream(raw_ostream &os) const;
449
450  static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
451                    BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) {
452    ID.AddInteger((unsigned) SymSymKind);
453    ID.AddPointer(lhs);
454    ID.AddInteger(op);
455    ID.AddPointer(rhs);
456    ID.Add(t);
457  }
458
459  void Profile(llvm::FoldingSetNodeID& ID) {
460    Profile(ID, LHS, Op, RHS, T);
461  }
462
463  // Implement isa<T> support.
464  static inline bool classof(const SymExpr *SE) {
465    return SE->getKind() == SymSymKind;
466  }
467};
468
469class SymbolManager {
470  typedef llvm::FoldingSet<SymExpr> DataSetTy;
471  typedef llvm::DenseMap<SymbolRef, SymbolRefSmallVectorTy*> SymbolDependTy;
472
473  DataSetTy DataSet;
474  /// Stores the extra dependencies between symbols: the data should be kept
475  /// alive as long as the key is live.
476  SymbolDependTy SymbolDependencies;
477  unsigned SymbolCounter;
478  llvm::BumpPtrAllocator& BPAlloc;
479  BasicValueFactory &BV;
480  ASTContext &Ctx;
481
482public:
483  SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
484                llvm::BumpPtrAllocator& bpalloc)
485    : SymbolDependencies(16), SymbolCounter(0),
486      BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
487
488  ~SymbolManager();
489
490  static bool canSymbolicate(QualType T);
491
492  /// \brief Make a unique symbol for MemRegion R according to its kind.
493  const SymbolRegionValue* getRegionValueSymbol(const TypedValueRegion* R);
494
495  const SymbolConjured* conjureSymbol(const Stmt *E,
496                                      const LocationContext *LCtx,
497                                      QualType T,
498                                      unsigned VisitCount,
499                                      const void *SymbolTag = 0);
500
501  const SymbolConjured* conjureSymbol(const Expr *E,
502                                      const LocationContext *LCtx,
503                                      unsigned VisitCount,
504                                      const void *SymbolTag = 0) {
505    return conjureSymbol(E, LCtx, E->getType(), VisitCount, SymbolTag);
506  }
507
508  const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol,
509                                        const TypedValueRegion *R);
510
511  const SymbolExtent *getExtentSymbol(const SubRegion *R);
512
513  /// \brief Creates a metadata symbol associated with a specific region.
514  ///
515  /// VisitCount can be used to differentiate regions corresponding to
516  /// different loop iterations, thus, making the symbol path-dependent.
517  const SymbolMetadata* getMetadataSymbol(const MemRegion* R, const Stmt *S,
518                                          QualType T, unsigned VisitCount,
519                                          const void *SymbolTag = 0);
520
521  const SymbolCast* getCastSymbol(const SymExpr *Operand,
522                                  QualType From, QualType To);
523
524  const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
525                                  const llvm::APSInt& rhs, QualType t);
526
527  const SymIntExpr *getSymIntExpr(const SymExpr &lhs, BinaryOperator::Opcode op,
528                                  const llvm::APSInt& rhs, QualType t) {
529    return getSymIntExpr(&lhs, op, rhs, t);
530  }
531
532  const IntSymExpr *getIntSymExpr(const llvm::APSInt& lhs,
533                                  BinaryOperator::Opcode op,
534                                  const SymExpr *rhs, QualType t);
535
536  const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
537                                  const SymExpr *rhs, QualType t);
538
539  QualType getType(const SymExpr *SE) const {
540    return SE->getType();
541  }
542
543  /// \brief Add artificial symbol dependency.
544  ///
545  /// The dependent symbol should stay alive as long as the primary is alive.
546  void addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent);
547
548  const SymbolRefSmallVectorTy *getDependentSymbols(const SymbolRef Primary);
549
550  ASTContext &getContext() { return Ctx; }
551  BasicValueFactory &getBasicVals() { return BV; }
552};
553
554/// \brief A class responsible for cleaning up unused symbols.
555class SymbolReaper {
556  enum SymbolStatus {
557    NotProcessed,
558    HaveMarkedDependents
559  };
560
561  typedef llvm::DenseSet<SymbolRef> SymbolSetTy;
562  typedef llvm::DenseMap<SymbolRef, SymbolStatus> SymbolMapTy;
563  typedef llvm::DenseSet<const MemRegion *> RegionSetTy;
564
565  SymbolMapTy TheLiving;
566  SymbolSetTy MetadataInUse;
567  SymbolSetTy TheDead;
568
569  RegionSetTy RegionRoots;
570
571  const StackFrameContext *LCtx;
572  const Stmt *Loc;
573  SymbolManager& SymMgr;
574  StoreRef reapedStore;
575  llvm::DenseMap<const MemRegion *, unsigned> includedRegionCache;
576
577public:
578  /// \brief Construct a reaper object, which removes everything which is not
579  /// live before we execute statement s in the given location context.
580  ///
581  /// If the statement is NULL, everything is this and parent contexts is
582  /// considered live.
583  /// If the stack frame context is NULL, everything on stack is considered
584  /// dead.
585  SymbolReaper(const StackFrameContext *Ctx, const Stmt *s, SymbolManager& symmgr,
586               StoreManager &storeMgr)
587   : LCtx(Ctx), Loc(s), SymMgr(symmgr),
588     reapedStore(0, storeMgr) {}
589
590  ~SymbolReaper() {}
591
592  const LocationContext *getLocationContext() const { return LCtx; }
593
594  bool isLive(SymbolRef sym);
595  bool isLiveRegion(const MemRegion *region);
596  bool isLive(const Stmt *ExprVal, const LocationContext *LCtx) const;
597  bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const;
598
599  /// \brief Unconditionally marks a symbol as live.
600  ///
601  /// This should never be
602  /// used by checkers, only by the state infrastructure such as the store and
603  /// environment. Checkers should instead use metadata symbols and markInUse.
604  void markLive(SymbolRef sym);
605
606  /// \brief Marks a symbol as important to a checker.
607  ///
608  /// For metadata symbols,
609  /// this will keep the symbol alive as long as its associated region is also
610  /// live. For other symbols, this has no effect; checkers are not permitted
611  /// to influence the life of other symbols. This should be used before any
612  /// symbol marking has occurred, i.e. in the MarkLiveSymbols callback.
613  void markInUse(SymbolRef sym);
614
615  /// \brief If a symbol is known to be live, marks the symbol as live.
616  ///
617  ///  Otherwise, if the symbol cannot be proven live, it is marked as dead.
618  ///  Returns true if the symbol is dead, false if live.
619  bool maybeDead(SymbolRef sym);
620
621  typedef SymbolSetTy::const_iterator dead_iterator;
622  dead_iterator dead_begin() const { return TheDead.begin(); }
623  dead_iterator dead_end() const { return TheDead.end(); }
624
625  bool hasDeadSymbols() const {
626    return !TheDead.empty();
627  }
628
629  typedef RegionSetTy::const_iterator region_iterator;
630  region_iterator region_begin() const { return RegionRoots.begin(); }
631  region_iterator region_end() const { return RegionRoots.end(); }
632
633  /// \brief Returns whether or not a symbol has been confirmed dead.
634  ///
635  /// This should only be called once all marking of dead symbols has completed.
636  /// (For checkers, this means only in the evalDeadSymbols callback.)
637  bool isDead(SymbolRef sym) const {
638    return TheDead.count(sym);
639  }
640
641  void markLive(const MemRegion *region);
642
643  /// \brief Set to the value of the symbolic store after
644  /// StoreManager::removeDeadBindings has been called.
645  void setReapedStore(StoreRef st) { reapedStore = st; }
646
647private:
648  /// Mark the symbols dependent on the input symbol as live.
649  void markDependentsLive(SymbolRef sym);
650};
651
652class SymbolVisitor {
653public:
654  /// \brief A visitor method invoked by ProgramStateManager::scanReachableSymbols.
655  ///
656  /// The method returns \c true if symbols should continue be scanned and \c
657  /// false otherwise.
658  virtual bool VisitSymbol(SymbolRef sym) = 0;
659  virtual bool VisitMemRegion(const MemRegion *region) { return true; }
660  virtual ~SymbolVisitor();
661};
662
663} // end GR namespace
664
665} // end clang namespace
666
667namespace llvm {
668static inline raw_ostream &operator<<(raw_ostream &os,
669                                      const clang::ento::SymExpr *SE) {
670  SE->dumpToStream(os);
671  return os;
672}
673} // end llvm namespace
674#endif
675