SymbolManager.h revision 235633
1//== SymbolManager.h - Management of Symbolic Values ------------*- C++ -*--==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines SymbolManager, a class that manages symbolic values
11//  created for use by ExprEngine and related classes.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_GR_SYMMGR_H
16#define LLVM_CLANG_GR_SYMMGR_H
17
18#include "clang/AST/Decl.h"
19#include "clang/AST/Expr.h"
20#include "clang/Analysis/AnalysisContext.h"
21#include "clang/Basic/LLVM.h"
22#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
23#include "llvm/Support/DataTypes.h"
24#include "llvm/ADT/FoldingSet.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/DenseMap.h"
27
28namespace llvm {
29class BumpPtrAllocator;
30}
31
32namespace clang {
33  class ASTContext;
34  class StackFrameContext;
35
36namespace ento {
37  class BasicValueFactory;
38  class MemRegion;
39  class SubRegion;
40  class TypedValueRegion;
41  class VarRegion;
42
43/// \brief Symbolic value. These values used to capture symbolic execution of
44/// the program.
45class SymExpr : public llvm::FoldingSetNode {
46  virtual void anchor();
47public:
48  enum Kind { RegionValueKind, ConjuredKind, DerivedKind, ExtentKind,
49              MetadataKind,
50              BEGIN_SYMBOLS = RegionValueKind,
51              END_SYMBOLS = MetadataKind,
52              SymIntKind, IntSymKind, SymSymKind, CastSymbolKind };
53private:
54  Kind K;
55
56protected:
57  SymExpr(Kind k) : K(k) {}
58
59public:
60  virtual ~SymExpr() {}
61
62  Kind getKind() const { return K; }
63
64  virtual void dump() const;
65
66  virtual void dumpToStream(raw_ostream &os) const {}
67
68  virtual QualType getType(ASTContext&) const = 0;
69  virtual void Profile(llvm::FoldingSetNodeID& profile) = 0;
70
71  // Implement isa<T> support.
72  static inline bool classof(const SymExpr*) { return true; }
73
74  /// \brief Iterator over symbols that the current symbol depends on.
75  ///
76  /// For SymbolData, it's the symbol itself; for expressions, it's the
77  /// expression symbol and all the operands in it. Note, SymbolDerived is
78  /// treated as SymbolData - the iterator will NOT visit the parent region.
79  class symbol_iterator {
80    SmallVector<const SymExpr*, 5> itr;
81    void expand();
82  public:
83    symbol_iterator() {}
84    symbol_iterator(const SymExpr *SE);
85
86    symbol_iterator &operator++();
87    const SymExpr* operator*();
88
89    bool operator==(const symbol_iterator &X) const;
90    bool operator!=(const symbol_iterator &X) const;
91  };
92
93  symbol_iterator symbol_begin() const {
94    return symbol_iterator(this);
95  }
96  static symbol_iterator symbol_end() { return symbol_iterator(); }
97};
98
99typedef const SymExpr* SymbolRef;
100typedef llvm::SmallVector<SymbolRef, 2> SymbolRefSmallVectorTy;
101
102typedef unsigned SymbolID;
103/// \brief A symbol representing data which can be stored in a memory location
104/// (region).
105class SymbolData : public SymExpr {
106  virtual void anchor();
107  const SymbolID Sym;
108
109protected:
110  SymbolData(Kind k, SymbolID sym) : SymExpr(k), Sym(sym) {}
111
112public:
113  virtual ~SymbolData() {}
114
115  SymbolID getSymbolID() const { return Sym; }
116
117  // Implement isa<T> support.
118  static inline bool classof(const SymExpr *SE) {
119    Kind k = SE->getKind();
120    return k >= BEGIN_SYMBOLS && k <= END_SYMBOLS;
121  }
122};
123
124///\brief A symbol representing the value stored at a MemRegion.
125class SymbolRegionValue : public SymbolData {
126  const TypedValueRegion *R;
127
128public:
129  SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
130    : SymbolData(RegionValueKind, sym), R(r) {}
131
132  const TypedValueRegion* getRegion() const { return R; }
133
134  static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) {
135    profile.AddInteger((unsigned) RegionValueKind);
136    profile.AddPointer(R);
137  }
138
139  virtual void Profile(llvm::FoldingSetNodeID& profile) {
140    Profile(profile, R);
141  }
142
143  virtual void dumpToStream(raw_ostream &os) const;
144
145  QualType getType(ASTContext&) const;
146
147  // Implement isa<T> support.
148  static inline bool classof(const SymExpr *SE) {
149    return SE->getKind() == RegionValueKind;
150  }
151};
152
153/// A symbol representing the result of an expression in the case when we do
154/// not know anything about what the expression is.
155class SymbolConjured : public SymbolData {
156  const Stmt *S;
157  QualType T;
158  unsigned Count;
159  const LocationContext *LCtx;
160  const void *SymbolTag;
161
162public:
163  SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
164		 QualType t, unsigned count,
165                 const void *symbolTag)
166    : SymbolData(ConjuredKind, sym), S(s), T(t), Count(count),
167      LCtx(lctx),
168      SymbolTag(symbolTag) {}
169
170  const Stmt *getStmt() const { return S; }
171  unsigned getCount() const { return Count; }
172  const void *getTag() const { return SymbolTag; }
173
174  QualType getType(ASTContext&) const;
175
176  virtual void dumpToStream(raw_ostream &os) const;
177
178  static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S,
179                      QualType T, unsigned Count, const LocationContext *LCtx,
180                      const void *SymbolTag) {
181    profile.AddInteger((unsigned) ConjuredKind);
182    profile.AddPointer(S);
183    profile.AddPointer(LCtx);
184    profile.Add(T);
185    profile.AddInteger(Count);
186    profile.AddPointer(SymbolTag);
187  }
188
189  virtual void Profile(llvm::FoldingSetNodeID& profile) {
190    Profile(profile, S, T, Count, LCtx, SymbolTag);
191  }
192
193  // Implement isa<T> support.
194  static inline bool classof(const SymExpr *SE) {
195    return SE->getKind() == ConjuredKind;
196  }
197};
198
199/// A symbol representing the value of a MemRegion whose parent region has
200/// symbolic value.
201class SymbolDerived : public SymbolData {
202  SymbolRef parentSymbol;
203  const TypedValueRegion *R;
204
205public:
206  SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
207    : SymbolData(DerivedKind, sym), parentSymbol(parent), R(r) {}
208
209  SymbolRef getParentSymbol() const { return parentSymbol; }
210  const TypedValueRegion *getRegion() const { return R; }
211
212  QualType getType(ASTContext&) const;
213
214  virtual void dumpToStream(raw_ostream &os) const;
215
216  static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent,
217                      const TypedValueRegion *r) {
218    profile.AddInteger((unsigned) DerivedKind);
219    profile.AddPointer(r);
220    profile.AddPointer(parent);
221  }
222
223  virtual void Profile(llvm::FoldingSetNodeID& profile) {
224    Profile(profile, parentSymbol, R);
225  }
226
227  // Implement isa<T> support.
228  static inline bool classof(const SymExpr *SE) {
229    return SE->getKind() == DerivedKind;
230  }
231};
232
233/// SymbolExtent - Represents the extent (size in bytes) of a bounded region.
234///  Clients should not ask the SymbolManager for a region's extent. Always use
235///  SubRegion::getExtent instead -- the value returned may not be a symbol.
236class SymbolExtent : public SymbolData {
237  const SubRegion *R;
238
239public:
240  SymbolExtent(SymbolID sym, const SubRegion *r)
241  : SymbolData(ExtentKind, sym), R(r) {}
242
243  const SubRegion *getRegion() const { return R; }
244
245  QualType getType(ASTContext&) const;
246
247  virtual void dumpToStream(raw_ostream &os) const;
248
249  static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) {
250    profile.AddInteger((unsigned) ExtentKind);
251    profile.AddPointer(R);
252  }
253
254  virtual void Profile(llvm::FoldingSetNodeID& profile) {
255    Profile(profile, R);
256  }
257
258  // Implement isa<T> support.
259  static inline bool classof(const SymExpr *SE) {
260    return SE->getKind() == ExtentKind;
261  }
262};
263
264/// SymbolMetadata - Represents path-dependent metadata about a specific region.
265///  Metadata symbols remain live as long as they are marked as in use before
266///  dead-symbol sweeping AND their associated regions are still alive.
267///  Intended for use by checkers.
268class SymbolMetadata : public SymbolData {
269  const MemRegion* R;
270  const Stmt *S;
271  QualType T;
272  unsigned Count;
273  const void *Tag;
274public:
275  SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
276                 unsigned count, const void *tag)
277  : SymbolData(MetadataKind, sym), R(r), S(s), T(t), Count(count), Tag(tag) {}
278
279  const MemRegion *getRegion() const { return R; }
280  const Stmt *getStmt() const { return S; }
281  unsigned getCount() const { return Count; }
282  const void *getTag() const { return Tag; }
283
284  QualType getType(ASTContext&) const;
285
286  virtual void dumpToStream(raw_ostream &os) const;
287
288  static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R,
289                      const Stmt *S, QualType T, unsigned Count,
290                      const void *Tag) {
291    profile.AddInteger((unsigned) MetadataKind);
292    profile.AddPointer(R);
293    profile.AddPointer(S);
294    profile.Add(T);
295    profile.AddInteger(Count);
296    profile.AddPointer(Tag);
297  }
298
299  virtual void Profile(llvm::FoldingSetNodeID& profile) {
300    Profile(profile, R, S, T, Count, Tag);
301  }
302
303  // Implement isa<T> support.
304  static inline bool classof(const SymExpr *SE) {
305    return SE->getKind() == MetadataKind;
306  }
307};
308
309/// \brief Represents a cast expression.
310class SymbolCast : public SymExpr {
311  const SymExpr *Operand;
312  /// Type of the operand.
313  QualType FromTy;
314  /// The type of the result.
315  QualType ToTy;
316
317public:
318  SymbolCast(const SymExpr *In, QualType From, QualType To) :
319    SymExpr(CastSymbolKind), Operand(In), FromTy(From), ToTy(To) { }
320
321  QualType getType(ASTContext &C) const { return ToTy; }
322
323  const SymExpr *getOperand() const { return Operand; }
324
325  virtual void dumpToStream(raw_ostream &os) const;
326
327  static void Profile(llvm::FoldingSetNodeID& ID,
328                      const SymExpr *In, QualType From, QualType To) {
329    ID.AddInteger((unsigned) CastSymbolKind);
330    ID.AddPointer(In);
331    ID.Add(From);
332    ID.Add(To);
333  }
334
335  void Profile(llvm::FoldingSetNodeID& ID) {
336    Profile(ID, Operand, FromTy, ToTy);
337  }
338
339  // Implement isa<T> support.
340  static inline bool classof(const SymExpr *SE) {
341    return SE->getKind() == CastSymbolKind;
342  }
343};
344
345/// SymIntExpr - Represents symbolic expression like 'x' + 3.
346class SymIntExpr : public SymExpr {
347  const SymExpr *LHS;
348  BinaryOperator::Opcode Op;
349  const llvm::APSInt& RHS;
350  QualType T;
351
352public:
353  SymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
354             const llvm::APSInt& rhs, QualType t)
355    : SymExpr(SymIntKind), LHS(lhs), Op(op), RHS(rhs), T(t) {}
356
357  // FIXME: We probably need to make this out-of-line to avoid redundant
358  // generation of virtual functions.
359  QualType getType(ASTContext &C) const { return T; }
360
361  BinaryOperator::Opcode getOpcode() const { return Op; }
362
363  virtual void dumpToStream(raw_ostream &os) const;
364
365  const SymExpr *getLHS() const { return LHS; }
366  const llvm::APSInt &getRHS() const { return RHS; }
367
368  static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
369                      BinaryOperator::Opcode op, const llvm::APSInt& rhs,
370                      QualType t) {
371    ID.AddInteger((unsigned) SymIntKind);
372    ID.AddPointer(lhs);
373    ID.AddInteger(op);
374    ID.AddPointer(&rhs);
375    ID.Add(t);
376  }
377
378  void Profile(llvm::FoldingSetNodeID& ID) {
379    Profile(ID, LHS, Op, RHS, T);
380  }
381
382  // Implement isa<T> support.
383  static inline bool classof(const SymExpr *SE) {
384    return SE->getKind() == SymIntKind;
385  }
386};
387
388/// IntSymExpr - Represents symbolic expression like 3 - 'x'.
389class IntSymExpr : public SymExpr {
390  const llvm::APSInt& LHS;
391  BinaryOperator::Opcode Op;
392  const SymExpr *RHS;
393  QualType T;
394
395public:
396  IntSymExpr(const llvm::APSInt& lhs, BinaryOperator::Opcode op,
397             const SymExpr *rhs, QualType t)
398    : SymExpr(IntSymKind), LHS(lhs), Op(op), RHS(rhs), T(t) {}
399
400  QualType getType(ASTContext &C) const { return T; }
401
402  BinaryOperator::Opcode getOpcode() const { return Op; }
403
404  virtual void dumpToStream(raw_ostream &os) const;
405
406  const SymExpr *getRHS() const { return RHS; }
407  const llvm::APSInt &getLHS() const { return LHS; }
408
409  static void Profile(llvm::FoldingSetNodeID& ID, const llvm::APSInt& lhs,
410                      BinaryOperator::Opcode op, const SymExpr *rhs,
411                      QualType t) {
412    ID.AddInteger((unsigned) IntSymKind);
413    ID.AddPointer(&lhs);
414    ID.AddInteger(op);
415    ID.AddPointer(rhs);
416    ID.Add(t);
417  }
418
419  void Profile(llvm::FoldingSetNodeID& ID) {
420    Profile(ID, LHS, Op, RHS, T);
421  }
422
423  // Implement isa<T> support.
424  static inline bool classof(const SymExpr *SE) {
425    return SE->getKind() == IntSymKind;
426  }
427};
428
429/// SymSymExpr - Represents symbolic expression like 'x' + 'y'.
430class SymSymExpr : public SymExpr {
431  const SymExpr *LHS;
432  BinaryOperator::Opcode Op;
433  const SymExpr *RHS;
434  QualType T;
435
436public:
437  SymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs,
438             QualType t)
439    : SymExpr(SymSymKind), LHS(lhs), Op(op), RHS(rhs), T(t) {}
440
441  BinaryOperator::Opcode getOpcode() const { return Op; }
442  const SymExpr *getLHS() const { return LHS; }
443  const SymExpr *getRHS() const { return RHS; }
444
445  // FIXME: We probably need to make this out-of-line to avoid redundant
446  // generation of virtual functions.
447  QualType getType(ASTContext &C) const { return T; }
448
449  virtual void dumpToStream(raw_ostream &os) const;
450
451  static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
452                    BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) {
453    ID.AddInteger((unsigned) SymSymKind);
454    ID.AddPointer(lhs);
455    ID.AddInteger(op);
456    ID.AddPointer(rhs);
457    ID.Add(t);
458  }
459
460  void Profile(llvm::FoldingSetNodeID& ID) {
461    Profile(ID, LHS, Op, RHS, T);
462  }
463
464  // Implement isa<T> support.
465  static inline bool classof(const SymExpr *SE) {
466    return SE->getKind() == SymSymKind;
467  }
468};
469
470class SymbolManager {
471  typedef llvm::FoldingSet<SymExpr> DataSetTy;
472  typedef llvm::DenseMap<SymbolRef, SymbolRefSmallVectorTy*> SymbolDependTy;
473
474  DataSetTy DataSet;
475  /// Stores the extra dependencies between symbols: the data should be kept
476  /// alive as long as the key is live.
477  SymbolDependTy SymbolDependencies;
478  unsigned SymbolCounter;
479  llvm::BumpPtrAllocator& BPAlloc;
480  BasicValueFactory &BV;
481  ASTContext &Ctx;
482
483public:
484  SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
485                llvm::BumpPtrAllocator& bpalloc)
486    : SymbolDependencies(16), SymbolCounter(0),
487      BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
488
489  ~SymbolManager();
490
491  static bool canSymbolicate(QualType T);
492
493  /// \brief Make a unique symbol for MemRegion R according to its kind.
494  const SymbolRegionValue* getRegionValueSymbol(const TypedValueRegion* R);
495
496  const SymbolConjured* getConjuredSymbol(const Stmt *E,
497					  const LocationContext *LCtx,
498					  QualType T,
499                                          unsigned VisitCount,
500                                          const void *SymbolTag = 0);
501
502  const SymbolConjured* getConjuredSymbol(const Expr *E,
503					  const LocationContext *LCtx,
504					  unsigned VisitCount,
505                                          const void *SymbolTag = 0) {
506    return getConjuredSymbol(E, LCtx, E->getType(),
507			     VisitCount, SymbolTag);
508  }
509
510  const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol,
511                                        const TypedValueRegion *R);
512
513  const SymbolExtent *getExtentSymbol(const SubRegion *R);
514
515  /// \brief Creates a metadata symbol associated with a specific region.
516  ///
517  /// VisitCount can be used to differentiate regions corresponding to
518  /// different loop iterations, thus, making the symbol path-dependent.
519  const SymbolMetadata* getMetadataSymbol(const MemRegion* R, const Stmt *S,
520                                          QualType T, unsigned VisitCount,
521                                          const void *SymbolTag = 0);
522
523  const SymbolCast* getCastSymbol(const SymExpr *Operand,
524                                  QualType From, QualType To);
525
526  const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
527                                  const llvm::APSInt& rhs, QualType t);
528
529  const SymIntExpr *getSymIntExpr(const SymExpr &lhs, BinaryOperator::Opcode op,
530                                  const llvm::APSInt& rhs, QualType t) {
531    return getSymIntExpr(&lhs, op, rhs, t);
532  }
533
534  const IntSymExpr *getIntSymExpr(const llvm::APSInt& lhs,
535                                  BinaryOperator::Opcode op,
536                                  const SymExpr *rhs, QualType t);
537
538  const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
539                                  const SymExpr *rhs, QualType t);
540
541  QualType getType(const SymExpr *SE) const {
542    return SE->getType(Ctx);
543  }
544
545  /// \brief Add artificial symbol dependency.
546  ///
547  /// The dependent symbol should stay alive as long as the primary is alive.
548  void addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent);
549
550  const SymbolRefSmallVectorTy *getDependentSymbols(const SymbolRef Primary);
551
552  ASTContext &getContext() { return Ctx; }
553  BasicValueFactory &getBasicVals() { return BV; }
554};
555
556class SymbolReaper {
557  enum SymbolStatus {
558    NotProcessed,
559    HaveMarkedDependents
560  };
561
562  typedef llvm::DenseSet<SymbolRef> SymbolSetTy;
563  typedef llvm::DenseMap<SymbolRef, SymbolStatus> SymbolMapTy;
564  typedef llvm::DenseSet<const MemRegion *> RegionSetTy;
565
566  SymbolMapTy TheLiving;
567  SymbolSetTy MetadataInUse;
568  SymbolSetTy TheDead;
569
570  RegionSetTy RegionRoots;
571
572  const LocationContext *LCtx;
573  const Stmt *Loc;
574  SymbolManager& SymMgr;
575  StoreRef reapedStore;
576  llvm::DenseMap<const MemRegion *, unsigned> includedRegionCache;
577
578public:
579  SymbolReaper(const LocationContext *ctx, const Stmt *s, SymbolManager& symmgr,
580               StoreManager &storeMgr)
581   : LCtx(ctx), Loc(s), SymMgr(symmgr), reapedStore(0, storeMgr) {}
582
583  ~SymbolReaper() {}
584
585  const LocationContext *getLocationContext() const { return LCtx; }
586  const Stmt *getCurrentStatement() const { return Loc; }
587
588  bool isLive(SymbolRef sym);
589  bool isLiveRegion(const MemRegion *region);
590  bool isLive(const Stmt *ExprVal, const LocationContext *LCtx) const;
591  bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const;
592
593  /// \brief Unconditionally marks a symbol as live.
594  ///
595  /// This should never be
596  /// used by checkers, only by the state infrastructure such as the store and
597  /// environment. Checkers should instead use metadata symbols and markInUse.
598  void markLive(SymbolRef sym);
599
600  /// \brief Marks a symbol as important to a checker.
601  ///
602  /// For metadata symbols,
603  /// this will keep the symbol alive as long as its associated region is also
604  /// live. For other symbols, this has no effect; checkers are not permitted
605  /// to influence the life of other symbols. This should be used before any
606  /// symbol marking has occurred, i.e. in the MarkLiveSymbols callback.
607  void markInUse(SymbolRef sym);
608
609  /// \brief If a symbol is known to be live, marks the symbol as live.
610  ///
611  ///  Otherwise, if the symbol cannot be proven live, it is marked as dead.
612  ///  Returns true if the symbol is dead, false if live.
613  bool maybeDead(SymbolRef sym);
614
615  typedef SymbolSetTy::const_iterator dead_iterator;
616  dead_iterator dead_begin() const { return TheDead.begin(); }
617  dead_iterator dead_end() const { return TheDead.end(); }
618
619  bool hasDeadSymbols() const {
620    return !TheDead.empty();
621  }
622
623  typedef RegionSetTy::const_iterator region_iterator;
624  region_iterator region_begin() const { return RegionRoots.begin(); }
625  region_iterator region_end() const { return RegionRoots.end(); }
626
627  /// \brief Returns whether or not a symbol has been confirmed dead.
628  ///
629  /// This should only be called once all marking of dead symbols has completed.
630  /// (For checkers, this means only in the evalDeadSymbols callback.)
631  bool isDead(SymbolRef sym) const {
632    return TheDead.count(sym);
633  }
634
635  void markLive(const MemRegion *region);
636
637  /// \brief Set to the value of the symbolic store after
638  /// StoreManager::removeDeadBindings has been called.
639  void setReapedStore(StoreRef st) { reapedStore = st; }
640
641private:
642  /// Mark the symbols dependent on the input symbol as live.
643  void markDependentsLive(SymbolRef sym);
644};
645
646class SymbolVisitor {
647public:
648  /// \brief A visitor method invoked by ProgramStateManager::scanReachableSymbols.
649  ///
650  /// The method returns \c true if symbols should continue be scanned and \c
651  /// false otherwise.
652  virtual bool VisitSymbol(SymbolRef sym) = 0;
653  virtual bool VisitMemRegion(const MemRegion *region) { return true; }
654  virtual ~SymbolVisitor();
655};
656
657} // end GR namespace
658
659} // end clang namespace
660
661namespace llvm {
662static inline raw_ostream &operator<<(raw_ostream &os,
663                                      const clang::ento::SymExpr *SE) {
664  SE->dumpToStream(os);
665  return os;
666}
667} // end llvm namespace
668#endif
669