SymbolManager.h revision 341825
1//===- SymbolManager.h - Management of Symbolic Values ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines SymbolManager, a class that manages symbolic values 11// created for use by ExprEngine and related classes. 12// 13//===----------------------------------------------------------------------===// 14 15#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 16#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 17 18#include "clang/AST/Expr.h" 19#include "clang/AST/Type.h" 20#include "clang/Analysis/AnalysisDeclContext.h" 21#include "clang/Basic/LLVM.h" 22#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" 23#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h" 24#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" 25#include "llvm/ADT/DenseMap.h" 26#include "llvm/ADT/DenseSet.h" 27#include "llvm/ADT/FoldingSet.h" 28#include "llvm/Support/Allocator.h" 29#include <cassert> 30 31namespace clang { 32 33class ASTContext; 34class Stmt; 35 36namespace ento { 37 38class BasicValueFactory; 39class StoreManager; 40 41///A symbol representing the value stored at a MemRegion. 42class SymbolRegionValue : public SymbolData { 43 const TypedValueRegion *R; 44 45public: 46 SymbolRegionValue(SymbolID sym, const TypedValueRegion *r) 47 : SymbolData(SymbolRegionValueKind, sym), R(r) { 48 assert(r); 49 assert(isValidTypeForSymbol(r->getValueType())); 50 } 51 52 const TypedValueRegion* getRegion() const { return R; } 53 54 static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) { 55 profile.AddInteger((unsigned) SymbolRegionValueKind); 56 profile.AddPointer(R); 57 } 58 59 void Profile(llvm::FoldingSetNodeID& profile) override { 60 Profile(profile, R); 61 } 62 63 void dumpToStream(raw_ostream &os) const override; 64 const MemRegion *getOriginRegion() const override { return getRegion(); } 65 66 QualType getType() const override; 67 68 // Implement isa<T> support. 69 static bool classof(const SymExpr *SE) { 70 return SE->getKind() == SymbolRegionValueKind; 71 } 72}; 73 74/// A symbol representing the result of an expression in the case when we do 75/// not know anything about what the expression is. 76class SymbolConjured : public SymbolData { 77 const Stmt *S; 78 QualType T; 79 unsigned Count; 80 const LocationContext *LCtx; 81 const void *SymbolTag; 82 83public: 84 SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx, 85 QualType t, unsigned count, const void *symbolTag) 86 : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count), 87 LCtx(lctx), SymbolTag(symbolTag) { 88 // FIXME: 's' might be a nullptr if we're conducting invalidation 89 // that was caused by a destructor call on a temporary object, 90 // which has no statement associated with it. 91 // Due to this, we might be creating the same invalidation symbol for 92 // two different invalidation passes (for two different temporaries). 93 assert(lctx); 94 assert(isValidTypeForSymbol(t)); 95 } 96 97 const Stmt *getStmt() const { return S; } 98 unsigned getCount() const { return Count; } 99 const void *getTag() const { return SymbolTag; } 100 101 QualType getType() const override; 102 103 void dumpToStream(raw_ostream &os) const override; 104 105 static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S, 106 QualType T, unsigned Count, const LocationContext *LCtx, 107 const void *SymbolTag) { 108 profile.AddInteger((unsigned) SymbolConjuredKind); 109 profile.AddPointer(S); 110 profile.AddPointer(LCtx); 111 profile.Add(T); 112 profile.AddInteger(Count); 113 profile.AddPointer(SymbolTag); 114 } 115 116 void Profile(llvm::FoldingSetNodeID& profile) override { 117 Profile(profile, S, T, Count, LCtx, SymbolTag); 118 } 119 120 // Implement isa<T> support. 121 static bool classof(const SymExpr *SE) { 122 return SE->getKind() == SymbolConjuredKind; 123 } 124}; 125 126/// A symbol representing the value of a MemRegion whose parent region has 127/// symbolic value. 128class SymbolDerived : public SymbolData { 129 SymbolRef parentSymbol; 130 const TypedValueRegion *R; 131 132public: 133 SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r) 134 : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) { 135 assert(parent); 136 assert(r); 137 assert(isValidTypeForSymbol(r->getValueType())); 138 } 139 140 SymbolRef getParentSymbol() const { return parentSymbol; } 141 const TypedValueRegion *getRegion() const { return R; } 142 143 QualType getType() const override; 144 145 void dumpToStream(raw_ostream &os) const override; 146 const MemRegion *getOriginRegion() const override { return getRegion(); } 147 148 static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent, 149 const TypedValueRegion *r) { 150 profile.AddInteger((unsigned) SymbolDerivedKind); 151 profile.AddPointer(r); 152 profile.AddPointer(parent); 153 } 154 155 void Profile(llvm::FoldingSetNodeID& profile) override { 156 Profile(profile, parentSymbol, R); 157 } 158 159 // Implement isa<T> support. 160 static bool classof(const SymExpr *SE) { 161 return SE->getKind() == SymbolDerivedKind; 162 } 163}; 164 165/// SymbolExtent - Represents the extent (size in bytes) of a bounded region. 166/// Clients should not ask the SymbolManager for a region's extent. Always use 167/// SubRegion::getExtent instead -- the value returned may not be a symbol. 168class SymbolExtent : public SymbolData { 169 const SubRegion *R; 170 171public: 172 SymbolExtent(SymbolID sym, const SubRegion *r) 173 : SymbolData(SymbolExtentKind, sym), R(r) { 174 assert(r); 175 } 176 177 const SubRegion *getRegion() const { return R; } 178 179 QualType getType() const override; 180 181 void dumpToStream(raw_ostream &os) const override; 182 183 static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) { 184 profile.AddInteger((unsigned) SymbolExtentKind); 185 profile.AddPointer(R); 186 } 187 188 void Profile(llvm::FoldingSetNodeID& profile) override { 189 Profile(profile, R); 190 } 191 192 // Implement isa<T> support. 193 static bool classof(const SymExpr *SE) { 194 return SE->getKind() == SymbolExtentKind; 195 } 196}; 197 198/// SymbolMetadata - Represents path-dependent metadata about a specific region. 199/// Metadata symbols remain live as long as they are marked as in use before 200/// dead-symbol sweeping AND their associated regions are still alive. 201/// Intended for use by checkers. 202class SymbolMetadata : public SymbolData { 203 const MemRegion* R; 204 const Stmt *S; 205 QualType T; 206 const LocationContext *LCtx; 207 unsigned Count; 208 const void *Tag; 209 210public: 211 SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t, 212 const LocationContext *LCtx, unsigned count, const void *tag) 213 : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx), 214 Count(count), Tag(tag) { 215 assert(r); 216 assert(s); 217 assert(isValidTypeForSymbol(t)); 218 assert(LCtx); 219 assert(tag); 220 } 221 222 const MemRegion *getRegion() const { return R; } 223 const Stmt *getStmt() const { return S; } 224 const LocationContext *getLocationContext() const { return LCtx; } 225 unsigned getCount() const { return Count; } 226 const void *getTag() const { return Tag; } 227 228 QualType getType() const override; 229 230 void dumpToStream(raw_ostream &os) const override; 231 232 static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R, 233 const Stmt *S, QualType T, const LocationContext *LCtx, 234 unsigned Count, const void *Tag) { 235 profile.AddInteger((unsigned) SymbolMetadataKind); 236 profile.AddPointer(R); 237 profile.AddPointer(S); 238 profile.Add(T); 239 profile.AddPointer(LCtx); 240 profile.AddInteger(Count); 241 profile.AddPointer(Tag); 242 } 243 244 void Profile(llvm::FoldingSetNodeID& profile) override { 245 Profile(profile, R, S, T, LCtx, Count, Tag); 246 } 247 248 // Implement isa<T> support. 249 static bool classof(const SymExpr *SE) { 250 return SE->getKind() == SymbolMetadataKind; 251 } 252}; 253 254/// Represents a cast expression. 255class SymbolCast : public SymExpr { 256 const SymExpr *Operand; 257 258 /// Type of the operand. 259 QualType FromTy; 260 261 /// The type of the result. 262 QualType ToTy; 263 264public: 265 SymbolCast(const SymExpr *In, QualType From, QualType To) 266 : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) { 267 assert(In); 268 assert(isValidTypeForSymbol(From)); 269 // FIXME: GenericTaintChecker creates symbols of void type. 270 // Otherwise, 'To' should also be a valid type. 271 } 272 273 unsigned computeComplexity() const override { 274 if (Complexity == 0) 275 Complexity = 1 + Operand->computeComplexity(); 276 return Complexity; 277 } 278 279 QualType getType() const override { return ToTy; } 280 281 const SymExpr *getOperand() const { return Operand; } 282 283 void dumpToStream(raw_ostream &os) const override; 284 285 static void Profile(llvm::FoldingSetNodeID& ID, 286 const SymExpr *In, QualType From, QualType To) { 287 ID.AddInteger((unsigned) SymbolCastKind); 288 ID.AddPointer(In); 289 ID.Add(From); 290 ID.Add(To); 291 } 292 293 void Profile(llvm::FoldingSetNodeID& ID) override { 294 Profile(ID, Operand, FromTy, ToTy); 295 } 296 297 // Implement isa<T> support. 298 static bool classof(const SymExpr *SE) { 299 return SE->getKind() == SymbolCastKind; 300 } 301}; 302 303/// Represents a symbolic expression involving a binary operator 304class BinarySymExpr : public SymExpr { 305 BinaryOperator::Opcode Op; 306 QualType T; 307 308protected: 309 BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t) 310 : SymExpr(k), Op(op), T(t) { 311 assert(classof(this)); 312 assert(isValidTypeForSymbol(t)); 313 } 314 315public: 316 // FIXME: We probably need to make this out-of-line to avoid redundant 317 // generation of virtual functions. 318 QualType getType() const override { return T; } 319 320 BinaryOperator::Opcode getOpcode() const { return Op; } 321 322 // Implement isa<T> support. 323 static bool classof(const SymExpr *SE) { 324 Kind k = SE->getKind(); 325 return k >= BEGIN_BINARYSYMEXPRS && k <= END_BINARYSYMEXPRS; 326 } 327}; 328 329/// Represents a symbolic expression like 'x' + 3. 330class SymIntExpr : public BinarySymExpr { 331 const SymExpr *LHS; 332 const llvm::APSInt& RHS; 333 334public: 335 SymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op, 336 const llvm::APSInt &rhs, QualType t) 337 : BinarySymExpr(SymIntExprKind, op, t), LHS(lhs), RHS(rhs) { 338 assert(lhs); 339 } 340 341 void dumpToStream(raw_ostream &os) const override; 342 343 const SymExpr *getLHS() const { return LHS; } 344 const llvm::APSInt &getRHS() const { return RHS; } 345 346 unsigned computeComplexity() const override { 347 if (Complexity == 0) 348 Complexity = 1 + LHS->computeComplexity(); 349 return Complexity; 350 } 351 352 static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs, 353 BinaryOperator::Opcode op, const llvm::APSInt& rhs, 354 QualType t) { 355 ID.AddInteger((unsigned) SymIntExprKind); 356 ID.AddPointer(lhs); 357 ID.AddInteger(op); 358 ID.AddPointer(&rhs); 359 ID.Add(t); 360 } 361 362 void Profile(llvm::FoldingSetNodeID& ID) override { 363 Profile(ID, LHS, getOpcode(), RHS, getType()); 364 } 365 366 // Implement isa<T> support. 367 static bool classof(const SymExpr *SE) { 368 return SE->getKind() == SymIntExprKind; 369 } 370}; 371 372/// Represents a symbolic expression like 3 - 'x'. 373class IntSymExpr : public BinarySymExpr { 374 const llvm::APSInt& LHS; 375 const SymExpr *RHS; 376 377public: 378 IntSymExpr(const llvm::APSInt &lhs, BinaryOperator::Opcode op, 379 const SymExpr *rhs, QualType t) 380 : BinarySymExpr(IntSymExprKind, op, t), LHS(lhs), RHS(rhs) { 381 assert(rhs); 382 } 383 384 void dumpToStream(raw_ostream &os) const override; 385 386 const SymExpr *getRHS() const { return RHS; } 387 const llvm::APSInt &getLHS() const { return LHS; } 388 389 unsigned computeComplexity() const override { 390 if (Complexity == 0) 391 Complexity = 1 + RHS->computeComplexity(); 392 return Complexity; 393 } 394 395 static void Profile(llvm::FoldingSetNodeID& ID, const llvm::APSInt& lhs, 396 BinaryOperator::Opcode op, const SymExpr *rhs, 397 QualType t) { 398 ID.AddInteger((unsigned) IntSymExprKind); 399 ID.AddPointer(&lhs); 400 ID.AddInteger(op); 401 ID.AddPointer(rhs); 402 ID.Add(t); 403 } 404 405 void Profile(llvm::FoldingSetNodeID& ID) override { 406 Profile(ID, LHS, getOpcode(), RHS, getType()); 407 } 408 409 // Implement isa<T> support. 410 static bool classof(const SymExpr *SE) { 411 return SE->getKind() == IntSymExprKind; 412 } 413}; 414 415/// Represents a symbolic expression like 'x' + 'y'. 416class SymSymExpr : public BinarySymExpr { 417 const SymExpr *LHS; 418 const SymExpr *RHS; 419 420public: 421 SymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs, 422 QualType t) 423 : BinarySymExpr(SymSymExprKind, op, t), LHS(lhs), RHS(rhs) { 424 assert(lhs); 425 assert(rhs); 426 } 427 428 const SymExpr *getLHS() const { return LHS; } 429 const SymExpr *getRHS() const { return RHS; } 430 431 void dumpToStream(raw_ostream &os) const override; 432 433 unsigned computeComplexity() const override { 434 if (Complexity == 0) 435 Complexity = RHS->computeComplexity() + LHS->computeComplexity(); 436 return Complexity; 437 } 438 439 static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs, 440 BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) { 441 ID.AddInteger((unsigned) SymSymExprKind); 442 ID.AddPointer(lhs); 443 ID.AddInteger(op); 444 ID.AddPointer(rhs); 445 ID.Add(t); 446 } 447 448 void Profile(llvm::FoldingSetNodeID& ID) override { 449 Profile(ID, LHS, getOpcode(), RHS, getType()); 450 } 451 452 // Implement isa<T> support. 453 static bool classof(const SymExpr *SE) { 454 return SE->getKind() == SymSymExprKind; 455 } 456}; 457 458class SymbolManager { 459 using DataSetTy = llvm::FoldingSet<SymExpr>; 460 using SymbolDependTy = llvm::DenseMap<SymbolRef, SymbolRefSmallVectorTy *>; 461 462 DataSetTy DataSet; 463 464 /// Stores the extra dependencies between symbols: the data should be kept 465 /// alive as long as the key is live. 466 SymbolDependTy SymbolDependencies; 467 468 unsigned SymbolCounter = 0; 469 llvm::BumpPtrAllocator& BPAlloc; 470 BasicValueFactory &BV; 471 ASTContext &Ctx; 472 473public: 474 SymbolManager(ASTContext &ctx, BasicValueFactory &bv, 475 llvm::BumpPtrAllocator& bpalloc) 476 : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {} 477 ~SymbolManager(); 478 479 static bool canSymbolicate(QualType T); 480 481 /// Make a unique symbol for MemRegion R according to its kind. 482 const SymbolRegionValue* getRegionValueSymbol(const TypedValueRegion* R); 483 484 const SymbolConjured* conjureSymbol(const Stmt *E, 485 const LocationContext *LCtx, 486 QualType T, 487 unsigned VisitCount, 488 const void *SymbolTag = nullptr); 489 490 const SymbolConjured* conjureSymbol(const Expr *E, 491 const LocationContext *LCtx, 492 unsigned VisitCount, 493 const void *SymbolTag = nullptr) { 494 return conjureSymbol(E, LCtx, E->getType(), VisitCount, SymbolTag); 495 } 496 497 const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol, 498 const TypedValueRegion *R); 499 500 const SymbolExtent *getExtentSymbol(const SubRegion *R); 501 502 /// Creates a metadata symbol associated with a specific region. 503 /// 504 /// VisitCount can be used to differentiate regions corresponding to 505 /// different loop iterations, thus, making the symbol path-dependent. 506 const SymbolMetadata *getMetadataSymbol(const MemRegion *R, const Stmt *S, 507 QualType T, 508 const LocationContext *LCtx, 509 unsigned VisitCount, 510 const void *SymbolTag = nullptr); 511 512 const SymbolCast* getCastSymbol(const SymExpr *Operand, 513 QualType From, QualType To); 514 515 const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op, 516 const llvm::APSInt& rhs, QualType t); 517 518 const SymIntExpr *getSymIntExpr(const SymExpr &lhs, BinaryOperator::Opcode op, 519 const llvm::APSInt& rhs, QualType t) { 520 return getSymIntExpr(&lhs, op, rhs, t); 521 } 522 523 const IntSymExpr *getIntSymExpr(const llvm::APSInt& lhs, 524 BinaryOperator::Opcode op, 525 const SymExpr *rhs, QualType t); 526 527 const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, 528 const SymExpr *rhs, QualType t); 529 530 QualType getType(const SymExpr *SE) const { 531 return SE->getType(); 532 } 533 534 /// Add artificial symbol dependency. 535 /// 536 /// The dependent symbol should stay alive as long as the primary is alive. 537 void addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent); 538 539 const SymbolRefSmallVectorTy *getDependentSymbols(const SymbolRef Primary); 540 541 ASTContext &getContext() { return Ctx; } 542 BasicValueFactory &getBasicVals() { return BV; } 543}; 544 545/// A class responsible for cleaning up unused symbols. 546class SymbolReaper { 547 enum SymbolStatus { 548 NotProcessed, 549 HaveMarkedDependents 550 }; 551 552 using SymbolSetTy = llvm::DenseSet<SymbolRef>; 553 using SymbolMapTy = llvm::DenseMap<SymbolRef, SymbolStatus>; 554 using RegionSetTy = llvm::DenseSet<const MemRegion *>; 555 556 SymbolMapTy TheLiving; 557 SymbolSetTy MetadataInUse; 558 SymbolSetTy TheDead; 559 560 RegionSetTy RegionRoots; 561 562 const StackFrameContext *LCtx; 563 const Stmt *Loc; 564 SymbolManager& SymMgr; 565 StoreRef reapedStore; 566 llvm::DenseMap<const MemRegion *, unsigned> includedRegionCache; 567 568public: 569 /// Construct a reaper object, which removes everything which is not 570 /// live before we execute statement s in the given location context. 571 /// 572 /// If the statement is NULL, everything is this and parent contexts is 573 /// considered live. 574 /// If the stack frame context is NULL, everything on stack is considered 575 /// dead. 576 SymbolReaper(const StackFrameContext *Ctx, const Stmt *s, 577 SymbolManager &symmgr, StoreManager &storeMgr) 578 : LCtx(Ctx), Loc(s), SymMgr(symmgr), reapedStore(nullptr, storeMgr) {} 579 580 const LocationContext *getLocationContext() const { return LCtx; } 581 582 bool isLive(SymbolRef sym); 583 bool isLiveRegion(const MemRegion *region); 584 bool isLive(const Stmt *ExprVal, const LocationContext *LCtx) const; 585 bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const; 586 587 /// Unconditionally marks a symbol as live. 588 /// 589 /// This should never be 590 /// used by checkers, only by the state infrastructure such as the store and 591 /// environment. Checkers should instead use metadata symbols and markInUse. 592 void markLive(SymbolRef sym); 593 594 /// Marks a symbol as important to a checker. 595 /// 596 /// For metadata symbols, 597 /// this will keep the symbol alive as long as its associated region is also 598 /// live. For other symbols, this has no effect; checkers are not permitted 599 /// to influence the life of other symbols. This should be used before any 600 /// symbol marking has occurred, i.e. in the MarkLiveSymbols callback. 601 void markInUse(SymbolRef sym); 602 603 /// If a symbol is known to be live, marks the symbol as live. 604 /// 605 /// Otherwise, if the symbol cannot be proven live, it is marked as dead. 606 /// Returns true if the symbol is dead, false if live. 607 bool maybeDead(SymbolRef sym); 608 609 using dead_iterator = SymbolSetTy::const_iterator; 610 611 dead_iterator dead_begin() const { return TheDead.begin(); } 612 dead_iterator dead_end() const { return TheDead.end(); } 613 614 bool hasDeadSymbols() const { 615 return !TheDead.empty(); 616 } 617 618 using region_iterator = RegionSetTy::const_iterator; 619 620 region_iterator region_begin() const { return RegionRoots.begin(); } 621 region_iterator region_end() const { return RegionRoots.end(); } 622 623 /// Returns whether or not a symbol has been confirmed dead. 624 /// 625 /// This should only be called once all marking of dead symbols has completed. 626 /// (For checkers, this means only in the evalDeadSymbols callback.) 627 bool isDead(SymbolRef sym) const { 628 return TheDead.count(sym); 629 } 630 631 void markLive(const MemRegion *region); 632 void markElementIndicesLive(const MemRegion *region); 633 634 /// Set to the value of the symbolic store after 635 /// StoreManager::removeDeadBindings has been called. 636 void setReapedStore(StoreRef st) { reapedStore = st; } 637 638private: 639 /// Mark the symbols dependent on the input symbol as live. 640 void markDependentsLive(SymbolRef sym); 641}; 642 643class SymbolVisitor { 644protected: 645 ~SymbolVisitor() = default; 646 647public: 648 SymbolVisitor() = default; 649 SymbolVisitor(const SymbolVisitor &) = default; 650 SymbolVisitor(SymbolVisitor &&) {} 651 652 /// A visitor method invoked by ProgramStateManager::scanReachableSymbols. 653 /// 654 /// The method returns \c true if symbols should continue be scanned and \c 655 /// false otherwise. 656 virtual bool VisitSymbol(SymbolRef sym) = 0; 657 virtual bool VisitMemRegion(const MemRegion *region) { return true; } 658}; 659 660} // namespace ento 661 662} // namespace clang 663 664#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 665