SymbolManager.h revision 344779
1//===- SymbolManager.h - Management of Symbolic Values ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines SymbolManager, a class that manages symbolic values 11// created for use by ExprEngine and related classes. 12// 13//===----------------------------------------------------------------------===// 14 15#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 16#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 17 18#include "clang/AST/Expr.h" 19#include "clang/AST/Type.h" 20#include "clang/Analysis/AnalysisDeclContext.h" 21#include "clang/Basic/LLVM.h" 22#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" 23#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h" 24#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" 25#include "llvm/ADT/DenseMap.h" 26#include "llvm/ADT/DenseSet.h" 27#include "llvm/ADT/FoldingSet.h" 28#include "llvm/Support/Allocator.h" 29#include <cassert> 30 31namespace clang { 32 33class ASTContext; 34class Stmt; 35 36namespace ento { 37 38class BasicValueFactory; 39class StoreManager; 40 41///A symbol representing the value stored at a MemRegion. 42class SymbolRegionValue : public SymbolData { 43 const TypedValueRegion *R; 44 45public: 46 SymbolRegionValue(SymbolID sym, const TypedValueRegion *r) 47 : SymbolData(SymbolRegionValueKind, sym), R(r) { 48 assert(r); 49 assert(isValidTypeForSymbol(r->getValueType())); 50 } 51 52 const TypedValueRegion* getRegion() const { return R; } 53 54 static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) { 55 profile.AddInteger((unsigned) SymbolRegionValueKind); 56 profile.AddPointer(R); 57 } 58 59 void Profile(llvm::FoldingSetNodeID& profile) override { 60 Profile(profile, R); 61 } 62 63 void dumpToStream(raw_ostream &os) const override; 64 const MemRegion *getOriginRegion() const override { return getRegion(); } 65 66 QualType getType() const override; 67 68 // Implement isa<T> support. 69 static bool classof(const SymExpr *SE) { 70 return SE->getKind() == SymbolRegionValueKind; 71 } 72}; 73 74/// A symbol representing the result of an expression in the case when we do 75/// not know anything about what the expression is. 76class SymbolConjured : public SymbolData { 77 const Stmt *S; 78 QualType T; 79 unsigned Count; 80 const LocationContext *LCtx; 81 const void *SymbolTag; 82 83public: 84 SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx, 85 QualType t, unsigned count, const void *symbolTag) 86 : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count), 87 LCtx(lctx), SymbolTag(symbolTag) { 88 // FIXME: 's' might be a nullptr if we're conducting invalidation 89 // that was caused by a destructor call on a temporary object, 90 // which has no statement associated with it. 91 // Due to this, we might be creating the same invalidation symbol for 92 // two different invalidation passes (for two different temporaries). 93 assert(lctx); 94 assert(isValidTypeForSymbol(t)); 95 } 96 97 const Stmt *getStmt() const { return S; } 98 unsigned getCount() const { return Count; } 99 const void *getTag() const { return SymbolTag; } 100 101 QualType getType() const override; 102 103 void dumpToStream(raw_ostream &os) const override; 104 105 static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S, 106 QualType T, unsigned Count, const LocationContext *LCtx, 107 const void *SymbolTag) { 108 profile.AddInteger((unsigned) SymbolConjuredKind); 109 profile.AddPointer(S); 110 profile.AddPointer(LCtx); 111 profile.Add(T); 112 profile.AddInteger(Count); 113 profile.AddPointer(SymbolTag); 114 } 115 116 void Profile(llvm::FoldingSetNodeID& profile) override { 117 Profile(profile, S, T, Count, LCtx, SymbolTag); 118 } 119 120 // Implement isa<T> support. 121 static bool classof(const SymExpr *SE) { 122 return SE->getKind() == SymbolConjuredKind; 123 } 124}; 125 126/// A symbol representing the value of a MemRegion whose parent region has 127/// symbolic value. 128class SymbolDerived : public SymbolData { 129 SymbolRef parentSymbol; 130 const TypedValueRegion *R; 131 132public: 133 SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r) 134 : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) { 135 assert(parent); 136 assert(r); 137 assert(isValidTypeForSymbol(r->getValueType())); 138 } 139 140 SymbolRef getParentSymbol() const { return parentSymbol; } 141 const TypedValueRegion *getRegion() const { return R; } 142 143 QualType getType() const override; 144 145 void dumpToStream(raw_ostream &os) const override; 146 const MemRegion *getOriginRegion() const override { return getRegion(); } 147 148 static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent, 149 const TypedValueRegion *r) { 150 profile.AddInteger((unsigned) SymbolDerivedKind); 151 profile.AddPointer(r); 152 profile.AddPointer(parent); 153 } 154 155 void Profile(llvm::FoldingSetNodeID& profile) override { 156 Profile(profile, parentSymbol, R); 157 } 158 159 // Implement isa<T> support. 160 static bool classof(const SymExpr *SE) { 161 return SE->getKind() == SymbolDerivedKind; 162 } 163}; 164 165/// SymbolExtent - Represents the extent (size in bytes) of a bounded region. 166/// Clients should not ask the SymbolManager for a region's extent. Always use 167/// SubRegion::getExtent instead -- the value returned may not be a symbol. 168class SymbolExtent : public SymbolData { 169 const SubRegion *R; 170 171public: 172 SymbolExtent(SymbolID sym, const SubRegion *r) 173 : SymbolData(SymbolExtentKind, sym), R(r) { 174 assert(r); 175 } 176 177 const SubRegion *getRegion() const { return R; } 178 179 QualType getType() const override; 180 181 void dumpToStream(raw_ostream &os) const override; 182 183 static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) { 184 profile.AddInteger((unsigned) SymbolExtentKind); 185 profile.AddPointer(R); 186 } 187 188 void Profile(llvm::FoldingSetNodeID& profile) override { 189 Profile(profile, R); 190 } 191 192 // Implement isa<T> support. 193 static bool classof(const SymExpr *SE) { 194 return SE->getKind() == SymbolExtentKind; 195 } 196}; 197 198/// SymbolMetadata - Represents path-dependent metadata about a specific region. 199/// Metadata symbols remain live as long as they are marked as in use before 200/// dead-symbol sweeping AND their associated regions are still alive. 201/// Intended for use by checkers. 202class SymbolMetadata : public SymbolData { 203 const MemRegion* R; 204 const Stmt *S; 205 QualType T; 206 const LocationContext *LCtx; 207 unsigned Count; 208 const void *Tag; 209 210public: 211 SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t, 212 const LocationContext *LCtx, unsigned count, const void *tag) 213 : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx), 214 Count(count), Tag(tag) { 215 assert(r); 216 assert(s); 217 assert(isValidTypeForSymbol(t)); 218 assert(LCtx); 219 assert(tag); 220 } 221 222 const MemRegion *getRegion() const { return R; } 223 const Stmt *getStmt() const { return S; } 224 const LocationContext *getLocationContext() const { return LCtx; } 225 unsigned getCount() const { return Count; } 226 const void *getTag() const { return Tag; } 227 228 QualType getType() const override; 229 230 void dumpToStream(raw_ostream &os) const override; 231 232 static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R, 233 const Stmt *S, QualType T, const LocationContext *LCtx, 234 unsigned Count, const void *Tag) { 235 profile.AddInteger((unsigned) SymbolMetadataKind); 236 profile.AddPointer(R); 237 profile.AddPointer(S); 238 profile.Add(T); 239 profile.AddPointer(LCtx); 240 profile.AddInteger(Count); 241 profile.AddPointer(Tag); 242 } 243 244 void Profile(llvm::FoldingSetNodeID& profile) override { 245 Profile(profile, R, S, T, LCtx, Count, Tag); 246 } 247 248 // Implement isa<T> support. 249 static bool classof(const SymExpr *SE) { 250 return SE->getKind() == SymbolMetadataKind; 251 } 252}; 253 254/// Represents a cast expression. 255class SymbolCast : public SymExpr { 256 const SymExpr *Operand; 257 258 /// Type of the operand. 259 QualType FromTy; 260 261 /// The type of the result. 262 QualType ToTy; 263 264public: 265 SymbolCast(const SymExpr *In, QualType From, QualType To) 266 : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) { 267 assert(In); 268 assert(isValidTypeForSymbol(From)); 269 // FIXME: GenericTaintChecker creates symbols of void type. 270 // Otherwise, 'To' should also be a valid type. 271 } 272 273 unsigned computeComplexity() const override { 274 if (Complexity == 0) 275 Complexity = 1 + Operand->computeComplexity(); 276 return Complexity; 277 } 278 279 QualType getType() const override { return ToTy; } 280 281 const SymExpr *getOperand() const { return Operand; } 282 283 void dumpToStream(raw_ostream &os) const override; 284 285 static void Profile(llvm::FoldingSetNodeID& ID, 286 const SymExpr *In, QualType From, QualType To) { 287 ID.AddInteger((unsigned) SymbolCastKind); 288 ID.AddPointer(In); 289 ID.Add(From); 290 ID.Add(To); 291 } 292 293 void Profile(llvm::FoldingSetNodeID& ID) override { 294 Profile(ID, Operand, FromTy, ToTy); 295 } 296 297 // Implement isa<T> support. 298 static bool classof(const SymExpr *SE) { 299 return SE->getKind() == SymbolCastKind; 300 } 301}; 302 303/// Represents a symbolic expression involving a binary operator 304class BinarySymExpr : public SymExpr { 305 BinaryOperator::Opcode Op; 306 QualType T; 307 308protected: 309 BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t) 310 : SymExpr(k), Op(op), T(t) { 311 assert(classof(this)); 312 // Binary expressions are results of arithmetic. Pointer arithmetic is not 313 // handled by binary expressions, but it is instead handled by applying 314 // sub-regions to regions. 315 assert(isValidTypeForSymbol(t) && !Loc::isLocType(t)); 316 } 317 318public: 319 // FIXME: We probably need to make this out-of-line to avoid redundant 320 // generation of virtual functions. 321 QualType getType() const override { return T; } 322 323 BinaryOperator::Opcode getOpcode() const { return Op; } 324 325 // Implement isa<T> support. 326 static bool classof(const SymExpr *SE) { 327 Kind k = SE->getKind(); 328 return k >= BEGIN_BINARYSYMEXPRS && k <= END_BINARYSYMEXPRS; 329 } 330}; 331 332/// Represents a symbolic expression like 'x' + 3. 333class SymIntExpr : public BinarySymExpr { 334 const SymExpr *LHS; 335 const llvm::APSInt& RHS; 336 337public: 338 SymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op, 339 const llvm::APSInt &rhs, QualType t) 340 : BinarySymExpr(SymIntExprKind, op, t), LHS(lhs), RHS(rhs) { 341 assert(lhs); 342 } 343 344 void dumpToStream(raw_ostream &os) const override; 345 346 const SymExpr *getLHS() const { return LHS; } 347 const llvm::APSInt &getRHS() const { return RHS; } 348 349 unsigned computeComplexity() const override { 350 if (Complexity == 0) 351 Complexity = 1 + LHS->computeComplexity(); 352 return Complexity; 353 } 354 355 static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs, 356 BinaryOperator::Opcode op, const llvm::APSInt& rhs, 357 QualType t) { 358 ID.AddInteger((unsigned) SymIntExprKind); 359 ID.AddPointer(lhs); 360 ID.AddInteger(op); 361 ID.AddPointer(&rhs); 362 ID.Add(t); 363 } 364 365 void Profile(llvm::FoldingSetNodeID& ID) override { 366 Profile(ID, LHS, getOpcode(), RHS, getType()); 367 } 368 369 // Implement isa<T> support. 370 static bool classof(const SymExpr *SE) { 371 return SE->getKind() == SymIntExprKind; 372 } 373}; 374 375/// Represents a symbolic expression like 3 - 'x'. 376class IntSymExpr : public BinarySymExpr { 377 const llvm::APSInt& LHS; 378 const SymExpr *RHS; 379 380public: 381 IntSymExpr(const llvm::APSInt &lhs, BinaryOperator::Opcode op, 382 const SymExpr *rhs, QualType t) 383 : BinarySymExpr(IntSymExprKind, op, t), LHS(lhs), RHS(rhs) { 384 assert(rhs); 385 } 386 387 void dumpToStream(raw_ostream &os) const override; 388 389 const SymExpr *getRHS() const { return RHS; } 390 const llvm::APSInt &getLHS() const { return LHS; } 391 392 unsigned computeComplexity() const override { 393 if (Complexity == 0) 394 Complexity = 1 + RHS->computeComplexity(); 395 return Complexity; 396 } 397 398 static void Profile(llvm::FoldingSetNodeID& ID, const llvm::APSInt& lhs, 399 BinaryOperator::Opcode op, const SymExpr *rhs, 400 QualType t) { 401 ID.AddInteger((unsigned) IntSymExprKind); 402 ID.AddPointer(&lhs); 403 ID.AddInteger(op); 404 ID.AddPointer(rhs); 405 ID.Add(t); 406 } 407 408 void Profile(llvm::FoldingSetNodeID& ID) override { 409 Profile(ID, LHS, getOpcode(), RHS, getType()); 410 } 411 412 // Implement isa<T> support. 413 static bool classof(const SymExpr *SE) { 414 return SE->getKind() == IntSymExprKind; 415 } 416}; 417 418/// Represents a symbolic expression like 'x' + 'y'. 419class SymSymExpr : public BinarySymExpr { 420 const SymExpr *LHS; 421 const SymExpr *RHS; 422 423public: 424 SymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs, 425 QualType t) 426 : BinarySymExpr(SymSymExprKind, op, t), LHS(lhs), RHS(rhs) { 427 assert(lhs); 428 assert(rhs); 429 } 430 431 const SymExpr *getLHS() const { return LHS; } 432 const SymExpr *getRHS() const { return RHS; } 433 434 void dumpToStream(raw_ostream &os) const override; 435 436 unsigned computeComplexity() const override { 437 if (Complexity == 0) 438 Complexity = RHS->computeComplexity() + LHS->computeComplexity(); 439 return Complexity; 440 } 441 442 static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs, 443 BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) { 444 ID.AddInteger((unsigned) SymSymExprKind); 445 ID.AddPointer(lhs); 446 ID.AddInteger(op); 447 ID.AddPointer(rhs); 448 ID.Add(t); 449 } 450 451 void Profile(llvm::FoldingSetNodeID& ID) override { 452 Profile(ID, LHS, getOpcode(), RHS, getType()); 453 } 454 455 // Implement isa<T> support. 456 static bool classof(const SymExpr *SE) { 457 return SE->getKind() == SymSymExprKind; 458 } 459}; 460 461class SymbolManager { 462 using DataSetTy = llvm::FoldingSet<SymExpr>; 463 using SymbolDependTy = llvm::DenseMap<SymbolRef, SymbolRefSmallVectorTy *>; 464 465 DataSetTy DataSet; 466 467 /// Stores the extra dependencies between symbols: the data should be kept 468 /// alive as long as the key is live. 469 SymbolDependTy SymbolDependencies; 470 471 unsigned SymbolCounter = 0; 472 llvm::BumpPtrAllocator& BPAlloc; 473 BasicValueFactory &BV; 474 ASTContext &Ctx; 475 476public: 477 SymbolManager(ASTContext &ctx, BasicValueFactory &bv, 478 llvm::BumpPtrAllocator& bpalloc) 479 : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {} 480 ~SymbolManager(); 481 482 static bool canSymbolicate(QualType T); 483 484 /// Make a unique symbol for MemRegion R according to its kind. 485 const SymbolRegionValue* getRegionValueSymbol(const TypedValueRegion* R); 486 487 const SymbolConjured* conjureSymbol(const Stmt *E, 488 const LocationContext *LCtx, 489 QualType T, 490 unsigned VisitCount, 491 const void *SymbolTag = nullptr); 492 493 const SymbolConjured* conjureSymbol(const Expr *E, 494 const LocationContext *LCtx, 495 unsigned VisitCount, 496 const void *SymbolTag = nullptr) { 497 return conjureSymbol(E, LCtx, E->getType(), VisitCount, SymbolTag); 498 } 499 500 const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol, 501 const TypedValueRegion *R); 502 503 const SymbolExtent *getExtentSymbol(const SubRegion *R); 504 505 /// Creates a metadata symbol associated with a specific region. 506 /// 507 /// VisitCount can be used to differentiate regions corresponding to 508 /// different loop iterations, thus, making the symbol path-dependent. 509 const SymbolMetadata *getMetadataSymbol(const MemRegion *R, const Stmt *S, 510 QualType T, 511 const LocationContext *LCtx, 512 unsigned VisitCount, 513 const void *SymbolTag = nullptr); 514 515 const SymbolCast* getCastSymbol(const SymExpr *Operand, 516 QualType From, QualType To); 517 518 const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op, 519 const llvm::APSInt& rhs, QualType t); 520 521 const SymIntExpr *getSymIntExpr(const SymExpr &lhs, BinaryOperator::Opcode op, 522 const llvm::APSInt& rhs, QualType t) { 523 return getSymIntExpr(&lhs, op, rhs, t); 524 } 525 526 const IntSymExpr *getIntSymExpr(const llvm::APSInt& lhs, 527 BinaryOperator::Opcode op, 528 const SymExpr *rhs, QualType t); 529 530 const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, 531 const SymExpr *rhs, QualType t); 532 533 QualType getType(const SymExpr *SE) const { 534 return SE->getType(); 535 } 536 537 /// Add artificial symbol dependency. 538 /// 539 /// The dependent symbol should stay alive as long as the primary is alive. 540 void addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent); 541 542 const SymbolRefSmallVectorTy *getDependentSymbols(const SymbolRef Primary); 543 544 ASTContext &getContext() { return Ctx; } 545 BasicValueFactory &getBasicVals() { return BV; } 546}; 547 548/// A class responsible for cleaning up unused symbols. 549class SymbolReaper { 550 enum SymbolStatus { 551 NotProcessed, 552 HaveMarkedDependents 553 }; 554 555 using SymbolSetTy = llvm::DenseSet<SymbolRef>; 556 using SymbolMapTy = llvm::DenseMap<SymbolRef, SymbolStatus>; 557 using RegionSetTy = llvm::DenseSet<const MemRegion *>; 558 559 SymbolMapTy TheLiving; 560 SymbolSetTy MetadataInUse; 561 562 RegionSetTy RegionRoots; 563 564 const StackFrameContext *LCtx; 565 const Stmt *Loc; 566 SymbolManager& SymMgr; 567 StoreRef reapedStore; 568 llvm::DenseMap<const MemRegion *, unsigned> includedRegionCache; 569 570public: 571 /// Construct a reaper object, which removes everything which is not 572 /// live before we execute statement s in the given location context. 573 /// 574 /// If the statement is NULL, everything is this and parent contexts is 575 /// considered live. 576 /// If the stack frame context is NULL, everything on stack is considered 577 /// dead. 578 SymbolReaper(const StackFrameContext *Ctx, const Stmt *s, 579 SymbolManager &symmgr, StoreManager &storeMgr) 580 : LCtx(Ctx), Loc(s), SymMgr(symmgr), reapedStore(nullptr, storeMgr) {} 581 582 const LocationContext *getLocationContext() const { return LCtx; } 583 584 bool isLive(SymbolRef sym); 585 bool isLiveRegion(const MemRegion *region); 586 bool isLive(const Stmt *ExprVal, const LocationContext *LCtx) const; 587 bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const; 588 589 /// Unconditionally marks a symbol as live. 590 /// 591 /// This should never be 592 /// used by checkers, only by the state infrastructure such as the store and 593 /// environment. Checkers should instead use metadata symbols and markInUse. 594 void markLive(SymbolRef sym); 595 596 /// Marks a symbol as important to a checker. 597 /// 598 /// For metadata symbols, 599 /// this will keep the symbol alive as long as its associated region is also 600 /// live. For other symbols, this has no effect; checkers are not permitted 601 /// to influence the life of other symbols. This should be used before any 602 /// symbol marking has occurred, i.e. in the MarkLiveSymbols callback. 603 void markInUse(SymbolRef sym); 604 605 using region_iterator = RegionSetTy::const_iterator; 606 607 region_iterator region_begin() const { return RegionRoots.begin(); } 608 region_iterator region_end() const { return RegionRoots.end(); } 609 610 /// Returns whether or not a symbol has been confirmed dead. 611 /// 612 /// This should only be called once all marking of dead symbols has completed. 613 /// (For checkers, this means only in the checkDeadSymbols callback.) 614 bool isDead(SymbolRef sym) { 615 return !isLive(sym); 616 } 617 618 void markLive(const MemRegion *region); 619 void markElementIndicesLive(const MemRegion *region); 620 621 /// Set to the value of the symbolic store after 622 /// StoreManager::removeDeadBindings has been called. 623 void setReapedStore(StoreRef st) { reapedStore = st; } 624 625private: 626 /// Mark the symbols dependent on the input symbol as live. 627 void markDependentsLive(SymbolRef sym); 628}; 629 630class SymbolVisitor { 631protected: 632 ~SymbolVisitor() = default; 633 634public: 635 SymbolVisitor() = default; 636 SymbolVisitor(const SymbolVisitor &) = default; 637 SymbolVisitor(SymbolVisitor &&) {} 638 639 /// A visitor method invoked by ProgramStateManager::scanReachableSymbols. 640 /// 641 /// The method returns \c true if symbols should continue be scanned and \c 642 /// false otherwise. 643 virtual bool VisitSymbol(SymbolRef sym) = 0; 644 virtual bool VisitMemRegion(const MemRegion *) { return true; } 645}; 646 647} // namespace ento 648 649} // namespace clang 650 651#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 652