1//===- SymbolManager.h - Management of Symbolic Values ----------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines SymbolManager, a class that manages symbolic values 10// created for use by ExprEngine and related classes. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 15#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 16 17#include "clang/AST/Expr.h" 18#include "clang/AST/Type.h" 19#include "clang/Analysis/AnalysisDeclContext.h" 20#include "clang/Basic/LLVM.h" 21#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" 22#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h" 23#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" 24#include "llvm/ADT/DenseMap.h" 25#include "llvm/ADT/DenseSet.h" 26#include "llvm/ADT/FoldingSet.h" 27#include "llvm/Support/Allocator.h" 28#include <cassert> 29 30namespace clang { 31 32class ASTContext; 33class Stmt; 34 35namespace ento { 36 37class BasicValueFactory; 38class StoreManager; 39 40///A symbol representing the value stored at a MemRegion. 41class SymbolRegionValue : public SymbolData { 42 const TypedValueRegion *R; 43 44public: 45 SymbolRegionValue(SymbolID sym, const TypedValueRegion *r) 46 : SymbolData(SymbolRegionValueKind, sym), R(r) { 47 assert(r); 48 assert(isValidTypeForSymbol(r->getValueType())); 49 } 50 51 const TypedValueRegion* getRegion() const { return R; } 52 53 static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) { 54 profile.AddInteger((unsigned) SymbolRegionValueKind); 55 profile.AddPointer(R); 56 } 57 58 void Profile(llvm::FoldingSetNodeID& profile) override { 59 Profile(profile, R); 60 } 61 62 void dumpToStream(raw_ostream &os) const override; 63 const MemRegion *getOriginRegion() const override { return getRegion(); } 64 65 QualType getType() const override; 66 67 // Implement isa<T> support. 68 static bool classof(const SymExpr *SE) { 69 return SE->getKind() == SymbolRegionValueKind; 70 } 71}; 72 73/// A symbol representing the result of an expression in the case when we do 74/// not know anything about what the expression is. 75class SymbolConjured : public SymbolData { 76 const Stmt *S; 77 QualType T; 78 unsigned Count; 79 const LocationContext *LCtx; 80 const void *SymbolTag; 81 82public: 83 SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx, 84 QualType t, unsigned count, const void *symbolTag) 85 : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count), 86 LCtx(lctx), SymbolTag(symbolTag) { 87 // FIXME: 's' might be a nullptr if we're conducting invalidation 88 // that was caused by a destructor call on a temporary object, 89 // which has no statement associated with it. 90 // Due to this, we might be creating the same invalidation symbol for 91 // two different invalidation passes (for two different temporaries). 92 assert(lctx); 93 assert(isValidTypeForSymbol(t)); 94 } 95 96 const Stmt *getStmt() const { return S; } 97 unsigned getCount() const { return Count; } 98 const void *getTag() const { return SymbolTag; } 99 100 QualType getType() const override; 101 102 void dumpToStream(raw_ostream &os) const override; 103 104 static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S, 105 QualType T, unsigned Count, const LocationContext *LCtx, 106 const void *SymbolTag) { 107 profile.AddInteger((unsigned) SymbolConjuredKind); 108 profile.AddPointer(S); 109 profile.AddPointer(LCtx); 110 profile.Add(T); 111 profile.AddInteger(Count); 112 profile.AddPointer(SymbolTag); 113 } 114 115 void Profile(llvm::FoldingSetNodeID& profile) override { 116 Profile(profile, S, T, Count, LCtx, SymbolTag); 117 } 118 119 // Implement isa<T> support. 120 static bool classof(const SymExpr *SE) { 121 return SE->getKind() == SymbolConjuredKind; 122 } 123}; 124 125/// A symbol representing the value of a MemRegion whose parent region has 126/// symbolic value. 127class SymbolDerived : public SymbolData { 128 SymbolRef parentSymbol; 129 const TypedValueRegion *R; 130 131public: 132 SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r) 133 : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) { 134 assert(parent); 135 assert(r); 136 assert(isValidTypeForSymbol(r->getValueType())); 137 } 138 139 SymbolRef getParentSymbol() const { return parentSymbol; } 140 const TypedValueRegion *getRegion() const { return R; } 141 142 QualType getType() const override; 143 144 void dumpToStream(raw_ostream &os) const override; 145 const MemRegion *getOriginRegion() const override { return getRegion(); } 146 147 static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent, 148 const TypedValueRegion *r) { 149 profile.AddInteger((unsigned) SymbolDerivedKind); 150 profile.AddPointer(r); 151 profile.AddPointer(parent); 152 } 153 154 void Profile(llvm::FoldingSetNodeID& profile) override { 155 Profile(profile, parentSymbol, R); 156 } 157 158 // Implement isa<T> support. 159 static bool classof(const SymExpr *SE) { 160 return SE->getKind() == SymbolDerivedKind; 161 } 162}; 163 164/// SymbolExtent - Represents the extent (size in bytes) of a bounded region. 165/// Clients should not ask the SymbolManager for a region's extent. Always use 166/// SubRegion::getExtent instead -- the value returned may not be a symbol. 167class SymbolExtent : public SymbolData { 168 const SubRegion *R; 169 170public: 171 SymbolExtent(SymbolID sym, const SubRegion *r) 172 : SymbolData(SymbolExtentKind, sym), R(r) { 173 assert(r); 174 } 175 176 const SubRegion *getRegion() const { return R; } 177 178 QualType getType() const override; 179 180 void dumpToStream(raw_ostream &os) const override; 181 182 static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) { 183 profile.AddInteger((unsigned) SymbolExtentKind); 184 profile.AddPointer(R); 185 } 186 187 void Profile(llvm::FoldingSetNodeID& profile) override { 188 Profile(profile, R); 189 } 190 191 // Implement isa<T> support. 192 static bool classof(const SymExpr *SE) { 193 return SE->getKind() == SymbolExtentKind; 194 } 195}; 196 197/// SymbolMetadata - Represents path-dependent metadata about a specific region. 198/// Metadata symbols remain live as long as they are marked as in use before 199/// dead-symbol sweeping AND their associated regions are still alive. 200/// Intended for use by checkers. 201class SymbolMetadata : public SymbolData { 202 const MemRegion* R; 203 const Stmt *S; 204 QualType T; 205 const LocationContext *LCtx; 206 unsigned Count; 207 const void *Tag; 208 209public: 210 SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t, 211 const LocationContext *LCtx, unsigned count, const void *tag) 212 : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx), 213 Count(count), Tag(tag) { 214 assert(r); 215 assert(s); 216 assert(isValidTypeForSymbol(t)); 217 assert(LCtx); 218 assert(tag); 219 } 220 221 const MemRegion *getRegion() const { return R; } 222 const Stmt *getStmt() const { return S; } 223 const LocationContext *getLocationContext() const { return LCtx; } 224 unsigned getCount() const { return Count; } 225 const void *getTag() const { return Tag; } 226 227 QualType getType() const override; 228 229 void dumpToStream(raw_ostream &os) const override; 230 231 static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R, 232 const Stmt *S, QualType T, const LocationContext *LCtx, 233 unsigned Count, const void *Tag) { 234 profile.AddInteger((unsigned) SymbolMetadataKind); 235 profile.AddPointer(R); 236 profile.AddPointer(S); 237 profile.Add(T); 238 profile.AddPointer(LCtx); 239 profile.AddInteger(Count); 240 profile.AddPointer(Tag); 241 } 242 243 void Profile(llvm::FoldingSetNodeID& profile) override { 244 Profile(profile, R, S, T, LCtx, Count, Tag); 245 } 246 247 // Implement isa<T> support. 248 static bool classof(const SymExpr *SE) { 249 return SE->getKind() == SymbolMetadataKind; 250 } 251}; 252 253/// Represents a cast expression. 254class SymbolCast : public SymExpr { 255 const SymExpr *Operand; 256 257 /// Type of the operand. 258 QualType FromTy; 259 260 /// The type of the result. 261 QualType ToTy; 262 263public: 264 SymbolCast(const SymExpr *In, QualType From, QualType To) 265 : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) { 266 assert(In); 267 assert(isValidTypeForSymbol(From)); 268 // FIXME: GenericTaintChecker creates symbols of void type. 269 // Otherwise, 'To' should also be a valid type. 270 } 271 272 unsigned computeComplexity() const override { 273 if (Complexity == 0) 274 Complexity = 1 + Operand->computeComplexity(); 275 return Complexity; 276 } 277 278 QualType getType() const override { return ToTy; } 279 280 const SymExpr *getOperand() const { return Operand; } 281 282 void dumpToStream(raw_ostream &os) const override; 283 284 static void Profile(llvm::FoldingSetNodeID& ID, 285 const SymExpr *In, QualType From, QualType To) { 286 ID.AddInteger((unsigned) SymbolCastKind); 287 ID.AddPointer(In); 288 ID.Add(From); 289 ID.Add(To); 290 } 291 292 void Profile(llvm::FoldingSetNodeID& ID) override { 293 Profile(ID, Operand, FromTy, ToTy); 294 } 295 296 // Implement isa<T> support. 297 static bool classof(const SymExpr *SE) { 298 return SE->getKind() == SymbolCastKind; 299 } 300}; 301 302/// Represents a symbolic expression involving a binary operator 303class BinarySymExpr : public SymExpr { 304 BinaryOperator::Opcode Op; 305 QualType T; 306 307protected: 308 BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t) 309 : SymExpr(k), Op(op), T(t) { 310 assert(classof(this)); 311 // Binary expressions are results of arithmetic. Pointer arithmetic is not 312 // handled by binary expressions, but it is instead handled by applying 313 // sub-regions to regions. 314 assert(isValidTypeForSymbol(t) && !Loc::isLocType(t)); 315 } 316 317public: 318 // FIXME: We probably need to make this out-of-line to avoid redundant 319 // generation of virtual functions. 320 QualType getType() const override { return T; } 321 322 BinaryOperator::Opcode getOpcode() const { return Op; } 323 324 // Implement isa<T> support. 325 static bool classof(const SymExpr *SE) { 326 Kind k = SE->getKind(); 327 return k >= BEGIN_BINARYSYMEXPRS && k <= END_BINARYSYMEXPRS; 328 } 329 330protected: 331 static unsigned computeOperandComplexity(const SymExpr *Value) { 332 return Value->computeComplexity(); 333 } 334 static unsigned computeOperandComplexity(const llvm::APSInt &Value) { 335 return 1; 336 } 337 338 static const llvm::APSInt *getPointer(const llvm::APSInt &Value) { 339 return &Value; 340 } 341 static const SymExpr *getPointer(const SymExpr *Value) { return Value; } 342 343 static void dumpToStreamImpl(raw_ostream &os, const SymExpr *Value); 344 static void dumpToStreamImpl(raw_ostream &os, const llvm::APSInt &Value); 345 static void dumpToStreamImpl(raw_ostream &os, BinaryOperator::Opcode op); 346}; 347 348/// Template implementation for all binary symbolic expressions 349template <class LHSTYPE, class RHSTYPE, SymExpr::Kind ClassKind> 350class BinarySymExprImpl : public BinarySymExpr { 351 LHSTYPE LHS; 352 RHSTYPE RHS; 353 354public: 355 BinarySymExprImpl(LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs, 356 QualType t) 357 : BinarySymExpr(ClassKind, op, t), LHS(lhs), RHS(rhs) { 358 assert(getPointer(lhs)); 359 assert(getPointer(rhs)); 360 } 361 362 void dumpToStream(raw_ostream &os) const override { 363 dumpToStreamImpl(os, LHS); 364 dumpToStreamImpl(os, getOpcode()); 365 dumpToStreamImpl(os, RHS); 366 } 367 368 LHSTYPE getLHS() const { return LHS; } 369 RHSTYPE getRHS() const { return RHS; } 370 371 unsigned computeComplexity() const override { 372 if (Complexity == 0) 373 Complexity = 374 computeOperandComplexity(RHS) + computeOperandComplexity(LHS); 375 return Complexity; 376 } 377 378 static void Profile(llvm::FoldingSetNodeID &ID, LHSTYPE lhs, 379 BinaryOperator::Opcode op, RHSTYPE rhs, QualType t) { 380 ID.AddInteger((unsigned)ClassKind); 381 ID.AddPointer(getPointer(lhs)); 382 ID.AddInteger(op); 383 ID.AddPointer(getPointer(rhs)); 384 ID.Add(t); 385 } 386 387 void Profile(llvm::FoldingSetNodeID &ID) override { 388 Profile(ID, LHS, getOpcode(), RHS, getType()); 389 } 390 391 // Implement isa<T> support. 392 static bool classof(const SymExpr *SE) { return SE->getKind() == ClassKind; } 393}; 394 395/// Represents a symbolic expression like 'x' + 3. 396using SymIntExpr = BinarySymExprImpl<const SymExpr *, const llvm::APSInt &, 397 SymExpr::Kind::SymIntExprKind>; 398 399/// Represents a symbolic expression like 3 - 'x'. 400using IntSymExpr = BinarySymExprImpl<const llvm::APSInt &, const SymExpr *, 401 SymExpr::Kind::IntSymExprKind>; 402 403/// Represents a symbolic expression like 'x' + 'y'. 404using SymSymExpr = BinarySymExprImpl<const SymExpr *, const SymExpr *, 405 SymExpr::Kind::SymSymExprKind>; 406 407class SymbolManager { 408 using DataSetTy = llvm::FoldingSet<SymExpr>; 409 using SymbolDependTy = 410 llvm::DenseMap<SymbolRef, std::unique_ptr<SymbolRefSmallVectorTy>>; 411 412 DataSetTy DataSet; 413 414 /// Stores the extra dependencies between symbols: the data should be kept 415 /// alive as long as the key is live. 416 SymbolDependTy SymbolDependencies; 417 418 unsigned SymbolCounter = 0; 419 llvm::BumpPtrAllocator& BPAlloc; 420 BasicValueFactory &BV; 421 ASTContext &Ctx; 422 423public: 424 SymbolManager(ASTContext &ctx, BasicValueFactory &bv, 425 llvm::BumpPtrAllocator& bpalloc) 426 : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {} 427 428 static bool canSymbolicate(QualType T); 429 430 /// Make a unique symbol for MemRegion R according to its kind. 431 const SymbolRegionValue* getRegionValueSymbol(const TypedValueRegion* R); 432 433 const SymbolConjured* conjureSymbol(const Stmt *E, 434 const LocationContext *LCtx, 435 QualType T, 436 unsigned VisitCount, 437 const void *SymbolTag = nullptr); 438 439 const SymbolConjured* conjureSymbol(const Expr *E, 440 const LocationContext *LCtx, 441 unsigned VisitCount, 442 const void *SymbolTag = nullptr) { 443 return conjureSymbol(E, LCtx, E->getType(), VisitCount, SymbolTag); 444 } 445 446 const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol, 447 const TypedValueRegion *R); 448 449 const SymbolExtent *getExtentSymbol(const SubRegion *R); 450 451 /// Creates a metadata symbol associated with a specific region. 452 /// 453 /// VisitCount can be used to differentiate regions corresponding to 454 /// different loop iterations, thus, making the symbol path-dependent. 455 const SymbolMetadata *getMetadataSymbol(const MemRegion *R, const Stmt *S, 456 QualType T, 457 const LocationContext *LCtx, 458 unsigned VisitCount, 459 const void *SymbolTag = nullptr); 460 461 const SymbolCast* getCastSymbol(const SymExpr *Operand, 462 QualType From, QualType To); 463 464 const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op, 465 const llvm::APSInt& rhs, QualType t); 466 467 const SymIntExpr *getSymIntExpr(const SymExpr &lhs, BinaryOperator::Opcode op, 468 const llvm::APSInt& rhs, QualType t) { 469 return getSymIntExpr(&lhs, op, rhs, t); 470 } 471 472 const IntSymExpr *getIntSymExpr(const llvm::APSInt& lhs, 473 BinaryOperator::Opcode op, 474 const SymExpr *rhs, QualType t); 475 476 const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, 477 const SymExpr *rhs, QualType t); 478 479 QualType getType(const SymExpr *SE) const { 480 return SE->getType(); 481 } 482 483 /// Add artificial symbol dependency. 484 /// 485 /// The dependent symbol should stay alive as long as the primary is alive. 486 void addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent); 487 488 const SymbolRefSmallVectorTy *getDependentSymbols(const SymbolRef Primary); 489 490 ASTContext &getContext() { return Ctx; } 491 BasicValueFactory &getBasicVals() { return BV; } 492}; 493 494/// A class responsible for cleaning up unused symbols. 495class SymbolReaper { 496 enum SymbolStatus { 497 NotProcessed, 498 HaveMarkedDependents 499 }; 500 501 using SymbolSetTy = llvm::DenseSet<SymbolRef>; 502 using SymbolMapTy = llvm::DenseMap<SymbolRef, SymbolStatus>; 503 using RegionSetTy = llvm::DenseSet<const MemRegion *>; 504 505 SymbolMapTy TheLiving; 506 SymbolSetTy MetadataInUse; 507 508 RegionSetTy RegionRoots; 509 510 const StackFrameContext *LCtx; 511 const Stmt *Loc; 512 SymbolManager& SymMgr; 513 StoreRef reapedStore; 514 llvm::DenseMap<const MemRegion *, unsigned> includedRegionCache; 515 516public: 517 /// Construct a reaper object, which removes everything which is not 518 /// live before we execute statement s in the given location context. 519 /// 520 /// If the statement is NULL, everything is this and parent contexts is 521 /// considered live. 522 /// If the stack frame context is NULL, everything on stack is considered 523 /// dead. 524 SymbolReaper(const StackFrameContext *Ctx, const Stmt *s, 525 SymbolManager &symmgr, StoreManager &storeMgr) 526 : LCtx(Ctx), Loc(s), SymMgr(symmgr), reapedStore(nullptr, storeMgr) {} 527 528 const LocationContext *getLocationContext() const { return LCtx; } 529 530 bool isLive(SymbolRef sym); 531 bool isLiveRegion(const MemRegion *region); 532 bool isLive(const Stmt *ExprVal, const LocationContext *LCtx) const; 533 bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const; 534 535 /// Unconditionally marks a symbol as live. 536 /// 537 /// This should never be 538 /// used by checkers, only by the state infrastructure such as the store and 539 /// environment. Checkers should instead use metadata symbols and markInUse. 540 void markLive(SymbolRef sym); 541 542 /// Marks a symbol as important to a checker. 543 /// 544 /// For metadata symbols, 545 /// this will keep the symbol alive as long as its associated region is also 546 /// live. For other symbols, this has no effect; checkers are not permitted 547 /// to influence the life of other symbols. This should be used before any 548 /// symbol marking has occurred, i.e. in the MarkLiveSymbols callback. 549 void markInUse(SymbolRef sym); 550 551 using region_iterator = RegionSetTy::const_iterator; 552 553 region_iterator region_begin() const { return RegionRoots.begin(); } 554 region_iterator region_end() const { return RegionRoots.end(); } 555 556 /// Returns whether or not a symbol has been confirmed dead. 557 /// 558 /// This should only be called once all marking of dead symbols has completed. 559 /// (For checkers, this means only in the checkDeadSymbols callback.) 560 bool isDead(SymbolRef sym) { 561 return !isLive(sym); 562 } 563 564 void markLive(const MemRegion *region); 565 void markElementIndicesLive(const MemRegion *region); 566 567 /// Set to the value of the symbolic store after 568 /// StoreManager::removeDeadBindings has been called. 569 void setReapedStore(StoreRef st) { reapedStore = st; } 570 571private: 572 /// Mark the symbols dependent on the input symbol as live. 573 void markDependentsLive(SymbolRef sym); 574}; 575 576class SymbolVisitor { 577protected: 578 ~SymbolVisitor() = default; 579 580public: 581 SymbolVisitor() = default; 582 SymbolVisitor(const SymbolVisitor &) = default; 583 SymbolVisitor(SymbolVisitor &&) {} 584 585 /// A visitor method invoked by ProgramStateManager::scanReachableSymbols. 586 /// 587 /// The method returns \c true if symbols should continue be scanned and \c 588 /// false otherwise. 589 virtual bool VisitSymbol(SymbolRef sym) = 0; 590 virtual bool VisitMemRegion(const MemRegion *) { return true; } 591}; 592 593} // namespace ento 594 595} // namespace clang 596 597#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 598