GenericTaintChecker.cpp revision 321369
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This checker defines the attack surface for generic taint propagation. 11// 12// The taint information produced by it might be useful to other checkers. For 13// example, checkers should report errors which involve tainted data more 14// aggressively, even if the involved symbols are under constrained. 15// 16//===----------------------------------------------------------------------===// 17#include "ClangSACheckers.h" 18#include "clang/AST/Attr.h" 19#include "clang/Basic/Builtins.h" 20#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 21#include "clang/StaticAnalyzer/Core/Checker.h" 22#include "clang/StaticAnalyzer/Core/CheckerManager.h" 23#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 24#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 25#include <climits> 26 27using namespace clang; 28using namespace ento; 29 30namespace { 31class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 32 check::PreStmt<CallExpr> > { 33public: 34 static void *getTag() { static int Tag; return &Tag; } 35 36 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 37 38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 39 40private: 41 static const unsigned InvalidArgIndex = UINT_MAX; 42 /// Denotes the return vale. 43 static const unsigned ReturnValueIndex = UINT_MAX - 1; 44 45 mutable std::unique_ptr<BugType> BT; 46 inline void initBugType() const { 47 if (!BT) 48 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); 49 } 50 51 /// \brief Catch taint related bugs. Check if tainted data is passed to a 52 /// system call etc. 53 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 54 55 /// \brief Add taint sources on a pre-visit. 56 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 57 58 /// \brief Propagate taint generated at pre-visit. 59 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 60 61 /// \brief Add taint sources on a post visit. 62 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 63 64 /// Check if the region the expression evaluates to is the standard input, 65 /// and thus, is tainted. 66 static bool isStdin(const Expr *E, CheckerContext &C); 67 68 /// \brief Given a pointer argument, return the value it points to. 69 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); 70 71 /// Functions defining the attack surface. 72 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, 73 CheckerContext &C) const; 74 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 75 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 76 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 77 78 /// Taint the scanned input if the file is tainted. 79 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 80 81 /// Check for CWE-134: Uncontrolled Format String. 82 static const char MsgUncontrolledFormatString[]; 83 bool checkUncontrolledFormatString(const CallExpr *CE, 84 CheckerContext &C) const; 85 86 /// Check for: 87 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 88 /// CWE-78, "Failure to Sanitize Data into an OS Command" 89 static const char MsgSanitizeSystemArgs[]; 90 bool checkSystemCall(const CallExpr *CE, StringRef Name, 91 CheckerContext &C) const; 92 93 /// Check if tainted data is used as a buffer size ins strn.. functions, 94 /// and allocators. 95 static const char MsgTaintedBufferSize[]; 96 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 97 CheckerContext &C) const; 98 99 /// Generate a report if the expression is tainted or points to tainted data. 100 bool generateReportIfTainted(const Expr *E, const char Msg[], 101 CheckerContext &C) const; 102 103 /// The bug visitor prints a diagnostic message at the location where a given 104 /// variable was tainted. 105 class TaintBugVisitor 106 : public BugReporterVisitorImpl<TaintBugVisitor> { 107 private: 108 const SVal V; 109 110 public: 111 TaintBugVisitor(const SVal V) : V(V) {} 112 void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); } 113 114 std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N, 115 const ExplodedNode *PrevN, 116 BugReporterContext &BRC, 117 BugReport &BR) override; 118 }; 119 120 typedef SmallVector<unsigned, 2> ArgVector; 121 122 /// \brief A struct used to specify taint propagation rules for a function. 123 /// 124 /// If any of the possible taint source arguments is tainted, all of the 125 /// destination arguments should also be tainted. Use InvalidArgIndex in the 126 /// src list to specify that all of the arguments can introduce taint. Use 127 /// InvalidArgIndex in the dst arguments to signify that all the non-const 128 /// pointer and reference arguments might be tainted on return. If 129 /// ReturnValueIndex is added to the dst list, the return value will be 130 /// tainted. 131 struct TaintPropagationRule { 132 /// List of arguments which can be taint sources and should be checked. 133 ArgVector SrcArgs; 134 /// List of arguments which should be tainted on function return. 135 ArgVector DstArgs; 136 // TODO: Check if using other data structures would be more optimal. 137 138 TaintPropagationRule() {} 139 140 TaintPropagationRule(unsigned SArg, 141 unsigned DArg, bool TaintRet = false) { 142 SrcArgs.push_back(SArg); 143 DstArgs.push_back(DArg); 144 if (TaintRet) 145 DstArgs.push_back(ReturnValueIndex); 146 } 147 148 TaintPropagationRule(unsigned SArg1, unsigned SArg2, 149 unsigned DArg, bool TaintRet = false) { 150 SrcArgs.push_back(SArg1); 151 SrcArgs.push_back(SArg2); 152 DstArgs.push_back(DArg); 153 if (TaintRet) 154 DstArgs.push_back(ReturnValueIndex); 155 } 156 157 /// Get the propagation rule for a given function. 158 static TaintPropagationRule 159 getTaintPropagationRule(const FunctionDecl *FDecl, 160 StringRef Name, 161 CheckerContext &C); 162 163 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 164 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 165 166 inline bool isNull() const { return SrcArgs.empty(); } 167 168 inline bool isDestinationArgument(unsigned ArgNum) const { 169 return (std::find(DstArgs.begin(), 170 DstArgs.end(), ArgNum) != DstArgs.end()); 171 } 172 173 static inline bool isTaintedOrPointsToTainted(const Expr *E, 174 ProgramStateRef State, 175 CheckerContext &C) { 176 if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C)) 177 return true; 178 179 if (!E->getType().getTypePtr()->isPointerType()) 180 return false; 181 182 Optional<SVal> V = getPointedToSVal(C, E); 183 return (V && State->isTainted(*V)); 184 } 185 186 /// \brief Pre-process a function which propagates taint according to the 187 /// taint rule. 188 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 189 190 }; 191}; 192 193const unsigned GenericTaintChecker::ReturnValueIndex; 194const unsigned GenericTaintChecker::InvalidArgIndex; 195 196const char GenericTaintChecker::MsgUncontrolledFormatString[] = 197 "Untrusted data is used as a format string " 198 "(CWE-134: Uncontrolled Format String)"; 199 200const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 201 "Untrusted data is passed to a system call " 202 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 203 204const char GenericTaintChecker::MsgTaintedBufferSize[] = 205 "Untrusted data is used to specify the buffer size " 206 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 207 "character data and the null terminator)"; 208 209} // end of anonymous namespace 210 211/// A set which is used to pass information from call pre-visit instruction 212/// to the call post-visit. The values are unsigned integers, which are either 213/// ReturnValueIndex, or indexes of the pointer/reference argument, which 214/// points to data, which should be tainted on return. 215REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 216 217std::shared_ptr<PathDiagnosticPiece> 218GenericTaintChecker::TaintBugVisitor::VisitNode(const ExplodedNode *N, 219 const ExplodedNode *PrevN, BugReporterContext &BRC, BugReport &BR) { 220 221 // Find the ExplodedNode where the taint was first introduced 222 if (!N->getState()->isTainted(V) || PrevN->getState()->isTainted(V)) 223 return nullptr; 224 225 const Stmt *S = PathDiagnosticLocation::getStmt(N); 226 if (!S) 227 return nullptr; 228 229 const LocationContext *NCtx = N->getLocationContext(); 230 PathDiagnosticLocation L = 231 PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); 232 if (!L.isValid() || !L.asLocation().isValid()) 233 return nullptr; 234 235 return std::make_shared<PathDiagnosticEventPiece>( 236 L, "Taint originated here"); 237} 238 239GenericTaintChecker::TaintPropagationRule 240GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 241 const FunctionDecl *FDecl, 242 StringRef Name, 243 CheckerContext &C) { 244 // TODO: Currently, we might lose precision here: we always mark a return 245 // value as tainted even if it's just a pointer, pointing to tainted data. 246 247 // Check for exact name match for functions without builtin substitutes. 248 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 249 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 250 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 251 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 252 .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 253 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 254 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 255 .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 256 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 257 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 258 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 259 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 260 .Case("read", TaintPropagationRule(0, 2, 1, true)) 261 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 262 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 263 .Case("fgets", TaintPropagationRule(2, 0, true)) 264 .Case("getline", TaintPropagationRule(2, 0)) 265 .Case("getdelim", TaintPropagationRule(3, 0)) 266 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 267 .Default(TaintPropagationRule()); 268 269 if (!Rule.isNull()) 270 return Rule; 271 272 // Check if it's one of the memory setting/copying functions. 273 // This check is specialized but faster then calling isCLibraryFunction. 274 unsigned BId = 0; 275 if ( (BId = FDecl->getMemoryFunctionKind()) ) 276 switch(BId) { 277 case Builtin::BImemcpy: 278 case Builtin::BImemmove: 279 case Builtin::BIstrncpy: 280 case Builtin::BIstrncat: 281 return TaintPropagationRule(1, 2, 0, true); 282 case Builtin::BIstrlcpy: 283 case Builtin::BIstrlcat: 284 return TaintPropagationRule(1, 2, 0, false); 285 case Builtin::BIstrndup: 286 return TaintPropagationRule(0, 1, ReturnValueIndex); 287 288 default: 289 break; 290 }; 291 292 // Process all other functions which could be defined as builtins. 293 if (Rule.isNull()) { 294 if (C.isCLibraryFunction(FDecl, "snprintf") || 295 C.isCLibraryFunction(FDecl, "sprintf")) 296 return TaintPropagationRule(InvalidArgIndex, 0, true); 297 else if (C.isCLibraryFunction(FDecl, "strcpy") || 298 C.isCLibraryFunction(FDecl, "stpcpy") || 299 C.isCLibraryFunction(FDecl, "strcat")) 300 return TaintPropagationRule(1, 0, true); 301 else if (C.isCLibraryFunction(FDecl, "bcopy")) 302 return TaintPropagationRule(0, 2, 1, false); 303 else if (C.isCLibraryFunction(FDecl, "strdup") || 304 C.isCLibraryFunction(FDecl, "strdupa")) 305 return TaintPropagationRule(0, ReturnValueIndex); 306 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 307 return TaintPropagationRule(0, ReturnValueIndex); 308 } 309 310 // Skipping the following functions, since they might be used for cleansing 311 // or smart memory copy: 312 // - memccpy - copying until hitting a special character. 313 314 return TaintPropagationRule(); 315} 316 317void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 318 CheckerContext &C) const { 319 // Check for errors first. 320 if (checkPre(CE, C)) 321 return; 322 323 // Add taint second. 324 addSourcesPre(CE, C); 325} 326 327void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 328 CheckerContext &C) const { 329 if (propagateFromPre(CE, C)) 330 return; 331 addSourcesPost(CE, C); 332} 333 334void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 335 CheckerContext &C) const { 336 ProgramStateRef State = nullptr; 337 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 338 if (!FDecl || FDecl->getKind() != Decl::Function) 339 return; 340 341 StringRef Name = C.getCalleeName(FDecl); 342 if (Name.empty()) 343 return; 344 345 // First, try generating a propagation rule for this function. 346 TaintPropagationRule Rule = 347 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 348 if (!Rule.isNull()) { 349 State = Rule.process(CE, C); 350 if (!State) 351 return; 352 C.addTransition(State); 353 return; 354 } 355 356 // Otherwise, check if we have custom pre-processing implemented. 357 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 358 .Case("fscanf", &GenericTaintChecker::preFscanf) 359 .Default(nullptr); 360 // Check and evaluate the call. 361 if (evalFunction) 362 State = (this->*evalFunction)(CE, C); 363 if (!State) 364 return; 365 C.addTransition(State); 366 367} 368 369bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 370 CheckerContext &C) const { 371 ProgramStateRef State = C.getState(); 372 373 // Depending on what was tainted at pre-visit, we determined a set of 374 // arguments which should be tainted after the function returns. These are 375 // stored in the state as TaintArgsOnPostVisit set. 376 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 377 if (TaintArgs.isEmpty()) 378 return false; 379 380 for (llvm::ImmutableSet<unsigned>::iterator 381 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 382 unsigned ArgNum = *I; 383 384 // Special handling for the tainted return value. 385 if (ArgNum == ReturnValueIndex) { 386 State = State->addTaint(CE, C.getLocationContext()); 387 continue; 388 } 389 390 // The arguments are pointer arguments. The data they are pointing at is 391 // tainted after the call. 392 if (CE->getNumArgs() < (ArgNum + 1)) 393 return false; 394 const Expr* Arg = CE->getArg(ArgNum); 395 Optional<SVal> V = getPointedToSVal(C, Arg); 396 if (V) 397 State = State->addTaint(*V); 398 } 399 400 // Clear up the taint info from the state. 401 State = State->remove<TaintArgsOnPostVisit>(); 402 403 if (State != C.getState()) { 404 C.addTransition(State); 405 return true; 406 } 407 return false; 408} 409 410void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 411 CheckerContext &C) const { 412 // Define the attack surface. 413 // Set the evaluation function by switching on the callee name. 414 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 415 if (!FDecl || FDecl->getKind() != Decl::Function) 416 return; 417 418 StringRef Name = C.getCalleeName(FDecl); 419 if (Name.empty()) 420 return; 421 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 422 .Case("scanf", &GenericTaintChecker::postScanf) 423 // TODO: Add support for vfscanf & family. 424 .Case("getchar", &GenericTaintChecker::postRetTaint) 425 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 426 .Case("getenv", &GenericTaintChecker::postRetTaint) 427 .Case("fopen", &GenericTaintChecker::postRetTaint) 428 .Case("fdopen", &GenericTaintChecker::postRetTaint) 429 .Case("freopen", &GenericTaintChecker::postRetTaint) 430 .Case("getch", &GenericTaintChecker::postRetTaint) 431 .Case("wgetch", &GenericTaintChecker::postRetTaint) 432 .Case("socket", &GenericTaintChecker::postSocket) 433 .Default(nullptr); 434 435 // If the callee isn't defined, it is not of security concern. 436 // Check and evaluate the call. 437 ProgramStateRef State = nullptr; 438 if (evalFunction) 439 State = (this->*evalFunction)(CE, C); 440 if (!State) 441 return; 442 443 C.addTransition(State); 444} 445 446bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 447 448 if (checkUncontrolledFormatString(CE, C)) 449 return true; 450 451 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 452 if (!FDecl || FDecl->getKind() != Decl::Function) 453 return false; 454 455 StringRef Name = C.getCalleeName(FDecl); 456 if (Name.empty()) 457 return false; 458 459 if (checkSystemCall(CE, Name, C)) 460 return true; 461 462 if (checkTaintedBufferSize(CE, FDecl, C)) 463 return true; 464 465 return false; 466} 467 468Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C, 469 const Expr* Arg) { 470 ProgramStateRef State = C.getState(); 471 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 472 if (AddrVal.isUnknownOrUndef()) 473 return None; 474 475 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 476 if (!AddrLoc) 477 return None; 478 479 const PointerType *ArgTy = 480 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); 481 return State->getSVal(*AddrLoc, ArgTy ? ArgTy->getPointeeType(): QualType()); 482} 483 484ProgramStateRef 485GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 486 CheckerContext &C) const { 487 ProgramStateRef State = C.getState(); 488 489 // Check for taint in arguments. 490 bool IsTainted = false; 491 for (ArgVector::const_iterator I = SrcArgs.begin(), 492 E = SrcArgs.end(); I != E; ++I) { 493 unsigned ArgNum = *I; 494 495 if (ArgNum == InvalidArgIndex) { 496 // Check if any of the arguments is tainted, but skip the 497 // destination arguments. 498 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 499 if (isDestinationArgument(i)) 500 continue; 501 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 502 break; 503 } 504 break; 505 } 506 507 if (CE->getNumArgs() < (ArgNum + 1)) 508 return State; 509 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 510 break; 511 } 512 if (!IsTainted) 513 return State; 514 515 // Mark the arguments which should be tainted after the function returns. 516 for (ArgVector::const_iterator I = DstArgs.begin(), 517 E = DstArgs.end(); I != E; ++I) { 518 unsigned ArgNum = *I; 519 520 // Should we mark all arguments as tainted? 521 if (ArgNum == InvalidArgIndex) { 522 // For all pointer and references that were passed in: 523 // If they are not pointing to const data, mark data as tainted. 524 // TODO: So far we are just going one level down; ideally we'd need to 525 // recurse here. 526 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 527 const Expr *Arg = CE->getArg(i); 528 // Process pointer argument. 529 const Type *ArgTy = Arg->getType().getTypePtr(); 530 QualType PType = ArgTy->getPointeeType(); 531 if ((!PType.isNull() && !PType.isConstQualified()) 532 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 533 State = State->add<TaintArgsOnPostVisit>(i); 534 } 535 continue; 536 } 537 538 // Should mark the return value? 539 if (ArgNum == ReturnValueIndex) { 540 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 541 continue; 542 } 543 544 // Mark the given argument. 545 assert(ArgNum < CE->getNumArgs()); 546 State = State->add<TaintArgsOnPostVisit>(ArgNum); 547 } 548 549 return State; 550} 551 552 553// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 554// and arg 1 should get taint. 555ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 556 CheckerContext &C) const { 557 assert(CE->getNumArgs() >= 2); 558 ProgramStateRef State = C.getState(); 559 560 // Check is the file descriptor is tainted. 561 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 562 isStdin(CE->getArg(0), C)) { 563 // All arguments except for the first two should get taint. 564 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 565 State = State->add<TaintArgsOnPostVisit>(i); 566 return State; 567 } 568 569 return nullptr; 570} 571 572 573// If argument 0(protocol domain) is network, the return value should get taint. 574ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 575 CheckerContext &C) const { 576 ProgramStateRef State = C.getState(); 577 if (CE->getNumArgs() < 3) 578 return State; 579 580 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 581 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 582 // White list the internal communication protocols. 583 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 584 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 585 return State; 586 State = State->addTaint(CE, C.getLocationContext()); 587 return State; 588} 589 590ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 591 CheckerContext &C) const { 592 ProgramStateRef State = C.getState(); 593 if (CE->getNumArgs() < 2) 594 return State; 595 596 // All arguments except for the very first one should get taint. 597 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 598 // The arguments are pointer arguments. The data they are pointing at is 599 // tainted after the call. 600 const Expr* Arg = CE->getArg(i); 601 Optional<SVal> V = getPointedToSVal(C, Arg); 602 if (V) 603 State = State->addTaint(*V); 604 } 605 return State; 606} 607 608ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 609 CheckerContext &C) const { 610 return C.getState()->addTaint(CE, C.getLocationContext()); 611} 612 613bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 614 ProgramStateRef State = C.getState(); 615 SVal Val = State->getSVal(E, C.getLocationContext()); 616 617 // stdin is a pointer, so it would be a region. 618 const MemRegion *MemReg = Val.getAsRegion(); 619 620 // The region should be symbolic, we do not know it's value. 621 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 622 if (!SymReg) 623 return false; 624 625 // Get it's symbol and find the declaration region it's pointing to. 626 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 627 if (!Sm) 628 return false; 629 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 630 if (!DeclReg) 631 return false; 632 633 // This region corresponds to a declaration, find out if it's a global/extern 634 // variable named stdin with the proper type. 635 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 636 D = D->getCanonicalDecl(); 637 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 638 if (const PointerType * PtrTy = 639 dyn_cast<PointerType>(D->getType().getTypePtr())) 640 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 641 return true; 642 } 643 return false; 644} 645 646static bool getPrintfFormatArgumentNum(const CallExpr *CE, 647 const CheckerContext &C, 648 unsigned int &ArgNum) { 649 // Find if the function contains a format string argument. 650 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 651 // vsnprintf, syslog, custom annotated functions. 652 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 653 if (!FDecl) 654 return false; 655 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 656 ArgNum = Format->getFormatIdx() - 1; 657 if ((Format->getType()->getName() == "printf") && 658 CE->getNumArgs() > ArgNum) 659 return true; 660 } 661 662 // Or if a function is named setproctitle (this is a heuristic). 663 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 664 ArgNum = 0; 665 return true; 666 } 667 668 return false; 669} 670 671bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 672 const char Msg[], 673 CheckerContext &C) const { 674 assert(E); 675 676 // Check for taint. 677 ProgramStateRef State = C.getState(); 678 Optional<SVal> PointedToSVal = getPointedToSVal(C, E); 679 SVal TaintedSVal; 680 if (PointedToSVal && State->isTainted(*PointedToSVal)) 681 TaintedSVal = *PointedToSVal; 682 else if (State->isTainted(E, C.getLocationContext())) 683 TaintedSVal = C.getSVal(E); 684 else 685 return false; 686 687 // Generate diagnostic. 688 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 689 initBugType(); 690 auto report = llvm::make_unique<BugReport>(*BT, Msg, N); 691 report->addRange(E->getSourceRange()); 692 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal)); 693 C.emitReport(std::move(report)); 694 return true; 695 } 696 return false; 697} 698 699bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 700 CheckerContext &C) const{ 701 // Check if the function contains a format string argument. 702 unsigned int ArgNum = 0; 703 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 704 return false; 705 706 // If either the format string content or the pointer itself are tainted, warn. 707 return generateReportIfTainted(CE->getArg(ArgNum), 708 MsgUncontrolledFormatString, C); 709} 710 711bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 712 StringRef Name, 713 CheckerContext &C) const { 714 // TODO: It might make sense to run this check on demand. In some cases, 715 // we should check if the environment has been cleansed here. We also might 716 // need to know if the user was reset before these calls(seteuid). 717 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 718 .Case("system", 0) 719 .Case("popen", 0) 720 .Case("execl", 0) 721 .Case("execle", 0) 722 .Case("execlp", 0) 723 .Case("execv", 0) 724 .Case("execvp", 0) 725 .Case("execvP", 0) 726 .Case("execve", 0) 727 .Case("dlopen", 0) 728 .Default(UINT_MAX); 729 730 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 731 return false; 732 733 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); 734} 735 736// TODO: Should this check be a part of the CString checker? 737// If yes, should taint be a global setting? 738bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 739 const FunctionDecl *FDecl, 740 CheckerContext &C) const { 741 // If the function has a buffer size argument, set ArgNum. 742 unsigned ArgNum = InvalidArgIndex; 743 unsigned BId = 0; 744 if ( (BId = FDecl->getMemoryFunctionKind()) ) 745 switch(BId) { 746 case Builtin::BImemcpy: 747 case Builtin::BImemmove: 748 case Builtin::BIstrncpy: 749 ArgNum = 2; 750 break; 751 case Builtin::BIstrndup: 752 ArgNum = 1; 753 break; 754 default: 755 break; 756 }; 757 758 if (ArgNum == InvalidArgIndex) { 759 if (C.isCLibraryFunction(FDecl, "malloc") || 760 C.isCLibraryFunction(FDecl, "calloc") || 761 C.isCLibraryFunction(FDecl, "alloca")) 762 ArgNum = 0; 763 else if (C.isCLibraryFunction(FDecl, "memccpy")) 764 ArgNum = 3; 765 else if (C.isCLibraryFunction(FDecl, "realloc")) 766 ArgNum = 1; 767 else if (C.isCLibraryFunction(FDecl, "bcopy")) 768 ArgNum = 2; 769 } 770 771 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 772 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); 773} 774 775void ento::registerGenericTaintChecker(CheckerManager &mgr) { 776 mgr.registerChecker<GenericTaintChecker>(); 777} 778