GenericTaintChecker.cpp revision 341825
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This checker defines the attack surface for generic taint propagation. 11// 12// The taint information produced by it might be useful to other checkers. For 13// example, checkers should report errors which involve tainted data more 14// aggressively, even if the involved symbols are under constrained. 15// 16//===----------------------------------------------------------------------===// 17#include "ClangSACheckers.h" 18#include "clang/AST/Attr.h" 19#include "clang/Basic/Builtins.h" 20#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 21#include "clang/StaticAnalyzer/Core/Checker.h" 22#include "clang/StaticAnalyzer/Core/CheckerManager.h" 23#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 24#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 25#include <climits> 26 27using namespace clang; 28using namespace ento; 29 30namespace { 31class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 32 check::PreStmt<CallExpr> > { 33public: 34 static void *getTag() { static int Tag; return &Tag; } 35 36 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 37 38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 39 40private: 41 static const unsigned InvalidArgIndex = UINT_MAX; 42 /// Denotes the return vale. 43 static const unsigned ReturnValueIndex = UINT_MAX - 1; 44 45 mutable std::unique_ptr<BugType> BT; 46 inline void initBugType() const { 47 if (!BT) 48 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); 49 } 50 51 /// Catch taint related bugs. Check if tainted data is passed to a 52 /// system call etc. 53 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 54 55 /// Add taint sources on a pre-visit. 56 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 57 58 /// Propagate taint generated at pre-visit. 59 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 60 61 /// Add taint sources on a post visit. 62 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 63 64 /// Check if the region the expression evaluates to is the standard input, 65 /// and thus, is tainted. 66 static bool isStdin(const Expr *E, CheckerContext &C); 67 68 /// Given a pointer argument, return the value it points to. 69 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); 70 71 /// Functions defining the attack surface. 72 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, 73 CheckerContext &C) const; 74 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 75 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 76 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 77 78 /// Taint the scanned input if the file is tainted. 79 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 80 81 /// Check for CWE-134: Uncontrolled Format String. 82 static const char MsgUncontrolledFormatString[]; 83 bool checkUncontrolledFormatString(const CallExpr *CE, 84 CheckerContext &C) const; 85 86 /// Check for: 87 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 88 /// CWE-78, "Failure to Sanitize Data into an OS Command" 89 static const char MsgSanitizeSystemArgs[]; 90 bool checkSystemCall(const CallExpr *CE, StringRef Name, 91 CheckerContext &C) const; 92 93 /// Check if tainted data is used as a buffer size ins strn.. functions, 94 /// and allocators. 95 static const char MsgTaintedBufferSize[]; 96 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 97 CheckerContext &C) const; 98 99 /// Generate a report if the expression is tainted or points to tainted data. 100 bool generateReportIfTainted(const Expr *E, const char Msg[], 101 CheckerContext &C) const; 102 103 typedef SmallVector<unsigned, 2> ArgVector; 104 105 /// A struct used to specify taint propagation rules for a function. 106 /// 107 /// If any of the possible taint source arguments is tainted, all of the 108 /// destination arguments should also be tainted. Use InvalidArgIndex in the 109 /// src list to specify that all of the arguments can introduce taint. Use 110 /// InvalidArgIndex in the dst arguments to signify that all the non-const 111 /// pointer and reference arguments might be tainted on return. If 112 /// ReturnValueIndex is added to the dst list, the return value will be 113 /// tainted. 114 struct TaintPropagationRule { 115 /// List of arguments which can be taint sources and should be checked. 116 ArgVector SrcArgs; 117 /// List of arguments which should be tainted on function return. 118 ArgVector DstArgs; 119 // TODO: Check if using other data structures would be more optimal. 120 121 TaintPropagationRule() {} 122 123 TaintPropagationRule(unsigned SArg, 124 unsigned DArg, bool TaintRet = false) { 125 SrcArgs.push_back(SArg); 126 DstArgs.push_back(DArg); 127 if (TaintRet) 128 DstArgs.push_back(ReturnValueIndex); 129 } 130 131 TaintPropagationRule(unsigned SArg1, unsigned SArg2, 132 unsigned DArg, bool TaintRet = false) { 133 SrcArgs.push_back(SArg1); 134 SrcArgs.push_back(SArg2); 135 DstArgs.push_back(DArg); 136 if (TaintRet) 137 DstArgs.push_back(ReturnValueIndex); 138 } 139 140 /// Get the propagation rule for a given function. 141 static TaintPropagationRule 142 getTaintPropagationRule(const FunctionDecl *FDecl, 143 StringRef Name, 144 CheckerContext &C); 145 146 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 147 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 148 149 inline bool isNull() const { return SrcArgs.empty(); } 150 151 inline bool isDestinationArgument(unsigned ArgNum) const { 152 return (std::find(DstArgs.begin(), 153 DstArgs.end(), ArgNum) != DstArgs.end()); 154 } 155 156 static inline bool isTaintedOrPointsToTainted(const Expr *E, 157 ProgramStateRef State, 158 CheckerContext &C) { 159 if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C)) 160 return true; 161 162 if (!E->getType().getTypePtr()->isPointerType()) 163 return false; 164 165 Optional<SVal> V = getPointedToSVal(C, E); 166 return (V && State->isTainted(*V)); 167 } 168 169 /// Pre-process a function which propagates taint according to the 170 /// taint rule. 171 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 172 173 }; 174}; 175 176const unsigned GenericTaintChecker::ReturnValueIndex; 177const unsigned GenericTaintChecker::InvalidArgIndex; 178 179const char GenericTaintChecker::MsgUncontrolledFormatString[] = 180 "Untrusted data is used as a format string " 181 "(CWE-134: Uncontrolled Format String)"; 182 183const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 184 "Untrusted data is passed to a system call " 185 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 186 187const char GenericTaintChecker::MsgTaintedBufferSize[] = 188 "Untrusted data is used to specify the buffer size " 189 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 190 "character data and the null terminator)"; 191 192} // end of anonymous namespace 193 194/// A set which is used to pass information from call pre-visit instruction 195/// to the call post-visit. The values are unsigned integers, which are either 196/// ReturnValueIndex, or indexes of the pointer/reference argument, which 197/// points to data, which should be tainted on return. 198REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 199 200GenericTaintChecker::TaintPropagationRule 201GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 202 const FunctionDecl *FDecl, 203 StringRef Name, 204 CheckerContext &C) { 205 // TODO: Currently, we might lose precision here: we always mark a return 206 // value as tainted even if it's just a pointer, pointing to tainted data. 207 208 // Check for exact name match for functions without builtin substitutes. 209 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 210 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 211 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 212 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 213 .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 214 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 215 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 216 .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 217 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 218 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 219 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 220 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 221 .Case("read", TaintPropagationRule(0, 2, 1, true)) 222 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 223 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 224 .Case("fgets", TaintPropagationRule(2, 0, true)) 225 .Case("getline", TaintPropagationRule(2, 0)) 226 .Case("getdelim", TaintPropagationRule(3, 0)) 227 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 228 .Default(TaintPropagationRule()); 229 230 if (!Rule.isNull()) 231 return Rule; 232 233 // Check if it's one of the memory setting/copying functions. 234 // This check is specialized but faster then calling isCLibraryFunction. 235 unsigned BId = 0; 236 if ( (BId = FDecl->getMemoryFunctionKind()) ) 237 switch(BId) { 238 case Builtin::BImemcpy: 239 case Builtin::BImemmove: 240 case Builtin::BIstrncpy: 241 case Builtin::BIstrncat: 242 return TaintPropagationRule(1, 2, 0, true); 243 case Builtin::BIstrlcpy: 244 case Builtin::BIstrlcat: 245 return TaintPropagationRule(1, 2, 0, false); 246 case Builtin::BIstrndup: 247 return TaintPropagationRule(0, 1, ReturnValueIndex); 248 249 default: 250 break; 251 }; 252 253 // Process all other functions which could be defined as builtins. 254 if (Rule.isNull()) { 255 if (C.isCLibraryFunction(FDecl, "snprintf") || 256 C.isCLibraryFunction(FDecl, "sprintf")) 257 return TaintPropagationRule(InvalidArgIndex, 0, true); 258 else if (C.isCLibraryFunction(FDecl, "strcpy") || 259 C.isCLibraryFunction(FDecl, "stpcpy") || 260 C.isCLibraryFunction(FDecl, "strcat")) 261 return TaintPropagationRule(1, 0, true); 262 else if (C.isCLibraryFunction(FDecl, "bcopy")) 263 return TaintPropagationRule(0, 2, 1, false); 264 else if (C.isCLibraryFunction(FDecl, "strdup") || 265 C.isCLibraryFunction(FDecl, "strdupa")) 266 return TaintPropagationRule(0, ReturnValueIndex); 267 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 268 return TaintPropagationRule(0, ReturnValueIndex); 269 } 270 271 // Skipping the following functions, since they might be used for cleansing 272 // or smart memory copy: 273 // - memccpy - copying until hitting a special character. 274 275 return TaintPropagationRule(); 276} 277 278void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 279 CheckerContext &C) const { 280 // Check for errors first. 281 if (checkPre(CE, C)) 282 return; 283 284 // Add taint second. 285 addSourcesPre(CE, C); 286} 287 288void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 289 CheckerContext &C) const { 290 if (propagateFromPre(CE, C)) 291 return; 292 addSourcesPost(CE, C); 293} 294 295void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 296 CheckerContext &C) const { 297 ProgramStateRef State = nullptr; 298 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 299 if (!FDecl || FDecl->getKind() != Decl::Function) 300 return; 301 302 StringRef Name = C.getCalleeName(FDecl); 303 if (Name.empty()) 304 return; 305 306 // First, try generating a propagation rule for this function. 307 TaintPropagationRule Rule = 308 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 309 if (!Rule.isNull()) { 310 State = Rule.process(CE, C); 311 if (!State) 312 return; 313 C.addTransition(State); 314 return; 315 } 316 317 // Otherwise, check if we have custom pre-processing implemented. 318 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 319 .Case("fscanf", &GenericTaintChecker::preFscanf) 320 .Default(nullptr); 321 // Check and evaluate the call. 322 if (evalFunction) 323 State = (this->*evalFunction)(CE, C); 324 if (!State) 325 return; 326 C.addTransition(State); 327 328} 329 330bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 331 CheckerContext &C) const { 332 ProgramStateRef State = C.getState(); 333 334 // Depending on what was tainted at pre-visit, we determined a set of 335 // arguments which should be tainted after the function returns. These are 336 // stored in the state as TaintArgsOnPostVisit set. 337 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 338 if (TaintArgs.isEmpty()) 339 return false; 340 341 for (llvm::ImmutableSet<unsigned>::iterator 342 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 343 unsigned ArgNum = *I; 344 345 // Special handling for the tainted return value. 346 if (ArgNum == ReturnValueIndex) { 347 State = State->addTaint(CE, C.getLocationContext()); 348 continue; 349 } 350 351 // The arguments are pointer arguments. The data they are pointing at is 352 // tainted after the call. 353 if (CE->getNumArgs() < (ArgNum + 1)) 354 return false; 355 const Expr* Arg = CE->getArg(ArgNum); 356 Optional<SVal> V = getPointedToSVal(C, Arg); 357 if (V) 358 State = State->addTaint(*V); 359 } 360 361 // Clear up the taint info from the state. 362 State = State->remove<TaintArgsOnPostVisit>(); 363 364 if (State != C.getState()) { 365 C.addTransition(State); 366 return true; 367 } 368 return false; 369} 370 371void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 372 CheckerContext &C) const { 373 // Define the attack surface. 374 // Set the evaluation function by switching on the callee name. 375 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 376 if (!FDecl || FDecl->getKind() != Decl::Function) 377 return; 378 379 StringRef Name = C.getCalleeName(FDecl); 380 if (Name.empty()) 381 return; 382 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 383 .Case("scanf", &GenericTaintChecker::postScanf) 384 // TODO: Add support for vfscanf & family. 385 .Case("getchar", &GenericTaintChecker::postRetTaint) 386 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 387 .Case("getenv", &GenericTaintChecker::postRetTaint) 388 .Case("fopen", &GenericTaintChecker::postRetTaint) 389 .Case("fdopen", &GenericTaintChecker::postRetTaint) 390 .Case("freopen", &GenericTaintChecker::postRetTaint) 391 .Case("getch", &GenericTaintChecker::postRetTaint) 392 .Case("wgetch", &GenericTaintChecker::postRetTaint) 393 .Case("socket", &GenericTaintChecker::postSocket) 394 .Default(nullptr); 395 396 // If the callee isn't defined, it is not of security concern. 397 // Check and evaluate the call. 398 ProgramStateRef State = nullptr; 399 if (evalFunction) 400 State = (this->*evalFunction)(CE, C); 401 if (!State) 402 return; 403 404 C.addTransition(State); 405} 406 407bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 408 409 if (checkUncontrolledFormatString(CE, C)) 410 return true; 411 412 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 413 if (!FDecl || FDecl->getKind() != Decl::Function) 414 return false; 415 416 StringRef Name = C.getCalleeName(FDecl); 417 if (Name.empty()) 418 return false; 419 420 if (checkSystemCall(CE, Name, C)) 421 return true; 422 423 if (checkTaintedBufferSize(CE, FDecl, C)) 424 return true; 425 426 return false; 427} 428 429Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C, 430 const Expr *Arg) { 431 ProgramStateRef State = C.getState(); 432 SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 433 if (AddrVal.isUnknownOrUndef()) 434 return None; 435 436 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 437 if (!AddrLoc) 438 return None; 439 440 QualType ArgTy = Arg->getType().getCanonicalType(); 441 if (!ArgTy->isPointerType()) 442 return None; 443 444 QualType ValTy = ArgTy->getPointeeType(); 445 446 // Do not dereference void pointers. Treat them as byte pointers instead. 447 // FIXME: we might want to consider more than just the first byte. 448 if (ValTy->isVoidType()) 449 ValTy = C.getASTContext().CharTy; 450 451 return State->getSVal(*AddrLoc, ValTy); 452} 453 454ProgramStateRef 455GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 456 CheckerContext &C) const { 457 ProgramStateRef State = C.getState(); 458 459 // Check for taint in arguments. 460 bool IsTainted = false; 461 for (ArgVector::const_iterator I = SrcArgs.begin(), 462 E = SrcArgs.end(); I != E; ++I) { 463 unsigned ArgNum = *I; 464 465 if (ArgNum == InvalidArgIndex) { 466 // Check if any of the arguments is tainted, but skip the 467 // destination arguments. 468 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 469 if (isDestinationArgument(i)) 470 continue; 471 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 472 break; 473 } 474 break; 475 } 476 477 if (CE->getNumArgs() < (ArgNum + 1)) 478 return State; 479 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 480 break; 481 } 482 if (!IsTainted) 483 return State; 484 485 // Mark the arguments which should be tainted after the function returns. 486 for (ArgVector::const_iterator I = DstArgs.begin(), 487 E = DstArgs.end(); I != E; ++I) { 488 unsigned ArgNum = *I; 489 490 // Should we mark all arguments as tainted? 491 if (ArgNum == InvalidArgIndex) { 492 // For all pointer and references that were passed in: 493 // If they are not pointing to const data, mark data as tainted. 494 // TODO: So far we are just going one level down; ideally we'd need to 495 // recurse here. 496 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 497 const Expr *Arg = CE->getArg(i); 498 // Process pointer argument. 499 const Type *ArgTy = Arg->getType().getTypePtr(); 500 QualType PType = ArgTy->getPointeeType(); 501 if ((!PType.isNull() && !PType.isConstQualified()) 502 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 503 State = State->add<TaintArgsOnPostVisit>(i); 504 } 505 continue; 506 } 507 508 // Should mark the return value? 509 if (ArgNum == ReturnValueIndex) { 510 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 511 continue; 512 } 513 514 // Mark the given argument. 515 assert(ArgNum < CE->getNumArgs()); 516 State = State->add<TaintArgsOnPostVisit>(ArgNum); 517 } 518 519 return State; 520} 521 522 523// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 524// and arg 1 should get taint. 525ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 526 CheckerContext &C) const { 527 assert(CE->getNumArgs() >= 2); 528 ProgramStateRef State = C.getState(); 529 530 // Check is the file descriptor is tainted. 531 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 532 isStdin(CE->getArg(0), C)) { 533 // All arguments except for the first two should get taint. 534 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 535 State = State->add<TaintArgsOnPostVisit>(i); 536 return State; 537 } 538 539 return nullptr; 540} 541 542 543// If argument 0(protocol domain) is network, the return value should get taint. 544ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 545 CheckerContext &C) const { 546 ProgramStateRef State = C.getState(); 547 if (CE->getNumArgs() < 3) 548 return State; 549 550 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 551 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 552 // White list the internal communication protocols. 553 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 554 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 555 return State; 556 State = State->addTaint(CE, C.getLocationContext()); 557 return State; 558} 559 560ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 561 CheckerContext &C) const { 562 ProgramStateRef State = C.getState(); 563 if (CE->getNumArgs() < 2) 564 return State; 565 566 // All arguments except for the very first one should get taint. 567 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 568 // The arguments are pointer arguments. The data they are pointing at is 569 // tainted after the call. 570 const Expr* Arg = CE->getArg(i); 571 Optional<SVal> V = getPointedToSVal(C, Arg); 572 if (V) 573 State = State->addTaint(*V); 574 } 575 return State; 576} 577 578ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 579 CheckerContext &C) const { 580 return C.getState()->addTaint(CE, C.getLocationContext()); 581} 582 583bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 584 ProgramStateRef State = C.getState(); 585 SVal Val = C.getSVal(E); 586 587 // stdin is a pointer, so it would be a region. 588 const MemRegion *MemReg = Val.getAsRegion(); 589 590 // The region should be symbolic, we do not know it's value. 591 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 592 if (!SymReg) 593 return false; 594 595 // Get it's symbol and find the declaration region it's pointing to. 596 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 597 if (!Sm) 598 return false; 599 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 600 if (!DeclReg) 601 return false; 602 603 // This region corresponds to a declaration, find out if it's a global/extern 604 // variable named stdin with the proper type. 605 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 606 D = D->getCanonicalDecl(); 607 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 608 if (const PointerType * PtrTy = 609 dyn_cast<PointerType>(D->getType().getTypePtr())) 610 if (PtrTy->getPointeeType().getCanonicalType() == 611 C.getASTContext().getFILEType().getCanonicalType()) 612 return true; 613 } 614 return false; 615} 616 617static bool getPrintfFormatArgumentNum(const CallExpr *CE, 618 const CheckerContext &C, 619 unsigned int &ArgNum) { 620 // Find if the function contains a format string argument. 621 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 622 // vsnprintf, syslog, custom annotated functions. 623 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 624 if (!FDecl) 625 return false; 626 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 627 ArgNum = Format->getFormatIdx() - 1; 628 if ((Format->getType()->getName() == "printf") && 629 CE->getNumArgs() > ArgNum) 630 return true; 631 } 632 633 // Or if a function is named setproctitle (this is a heuristic). 634 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 635 ArgNum = 0; 636 return true; 637 } 638 639 return false; 640} 641 642bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 643 const char Msg[], 644 CheckerContext &C) const { 645 assert(E); 646 647 // Check for taint. 648 ProgramStateRef State = C.getState(); 649 Optional<SVal> PointedToSVal = getPointedToSVal(C, E); 650 SVal TaintedSVal; 651 if (PointedToSVal && State->isTainted(*PointedToSVal)) 652 TaintedSVal = *PointedToSVal; 653 else if (State->isTainted(E, C.getLocationContext())) 654 TaintedSVal = C.getSVal(E); 655 else 656 return false; 657 658 // Generate diagnostic. 659 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 660 initBugType(); 661 auto report = llvm::make_unique<BugReport>(*BT, Msg, N); 662 report->addRange(E->getSourceRange()); 663 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal)); 664 C.emitReport(std::move(report)); 665 return true; 666 } 667 return false; 668} 669 670bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 671 CheckerContext &C) const{ 672 // Check if the function contains a format string argument. 673 unsigned int ArgNum = 0; 674 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 675 return false; 676 677 // If either the format string content or the pointer itself are tainted, warn. 678 return generateReportIfTainted(CE->getArg(ArgNum), 679 MsgUncontrolledFormatString, C); 680} 681 682bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 683 StringRef Name, 684 CheckerContext &C) const { 685 // TODO: It might make sense to run this check on demand. In some cases, 686 // we should check if the environment has been cleansed here. We also might 687 // need to know if the user was reset before these calls(seteuid). 688 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 689 .Case("system", 0) 690 .Case("popen", 0) 691 .Case("execl", 0) 692 .Case("execle", 0) 693 .Case("execlp", 0) 694 .Case("execv", 0) 695 .Case("execvp", 0) 696 .Case("execvP", 0) 697 .Case("execve", 0) 698 .Case("dlopen", 0) 699 .Default(UINT_MAX); 700 701 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 702 return false; 703 704 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); 705} 706 707// TODO: Should this check be a part of the CString checker? 708// If yes, should taint be a global setting? 709bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 710 const FunctionDecl *FDecl, 711 CheckerContext &C) const { 712 // If the function has a buffer size argument, set ArgNum. 713 unsigned ArgNum = InvalidArgIndex; 714 unsigned BId = 0; 715 if ( (BId = FDecl->getMemoryFunctionKind()) ) 716 switch(BId) { 717 case Builtin::BImemcpy: 718 case Builtin::BImemmove: 719 case Builtin::BIstrncpy: 720 ArgNum = 2; 721 break; 722 case Builtin::BIstrndup: 723 ArgNum = 1; 724 break; 725 default: 726 break; 727 }; 728 729 if (ArgNum == InvalidArgIndex) { 730 if (C.isCLibraryFunction(FDecl, "malloc") || 731 C.isCLibraryFunction(FDecl, "calloc") || 732 C.isCLibraryFunction(FDecl, "alloca")) 733 ArgNum = 0; 734 else if (C.isCLibraryFunction(FDecl, "memccpy")) 735 ArgNum = 3; 736 else if (C.isCLibraryFunction(FDecl, "realloc")) 737 ArgNum = 1; 738 else if (C.isCLibraryFunction(FDecl, "bcopy")) 739 ArgNum = 2; 740 } 741 742 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 743 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); 744} 745 746void ento::registerGenericTaintChecker(CheckerManager &mgr) { 747 mgr.registerChecker<GenericTaintChecker>(); 748} 749