GenericTaintChecker.cpp revision 344779
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This checker defines the attack surface for generic taint propagation. 11// 12// The taint information produced by it might be useful to other checkers. For 13// example, checkers should report errors which involve tainted data more 14// aggressively, even if the involved symbols are under constrained. 15// 16//===----------------------------------------------------------------------===// 17#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 18#include "clang/AST/Attr.h" 19#include "clang/Basic/Builtins.h" 20#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 21#include "clang/StaticAnalyzer/Core/Checker.h" 22#include "clang/StaticAnalyzer/Core/CheckerManager.h" 23#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 24#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 25#include <climits> 26 27using namespace clang; 28using namespace ento; 29 30namespace { 31class GenericTaintChecker 32 : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> { 33public: 34 static void *getTag() { 35 static int Tag; 36 return &Tag; 37 } 38 39 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 40 41 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 42 43private: 44 static const unsigned InvalidArgIndex = UINT_MAX; 45 /// Denotes the return vale. 46 static const unsigned ReturnValueIndex = UINT_MAX - 1; 47 48 mutable std::unique_ptr<BugType> BT; 49 inline void initBugType() const { 50 if (!BT) 51 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); 52 } 53 54 /// Catch taint related bugs. Check if tainted data is passed to a 55 /// system call etc. 56 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 57 58 /// Add taint sources on a pre-visit. 59 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 60 61 /// Propagate taint generated at pre-visit. 62 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 63 64 /// Add taint sources on a post visit. 65 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 66 67 /// Check if the region the expression evaluates to is the standard input, 68 /// and thus, is tainted. 69 static bool isStdin(const Expr *E, CheckerContext &C); 70 71 /// Given a pointer argument, return the value it points to. 72 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); 73 74 /// Functions defining the attack surface. 75 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)( 76 const CallExpr *, CheckerContext &C) const; 77 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 78 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 79 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 80 81 /// Taint the scanned input if the file is tainted. 82 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 83 84 /// Check for CWE-134: Uncontrolled Format String. 85 static const char MsgUncontrolledFormatString[]; 86 bool checkUncontrolledFormatString(const CallExpr *CE, 87 CheckerContext &C) const; 88 89 /// Check for: 90 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 91 /// CWE-78, "Failure to Sanitize Data into an OS Command" 92 static const char MsgSanitizeSystemArgs[]; 93 bool checkSystemCall(const CallExpr *CE, StringRef Name, 94 CheckerContext &C) const; 95 96 /// Check if tainted data is used as a buffer size ins strn.. functions, 97 /// and allocators. 98 static const char MsgTaintedBufferSize[]; 99 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 100 CheckerContext &C) const; 101 102 /// Generate a report if the expression is tainted or points to tainted data. 103 bool generateReportIfTainted(const Expr *E, const char Msg[], 104 CheckerContext &C) const; 105 106 typedef SmallVector<unsigned, 2> ArgVector; 107 108 /// A struct used to specify taint propagation rules for a function. 109 /// 110 /// If any of the possible taint source arguments is tainted, all of the 111 /// destination arguments should also be tainted. Use InvalidArgIndex in the 112 /// src list to specify that all of the arguments can introduce taint. Use 113 /// InvalidArgIndex in the dst arguments to signify that all the non-const 114 /// pointer and reference arguments might be tainted on return. If 115 /// ReturnValueIndex is added to the dst list, the return value will be 116 /// tainted. 117 struct TaintPropagationRule { 118 /// List of arguments which can be taint sources and should be checked. 119 ArgVector SrcArgs; 120 /// List of arguments which should be tainted on function return. 121 ArgVector DstArgs; 122 // TODO: Check if using other data structures would be more optimal. 123 124 TaintPropagationRule() {} 125 126 TaintPropagationRule(unsigned SArg, unsigned DArg, bool TaintRet = false) { 127 SrcArgs.push_back(SArg); 128 DstArgs.push_back(DArg); 129 if (TaintRet) 130 DstArgs.push_back(ReturnValueIndex); 131 } 132 133 TaintPropagationRule(unsigned SArg1, unsigned SArg2, unsigned DArg, 134 bool TaintRet = false) { 135 SrcArgs.push_back(SArg1); 136 SrcArgs.push_back(SArg2); 137 DstArgs.push_back(DArg); 138 if (TaintRet) 139 DstArgs.push_back(ReturnValueIndex); 140 } 141 142 /// Get the propagation rule for a given function. 143 static TaintPropagationRule 144 getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name, 145 CheckerContext &C); 146 147 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 148 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 149 150 inline bool isNull() const { return SrcArgs.empty(); } 151 152 inline bool isDestinationArgument(unsigned ArgNum) const { 153 return (std::find(DstArgs.begin(), DstArgs.end(), ArgNum) != 154 DstArgs.end()); 155 } 156 157 static inline bool isTaintedOrPointsToTainted(const Expr *E, 158 ProgramStateRef State, 159 CheckerContext &C) { 160 if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C)) 161 return true; 162 163 if (!E->getType().getTypePtr()->isPointerType()) 164 return false; 165 166 Optional<SVal> V = getPointedToSVal(C, E); 167 return (V && State->isTainted(*V)); 168 } 169 170 /// Pre-process a function which propagates taint according to the 171 /// taint rule. 172 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 173 }; 174}; 175 176const unsigned GenericTaintChecker::ReturnValueIndex; 177const unsigned GenericTaintChecker::InvalidArgIndex; 178 179const char GenericTaintChecker::MsgUncontrolledFormatString[] = 180 "Untrusted data is used as a format string " 181 "(CWE-134: Uncontrolled Format String)"; 182 183const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 184 "Untrusted data is passed to a system call " 185 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 186 187const char GenericTaintChecker::MsgTaintedBufferSize[] = 188 "Untrusted data is used to specify the buffer size " 189 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 190 "for " 191 "character data and the null terminator)"; 192 193} // end of anonymous namespace 194 195/// A set which is used to pass information from call pre-visit instruction 196/// to the call post-visit. The values are unsigned integers, which are either 197/// ReturnValueIndex, or indexes of the pointer/reference argument, which 198/// points to data, which should be tainted on return. 199REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 200 201GenericTaintChecker::TaintPropagationRule 202GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 203 const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) { 204 // TODO: Currently, we might lose precision here: we always mark a return 205 // value as tainted even if it's just a pointer, pointing to tainted data. 206 207 // Check for exact name match for functions without builtin substitutes. 208 TaintPropagationRule Rule = 209 llvm::StringSwitch<TaintPropagationRule>(Name) 210 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 211 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 212 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 213 .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 214 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 215 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 216 .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 217 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 218 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 219 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 220 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 221 .Case("read", TaintPropagationRule(0, 2, 1, true)) 222 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 223 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 224 .Case("fgets", TaintPropagationRule(2, 0, true)) 225 .Case("getline", TaintPropagationRule(2, 0)) 226 .Case("getdelim", TaintPropagationRule(3, 0)) 227 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 228 .Default(TaintPropagationRule()); 229 230 if (!Rule.isNull()) 231 return Rule; 232 233 // Check if it's one of the memory setting/copying functions. 234 // This check is specialized but faster then calling isCLibraryFunction. 235 unsigned BId = 0; 236 if ((BId = FDecl->getMemoryFunctionKind())) 237 switch (BId) { 238 case Builtin::BImemcpy: 239 case Builtin::BImemmove: 240 case Builtin::BIstrncpy: 241 case Builtin::BIstrncat: 242 return TaintPropagationRule(1, 2, 0, true); 243 case Builtin::BIstrlcpy: 244 case Builtin::BIstrlcat: 245 return TaintPropagationRule(1, 2, 0, false); 246 case Builtin::BIstrndup: 247 return TaintPropagationRule(0, 1, ReturnValueIndex); 248 249 default: 250 break; 251 }; 252 253 // Process all other functions which could be defined as builtins. 254 if (Rule.isNull()) { 255 if (C.isCLibraryFunction(FDecl, "snprintf") || 256 C.isCLibraryFunction(FDecl, "sprintf")) 257 return TaintPropagationRule(InvalidArgIndex, 0, true); 258 else if (C.isCLibraryFunction(FDecl, "strcpy") || 259 C.isCLibraryFunction(FDecl, "stpcpy") || 260 C.isCLibraryFunction(FDecl, "strcat")) 261 return TaintPropagationRule(1, 0, true); 262 else if (C.isCLibraryFunction(FDecl, "bcopy")) 263 return TaintPropagationRule(0, 2, 1, false); 264 else if (C.isCLibraryFunction(FDecl, "strdup") || 265 C.isCLibraryFunction(FDecl, "strdupa")) 266 return TaintPropagationRule(0, ReturnValueIndex); 267 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 268 return TaintPropagationRule(0, ReturnValueIndex); 269 } 270 271 // Skipping the following functions, since they might be used for cleansing 272 // or smart memory copy: 273 // - memccpy - copying until hitting a special character. 274 275 return TaintPropagationRule(); 276} 277 278void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 279 CheckerContext &C) const { 280 // Check for errors first. 281 if (checkPre(CE, C)) 282 return; 283 284 // Add taint second. 285 addSourcesPre(CE, C); 286} 287 288void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 289 CheckerContext &C) const { 290 if (propagateFromPre(CE, C)) 291 return; 292 addSourcesPost(CE, C); 293} 294 295void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 296 CheckerContext &C) const { 297 ProgramStateRef State = nullptr; 298 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 299 if (!FDecl || FDecl->getKind() != Decl::Function) 300 return; 301 302 StringRef Name = C.getCalleeName(FDecl); 303 if (Name.empty()) 304 return; 305 306 // First, try generating a propagation rule for this function. 307 TaintPropagationRule Rule = 308 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 309 if (!Rule.isNull()) { 310 State = Rule.process(CE, C); 311 if (!State) 312 return; 313 C.addTransition(State); 314 return; 315 } 316 317 // Otherwise, check if we have custom pre-processing implemented. 318 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 319 .Case("fscanf", &GenericTaintChecker::preFscanf) 320 .Default(nullptr); 321 // Check and evaluate the call. 322 if (evalFunction) 323 State = (this->*evalFunction)(CE, C); 324 if (!State) 325 return; 326 C.addTransition(State); 327} 328 329bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 330 CheckerContext &C) const { 331 ProgramStateRef State = C.getState(); 332 333 // Depending on what was tainted at pre-visit, we determined a set of 334 // arguments which should be tainted after the function returns. These are 335 // stored in the state as TaintArgsOnPostVisit set. 336 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 337 if (TaintArgs.isEmpty()) 338 return false; 339 340 for (llvm::ImmutableSet<unsigned>::iterator I = TaintArgs.begin(), 341 E = TaintArgs.end(); 342 I != E; ++I) { 343 unsigned ArgNum = *I; 344 345 // Special handling for the tainted return value. 346 if (ArgNum == ReturnValueIndex) { 347 State = State->addTaint(CE, C.getLocationContext()); 348 continue; 349 } 350 351 // The arguments are pointer arguments. The data they are pointing at is 352 // tainted after the call. 353 if (CE->getNumArgs() < (ArgNum + 1)) 354 return false; 355 const Expr *Arg = CE->getArg(ArgNum); 356 Optional<SVal> V = getPointedToSVal(C, Arg); 357 if (V) 358 State = State->addTaint(*V); 359 } 360 361 // Clear up the taint info from the state. 362 State = State->remove<TaintArgsOnPostVisit>(); 363 364 if (State != C.getState()) { 365 C.addTransition(State); 366 return true; 367 } 368 return false; 369} 370 371void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 372 CheckerContext &C) const { 373 // Define the attack surface. 374 // Set the evaluation function by switching on the callee name. 375 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 376 if (!FDecl || FDecl->getKind() != Decl::Function) 377 return; 378 379 StringRef Name = C.getCalleeName(FDecl); 380 if (Name.empty()) 381 return; 382 FnCheck evalFunction = 383 llvm::StringSwitch<FnCheck>(Name) 384 .Case("scanf", &GenericTaintChecker::postScanf) 385 // TODO: Add support for vfscanf & family. 386 .Case("getchar", &GenericTaintChecker::postRetTaint) 387 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 388 .Case("getenv", &GenericTaintChecker::postRetTaint) 389 .Case("fopen", &GenericTaintChecker::postRetTaint) 390 .Case("fdopen", &GenericTaintChecker::postRetTaint) 391 .Case("freopen", &GenericTaintChecker::postRetTaint) 392 .Case("getch", &GenericTaintChecker::postRetTaint) 393 .Case("wgetch", &GenericTaintChecker::postRetTaint) 394 .Case("socket", &GenericTaintChecker::postSocket) 395 .Default(nullptr); 396 397 // If the callee isn't defined, it is not of security concern. 398 // Check and evaluate the call. 399 ProgramStateRef State = nullptr; 400 if (evalFunction) 401 State = (this->*evalFunction)(CE, C); 402 if (!State) 403 return; 404 405 C.addTransition(State); 406} 407 408bool GenericTaintChecker::checkPre(const CallExpr *CE, 409 CheckerContext &C) const { 410 411 if (checkUncontrolledFormatString(CE, C)) 412 return true; 413 414 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 415 if (!FDecl || FDecl->getKind() != Decl::Function) 416 return false; 417 418 StringRef Name = C.getCalleeName(FDecl); 419 if (Name.empty()) 420 return false; 421 422 if (checkSystemCall(CE, Name, C)) 423 return true; 424 425 if (checkTaintedBufferSize(CE, FDecl, C)) 426 return true; 427 428 return false; 429} 430 431Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C, 432 const Expr *Arg) { 433 ProgramStateRef State = C.getState(); 434 SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 435 if (AddrVal.isUnknownOrUndef()) 436 return None; 437 438 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 439 if (!AddrLoc) 440 return None; 441 442 QualType ArgTy = Arg->getType().getCanonicalType(); 443 if (!ArgTy->isPointerType()) 444 return None; 445 446 QualType ValTy = ArgTy->getPointeeType(); 447 448 // Do not dereference void pointers. Treat them as byte pointers instead. 449 // FIXME: we might want to consider more than just the first byte. 450 if (ValTy->isVoidType()) 451 ValTy = C.getASTContext().CharTy; 452 453 return State->getSVal(*AddrLoc, ValTy); 454} 455 456ProgramStateRef 457GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 458 CheckerContext &C) const { 459 ProgramStateRef State = C.getState(); 460 461 // Check for taint in arguments. 462 bool IsTainted = false; 463 for (ArgVector::const_iterator I = SrcArgs.begin(), E = SrcArgs.end(); I != E; 464 ++I) { 465 unsigned ArgNum = *I; 466 467 if (ArgNum == InvalidArgIndex) { 468 // Check if any of the arguments is tainted, but skip the 469 // destination arguments. 470 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 471 if (isDestinationArgument(i)) 472 continue; 473 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 474 break; 475 } 476 break; 477 } 478 479 if (CE->getNumArgs() < (ArgNum + 1)) 480 return State; 481 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 482 break; 483 } 484 if (!IsTainted) 485 return State; 486 487 // Mark the arguments which should be tainted after the function returns. 488 for (ArgVector::const_iterator I = DstArgs.begin(), E = DstArgs.end(); I != E; 489 ++I) { 490 unsigned ArgNum = *I; 491 492 // Should we mark all arguments as tainted? 493 if (ArgNum == InvalidArgIndex) { 494 // For all pointer and references that were passed in: 495 // If they are not pointing to const data, mark data as tainted. 496 // TODO: So far we are just going one level down; ideally we'd need to 497 // recurse here. 498 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 499 const Expr *Arg = CE->getArg(i); 500 // Process pointer argument. 501 const Type *ArgTy = Arg->getType().getTypePtr(); 502 QualType PType = ArgTy->getPointeeType(); 503 if ((!PType.isNull() && !PType.isConstQualified()) || 504 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 505 State = State->add<TaintArgsOnPostVisit>(i); 506 } 507 continue; 508 } 509 510 // Should mark the return value? 511 if (ArgNum == ReturnValueIndex) { 512 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 513 continue; 514 } 515 516 // Mark the given argument. 517 assert(ArgNum < CE->getNumArgs()); 518 State = State->add<TaintArgsOnPostVisit>(ArgNum); 519 } 520 521 return State; 522} 523 524// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 525// and arg 1 should get taint. 526ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 527 CheckerContext &C) const { 528 assert(CE->getNumArgs() >= 2); 529 ProgramStateRef State = C.getState(); 530 531 // Check is the file descriptor is tainted. 532 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 533 isStdin(CE->getArg(0), C)) { 534 // All arguments except for the first two should get taint. 535 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 536 State = State->add<TaintArgsOnPostVisit>(i); 537 return State; 538 } 539 540 return nullptr; 541} 542 543// If argument 0(protocol domain) is network, the return value should get taint. 544ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 545 CheckerContext &C) const { 546 ProgramStateRef State = C.getState(); 547 if (CE->getNumArgs() < 3) 548 return State; 549 550 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 551 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 552 // White list the internal communication protocols. 553 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 554 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 555 return State; 556 State = State->addTaint(CE, C.getLocationContext()); 557 return State; 558} 559 560ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 561 CheckerContext &C) const { 562 ProgramStateRef State = C.getState(); 563 if (CE->getNumArgs() < 2) 564 return State; 565 566 // All arguments except for the very first one should get taint. 567 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 568 // The arguments are pointer arguments. The data they are pointing at is 569 // tainted after the call. 570 const Expr *Arg = CE->getArg(i); 571 Optional<SVal> V = getPointedToSVal(C, Arg); 572 if (V) 573 State = State->addTaint(*V); 574 } 575 return State; 576} 577 578ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 579 CheckerContext &C) const { 580 return C.getState()->addTaint(CE, C.getLocationContext()); 581} 582 583bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 584 ProgramStateRef State = C.getState(); 585 SVal Val = C.getSVal(E); 586 587 // stdin is a pointer, so it would be a region. 588 const MemRegion *MemReg = Val.getAsRegion(); 589 590 // The region should be symbolic, we do not know it's value. 591 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 592 if (!SymReg) 593 return false; 594 595 // Get it's symbol and find the declaration region it's pointing to. 596 const SymbolRegionValue *Sm = 597 dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 598 if (!Sm) 599 return false; 600 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 601 if (!DeclReg) 602 return false; 603 604 // This region corresponds to a declaration, find out if it's a global/extern 605 // variable named stdin with the proper type. 606 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 607 D = D->getCanonicalDecl(); 608 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 609 if (const PointerType *PtrTy = 610 dyn_cast<PointerType>(D->getType().getTypePtr())) 611 if (PtrTy->getPointeeType().getCanonicalType() == 612 C.getASTContext().getFILEType().getCanonicalType()) 613 return true; 614 } 615 return false; 616} 617 618static bool getPrintfFormatArgumentNum(const CallExpr *CE, 619 const CheckerContext &C, 620 unsigned int &ArgNum) { 621 // Find if the function contains a format string argument. 622 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 623 // vsnprintf, syslog, custom annotated functions. 624 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 625 if (!FDecl) 626 return false; 627 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 628 ArgNum = Format->getFormatIdx() - 1; 629 if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum) 630 return true; 631 } 632 633 // Or if a function is named setproctitle (this is a heuristic). 634 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 635 ArgNum = 0; 636 return true; 637 } 638 639 return false; 640} 641 642bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 643 const char Msg[], 644 CheckerContext &C) const { 645 assert(E); 646 647 // Check for taint. 648 ProgramStateRef State = C.getState(); 649 Optional<SVal> PointedToSVal = getPointedToSVal(C, E); 650 SVal TaintedSVal; 651 if (PointedToSVal && State->isTainted(*PointedToSVal)) 652 TaintedSVal = *PointedToSVal; 653 else if (State->isTainted(E, C.getLocationContext())) 654 TaintedSVal = C.getSVal(E); 655 else 656 return false; 657 658 // Generate diagnostic. 659 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 660 initBugType(); 661 auto report = llvm::make_unique<BugReport>(*BT, Msg, N); 662 report->addRange(E->getSourceRange()); 663 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal)); 664 C.emitReport(std::move(report)); 665 return true; 666 } 667 return false; 668} 669 670bool GenericTaintChecker::checkUncontrolledFormatString( 671 const CallExpr *CE, CheckerContext &C) const { 672 // Check if the function contains a format string argument. 673 unsigned int ArgNum = 0; 674 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 675 return false; 676 677 // If either the format string content or the pointer itself are tainted, 678 // warn. 679 return generateReportIfTainted(CE->getArg(ArgNum), 680 MsgUncontrolledFormatString, C); 681} 682 683bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name, 684 CheckerContext &C) const { 685 // TODO: It might make sense to run this check on demand. In some cases, 686 // we should check if the environment has been cleansed here. We also might 687 // need to know if the user was reset before these calls(seteuid). 688 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 689 .Case("system", 0) 690 .Case("popen", 0) 691 .Case("execl", 0) 692 .Case("execle", 0) 693 .Case("execlp", 0) 694 .Case("execv", 0) 695 .Case("execvp", 0) 696 .Case("execvP", 0) 697 .Case("execve", 0) 698 .Case("dlopen", 0) 699 .Default(UINT_MAX); 700 701 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 702 return false; 703 704 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); 705} 706 707// TODO: Should this check be a part of the CString checker? 708// If yes, should taint be a global setting? 709bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 710 const FunctionDecl *FDecl, 711 CheckerContext &C) const { 712 // If the function has a buffer size argument, set ArgNum. 713 unsigned ArgNum = InvalidArgIndex; 714 unsigned BId = 0; 715 if ((BId = FDecl->getMemoryFunctionKind())) 716 switch (BId) { 717 case Builtin::BImemcpy: 718 case Builtin::BImemmove: 719 case Builtin::BIstrncpy: 720 ArgNum = 2; 721 break; 722 case Builtin::BIstrndup: 723 ArgNum = 1; 724 break; 725 default: 726 break; 727 }; 728 729 if (ArgNum == InvalidArgIndex) { 730 if (C.isCLibraryFunction(FDecl, "malloc") || 731 C.isCLibraryFunction(FDecl, "calloc") || 732 C.isCLibraryFunction(FDecl, "alloca")) 733 ArgNum = 0; 734 else if (C.isCLibraryFunction(FDecl, "memccpy")) 735 ArgNum = 3; 736 else if (C.isCLibraryFunction(FDecl, "realloc")) 737 ArgNum = 1; 738 else if (C.isCLibraryFunction(FDecl, "bcopy")) 739 ArgNum = 2; 740 } 741 742 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 743 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); 744} 745 746void ento::registerGenericTaintChecker(CheckerManager &mgr) { 747 mgr.registerChecker<GenericTaintChecker>(); 748} 749