1234287Sdim//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2234287Sdim// 3234287Sdim// The LLVM Compiler Infrastructure 4234287Sdim// 5234287Sdim// This file is distributed under the University of Illinois Open Source 6234287Sdim// License. See LICENSE.TXT for details. 7234287Sdim// 8234287Sdim//===----------------------------------------------------------------------===// 9234287Sdim// 10234287Sdim// This checker defines the attack surface for generic taint propagation. 11234287Sdim// 12234287Sdim// The taint information produced by it might be useful to other checkers. For 13234287Sdim// example, checkers should report errors which involve tainted data more 14234287Sdim// aggressively, even if the involved symbols are under constrained. 15234287Sdim// 16234287Sdim//===----------------------------------------------------------------------===// 17234287Sdim#include "ClangSACheckers.h" 18249423Sdim#include "clang/AST/Attr.h" 19249423Sdim#include "clang/Basic/Builtins.h" 20249423Sdim#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 21234287Sdim#include "clang/StaticAnalyzer/Core/Checker.h" 22234287Sdim#include "clang/StaticAnalyzer/Core/CheckerManager.h" 23234287Sdim#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 24234287Sdim#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 25234287Sdim#include <climits> 26234287Sdim 27234287Sdimusing namespace clang; 28234287Sdimusing namespace ento; 29234287Sdim 30234287Sdimnamespace { 31234287Sdimclass GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 32234287Sdim check::PreStmt<CallExpr> > { 33234287Sdimpublic: 34234287Sdim static void *getTag() { static int Tag; return &Tag; } 35234287Sdim 36234287Sdim void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 37234287Sdim void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const; 38234287Sdim 39234287Sdim void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 40234287Sdim 41234287Sdimprivate: 42234287Sdim static const unsigned InvalidArgIndex = UINT_MAX; 43234287Sdim /// Denotes the return vale. 44234287Sdim static const unsigned ReturnValueIndex = UINT_MAX - 1; 45234287Sdim 46234287Sdim mutable OwningPtr<BugType> BT; 47234287Sdim inline void initBugType() const { 48234287Sdim if (!BT) 49234287Sdim BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data")); 50234287Sdim } 51234287Sdim 52234287Sdim /// \brief Catch taint related bugs. Check if tainted data is passed to a 53234287Sdim /// system call etc. 54234287Sdim bool checkPre(const CallExpr *CE, CheckerContext &C) const; 55234287Sdim 56234287Sdim /// \brief Add taint sources on a pre-visit. 57234287Sdim void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 58234287Sdim 59234287Sdim /// \brief Propagate taint generated at pre-visit. 60234287Sdim bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 61234287Sdim 62234287Sdim /// \brief Add taint sources on a post visit. 63234287Sdim void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 64234287Sdim 65234287Sdim /// Check if the region the expression evaluates to is the standard input, 66234287Sdim /// and thus, is tainted. 67234287Sdim static bool isStdin(const Expr *E, CheckerContext &C); 68234287Sdim 69234287Sdim /// \brief Given a pointer argument, get the symbol of the value it contains 70234287Sdim /// (points to). 71234287Sdim static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); 72234287Sdim 73234287Sdim /// Functions defining the attack surface. 74234287Sdim typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, 75234287Sdim CheckerContext &C) const; 76234287Sdim ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 77234287Sdim ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 78234287Sdim ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 79234287Sdim 80234287Sdim /// Taint the scanned input if the file is tainted. 81234287Sdim ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 82234287Sdim 83234287Sdim /// Check for CWE-134: Uncontrolled Format String. 84234287Sdim static const char MsgUncontrolledFormatString[]; 85234287Sdim bool checkUncontrolledFormatString(const CallExpr *CE, 86234287Sdim CheckerContext &C) const; 87234287Sdim 88234287Sdim /// Check for: 89234287Sdim /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 90234287Sdim /// CWE-78, "Failure to Sanitize Data into an OS Command" 91234287Sdim static const char MsgSanitizeSystemArgs[]; 92234287Sdim bool checkSystemCall(const CallExpr *CE, StringRef Name, 93234287Sdim CheckerContext &C) const; 94234287Sdim 95234287Sdim /// Check if tainted data is used as a buffer size ins strn.. functions, 96234287Sdim /// and allocators. 97234287Sdim static const char MsgTaintedBufferSize[]; 98234287Sdim bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 99234287Sdim CheckerContext &C) const; 100234287Sdim 101234287Sdim /// Generate a report if the expression is tainted or points to tainted data. 102234287Sdim bool generateReportIfTainted(const Expr *E, const char Msg[], 103234287Sdim CheckerContext &C) const; 104234287Sdim 105234287Sdim 106249423Sdim typedef SmallVector<unsigned, 2> ArgVector; 107234287Sdim 108234287Sdim /// \brief A struct used to specify taint propagation rules for a function. 109234287Sdim /// 110234287Sdim /// If any of the possible taint source arguments is tainted, all of the 111234287Sdim /// destination arguments should also be tainted. Use InvalidArgIndex in the 112234287Sdim /// src list to specify that all of the arguments can introduce taint. Use 113234287Sdim /// InvalidArgIndex in the dst arguments to signify that all the non-const 114234287Sdim /// pointer and reference arguments might be tainted on return. If 115234287Sdim /// ReturnValueIndex is added to the dst list, the return value will be 116234287Sdim /// tainted. 117234287Sdim struct TaintPropagationRule { 118234287Sdim /// List of arguments which can be taint sources and should be checked. 119234287Sdim ArgVector SrcArgs; 120234287Sdim /// List of arguments which should be tainted on function return. 121234287Sdim ArgVector DstArgs; 122234287Sdim // TODO: Check if using other data structures would be more optimal. 123234287Sdim 124234287Sdim TaintPropagationRule() {} 125234287Sdim 126234287Sdim TaintPropagationRule(unsigned SArg, 127234287Sdim unsigned DArg, bool TaintRet = false) { 128234287Sdim SrcArgs.push_back(SArg); 129234287Sdim DstArgs.push_back(DArg); 130234287Sdim if (TaintRet) 131234287Sdim DstArgs.push_back(ReturnValueIndex); 132234287Sdim } 133234287Sdim 134234287Sdim TaintPropagationRule(unsigned SArg1, unsigned SArg2, 135234287Sdim unsigned DArg, bool TaintRet = false) { 136234287Sdim SrcArgs.push_back(SArg1); 137234287Sdim SrcArgs.push_back(SArg2); 138234287Sdim DstArgs.push_back(DArg); 139234287Sdim if (TaintRet) 140234287Sdim DstArgs.push_back(ReturnValueIndex); 141234287Sdim } 142234287Sdim 143234287Sdim /// Get the propagation rule for a given function. 144234287Sdim static TaintPropagationRule 145234287Sdim getTaintPropagationRule(const FunctionDecl *FDecl, 146234287Sdim StringRef Name, 147234287Sdim CheckerContext &C); 148234287Sdim 149234287Sdim inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 150234287Sdim inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 151234287Sdim 152234287Sdim inline bool isNull() const { return SrcArgs.empty(); } 153234287Sdim 154234287Sdim inline bool isDestinationArgument(unsigned ArgNum) const { 155234287Sdim return (std::find(DstArgs.begin(), 156234287Sdim DstArgs.end(), ArgNum) != DstArgs.end()); 157234287Sdim } 158234287Sdim 159234287Sdim static inline bool isTaintedOrPointsToTainted(const Expr *E, 160234287Sdim ProgramStateRef State, 161234287Sdim CheckerContext &C) { 162234287Sdim return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) || 163234287Sdim (E->getType().getTypePtr()->isPointerType() && 164234287Sdim State->isTainted(getPointedToSymbol(C, E)))); 165234287Sdim } 166234287Sdim 167234287Sdim /// \brief Pre-process a function which propagates taint according to the 168234287Sdim /// taint rule. 169234287Sdim ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 170234287Sdim 171234287Sdim }; 172234287Sdim}; 173234287Sdim 174234287Sdimconst unsigned GenericTaintChecker::ReturnValueIndex; 175234287Sdimconst unsigned GenericTaintChecker::InvalidArgIndex; 176234287Sdim 177234287Sdimconst char GenericTaintChecker::MsgUncontrolledFormatString[] = 178234287Sdim "Untrusted data is used as a format string " 179234287Sdim "(CWE-134: Uncontrolled Format String)"; 180234287Sdim 181234287Sdimconst char GenericTaintChecker::MsgSanitizeSystemArgs[] = 182234287Sdim "Untrusted data is passed to a system call " 183234287Sdim "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 184234287Sdim 185234287Sdimconst char GenericTaintChecker::MsgTaintedBufferSize[] = 186234287Sdim "Untrusted data is used to specify the buffer size " 187234287Sdim "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 188234287Sdim "character data and the null terminator)"; 189234287Sdim 190234287Sdim} // end of anonymous namespace 191234287Sdim 192234287Sdim/// A set which is used to pass information from call pre-visit instruction 193234287Sdim/// to the call post-visit. The values are unsigned integers, which are either 194234287Sdim/// ReturnValueIndex, or indexes of the pointer/reference argument, which 195234287Sdim/// points to data, which should be tainted on return. 196243830SdimREGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 197234287Sdim 198234287SdimGenericTaintChecker::TaintPropagationRule 199234287SdimGenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 200234287Sdim const FunctionDecl *FDecl, 201234287Sdim StringRef Name, 202234287Sdim CheckerContext &C) { 203234287Sdim // TODO: Currently, we might loose precision here: we always mark a return 204234287Sdim // value as tainted even if it's just a pointer, pointing to tainted data. 205234287Sdim 206234287Sdim // Check for exact name match for functions without builtin substitutes. 207234287Sdim TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 208234287Sdim .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 209234287Sdim .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 210234287Sdim .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 211234287Sdim .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 212234287Sdim .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 213234287Sdim .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 214234287Sdim .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 215234287Sdim .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 216234287Sdim .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 217234287Sdim .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 218234287Sdim .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 219234287Sdim .Case("read", TaintPropagationRule(0, 2, 1, true)) 220234287Sdim .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 221234287Sdim .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 222234287Sdim .Case("fgets", TaintPropagationRule(2, 0, true)) 223234287Sdim .Case("getline", TaintPropagationRule(2, 0)) 224234287Sdim .Case("getdelim", TaintPropagationRule(3, 0)) 225234287Sdim .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 226234287Sdim .Default(TaintPropagationRule()); 227234287Sdim 228234287Sdim if (!Rule.isNull()) 229234287Sdim return Rule; 230234287Sdim 231234287Sdim // Check if it's one of the memory setting/copying functions. 232234287Sdim // This check is specialized but faster then calling isCLibraryFunction. 233234287Sdim unsigned BId = 0; 234234287Sdim if ( (BId = FDecl->getMemoryFunctionKind()) ) 235234287Sdim switch(BId) { 236234287Sdim case Builtin::BImemcpy: 237234287Sdim case Builtin::BImemmove: 238234287Sdim case Builtin::BIstrncpy: 239234287Sdim case Builtin::BIstrncat: 240234287Sdim return TaintPropagationRule(1, 2, 0, true); 241234287Sdim case Builtin::BIstrlcpy: 242234287Sdim case Builtin::BIstrlcat: 243234287Sdim return TaintPropagationRule(1, 2, 0, false); 244234287Sdim case Builtin::BIstrndup: 245234287Sdim return TaintPropagationRule(0, 1, ReturnValueIndex); 246234287Sdim 247234287Sdim default: 248234287Sdim break; 249234287Sdim }; 250234287Sdim 251234287Sdim // Process all other functions which could be defined as builtins. 252234287Sdim if (Rule.isNull()) { 253234287Sdim if (C.isCLibraryFunction(FDecl, "snprintf") || 254234287Sdim C.isCLibraryFunction(FDecl, "sprintf")) 255234287Sdim return TaintPropagationRule(InvalidArgIndex, 0, true); 256234287Sdim else if (C.isCLibraryFunction(FDecl, "strcpy") || 257234287Sdim C.isCLibraryFunction(FDecl, "stpcpy") || 258234287Sdim C.isCLibraryFunction(FDecl, "strcat")) 259234287Sdim return TaintPropagationRule(1, 0, true); 260234287Sdim else if (C.isCLibraryFunction(FDecl, "bcopy")) 261234287Sdim return TaintPropagationRule(0, 2, 1, false); 262234287Sdim else if (C.isCLibraryFunction(FDecl, "strdup") || 263234287Sdim C.isCLibraryFunction(FDecl, "strdupa")) 264234287Sdim return TaintPropagationRule(0, ReturnValueIndex); 265234287Sdim else if (C.isCLibraryFunction(FDecl, "wcsdup")) 266234287Sdim return TaintPropagationRule(0, ReturnValueIndex); 267234287Sdim } 268234287Sdim 269234287Sdim // Skipping the following functions, since they might be used for cleansing 270234287Sdim // or smart memory copy: 271239462Sdim // - memccpy - copying until hitting a special character. 272234287Sdim 273234287Sdim return TaintPropagationRule(); 274234287Sdim} 275234287Sdim 276234287Sdimvoid GenericTaintChecker::checkPreStmt(const CallExpr *CE, 277234287Sdim CheckerContext &C) const { 278234287Sdim // Check for errors first. 279234287Sdim if (checkPre(CE, C)) 280234287Sdim return; 281234287Sdim 282234287Sdim // Add taint second. 283234287Sdim addSourcesPre(CE, C); 284234287Sdim} 285234287Sdim 286234287Sdimvoid GenericTaintChecker::checkPostStmt(const CallExpr *CE, 287234287Sdim CheckerContext &C) const { 288234287Sdim if (propagateFromPre(CE, C)) 289234287Sdim return; 290234287Sdim addSourcesPost(CE, C); 291234287Sdim} 292234287Sdim 293234287Sdimvoid GenericTaintChecker::addSourcesPre(const CallExpr *CE, 294234287Sdim CheckerContext &C) const { 295234287Sdim ProgramStateRef State = 0; 296234287Sdim const FunctionDecl *FDecl = C.getCalleeDecl(CE); 297239462Sdim if (!FDecl || FDecl->getKind() != Decl::Function) 298239462Sdim return; 299239462Sdim 300234287Sdim StringRef Name = C.getCalleeName(FDecl); 301234287Sdim if (Name.empty()) 302234287Sdim return; 303234287Sdim 304234287Sdim // First, try generating a propagation rule for this function. 305234287Sdim TaintPropagationRule Rule = 306234287Sdim TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 307234287Sdim if (!Rule.isNull()) { 308234287Sdim State = Rule.process(CE, C); 309234287Sdim if (!State) 310234287Sdim return; 311234287Sdim C.addTransition(State); 312234287Sdim return; 313234287Sdim } 314234287Sdim 315234287Sdim // Otherwise, check if we have custom pre-processing implemented. 316234287Sdim FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 317234287Sdim .Case("fscanf", &GenericTaintChecker::preFscanf) 318234287Sdim .Default(0); 319234287Sdim // Check and evaluate the call. 320234287Sdim if (evalFunction) 321234287Sdim State = (this->*evalFunction)(CE, C); 322234287Sdim if (!State) 323234287Sdim return; 324234287Sdim C.addTransition(State); 325234287Sdim 326234287Sdim} 327234287Sdim 328234287Sdimbool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 329234287Sdim CheckerContext &C) const { 330234287Sdim ProgramStateRef State = C.getState(); 331234287Sdim 332234287Sdim // Depending on what was tainted at pre-visit, we determined a set of 333234287Sdim // arguments which should be tainted after the function returns. These are 334234287Sdim // stored in the state as TaintArgsOnPostVisit set. 335243830Sdim TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 336234287Sdim if (TaintArgs.isEmpty()) 337234287Sdim return false; 338234287Sdim 339234287Sdim for (llvm::ImmutableSet<unsigned>::iterator 340234287Sdim I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 341234287Sdim unsigned ArgNum = *I; 342234287Sdim 343234287Sdim // Special handling for the tainted return value. 344234287Sdim if (ArgNum == ReturnValueIndex) { 345234287Sdim State = State->addTaint(CE, C.getLocationContext()); 346234287Sdim continue; 347234287Sdim } 348234287Sdim 349234287Sdim // The arguments are pointer arguments. The data they are pointing at is 350234287Sdim // tainted after the call. 351234287Sdim if (CE->getNumArgs() < (ArgNum + 1)) 352234287Sdim return false; 353234287Sdim const Expr* Arg = CE->getArg(ArgNum); 354234287Sdim SymbolRef Sym = getPointedToSymbol(C, Arg); 355234287Sdim if (Sym) 356234287Sdim State = State->addTaint(Sym); 357234287Sdim } 358234287Sdim 359234287Sdim // Clear up the taint info from the state. 360234287Sdim State = State->remove<TaintArgsOnPostVisit>(); 361234287Sdim 362234287Sdim if (State != C.getState()) { 363234287Sdim C.addTransition(State); 364234287Sdim return true; 365234287Sdim } 366234287Sdim return false; 367234287Sdim} 368234287Sdim 369234287Sdimvoid GenericTaintChecker::addSourcesPost(const CallExpr *CE, 370234287Sdim CheckerContext &C) const { 371234287Sdim // Define the attack surface. 372234287Sdim // Set the evaluation function by switching on the callee name. 373239462Sdim const FunctionDecl *FDecl = C.getCalleeDecl(CE); 374239462Sdim if (!FDecl || FDecl->getKind() != Decl::Function) 375239462Sdim return; 376239462Sdim 377239462Sdim StringRef Name = C.getCalleeName(FDecl); 378234287Sdim if (Name.empty()) 379234287Sdim return; 380234287Sdim FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 381234287Sdim .Case("scanf", &GenericTaintChecker::postScanf) 382234287Sdim // TODO: Add support for vfscanf & family. 383234287Sdim .Case("getchar", &GenericTaintChecker::postRetTaint) 384234287Sdim .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 385234287Sdim .Case("getenv", &GenericTaintChecker::postRetTaint) 386234287Sdim .Case("fopen", &GenericTaintChecker::postRetTaint) 387234287Sdim .Case("fdopen", &GenericTaintChecker::postRetTaint) 388234287Sdim .Case("freopen", &GenericTaintChecker::postRetTaint) 389234287Sdim .Case("getch", &GenericTaintChecker::postRetTaint) 390234287Sdim .Case("wgetch", &GenericTaintChecker::postRetTaint) 391234287Sdim .Case("socket", &GenericTaintChecker::postSocket) 392234287Sdim .Default(0); 393234287Sdim 394234287Sdim // If the callee isn't defined, it is not of security concern. 395234287Sdim // Check and evaluate the call. 396234287Sdim ProgramStateRef State = 0; 397234287Sdim if (evalFunction) 398234287Sdim State = (this->*evalFunction)(CE, C); 399234287Sdim if (!State) 400234287Sdim return; 401234287Sdim 402234287Sdim C.addTransition(State); 403234287Sdim} 404234287Sdim 405234287Sdimbool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 406234287Sdim 407234287Sdim if (checkUncontrolledFormatString(CE, C)) 408234287Sdim return true; 409234287Sdim 410234287Sdim const FunctionDecl *FDecl = C.getCalleeDecl(CE); 411239462Sdim if (!FDecl || FDecl->getKind() != Decl::Function) 412239462Sdim return false; 413239462Sdim 414234287Sdim StringRef Name = C.getCalleeName(FDecl); 415234287Sdim if (Name.empty()) 416234287Sdim return false; 417234287Sdim 418234287Sdim if (checkSystemCall(CE, Name, C)) 419234287Sdim return true; 420234287Sdim 421234287Sdim if (checkTaintedBufferSize(CE, FDecl, C)) 422234287Sdim return true; 423234287Sdim 424234287Sdim return false; 425234287Sdim} 426234287Sdim 427234287SdimSymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 428234287Sdim const Expr* Arg) { 429234287Sdim ProgramStateRef State = C.getState(); 430234287Sdim SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 431234287Sdim if (AddrVal.isUnknownOrUndef()) 432234287Sdim return 0; 433234287Sdim 434249423Sdim Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 435234287Sdim if (!AddrLoc) 436234287Sdim return 0; 437234287Sdim 438234287Sdim const PointerType *ArgTy = 439234287Sdim dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); 440234287Sdim SVal Val = State->getSVal(*AddrLoc, 441234287Sdim ArgTy ? ArgTy->getPointeeType(): QualType()); 442234287Sdim return Val.getAsSymbol(); 443234287Sdim} 444234287Sdim 445234287SdimProgramStateRef 446234287SdimGenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 447234287Sdim CheckerContext &C) const { 448234287Sdim ProgramStateRef State = C.getState(); 449234287Sdim 450234287Sdim // Check for taint in arguments. 451234287Sdim bool IsTainted = false; 452234287Sdim for (ArgVector::const_iterator I = SrcArgs.begin(), 453234287Sdim E = SrcArgs.end(); I != E; ++I) { 454234287Sdim unsigned ArgNum = *I; 455234287Sdim 456234287Sdim if (ArgNum == InvalidArgIndex) { 457234287Sdim // Check if any of the arguments is tainted, but skip the 458234287Sdim // destination arguments. 459234287Sdim for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 460234287Sdim if (isDestinationArgument(i)) 461234287Sdim continue; 462234287Sdim if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 463234287Sdim break; 464234287Sdim } 465234287Sdim break; 466234287Sdim } 467234287Sdim 468234287Sdim if (CE->getNumArgs() < (ArgNum + 1)) 469234287Sdim return State; 470234287Sdim if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 471234287Sdim break; 472234287Sdim } 473234287Sdim if (!IsTainted) 474234287Sdim return State; 475234287Sdim 476234287Sdim // Mark the arguments which should be tainted after the function returns. 477234287Sdim for (ArgVector::const_iterator I = DstArgs.begin(), 478234287Sdim E = DstArgs.end(); I != E; ++I) { 479234287Sdim unsigned ArgNum = *I; 480234287Sdim 481234287Sdim // Should we mark all arguments as tainted? 482234287Sdim if (ArgNum == InvalidArgIndex) { 483234287Sdim // For all pointer and references that were passed in: 484234287Sdim // If they are not pointing to const data, mark data as tainted. 485234287Sdim // TODO: So far we are just going one level down; ideally we'd need to 486234287Sdim // recurse here. 487234287Sdim for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 488234287Sdim const Expr *Arg = CE->getArg(i); 489234287Sdim // Process pointer argument. 490234287Sdim const Type *ArgTy = Arg->getType().getTypePtr(); 491234287Sdim QualType PType = ArgTy->getPointeeType(); 492234287Sdim if ((!PType.isNull() && !PType.isConstQualified()) 493234287Sdim || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 494234287Sdim State = State->add<TaintArgsOnPostVisit>(i); 495234287Sdim } 496234287Sdim continue; 497234287Sdim } 498234287Sdim 499234287Sdim // Should mark the return value? 500234287Sdim if (ArgNum == ReturnValueIndex) { 501234287Sdim State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 502234287Sdim continue; 503234287Sdim } 504234287Sdim 505234287Sdim // Mark the given argument. 506234287Sdim assert(ArgNum < CE->getNumArgs()); 507234287Sdim State = State->add<TaintArgsOnPostVisit>(ArgNum); 508234287Sdim } 509234287Sdim 510234287Sdim return State; 511234287Sdim} 512234287Sdim 513234287Sdim 514234287Sdim// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 515234287Sdim// and arg 1 should get taint. 516234287SdimProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 517234287Sdim CheckerContext &C) const { 518234287Sdim assert(CE->getNumArgs() >= 2); 519234287Sdim ProgramStateRef State = C.getState(); 520234287Sdim 521234287Sdim // Check is the file descriptor is tainted. 522234287Sdim if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 523234287Sdim isStdin(CE->getArg(0), C)) { 524234287Sdim // All arguments except for the first two should get taint. 525234287Sdim for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 526234287Sdim State = State->add<TaintArgsOnPostVisit>(i); 527234287Sdim return State; 528234287Sdim } 529234287Sdim 530234287Sdim return 0; 531234287Sdim} 532234287Sdim 533234287Sdim 534234287Sdim// If argument 0(protocol domain) is network, the return value should get taint. 535234287SdimProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 536234287Sdim CheckerContext &C) const { 537234287Sdim ProgramStateRef State = C.getState(); 538234287Sdim if (CE->getNumArgs() < 3) 539234287Sdim return State; 540234287Sdim 541234287Sdim SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 542234287Sdim StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 543234287Sdim // White list the internal communication protocols. 544234287Sdim if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 545234287Sdim DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 546234287Sdim return State; 547234287Sdim State = State->addTaint(CE, C.getLocationContext()); 548234287Sdim return State; 549234287Sdim} 550234287Sdim 551234287SdimProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 552234287Sdim CheckerContext &C) const { 553234287Sdim ProgramStateRef State = C.getState(); 554234287Sdim if (CE->getNumArgs() < 2) 555234287Sdim return State; 556234287Sdim 557234287Sdim // All arguments except for the very first one should get taint. 558234287Sdim for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 559234287Sdim // The arguments are pointer arguments. The data they are pointing at is 560234287Sdim // tainted after the call. 561234287Sdim const Expr* Arg = CE->getArg(i); 562234287Sdim SymbolRef Sym = getPointedToSymbol(C, Arg); 563234287Sdim if (Sym) 564234287Sdim State = State->addTaint(Sym); 565234287Sdim } 566234287Sdim return State; 567234287Sdim} 568234287Sdim 569234287SdimProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 570234287Sdim CheckerContext &C) const { 571234287Sdim return C.getState()->addTaint(CE, C.getLocationContext()); 572234287Sdim} 573234287Sdim 574234287Sdimbool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 575234287Sdim ProgramStateRef State = C.getState(); 576234287Sdim SVal Val = State->getSVal(E, C.getLocationContext()); 577234287Sdim 578234287Sdim // stdin is a pointer, so it would be a region. 579234287Sdim const MemRegion *MemReg = Val.getAsRegion(); 580234287Sdim 581234287Sdim // The region should be symbolic, we do not know it's value. 582234287Sdim const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 583234287Sdim if (!SymReg) 584234287Sdim return false; 585234287Sdim 586234287Sdim // Get it's symbol and find the declaration region it's pointing to. 587234287Sdim const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 588234287Sdim if (!Sm) 589234287Sdim return false; 590234287Sdim const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 591234287Sdim if (!DeclReg) 592234287Sdim return false; 593234287Sdim 594234287Sdim // This region corresponds to a declaration, find out if it's a global/extern 595234287Sdim // variable named stdin with the proper type. 596234287Sdim if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 597234287Sdim D = D->getCanonicalDecl(); 598234287Sdim if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 599234287Sdim if (const PointerType * PtrTy = 600234287Sdim dyn_cast<PointerType>(D->getType().getTypePtr())) 601234287Sdim if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 602234287Sdim return true; 603234287Sdim } 604234287Sdim return false; 605234287Sdim} 606234287Sdim 607234287Sdimstatic bool getPrintfFormatArgumentNum(const CallExpr *CE, 608234287Sdim const CheckerContext &C, 609234287Sdim unsigned int &ArgNum) { 610234287Sdim // Find if the function contains a format string argument. 611234287Sdim // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 612234287Sdim // vsnprintf, syslog, custom annotated functions. 613234287Sdim const FunctionDecl *FDecl = C.getCalleeDecl(CE); 614234287Sdim if (!FDecl) 615234287Sdim return false; 616234287Sdim for (specific_attr_iterator<FormatAttr> 617234287Sdim i = FDecl->specific_attr_begin<FormatAttr>(), 618234287Sdim e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) { 619234287Sdim 620234287Sdim const FormatAttr *Format = *i; 621234287Sdim ArgNum = Format->getFormatIdx() - 1; 622263508Sdim if ((Format->getType()->getName() == "printf") && 623263508Sdim CE->getNumArgs() > ArgNum) 624234287Sdim return true; 625234287Sdim } 626234287Sdim 627234287Sdim // Or if a function is named setproctitle (this is a heuristic). 628234287Sdim if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 629234287Sdim ArgNum = 0; 630234287Sdim return true; 631234287Sdim } 632234287Sdim 633234287Sdim return false; 634234287Sdim} 635234287Sdim 636234287Sdimbool GenericTaintChecker::generateReportIfTainted(const Expr *E, 637234287Sdim const char Msg[], 638234287Sdim CheckerContext &C) const { 639234287Sdim assert(E); 640234287Sdim 641234287Sdim // Check for taint. 642234287Sdim ProgramStateRef State = C.getState(); 643234287Sdim if (!State->isTainted(getPointedToSymbol(C, E)) && 644234287Sdim !State->isTainted(E, C.getLocationContext())) 645234287Sdim return false; 646234287Sdim 647234287Sdim // Generate diagnostic. 648234287Sdim if (ExplodedNode *N = C.addTransition()) { 649234287Sdim initBugType(); 650234287Sdim BugReport *report = new BugReport(*BT, Msg, N); 651234287Sdim report->addRange(E->getSourceRange()); 652243830Sdim C.emitReport(report); 653234287Sdim return true; 654234287Sdim } 655234287Sdim return false; 656234287Sdim} 657234287Sdim 658234287Sdimbool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 659234287Sdim CheckerContext &C) const{ 660234287Sdim // Check if the function contains a format string argument. 661234287Sdim unsigned int ArgNum = 0; 662234287Sdim if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 663234287Sdim return false; 664234287Sdim 665234287Sdim // If either the format string content or the pointer itself are tainted, warn. 666234287Sdim if (generateReportIfTainted(CE->getArg(ArgNum), 667234287Sdim MsgUncontrolledFormatString, C)) 668234287Sdim return true; 669234287Sdim return false; 670234287Sdim} 671234287Sdim 672234287Sdimbool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 673234287Sdim StringRef Name, 674234287Sdim CheckerContext &C) const { 675234287Sdim // TODO: It might make sense to run this check on demand. In some cases, 676234287Sdim // we should check if the environment has been cleansed here. We also might 677234287Sdim // need to know if the user was reset before these calls(seteuid). 678234287Sdim unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 679234287Sdim .Case("system", 0) 680234287Sdim .Case("popen", 0) 681234287Sdim .Case("execl", 0) 682234287Sdim .Case("execle", 0) 683234287Sdim .Case("execlp", 0) 684234287Sdim .Case("execv", 0) 685234287Sdim .Case("execvp", 0) 686234287Sdim .Case("execvP", 0) 687234287Sdim .Case("execve", 0) 688234287Sdim .Case("dlopen", 0) 689234287Sdim .Default(UINT_MAX); 690234287Sdim 691234287Sdim if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 692234287Sdim return false; 693234287Sdim 694234287Sdim if (generateReportIfTainted(CE->getArg(ArgNum), 695234287Sdim MsgSanitizeSystemArgs, C)) 696234287Sdim return true; 697234287Sdim 698234287Sdim return false; 699234287Sdim} 700234287Sdim 701234287Sdim// TODO: Should this check be a part of the CString checker? 702234287Sdim// If yes, should taint be a global setting? 703234287Sdimbool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 704234287Sdim const FunctionDecl *FDecl, 705234287Sdim CheckerContext &C) const { 706234287Sdim // If the function has a buffer size argument, set ArgNum. 707234287Sdim unsigned ArgNum = InvalidArgIndex; 708234287Sdim unsigned BId = 0; 709234287Sdim if ( (BId = FDecl->getMemoryFunctionKind()) ) 710234287Sdim switch(BId) { 711234287Sdim case Builtin::BImemcpy: 712234287Sdim case Builtin::BImemmove: 713234287Sdim case Builtin::BIstrncpy: 714234287Sdim ArgNum = 2; 715234287Sdim break; 716234287Sdim case Builtin::BIstrndup: 717234287Sdim ArgNum = 1; 718234287Sdim break; 719234287Sdim default: 720234287Sdim break; 721234287Sdim }; 722234287Sdim 723234287Sdim if (ArgNum == InvalidArgIndex) { 724234287Sdim if (C.isCLibraryFunction(FDecl, "malloc") || 725234287Sdim C.isCLibraryFunction(FDecl, "calloc") || 726234287Sdim C.isCLibraryFunction(FDecl, "alloca")) 727234287Sdim ArgNum = 0; 728234287Sdim else if (C.isCLibraryFunction(FDecl, "memccpy")) 729234287Sdim ArgNum = 3; 730234287Sdim else if (C.isCLibraryFunction(FDecl, "realloc")) 731234287Sdim ArgNum = 1; 732234287Sdim else if (C.isCLibraryFunction(FDecl, "bcopy")) 733234287Sdim ArgNum = 2; 734234287Sdim } 735234287Sdim 736234287Sdim if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 737234287Sdim generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C)) 738234287Sdim return true; 739234287Sdim 740234287Sdim return false; 741234287Sdim} 742234287Sdim 743234287Sdimvoid ento::registerGenericTaintChecker(CheckerManager &mgr) { 744234287Sdim mgr.registerChecker<GenericTaintChecker>(); 745234287Sdim} 746