//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This checker defines the attack surface for generic taint propagation. // // The taint information produced by it might be useful to other checkers. For // example, checkers should report errors which involve tainted data more // aggressively, even if the involved symbols are under constrained. // //===----------------------------------------------------------------------===// #include "Taint.h" #include "Yaml.h" #include "clang/AST/Attr.h" #include "clang/Basic/Builtins.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "llvm/Support/YAMLTraits.h" #include #include #include #include using namespace clang; using namespace ento; using namespace taint; namespace { class GenericTaintChecker : public Checker, check::PreStmt> { public: static void *getTag() { static int Tag; return &Tag; } void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, const char *Sep) const override; using ArgVector = SmallVector; using SignedArgVector = SmallVector; enum class VariadicType { None, Src, Dst }; /// Used to parse the configuration file. struct TaintConfiguration { using NameScopeArgs = std::tuple; struct Propagation { std::string Name; std::string Scope; ArgVector SrcArgs; SignedArgVector DstArgs; VariadicType VarType; unsigned VarIndex; }; std::vector Propagations; std::vector Filters; std::vector Sinks; TaintConfiguration() = default; TaintConfiguration(const TaintConfiguration &) = default; TaintConfiguration(TaintConfiguration &&) = default; TaintConfiguration &operator=(const TaintConfiguration &) = default; TaintConfiguration &operator=(TaintConfiguration &&) = default; }; /// Convert SignedArgVector to ArgVector. ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, SignedArgVector Args); /// Parse the config. void parseConfiguration(CheckerManager &Mgr, const std::string &Option, TaintConfiguration &&Config); static const unsigned InvalidArgIndex{std::numeric_limits::max()}; /// Denotes the return vale. static const unsigned ReturnValueIndex{std::numeric_limits::max() - 1}; private: mutable std::unique_ptr BT; void initBugType() const { if (!BT) BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); } struct FunctionData { FunctionData() = delete; FunctionData(const FunctionData &) = default; FunctionData(FunctionData &&) = default; FunctionData &operator=(const FunctionData &) = delete; FunctionData &operator=(FunctionData &&) = delete; static Optional create(const CallExpr *CE, const CheckerContext &C) { const FunctionDecl *FDecl = C.getCalleeDecl(CE); if (!FDecl || (FDecl->getKind() != Decl::Function && FDecl->getKind() != Decl::CXXMethod)) return None; StringRef Name = C.getCalleeName(FDecl); std::string FullName = FDecl->getQualifiedNameAsString(); if (Name.empty() || FullName.empty()) return None; return FunctionData{FDecl, Name, FullName}; } bool isInScope(StringRef Scope) const { return StringRef(FullName).startswith(Scope); } const FunctionDecl *const FDecl; const StringRef Name; const std::string FullName; }; /// Catch taint related bugs. Check if tainted data is passed to a /// system call etc. Returns true on matching. bool checkPre(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const; /// Add taint sources on a pre-visit. Returns true on matching. bool addSourcesPre(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const; /// Mark filter's arguments not tainted on a pre-visit. Returns true on /// matching. bool addFiltersPre(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const; /// Propagate taint generated at pre-visit. Returns true on matching. bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; /// Check if the region the expression evaluates to is the standard input, /// and thus, is tainted. static bool isStdin(const Expr *E, CheckerContext &C); /// Given a pointer argument, return the value it points to. static Optional getPointedToSVal(CheckerContext &C, const Expr *Arg); /// Check for CWE-134: Uncontrolled Format String. static constexpr llvm::StringLiteral MsgUncontrolledFormatString = "Untrusted data is used as a format string " "(CWE-134: Uncontrolled Format String)"; bool checkUncontrolledFormatString(const CallExpr *CE, CheckerContext &C) const; /// Check for: /// CERT/STR02-C. "Sanitize data passed to complex subsystems" /// CWE-78, "Failure to Sanitize Data into an OS Command" static constexpr llvm::StringLiteral MsgSanitizeSystemArgs = "Untrusted data is passed to a system call " "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; bool checkSystemCall(const CallExpr *CE, StringRef Name, CheckerContext &C) const; /// Check if tainted data is used as a buffer size ins strn.. functions, /// and allocators. static constexpr llvm::StringLiteral MsgTaintedBufferSize = "Untrusted data is used to specify the buffer size " "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " "for character data and the null terminator)"; bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, CheckerContext &C) const; /// Check if tainted data is used as a custom sink's parameter. static constexpr llvm::StringLiteral MsgCustomSink = "Untrusted data is passed to a user-defined sink"; bool checkCustomSinks(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const; /// Generate a report if the expression is tainted or points to tainted data. bool generateReportIfTainted(const Expr *E, StringRef Msg, CheckerContext &C) const; struct TaintPropagationRule; template using ConfigDataMap = std::unordered_multimap>; using NameRuleMap = ConfigDataMap; using NameArgMap = ConfigDataMap; /// Find a function with the given name and scope. Returns the first match /// or the end of the map. template static auto findFunctionInConfig(const ConfigDataMap &Map, const FunctionData &FData); /// A struct used to specify taint propagation rules for a function. /// /// If any of the possible taint source arguments is tainted, all of the /// destination arguments should also be tainted. Use InvalidArgIndex in the /// src list to specify that all of the arguments can introduce taint. Use /// InvalidArgIndex in the dst arguments to signify that all the non-const /// pointer and reference arguments might be tainted on return. If /// ReturnValueIndex is added to the dst list, the return value will be /// tainted. struct TaintPropagationRule { using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *, CheckerContext &C); /// List of arguments which can be taint sources and should be checked. ArgVector SrcArgs; /// List of arguments which should be tainted on function return. ArgVector DstArgs; /// Index for the first variadic parameter if exist. unsigned VariadicIndex; /// Show when a function has variadic parameters. If it has, it marks all /// of them as source or destination. VariadicType VarType; /// Special function for tainted source determination. If defined, it can /// override the default behavior. PropagationFuncType PropagationFunc; TaintPropagationRule() : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), PropagationFunc(nullptr) {} TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, VariadicType Var = VariadicType::None, unsigned VarIndex = InvalidArgIndex, PropagationFuncType Func = nullptr) : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {} /// Get the propagation rule for a given function. static TaintPropagationRule getTaintPropagationRule(const NameRuleMap &CustomPropagations, const FunctionData &FData, CheckerContext &C); void addSrcArg(unsigned A) { SrcArgs.push_back(A); } void addDstArg(unsigned A) { DstArgs.push_back(A); } bool isNull() const { return SrcArgs.empty() && DstArgs.empty() && VariadicType::None == VarType; } bool isDestinationArgument(unsigned ArgNum) const { return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); } static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, CheckerContext &C) { if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) return true; if (!E->getType().getTypePtr()->isPointerType()) return false; Optional V = getPointedToSVal(C, E); return (V && isTainted(State, *V)); } /// Pre-process a function which propagates taint according to the /// taint rule. ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; // Functions for custom taintedness propagation. static bool postSocket(bool IsTainted, const CallExpr *CE, CheckerContext &C); }; /// Defines a map between the propagation function's name, scope /// and TaintPropagationRule. NameRuleMap CustomPropagations; /// Defines a map between the filter function's name, scope and filtering /// args. NameArgMap CustomFilters; /// Defines a map between the sink function's name, scope and sinking args. NameArgMap CustomSinks; }; const unsigned GenericTaintChecker::ReturnValueIndex; const unsigned GenericTaintChecker::InvalidArgIndex; // FIXME: these lines can be removed in C++17 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString; constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs; constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize; constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink; } // end of anonymous namespace using TaintConfig = GenericTaintChecker::TaintConfiguration; LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs) namespace llvm { namespace yaml { template <> struct MappingTraits { static void mapping(IO &IO, TaintConfig &Config) { IO.mapOptional("Propagations", Config.Propagations); IO.mapOptional("Filters", Config.Filters); IO.mapOptional("Sinks", Config.Sinks); } }; template <> struct MappingTraits { static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { IO.mapRequired("Name", Propagation.Name); IO.mapOptional("Scope", Propagation.Scope); IO.mapOptional("SrcArgs", Propagation.SrcArgs); IO.mapOptional("DstArgs", Propagation.DstArgs); IO.mapOptional("VariadicType", Propagation.VarType, GenericTaintChecker::VariadicType::None); IO.mapOptional("VariadicIndex", Propagation.VarIndex, GenericTaintChecker::InvalidArgIndex); } }; template <> struct ScalarEnumerationTraits { static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); } }; template <> struct MappingTraits { static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) { IO.mapRequired("Name", std::get<0>(NSA)); IO.mapOptional("Scope", std::get<1>(NSA)); IO.mapRequired("Args", std::get<2>(NSA)); } }; } // namespace yaml } // namespace llvm /// A set which is used to pass information from call pre-visit instruction /// to the call post-visit. The values are unsigned integers, which are either /// ReturnValueIndex, or indexes of the pointer/reference argument, which /// points to data, which should be tainted on return. REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector( CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) { ArgVector Result; for (int Arg : Args) { if (Arg == -1) Result.push_back(ReturnValueIndex); else if (Arg < -1) { Result.push_back(InvalidArgIndex); Mgr.reportInvalidCheckerOptionValue( this, Option, "an argument number for propagation rules greater or equal to -1"); } else Result.push_back(static_cast(Arg)); } return Result; } void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, const std::string &Option, TaintConfiguration &&Config) { for (auto &P : Config.Propagations) { GenericTaintChecker::CustomPropagations.emplace( P.Name, std::make_pair(P.Scope, TaintPropagationRule{ std::move(P.SrcArgs), convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex})); } for (auto &F : Config.Filters) { GenericTaintChecker::CustomFilters.emplace( std::get<0>(F), std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F)))); } for (auto &S : Config.Sinks) { GenericTaintChecker::CustomSinks.emplace( std::get<0>(S), std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S)))); } } template auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap &Map, const FunctionData &FData) { auto Range = Map.equal_range(FData.Name); auto It = std::find_if(Range.first, Range.second, [&FData](const auto &Entry) { const auto &Value = Entry.second; StringRef Scope = Value.first; return Scope.empty() || FData.isInScope(Scope); }); return It != Range.second ? It : Map.end(); } GenericTaintChecker::TaintPropagationRule GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( const NameRuleMap &CustomPropagations, const FunctionData &FData, CheckerContext &C) { // TODO: Currently, we might lose precision here: we always mark a return // value as tainted even if it's just a pointer, pointing to tainted data. // Check for exact name match for functions without builtin substitutes. // Use qualified name, because these are C functions without namespace. TaintPropagationRule Rule = llvm::StringSwitch(FData.FullName) // Source functions // TODO: Add support for vfscanf & family. .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex})) .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex})) .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex})) .Case("getch", TaintPropagationRule({}, {ReturnValueIndex})) .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex})) .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex})) .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex})) .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex})) .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1)) .Case("socket", TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None, InvalidArgIndex, &TaintPropagationRule::postSocket)) .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex})) // Propagating functions .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex})) .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex})) .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex})) .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex})) .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex})) .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex})) .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2)) .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex})) .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex})) .Case("getdelim", TaintPropagationRule({3}, {0})) .Case("getline", TaintPropagationRule({2}, {0})) .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex})) .Case("pread", TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex})) .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex})) .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex})) .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex})) .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex})) .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex})) .Default(TaintPropagationRule()); if (!Rule.isNull()) return Rule; // Check if it's one of the memory setting/copying functions. // This check is specialized but faster then calling isCLibraryFunction. const FunctionDecl *FDecl = FData.FDecl; unsigned BId = 0; if ((BId = FDecl->getMemoryFunctionKind())) switch (BId) { case Builtin::BImemcpy: case Builtin::BImemmove: case Builtin::BIstrncpy: case Builtin::BIstrncat: return TaintPropagationRule({1, 2}, {0, ReturnValueIndex}); case Builtin::BIstrlcpy: case Builtin::BIstrlcat: return TaintPropagationRule({1, 2}, {0}); case Builtin::BIstrndup: return TaintPropagationRule({0, 1}, {ReturnValueIndex}); default: break; }; // Process all other functions which could be defined as builtins. if (Rule.isNull()) { if (C.isCLibraryFunction(FDecl, "snprintf")) return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src, 3); else if (C.isCLibraryFunction(FDecl, "sprintf")) return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src, 2); else if (C.isCLibraryFunction(FDecl, "strcpy") || C.isCLibraryFunction(FDecl, "stpcpy") || C.isCLibraryFunction(FDecl, "strcat")) return TaintPropagationRule({1}, {0, ReturnValueIndex}); else if (C.isCLibraryFunction(FDecl, "bcopy")) return TaintPropagationRule({0, 2}, {1}); else if (C.isCLibraryFunction(FDecl, "strdup") || C.isCLibraryFunction(FDecl, "strdupa")) return TaintPropagationRule({0}, {ReturnValueIndex}); else if (C.isCLibraryFunction(FDecl, "wcsdup")) return TaintPropagationRule({0}, {ReturnValueIndex}); } // Skipping the following functions, since they might be used for cleansing // or smart memory copy: // - memccpy - copying until hitting a special character. auto It = findFunctionInConfig(CustomPropagations, FData); if (It != CustomPropagations.end()) { const auto &Value = It->second; return Value.second; } return TaintPropagationRule(); } void GenericTaintChecker::checkPreStmt(const CallExpr *CE, CheckerContext &C) const { Optional FData = FunctionData::create(CE, C); if (!FData) return; // Check for taintedness related errors first: system call, uncontrolled // format string, tainted buffer size. if (checkPre(CE, *FData, C)) return; // Marks the function's arguments and/or return value tainted if it present in // the list. if (addSourcesPre(CE, *FData, C)) return; addFiltersPre(CE, *FData, C); } void GenericTaintChecker::checkPostStmt(const CallExpr *CE, CheckerContext &C) const { // Set the marked values as tainted. The return value only accessible from // checkPostStmt. propagateFromPre(CE, C); } void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, const char *NL, const char *Sep) const { printTaint(State, Out, NL, Sep); } bool GenericTaintChecker::addSourcesPre(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const { // First, try generating a propagation rule for this function. TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( this->CustomPropagations, FData, C); if (!Rule.isNull()) { ProgramStateRef State = Rule.process(CE, C); if (State) { C.addTransition(State); return true; } } return false; } bool GenericTaintChecker::addFiltersPre(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const { auto It = findFunctionInConfig(CustomFilters, FData); if (It == CustomFilters.end()) return false; ProgramStateRef State = C.getState(); const auto &Value = It->second; const ArgVector &Args = Value.second; for (unsigned ArgNum : Args) { if (ArgNum >= CE->getNumArgs()) continue; const Expr *Arg = CE->getArg(ArgNum); Optional V = getPointedToSVal(C, Arg); if (V) State = removeTaint(State, *V); } if (State != C.getState()) { C.addTransition(State); return true; } return false; } bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, CheckerContext &C) const { ProgramStateRef State = C.getState(); // Depending on what was tainted at pre-visit, we determined a set of // arguments which should be tainted after the function returns. These are // stored in the state as TaintArgsOnPostVisit set. TaintArgsOnPostVisitTy TaintArgs = State->get(); if (TaintArgs.isEmpty()) return false; for (unsigned ArgNum : TaintArgs) { // Special handling for the tainted return value. if (ArgNum == ReturnValueIndex) { State = addTaint(State, CE, C.getLocationContext()); continue; } // The arguments are pointer arguments. The data they are pointing at is // tainted after the call. if (CE->getNumArgs() < (ArgNum + 1)) return false; const Expr *Arg = CE->getArg(ArgNum); Optional V = getPointedToSVal(C, Arg); if (V) State = addTaint(State, *V); } // Clear up the taint info from the state. State = State->remove(); if (State != C.getState()) { C.addTransition(State); return true; } return false; } bool GenericTaintChecker::checkPre(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const { if (checkUncontrolledFormatString(CE, C)) return true; if (checkSystemCall(CE, FData.Name, C)) return true; if (checkTaintedBufferSize(CE, FData.FDecl, C)) return true; if (checkCustomSinks(CE, FData, C)) return true; return false; } Optional GenericTaintChecker::getPointedToSVal(CheckerContext &C, const Expr *Arg) { ProgramStateRef State = C.getState(); SVal AddrVal = C.getSVal(Arg->IgnoreParens()); if (AddrVal.isUnknownOrUndef()) return None; Optional AddrLoc = AddrVal.getAs(); if (!AddrLoc) return None; QualType ArgTy = Arg->getType().getCanonicalType(); if (!ArgTy->isPointerType()) return State->getSVal(*AddrLoc); QualType ValTy = ArgTy->getPointeeType(); // Do not dereference void pointers. Treat them as byte pointers instead. // FIXME: we might want to consider more than just the first byte. if (ValTy->isVoidType()) ValTy = C.getASTContext().CharTy; return State->getSVal(*AddrLoc, ValTy); } ProgramStateRef GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, CheckerContext &C) const { ProgramStateRef State = C.getState(); // Check for taint in arguments. bool IsTainted = true; for (unsigned ArgNum : SrcArgs) { if (ArgNum >= CE->getNumArgs()) continue; if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) break; } // Check for taint in variadic arguments. if (!IsTainted && VariadicType::Src == VarType) { // Check if any of the arguments is tainted for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) break; } } if (PropagationFunc) IsTainted = PropagationFunc(IsTainted, CE, C); if (!IsTainted) return State; // Mark the arguments which should be tainted after the function returns. for (unsigned ArgNum : DstArgs) { // Should mark the return value? if (ArgNum == ReturnValueIndex) { State = State->add(ReturnValueIndex); continue; } if (ArgNum >= CE->getNumArgs()) continue; // Mark the given argument. State = State->add(ArgNum); } // Mark all variadic arguments tainted if present. if (VariadicType::Dst == VarType) { // For all pointer and references that were passed in: // If they are not pointing to const data, mark data as tainted. // TODO: So far we are just going one level down; ideally we'd need to // recurse here. for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { const Expr *Arg = CE->getArg(i); // Process pointer argument. const Type *ArgTy = Arg->getType().getTypePtr(); QualType PType = ArgTy->getPointeeType(); if ((!PType.isNull() && !PType.isConstQualified()) || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) State = State->add(i); } } return State; } // If argument 0(protocol domain) is network, the return value should get taint. bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/, const CallExpr *CE, CheckerContext &C) { SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); StringRef DomName = C.getMacroNameOrSpelling(DomLoc); // White list the internal communication protocols. if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) return false; return true; } bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { ProgramStateRef State = C.getState(); SVal Val = C.getSVal(E); // stdin is a pointer, so it would be a region. const MemRegion *MemReg = Val.getAsRegion(); // The region should be symbolic, we do not know it's value. const SymbolicRegion *SymReg = dyn_cast_or_null(MemReg); if (!SymReg) return false; // Get it's symbol and find the declaration region it's pointing to. const SymbolRegionValue *Sm = dyn_cast(SymReg->getSymbol()); if (!Sm) return false; const DeclRegion *DeclReg = dyn_cast_or_null(Sm->getRegion()); if (!DeclReg) return false; // This region corresponds to a declaration, find out if it's a global/extern // variable named stdin with the proper type. if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) { D = D->getCanonicalDecl(); if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { const auto *PtrTy = dyn_cast(D->getType().getTypePtr()); if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == C.getASTContext().getFILEType().getCanonicalType()) return true; } } return false; } static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned &ArgNum) { // Find if the function contains a format string argument. // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, // vsnprintf, syslog, custom annotated functions. const FunctionDecl *FDecl = C.getCalleeDecl(CE); if (!FDecl) return false; for (const auto *Format : FDecl->specific_attrs()) { ArgNum = Format->getFormatIdx() - 1; if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum) return true; } // Or if a function is named setproctitle (this is a heuristic). if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { ArgNum = 0; return true; } return false; } bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, CheckerContext &C) const { assert(E); // Check for taint. ProgramStateRef State = C.getState(); Optional PointedToSVal = getPointedToSVal(C, E); SVal TaintedSVal; if (PointedToSVal && isTainted(State, *PointedToSVal)) TaintedSVal = *PointedToSVal; else if (isTainted(State, E, C.getLocationContext())) TaintedSVal = C.getSVal(E); else return false; // Generate diagnostic. if (ExplodedNode *N = C.generateNonFatalErrorNode()) { initBugType(); auto report = std::make_unique(*BT, Msg, N); report->addRange(E->getSourceRange()); report->addVisitor(std::make_unique(TaintedSVal)); C.emitReport(std::move(report)); return true; } return false; } bool GenericTaintChecker::checkUncontrolledFormatString( const CallExpr *CE, CheckerContext &C) const { // Check if the function contains a format string argument. unsigned ArgNum = 0; if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) return false; // If either the format string content or the pointer itself are tainted, // warn. return generateReportIfTainted(CE->getArg(ArgNum), MsgUncontrolledFormatString, C); } bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name, CheckerContext &C) const { // TODO: It might make sense to run this check on demand. In some cases, // we should check if the environment has been cleansed here. We also might // need to know if the user was reset before these calls(seteuid). unsigned ArgNum = llvm::StringSwitch(Name) .Case("system", 0) .Case("popen", 0) .Case("execl", 0) .Case("execle", 0) .Case("execlp", 0) .Case("execv", 0) .Case("execvp", 0) .Case("execvP", 0) .Case("execve", 0) .Case("dlopen", 0) .Default(InvalidArgIndex); if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1)) return false; return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); } // TODO: Should this check be a part of the CString checker? // If yes, should taint be a global setting? bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, CheckerContext &C) const { // If the function has a buffer size argument, set ArgNum. unsigned ArgNum = InvalidArgIndex; unsigned BId = 0; if ((BId = FDecl->getMemoryFunctionKind())) switch (BId) { case Builtin::BImemcpy: case Builtin::BImemmove: case Builtin::BIstrncpy: ArgNum = 2; break; case Builtin::BIstrndup: ArgNum = 1; break; default: break; }; if (ArgNum == InvalidArgIndex) { if (C.isCLibraryFunction(FDecl, "malloc") || C.isCLibraryFunction(FDecl, "calloc") || C.isCLibraryFunction(FDecl, "alloca")) ArgNum = 0; else if (C.isCLibraryFunction(FDecl, "memccpy")) ArgNum = 3; else if (C.isCLibraryFunction(FDecl, "realloc")) ArgNum = 1; else if (C.isCLibraryFunction(FDecl, "bcopy")) ArgNum = 2; } return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); } bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const { auto It = findFunctionInConfig(CustomSinks, FData); if (It == CustomSinks.end()) return false; const auto &Value = It->second; const GenericTaintChecker::ArgVector &Args = Value.second; for (unsigned ArgNum : Args) { if (ArgNum >= CE->getNumArgs()) continue; if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C)) return true; } return false; } void ento::registerGenericTaintChecker(CheckerManager &Mgr) { auto *Checker = Mgr.registerChecker(); std::string Option{"Config"}; StringRef ConfigFile = Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); llvm::Optional Config = getConfiguration(Mgr, Checker, Option, ConfigFile); if (Config) Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); } bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) { return true; }