1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This checker defines the attack surface for generic taint propagation.
10//
11// The taint information produced by it might be useful to other checkers. For
12// example, checkers should report errors which involve tainted data more
13// aggressively, even if the involved symbols are under constrained.
14//
15//===----------------------------------------------------------------------===//
16
17#include "Taint.h"
18#include "Yaml.h"
19#include "clang/AST/Attr.h"
20#include "clang/Basic/Builtins.h"
21#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23#include "clang/StaticAnalyzer/Core/Checker.h"
24#include "clang/StaticAnalyzer/Core/CheckerManager.h"
25#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
26#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
27#include "llvm/Support/YAMLTraits.h"
28#include <algorithm>
29#include <limits>
30#include <unordered_map>
31#include <utility>
32
33using namespace clang;
34using namespace ento;
35using namespace taint;
36
37namespace {
38class GenericTaintChecker
39    : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
40public:
41  static void *getTag() {
42    static int Tag;
43    return &Tag;
44  }
45
46  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
47
48  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
49
50  void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
51                  const char *Sep) const override;
52
53  using ArgVector = SmallVector<unsigned, 2>;
54  using SignedArgVector = SmallVector<int, 2>;
55
56  enum class VariadicType { None, Src, Dst };
57
58  /// Used to parse the configuration file.
59  struct TaintConfiguration {
60    using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
61
62    struct Propagation {
63      std::string Name;
64      std::string Scope;
65      ArgVector SrcArgs;
66      SignedArgVector DstArgs;
67      VariadicType VarType;
68      unsigned VarIndex;
69    };
70
71    std::vector<Propagation> Propagations;
72    std::vector<NameScopeArgs> Filters;
73    std::vector<NameScopeArgs> Sinks;
74
75    TaintConfiguration() = default;
76    TaintConfiguration(const TaintConfiguration &) = default;
77    TaintConfiguration(TaintConfiguration &&) = default;
78    TaintConfiguration &operator=(const TaintConfiguration &) = default;
79    TaintConfiguration &operator=(TaintConfiguration &&) = default;
80  };
81
82  /// Convert SignedArgVector to ArgVector.
83  ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
84                               SignedArgVector Args);
85
86  /// Parse the config.
87  void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
88                          TaintConfiguration &&Config);
89
90  static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
91  /// Denotes the return vale.
92  static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
93                                         1};
94
95private:
96  mutable std::unique_ptr<BugType> BT;
97  void initBugType() const {
98    if (!BT)
99      BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
100  }
101
102  struct FunctionData {
103    FunctionData() = delete;
104    FunctionData(const FunctionData &) = default;
105    FunctionData(FunctionData &&) = default;
106    FunctionData &operator=(const FunctionData &) = delete;
107    FunctionData &operator=(FunctionData &&) = delete;
108
109    static Optional<FunctionData> create(const CallExpr *CE,
110                                         const CheckerContext &C) {
111      const FunctionDecl *FDecl = C.getCalleeDecl(CE);
112      if (!FDecl || (FDecl->getKind() != Decl::Function &&
113                     FDecl->getKind() != Decl::CXXMethod))
114        return None;
115
116      StringRef Name = C.getCalleeName(FDecl);
117      std::string FullName = FDecl->getQualifiedNameAsString();
118      if (Name.empty() || FullName.empty())
119        return None;
120
121      return FunctionData{FDecl, Name, FullName};
122    }
123
124    bool isInScope(StringRef Scope) const {
125      return StringRef(FullName).startswith(Scope);
126    }
127
128    const FunctionDecl *const FDecl;
129    const StringRef Name;
130    const std::string FullName;
131  };
132
133  /// Catch taint related bugs. Check if tainted data is passed to a
134  /// system call etc. Returns true on matching.
135  bool checkPre(const CallExpr *CE, const FunctionData &FData,
136                CheckerContext &C) const;
137
138  /// Add taint sources on a pre-visit. Returns true on matching.
139  bool addSourcesPre(const CallExpr *CE, const FunctionData &FData,
140                     CheckerContext &C) const;
141
142  /// Mark filter's arguments not tainted on a pre-visit. Returns true on
143  /// matching.
144  bool addFiltersPre(const CallExpr *CE, const FunctionData &FData,
145                     CheckerContext &C) const;
146
147  /// Propagate taint generated at pre-visit. Returns true on matching.
148  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
149
150  /// Check if the region the expression evaluates to is the standard input,
151  /// and thus, is tainted.
152  static bool isStdin(const Expr *E, CheckerContext &C);
153
154  /// Given a pointer argument, return the value it points to.
155  static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
156
157  /// Check for CWE-134: Uncontrolled Format String.
158  static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
159      "Untrusted data is used as a format string "
160      "(CWE-134: Uncontrolled Format String)";
161  bool checkUncontrolledFormatString(const CallExpr *CE,
162                                     CheckerContext &C) const;
163
164  /// Check for:
165  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
166  /// CWE-78, "Failure to Sanitize Data into an OS Command"
167  static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
168      "Untrusted data is passed to a system call "
169      "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
170  bool checkSystemCall(const CallExpr *CE, StringRef Name,
171                       CheckerContext &C) const;
172
173  /// Check if tainted data is used as a buffer size ins strn.. functions,
174  /// and allocators.
175  static constexpr llvm::StringLiteral MsgTaintedBufferSize =
176      "Untrusted data is used to specify the buffer size "
177      "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
178      "for character data and the null terminator)";
179  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
180                              CheckerContext &C) const;
181
182  /// Check if tainted data is used as a custom sink's parameter.
183  static constexpr llvm::StringLiteral MsgCustomSink =
184      "Untrusted data is passed to a user-defined sink";
185  bool checkCustomSinks(const CallExpr *CE, const FunctionData &FData,
186                        CheckerContext &C) const;
187
188  /// Generate a report if the expression is tainted or points to tainted data.
189  bool generateReportIfTainted(const Expr *E, StringRef Msg,
190                               CheckerContext &C) const;
191
192  struct TaintPropagationRule;
193  template <typename T>
194  using ConfigDataMap =
195      std::unordered_multimap<std::string, std::pair<std::string, T>>;
196  using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
197  using NameArgMap = ConfigDataMap<ArgVector>;
198
199  /// Find a function with the given name and scope. Returns the first match
200  /// or the end of the map.
201  template <typename T>
202  static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
203                                   const FunctionData &FData);
204
205  /// A struct used to specify taint propagation rules for a function.
206  ///
207  /// If any of the possible taint source arguments is tainted, all of the
208  /// destination arguments should also be tainted. Use InvalidArgIndex in the
209  /// src list to specify that all of the arguments can introduce taint. Use
210  /// InvalidArgIndex in the dst arguments to signify that all the non-const
211  /// pointer and reference arguments might be tainted on return. If
212  /// ReturnValueIndex is added to the dst list, the return value will be
213  /// tainted.
214  struct TaintPropagationRule {
215    using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
216                                         CheckerContext &C);
217
218    /// List of arguments which can be taint sources and should be checked.
219    ArgVector SrcArgs;
220    /// List of arguments which should be tainted on function return.
221    ArgVector DstArgs;
222    /// Index for the first variadic parameter if exist.
223    unsigned VariadicIndex;
224    /// Show when a function has variadic parameters. If it has, it marks all
225    /// of them as source or destination.
226    VariadicType VarType;
227    /// Special function for tainted source determination. If defined, it can
228    /// override the default behavior.
229    PropagationFuncType PropagationFunc;
230
231    TaintPropagationRule()
232        : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
233          PropagationFunc(nullptr) {}
234
235    TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
236                         VariadicType Var = VariadicType::None,
237                         unsigned VarIndex = InvalidArgIndex,
238                         PropagationFuncType Func = nullptr)
239        : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
240          VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
241
242    /// Get the propagation rule for a given function.
243    static TaintPropagationRule
244    getTaintPropagationRule(const NameRuleMap &CustomPropagations,
245                            const FunctionData &FData, CheckerContext &C);
246
247    void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
248    void addDstArg(unsigned A) { DstArgs.push_back(A); }
249
250    bool isNull() const {
251      return SrcArgs.empty() && DstArgs.empty() &&
252             VariadicType::None == VarType;
253    }
254
255    bool isDestinationArgument(unsigned ArgNum) const {
256      return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
257    }
258
259    static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
260                                           CheckerContext &C) {
261      if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
262        return true;
263
264      if (!E->getType().getTypePtr()->isPointerType())
265        return false;
266
267      Optional<SVal> V = getPointedToSVal(C, E);
268      return (V && isTainted(State, *V));
269    }
270
271    /// Pre-process a function which propagates taint according to the
272    /// taint rule.
273    ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
274
275    // Functions for custom taintedness propagation.
276    static bool postSocket(bool IsTainted, const CallExpr *CE,
277                           CheckerContext &C);
278  };
279
280  /// Defines a map between the propagation function's name, scope
281  /// and TaintPropagationRule.
282  NameRuleMap CustomPropagations;
283
284  /// Defines a map between the filter function's name, scope and filtering
285  /// args.
286  NameArgMap CustomFilters;
287
288  /// Defines a map between the sink function's name, scope and sinking args.
289  NameArgMap CustomSinks;
290};
291
292const unsigned GenericTaintChecker::ReturnValueIndex;
293const unsigned GenericTaintChecker::InvalidArgIndex;
294
295// FIXME: these lines can be removed in C++17
296constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
297constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
298constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
299constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
300} // end of anonymous namespace
301
302using TaintConfig = GenericTaintChecker::TaintConfiguration;
303
304LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
305LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
306
307namespace llvm {
308namespace yaml {
309template <> struct MappingTraits<TaintConfig> {
310  static void mapping(IO &IO, TaintConfig &Config) {
311    IO.mapOptional("Propagations", Config.Propagations);
312    IO.mapOptional("Filters", Config.Filters);
313    IO.mapOptional("Sinks", Config.Sinks);
314  }
315};
316
317template <> struct MappingTraits<TaintConfig::Propagation> {
318  static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
319    IO.mapRequired("Name", Propagation.Name);
320    IO.mapOptional("Scope", Propagation.Scope);
321    IO.mapOptional("SrcArgs", Propagation.SrcArgs);
322    IO.mapOptional("DstArgs", Propagation.DstArgs);
323    IO.mapOptional("VariadicType", Propagation.VarType,
324                   GenericTaintChecker::VariadicType::None);
325    IO.mapOptional("VariadicIndex", Propagation.VarIndex,
326                   GenericTaintChecker::InvalidArgIndex);
327  }
328};
329
330template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
331  static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
332    IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
333    IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
334    IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
335  }
336};
337
338template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
339  static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
340    IO.mapRequired("Name", std::get<0>(NSA));
341    IO.mapOptional("Scope", std::get<1>(NSA));
342    IO.mapRequired("Args", std::get<2>(NSA));
343  }
344};
345} // namespace yaml
346} // namespace llvm
347
348/// A set which is used to pass information from call pre-visit instruction
349/// to the call post-visit. The values are unsigned integers, which are either
350/// ReturnValueIndex, or indexes of the pointer/reference argument, which
351/// points to data, which should be tainted on return.
352REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
353
354GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
355    CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
356  ArgVector Result;
357  for (int Arg : Args) {
358    if (Arg == -1)
359      Result.push_back(ReturnValueIndex);
360    else if (Arg < -1) {
361      Result.push_back(InvalidArgIndex);
362      Mgr.reportInvalidCheckerOptionValue(
363          this, Option,
364          "an argument number for propagation rules greater or equal to -1");
365    } else
366      Result.push_back(static_cast<unsigned>(Arg));
367  }
368  return Result;
369}
370
371void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
372                                             const std::string &Option,
373                                             TaintConfiguration &&Config) {
374  for (auto &P : Config.Propagations) {
375    GenericTaintChecker::CustomPropagations.emplace(
376        P.Name,
377        std::make_pair(P.Scope, TaintPropagationRule{
378                                    std::move(P.SrcArgs),
379                                    convertToArgVector(Mgr, Option, P.DstArgs),
380                                    P.VarType, P.VarIndex}));
381  }
382
383  for (auto &F : Config.Filters) {
384    GenericTaintChecker::CustomFilters.emplace(
385        std::get<0>(F),
386        std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
387  }
388
389  for (auto &S : Config.Sinks) {
390    GenericTaintChecker::CustomSinks.emplace(
391        std::get<0>(S),
392        std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
393  }
394}
395
396template <typename T>
397auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
398                                               const FunctionData &FData) {
399  auto Range = Map.equal_range(FData.Name);
400  auto It =
401      std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
402        const auto &Value = Entry.second;
403        StringRef Scope = Value.first;
404        return Scope.empty() || FData.isInScope(Scope);
405      });
406  return It != Range.second ? It : Map.end();
407}
408
409GenericTaintChecker::TaintPropagationRule
410GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
411    const NameRuleMap &CustomPropagations, const FunctionData &FData,
412    CheckerContext &C) {
413  // TODO: Currently, we might lose precision here: we always mark a return
414  // value as tainted even if it's just a pointer, pointing to tainted data.
415
416  // Check for exact name match for functions without builtin substitutes.
417  // Use qualified name, because these are C functions without namespace.
418  TaintPropagationRule Rule =
419      llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
420          // Source functions
421          // TODO: Add support for vfscanf & family.
422          .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
423          .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
424          .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
425          .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
426          .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
427          .Case("getchar_unlocked",
428                TaintPropagationRule({}, {ReturnValueIndex}))
429          .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
430          .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
431          .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
432          .Case("socket",
433                TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
434                                     InvalidArgIndex,
435                                     &TaintPropagationRule::postSocket))
436          .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
437          // Propagating functions
438          .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
439          .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
440          .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
441          .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
442          .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
443          .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
444          .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
445          .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
446          .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
447          .Case("getdelim", TaintPropagationRule({3}, {0}))
448          .Case("getline", TaintPropagationRule({2}, {0}))
449          .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
450          .Case("pread",
451                TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
452          .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
453          .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
454          .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
455          .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
456          .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
457          .Default(TaintPropagationRule());
458
459  if (!Rule.isNull())
460    return Rule;
461
462  // Check if it's one of the memory setting/copying functions.
463  // This check is specialized but faster then calling isCLibraryFunction.
464  const FunctionDecl *FDecl = FData.FDecl;
465  unsigned BId = 0;
466  if ((BId = FDecl->getMemoryFunctionKind()))
467    switch (BId) {
468    case Builtin::BImemcpy:
469    case Builtin::BImemmove:
470    case Builtin::BIstrncpy:
471    case Builtin::BIstrncat:
472      return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
473    case Builtin::BIstrlcpy:
474    case Builtin::BIstrlcat:
475      return TaintPropagationRule({1, 2}, {0});
476    case Builtin::BIstrndup:
477      return TaintPropagationRule({0, 1}, {ReturnValueIndex});
478
479    default:
480      break;
481    };
482
483  // Process all other functions which could be defined as builtins.
484  if (Rule.isNull()) {
485    if (C.isCLibraryFunction(FDecl, "snprintf"))
486      return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
487                                  3);
488    else if (C.isCLibraryFunction(FDecl, "sprintf"))
489      return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
490                                  2);
491    else if (C.isCLibraryFunction(FDecl, "strcpy") ||
492             C.isCLibraryFunction(FDecl, "stpcpy") ||
493             C.isCLibraryFunction(FDecl, "strcat"))
494      return TaintPropagationRule({1}, {0, ReturnValueIndex});
495    else if (C.isCLibraryFunction(FDecl, "bcopy"))
496      return TaintPropagationRule({0, 2}, {1});
497    else if (C.isCLibraryFunction(FDecl, "strdup") ||
498             C.isCLibraryFunction(FDecl, "strdupa"))
499      return TaintPropagationRule({0}, {ReturnValueIndex});
500    else if (C.isCLibraryFunction(FDecl, "wcsdup"))
501      return TaintPropagationRule({0}, {ReturnValueIndex});
502  }
503
504  // Skipping the following functions, since they might be used for cleansing
505  // or smart memory copy:
506  // - memccpy - copying until hitting a special character.
507
508  auto It = findFunctionInConfig(CustomPropagations, FData);
509  if (It != CustomPropagations.end()) {
510    const auto &Value = It->second;
511    return Value.second;
512  }
513
514  return TaintPropagationRule();
515}
516
517void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
518                                       CheckerContext &C) const {
519  Optional<FunctionData> FData = FunctionData::create(CE, C);
520  if (!FData)
521    return;
522
523  // Check for taintedness related errors first: system call, uncontrolled
524  // format string, tainted buffer size.
525  if (checkPre(CE, *FData, C))
526    return;
527
528  // Marks the function's arguments and/or return value tainted if it present in
529  // the list.
530  if (addSourcesPre(CE, *FData, C))
531    return;
532
533  addFiltersPre(CE, *FData, C);
534}
535
536void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
537                                        CheckerContext &C) const {
538  // Set the marked values as tainted. The return value only accessible from
539  // checkPostStmt.
540  propagateFromPre(CE, C);
541}
542
543void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
544                                     const char *NL, const char *Sep) const {
545  printTaint(State, Out, NL, Sep);
546}
547
548bool GenericTaintChecker::addSourcesPre(const CallExpr *CE,
549                                        const FunctionData &FData,
550                                        CheckerContext &C) const {
551  // First, try generating a propagation rule for this function.
552  TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
553      this->CustomPropagations, FData, C);
554  if (!Rule.isNull()) {
555    ProgramStateRef State = Rule.process(CE, C);
556    if (State) {
557      C.addTransition(State);
558      return true;
559    }
560  }
561  return false;
562}
563
564bool GenericTaintChecker::addFiltersPre(const CallExpr *CE,
565                                        const FunctionData &FData,
566                                        CheckerContext &C) const {
567  auto It = findFunctionInConfig(CustomFilters, FData);
568  if (It == CustomFilters.end())
569    return false;
570
571  ProgramStateRef State = C.getState();
572  const auto &Value = It->second;
573  const ArgVector &Args = Value.second;
574  for (unsigned ArgNum : Args) {
575    if (ArgNum >= CE->getNumArgs())
576      continue;
577
578    const Expr *Arg = CE->getArg(ArgNum);
579    Optional<SVal> V = getPointedToSVal(C, Arg);
580    if (V)
581      State = removeTaint(State, *V);
582  }
583
584  if (State != C.getState()) {
585    C.addTransition(State);
586    return true;
587  }
588  return false;
589}
590
591bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
592                                           CheckerContext &C) const {
593  ProgramStateRef State = C.getState();
594
595  // Depending on what was tainted at pre-visit, we determined a set of
596  // arguments which should be tainted after the function returns. These are
597  // stored in the state as TaintArgsOnPostVisit set.
598  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
599  if (TaintArgs.isEmpty())
600    return false;
601
602  for (unsigned ArgNum : TaintArgs) {
603    // Special handling for the tainted return value.
604    if (ArgNum == ReturnValueIndex) {
605      State = addTaint(State, CE, C.getLocationContext());
606      continue;
607    }
608
609    // The arguments are pointer arguments. The data they are pointing at is
610    // tainted after the call.
611    if (CE->getNumArgs() < (ArgNum + 1))
612      return false;
613    const Expr *Arg = CE->getArg(ArgNum);
614    Optional<SVal> V = getPointedToSVal(C, Arg);
615    if (V)
616      State = addTaint(State, *V);
617  }
618
619  // Clear up the taint info from the state.
620  State = State->remove<TaintArgsOnPostVisit>();
621
622  if (State != C.getState()) {
623    C.addTransition(State);
624    return true;
625  }
626  return false;
627}
628
629bool GenericTaintChecker::checkPre(const CallExpr *CE,
630                                   const FunctionData &FData,
631                                   CheckerContext &C) const {
632
633  if (checkUncontrolledFormatString(CE, C))
634    return true;
635
636  if (checkSystemCall(CE, FData.Name, C))
637    return true;
638
639  if (checkTaintedBufferSize(CE, FData.FDecl, C))
640    return true;
641
642  if (checkCustomSinks(CE, FData, C))
643    return true;
644
645  return false;
646}
647
648Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
649                                                     const Expr *Arg) {
650  ProgramStateRef State = C.getState();
651  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
652  if (AddrVal.isUnknownOrUndef())
653    return None;
654
655  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
656  if (!AddrLoc)
657    return None;
658
659  QualType ArgTy = Arg->getType().getCanonicalType();
660  if (!ArgTy->isPointerType())
661    return State->getSVal(*AddrLoc);
662
663  QualType ValTy = ArgTy->getPointeeType();
664
665  // Do not dereference void pointers. Treat them as byte pointers instead.
666  // FIXME: we might want to consider more than just the first byte.
667  if (ValTy->isVoidType())
668    ValTy = C.getASTContext().CharTy;
669
670  return State->getSVal(*AddrLoc, ValTy);
671}
672
673ProgramStateRef
674GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
675                                                   CheckerContext &C) const {
676  ProgramStateRef State = C.getState();
677
678  // Check for taint in arguments.
679  bool IsTainted = true;
680  for (unsigned ArgNum : SrcArgs) {
681    if (ArgNum >= CE->getNumArgs())
682      continue;
683
684    if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
685      break;
686  }
687
688  // Check for taint in variadic arguments.
689  if (!IsTainted && VariadicType::Src == VarType) {
690    // Check if any of the arguments is tainted
691    for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
692      if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
693        break;
694    }
695  }
696
697  if (PropagationFunc)
698    IsTainted = PropagationFunc(IsTainted, CE, C);
699
700  if (!IsTainted)
701    return State;
702
703  // Mark the arguments which should be tainted after the function returns.
704  for (unsigned ArgNum : DstArgs) {
705    // Should mark the return value?
706    if (ArgNum == ReturnValueIndex) {
707      State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
708      continue;
709    }
710
711    if (ArgNum >= CE->getNumArgs())
712      continue;
713
714    // Mark the given argument.
715    State = State->add<TaintArgsOnPostVisit>(ArgNum);
716  }
717
718  // Mark all variadic arguments tainted if present.
719  if (VariadicType::Dst == VarType) {
720    // For all pointer and references that were passed in:
721    //   If they are not pointing to const data, mark data as tainted.
722    //   TODO: So far we are just going one level down; ideally we'd need to
723    //         recurse here.
724    for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
725      const Expr *Arg = CE->getArg(i);
726      // Process pointer argument.
727      const Type *ArgTy = Arg->getType().getTypePtr();
728      QualType PType = ArgTy->getPointeeType();
729      if ((!PType.isNull() && !PType.isConstQualified()) ||
730          (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
731        State = State->add<TaintArgsOnPostVisit>(i);
732    }
733  }
734
735  return State;
736}
737
738// If argument 0(protocol domain) is network, the return value should get taint.
739bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
740                                                           const CallExpr *CE,
741                                                           CheckerContext &C) {
742  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
743  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
744  // White list the internal communication protocols.
745  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
746      DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
747    return false;
748
749  return true;
750}
751
752bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
753  ProgramStateRef State = C.getState();
754  SVal Val = C.getSVal(E);
755
756  // stdin is a pointer, so it would be a region.
757  const MemRegion *MemReg = Val.getAsRegion();
758
759  // The region should be symbolic, we do not know it's value.
760  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
761  if (!SymReg)
762    return false;
763
764  // Get it's symbol and find the declaration region it's pointing to.
765  const SymbolRegionValue *Sm =
766      dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
767  if (!Sm)
768    return false;
769  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
770  if (!DeclReg)
771    return false;
772
773  // This region corresponds to a declaration, find out if it's a global/extern
774  // variable named stdin with the proper type.
775  if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
776    D = D->getCanonicalDecl();
777    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
778      const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
779      if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
780                       C.getASTContext().getFILEType().getCanonicalType())
781        return true;
782    }
783  }
784  return false;
785}
786
787static bool getPrintfFormatArgumentNum(const CallExpr *CE,
788                                       const CheckerContext &C,
789                                       unsigned &ArgNum) {
790  // Find if the function contains a format string argument.
791  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
792  // vsnprintf, syslog, custom annotated functions.
793  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
794  if (!FDecl)
795    return false;
796  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
797    ArgNum = Format->getFormatIdx() - 1;
798    if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
799      return true;
800  }
801
802  // Or if a function is named setproctitle (this is a heuristic).
803  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
804    ArgNum = 0;
805    return true;
806  }
807
808  return false;
809}
810
811bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
812                                                  CheckerContext &C) const {
813  assert(E);
814
815  // Check for taint.
816  ProgramStateRef State = C.getState();
817  Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
818  SVal TaintedSVal;
819  if (PointedToSVal && isTainted(State, *PointedToSVal))
820    TaintedSVal = *PointedToSVal;
821  else if (isTainted(State, E, C.getLocationContext()))
822    TaintedSVal = C.getSVal(E);
823  else
824    return false;
825
826  // Generate diagnostic.
827  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
828    initBugType();
829    auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
830    report->addRange(E->getSourceRange());
831    report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
832    C.emitReport(std::move(report));
833    return true;
834  }
835  return false;
836}
837
838bool GenericTaintChecker::checkUncontrolledFormatString(
839    const CallExpr *CE, CheckerContext &C) const {
840  // Check if the function contains a format string argument.
841  unsigned ArgNum = 0;
842  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
843    return false;
844
845  // If either the format string content or the pointer itself are tainted,
846  // warn.
847  return generateReportIfTainted(CE->getArg(ArgNum),
848                                 MsgUncontrolledFormatString, C);
849}
850
851bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
852                                          CheckerContext &C) const {
853  // TODO: It might make sense to run this check on demand. In some cases,
854  // we should check if the environment has been cleansed here. We also might
855  // need to know if the user was reset before these calls(seteuid).
856  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
857                        .Case("system", 0)
858                        .Case("popen", 0)
859                        .Case("execl", 0)
860                        .Case("execle", 0)
861                        .Case("execlp", 0)
862                        .Case("execv", 0)
863                        .Case("execvp", 0)
864                        .Case("execvP", 0)
865                        .Case("execve", 0)
866                        .Case("dlopen", 0)
867                        .Default(InvalidArgIndex);
868
869  if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
870    return false;
871
872  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
873}
874
875// TODO: Should this check be a part of the CString checker?
876// If yes, should taint be a global setting?
877bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
878                                                 const FunctionDecl *FDecl,
879                                                 CheckerContext &C) const {
880  // If the function has a buffer size argument, set ArgNum.
881  unsigned ArgNum = InvalidArgIndex;
882  unsigned BId = 0;
883  if ((BId = FDecl->getMemoryFunctionKind()))
884    switch (BId) {
885    case Builtin::BImemcpy:
886    case Builtin::BImemmove:
887    case Builtin::BIstrncpy:
888      ArgNum = 2;
889      break;
890    case Builtin::BIstrndup:
891      ArgNum = 1;
892      break;
893    default:
894      break;
895    };
896
897  if (ArgNum == InvalidArgIndex) {
898    if (C.isCLibraryFunction(FDecl, "malloc") ||
899        C.isCLibraryFunction(FDecl, "calloc") ||
900        C.isCLibraryFunction(FDecl, "alloca"))
901      ArgNum = 0;
902    else if (C.isCLibraryFunction(FDecl, "memccpy"))
903      ArgNum = 3;
904    else if (C.isCLibraryFunction(FDecl, "realloc"))
905      ArgNum = 1;
906    else if (C.isCLibraryFunction(FDecl, "bcopy"))
907      ArgNum = 2;
908  }
909
910  return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
911         generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
912}
913
914bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE,
915                                           const FunctionData &FData,
916                                           CheckerContext &C) const {
917  auto It = findFunctionInConfig(CustomSinks, FData);
918  if (It == CustomSinks.end())
919    return false;
920
921  const auto &Value = It->second;
922  const GenericTaintChecker::ArgVector &Args = Value.second;
923  for (unsigned ArgNum : Args) {
924    if (ArgNum >= CE->getNumArgs())
925      continue;
926
927    if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
928      return true;
929  }
930
931  return false;
932}
933
934void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
935  auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
936  std::string Option{"Config"};
937  StringRef ConfigFile =
938      Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
939  llvm::Optional<TaintConfig> Config =
940      getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
941  if (Config)
942    Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
943}
944
945bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
946  return true;
947}
948