1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This checker defines the attack surface for generic taint propagation.
10//
11// The taint information produced by it might be useful to other checkers. For
12// example, checkers should report errors which involve tainted data more
13// aggressively, even if the involved symbols are under constrained.
14//
15//===----------------------------------------------------------------------===//
16
17#include "Taint.h"
18#include "Yaml.h"
19#include "clang/AST/Attr.h"
20#include "clang/Basic/Builtins.h"
21#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23#include "clang/StaticAnalyzer/Core/Checker.h"
24#include "clang/StaticAnalyzer/Core/CheckerManager.h"
25#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
26#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
27#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28#include "llvm/Support/YAMLTraits.h"
29
30#include <algorithm>
31#include <limits>
32#include <memory>
33#include <unordered_map>
34#include <utility>
35
36using namespace clang;
37using namespace ento;
38using namespace taint;
39
40namespace {
41class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
42public:
43  static void *getTag() {
44    static int Tag;
45    return &Tag;
46  }
47
48  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
49  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
50
51  void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
52                  const char *Sep) const override;
53
54  using ArgVector = SmallVector<unsigned, 2>;
55  using SignedArgVector = SmallVector<int, 2>;
56
57  enum class VariadicType { None, Src, Dst };
58
59  /// Used to parse the configuration file.
60  struct TaintConfiguration {
61    using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
62
63    struct Propagation {
64      std::string Name;
65      std::string Scope;
66      ArgVector SrcArgs;
67      SignedArgVector DstArgs;
68      VariadicType VarType;
69      unsigned VarIndex;
70    };
71
72    std::vector<Propagation> Propagations;
73    std::vector<NameScopeArgs> Filters;
74    std::vector<NameScopeArgs> Sinks;
75
76    TaintConfiguration() = default;
77    TaintConfiguration(const TaintConfiguration &) = default;
78    TaintConfiguration(TaintConfiguration &&) = default;
79    TaintConfiguration &operator=(const TaintConfiguration &) = default;
80    TaintConfiguration &operator=(TaintConfiguration &&) = default;
81  };
82
83  /// Convert SignedArgVector to ArgVector.
84  ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
85                               const SignedArgVector &Args);
86
87  /// Parse the config.
88  void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
89                          TaintConfiguration &&Config);
90
91  static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
92  /// Denotes the return vale.
93  static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
94                                         1};
95
96private:
97  mutable std::unique_ptr<BugType> BT;
98  void initBugType() const {
99    if (!BT)
100      BT = std::make_unique<BugType>(this, "Use of Untrusted Data",
101                                     "Untrusted Data");
102  }
103
104  struct FunctionData {
105    FunctionData() = delete;
106    FunctionData(const FunctionData &) = default;
107    FunctionData(FunctionData &&) = default;
108    FunctionData &operator=(const FunctionData &) = delete;
109    FunctionData &operator=(FunctionData &&) = delete;
110
111    static Optional<FunctionData> create(const CallEvent &Call,
112                                         const CheckerContext &C) {
113      if (!Call.getDecl())
114        return None;
115
116      const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
117      if (!FDecl || (FDecl->getKind() != Decl::Function &&
118                     FDecl->getKind() != Decl::CXXMethod))
119        return None;
120
121      StringRef Name = C.getCalleeName(FDecl);
122      std::string FullName = FDecl->getQualifiedNameAsString();
123      if (Name.empty() || FullName.empty())
124        return None;
125
126      return FunctionData{FDecl, Name, FullName};
127    }
128
129    bool isInScope(StringRef Scope) const {
130      return StringRef(FullName).startswith(Scope);
131    }
132
133    const FunctionDecl *const FDecl;
134    const StringRef Name;
135    const std::string FullName;
136  };
137
138  /// Catch taint related bugs. Check if tainted data is passed to a
139  /// system call etc. Returns true on matching.
140  bool checkPre(const CallEvent &Call, const FunctionData &FData,
141                CheckerContext &C) const;
142
143  /// Add taint sources on a pre-visit. Returns true on matching.
144  bool addSourcesPre(const CallEvent &Call, const FunctionData &FData,
145                     CheckerContext &C) const;
146
147  /// Mark filter's arguments not tainted on a pre-visit. Returns true on
148  /// matching.
149  bool addFiltersPre(const CallEvent &Call, const FunctionData &FData,
150                     CheckerContext &C) const;
151
152  /// Propagate taint generated at pre-visit. Returns true on matching.
153  static bool propagateFromPre(const CallEvent &Call, CheckerContext &C);
154
155  /// Check if the region the expression evaluates to is the standard input,
156  /// and thus, is tainted.
157  static bool isStdin(const Expr *E, CheckerContext &C);
158
159  /// Given a pointer argument, return the value it points to.
160  static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg);
161
162  /// Check for CWE-134: Uncontrolled Format String.
163  static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
164      "Untrusted data is used as a format string "
165      "(CWE-134: Uncontrolled Format String)";
166  bool checkUncontrolledFormatString(const CallEvent &Call,
167                                     CheckerContext &C) const;
168
169  /// Check for:
170  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
171  /// CWE-78, "Failure to Sanitize Data into an OS Command"
172  static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
173      "Untrusted data is passed to a system call "
174      "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
175  bool checkSystemCall(const CallEvent &Call, StringRef Name,
176                       CheckerContext &C) const;
177
178  /// Check if tainted data is used as a buffer size ins strn.. functions,
179  /// and allocators.
180  static constexpr llvm::StringLiteral MsgTaintedBufferSize =
181      "Untrusted data is used to specify the buffer size "
182      "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
183      "for character data and the null terminator)";
184  bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const;
185
186  /// Check if tainted data is used as a custom sink's parameter.
187  static constexpr llvm::StringLiteral MsgCustomSink =
188      "Untrusted data is passed to a user-defined sink";
189  bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData,
190                        CheckerContext &C) const;
191
192  /// Generate a report if the expression is tainted or points to tainted data.
193  bool generateReportIfTainted(const Expr *E, StringRef Msg,
194                               CheckerContext &C) const;
195
196  struct TaintPropagationRule;
197  template <typename T>
198  using ConfigDataMap =
199      std::unordered_multimap<std::string, std::pair<std::string, T>>;
200  using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
201  using NameArgMap = ConfigDataMap<ArgVector>;
202
203  /// Find a function with the given name and scope. Returns the first match
204  /// or the end of the map.
205  template <typename T>
206  static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
207                                   const FunctionData &FData);
208
209  /// A struct used to specify taint propagation rules for a function.
210  ///
211  /// If any of the possible taint source arguments is tainted, all of the
212  /// destination arguments should also be tainted. Use InvalidArgIndex in the
213  /// src list to specify that all of the arguments can introduce taint. Use
214  /// InvalidArgIndex in the dst arguments to signify that all the non-const
215  /// pointer and reference arguments might be tainted on return. If
216  /// ReturnValueIndex is added to the dst list, the return value will be
217  /// tainted.
218  struct TaintPropagationRule {
219    using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call,
220                                         CheckerContext &C);
221
222    /// List of arguments which can be taint sources and should be checked.
223    ArgVector SrcArgs;
224    /// List of arguments which should be tainted on function return.
225    ArgVector DstArgs;
226    /// Index for the first variadic parameter if exist.
227    unsigned VariadicIndex;
228    /// Show when a function has variadic parameters. If it has, it marks all
229    /// of them as source or destination.
230    VariadicType VarType;
231    /// Special function for tainted source determination. If defined, it can
232    /// override the default behavior.
233    PropagationFuncType PropagationFunc;
234
235    TaintPropagationRule()
236        : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
237          PropagationFunc(nullptr) {}
238
239    TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
240                         VariadicType Var = VariadicType::None,
241                         unsigned VarIndex = InvalidArgIndex,
242                         PropagationFuncType Func = nullptr)
243        : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
244          VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
245
246    /// Get the propagation rule for a given function.
247    static TaintPropagationRule
248    getTaintPropagationRule(const NameRuleMap &CustomPropagations,
249                            const FunctionData &FData, CheckerContext &C);
250
251    void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
252    void addDstArg(unsigned A) { DstArgs.push_back(A); }
253
254    bool isNull() const {
255      return SrcArgs.empty() && DstArgs.empty() &&
256             VariadicType::None == VarType;
257    }
258
259    bool isDestinationArgument(unsigned ArgNum) const {
260      return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
261    }
262
263    static bool isTaintedOrPointsToTainted(const Expr *E,
264                                           const ProgramStateRef &State,
265                                           CheckerContext &C) {
266      if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
267        return true;
268
269      if (!E->getType().getTypePtr()->isPointerType())
270        return false;
271
272      Optional<SVal> V = getPointeeOf(C, E);
273      return (V && isTainted(State, *V));
274    }
275
276    /// Pre-process a function which propagates taint according to the
277    /// taint rule.
278    ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const;
279
280    // Functions for custom taintedness propagation.
281    static bool postSocket(bool IsTainted, const CallEvent &Call,
282                           CheckerContext &C);
283  };
284
285  /// Defines a map between the propagation function's name, scope
286  /// and TaintPropagationRule.
287  NameRuleMap CustomPropagations;
288
289  /// Defines a map between the filter function's name, scope and filtering
290  /// args.
291  NameArgMap CustomFilters;
292
293  /// Defines a map between the sink function's name, scope and sinking args.
294  NameArgMap CustomSinks;
295};
296
297const unsigned GenericTaintChecker::ReturnValueIndex;
298const unsigned GenericTaintChecker::InvalidArgIndex;
299
300// FIXME: these lines can be removed in C++17
301constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
302constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
303constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
304constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
305} // end of anonymous namespace
306
307using TaintConfig = GenericTaintChecker::TaintConfiguration;
308
309LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
310LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
311
312namespace llvm {
313namespace yaml {
314template <> struct MappingTraits<TaintConfig> {
315  static void mapping(IO &IO, TaintConfig &Config) {
316    IO.mapOptional("Propagations", Config.Propagations);
317    IO.mapOptional("Filters", Config.Filters);
318    IO.mapOptional("Sinks", Config.Sinks);
319  }
320};
321
322template <> struct MappingTraits<TaintConfig::Propagation> {
323  static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
324    IO.mapRequired("Name", Propagation.Name);
325    IO.mapOptional("Scope", Propagation.Scope);
326    IO.mapOptional("SrcArgs", Propagation.SrcArgs);
327    IO.mapOptional("DstArgs", Propagation.DstArgs);
328    IO.mapOptional("VariadicType", Propagation.VarType,
329                   GenericTaintChecker::VariadicType::None);
330    IO.mapOptional("VariadicIndex", Propagation.VarIndex,
331                   GenericTaintChecker::InvalidArgIndex);
332  }
333};
334
335template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
336  static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
337    IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
338    IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
339    IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
340  }
341};
342
343template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
344  static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
345    IO.mapRequired("Name", std::get<0>(NSA));
346    IO.mapOptional("Scope", std::get<1>(NSA));
347    IO.mapRequired("Args", std::get<2>(NSA));
348  }
349};
350} // namespace yaml
351} // namespace llvm
352
353/// A set which is used to pass information from call pre-visit instruction
354/// to the call post-visit. The values are unsigned integers, which are either
355/// ReturnValueIndex, or indexes of the pointer/reference argument, which
356/// points to data, which should be tainted on return.
357REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
358
359GenericTaintChecker::ArgVector
360GenericTaintChecker::convertToArgVector(CheckerManager &Mgr,
361                                        const std::string &Option,
362                                        const SignedArgVector &Args) {
363  ArgVector Result;
364  for (int Arg : Args) {
365    if (Arg == -1)
366      Result.push_back(ReturnValueIndex);
367    else if (Arg < -1) {
368      Result.push_back(InvalidArgIndex);
369      Mgr.reportInvalidCheckerOptionValue(
370          this, Option,
371          "an argument number for propagation rules greater or equal to -1");
372    } else
373      Result.push_back(static_cast<unsigned>(Arg));
374  }
375  return Result;
376}
377
378void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
379                                             const std::string &Option,
380                                             TaintConfiguration &&Config) {
381  for (auto &P : Config.Propagations) {
382    GenericTaintChecker::CustomPropagations.emplace(
383        P.Name,
384        std::make_pair(P.Scope, TaintPropagationRule{
385                                    std::move(P.SrcArgs),
386                                    convertToArgVector(Mgr, Option, P.DstArgs),
387                                    P.VarType, P.VarIndex}));
388  }
389
390  for (auto &F : Config.Filters) {
391    GenericTaintChecker::CustomFilters.emplace(
392        std::get<0>(F),
393        std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
394  }
395
396  for (auto &S : Config.Sinks) {
397    GenericTaintChecker::CustomSinks.emplace(
398        std::get<0>(S),
399        std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
400  }
401}
402
403template <typename T>
404auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
405                                               const FunctionData &FData) {
406  auto Range = Map.equal_range(std::string(FData.Name));
407  auto It =
408      std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
409        const auto &Value = Entry.second;
410        StringRef Scope = Value.first;
411        return Scope.empty() || FData.isInScope(Scope);
412      });
413  return It != Range.second ? It : Map.end();
414}
415
416GenericTaintChecker::TaintPropagationRule
417GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
418    const NameRuleMap &CustomPropagations, const FunctionData &FData,
419    CheckerContext &C) {
420  // TODO: Currently, we might lose precision here: we always mark a return
421  // value as tainted even if it's just a pointer, pointing to tainted data.
422
423  // Check for exact name match for functions without builtin substitutes.
424  // Use qualified name, because these are C functions without namespace.
425  TaintPropagationRule Rule =
426      llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
427          // Source functions
428          // TODO: Add support for vfscanf & family.
429          .Case("fdopen", {{}, {ReturnValueIndex}})
430          .Case("fopen", {{}, {ReturnValueIndex}})
431          .Case("freopen", {{}, {ReturnValueIndex}})
432          .Case("getch", {{}, {ReturnValueIndex}})
433          .Case("getchar", {{}, {ReturnValueIndex}})
434          .Case("getchar_unlocked", {{}, {ReturnValueIndex}})
435          .Case("getenv", {{}, {ReturnValueIndex}})
436          .Case("gets", {{}, {0, ReturnValueIndex}})
437          .Case("scanf", {{}, {}, VariadicType::Dst, 1})
438          .Case("socket", {{},
439                           {ReturnValueIndex},
440                           VariadicType::None,
441                           InvalidArgIndex,
442                           &TaintPropagationRule::postSocket})
443          .Case("wgetch", {{}, {ReturnValueIndex}})
444          // Propagating functions
445          .Case("atoi", {{0}, {ReturnValueIndex}})
446          .Case("atol", {{0}, {ReturnValueIndex}})
447          .Case("atoll", {{0}, {ReturnValueIndex}})
448          .Case("fgetc", {{0}, {ReturnValueIndex}})
449          .Case("fgetln", {{0}, {ReturnValueIndex}})
450          .Case("fgets", {{2}, {0, ReturnValueIndex}})
451          .Case("fscanf", {{0}, {}, VariadicType::Dst, 2})
452          .Case("sscanf", {{0}, {}, VariadicType::Dst, 2})
453          .Case("getc", {{0}, {ReturnValueIndex}})
454          .Case("getc_unlocked", {{0}, {ReturnValueIndex}})
455          .Case("getdelim", {{3}, {0}})
456          .Case("getline", {{2}, {0}})
457          .Case("getw", {{0}, {ReturnValueIndex}})
458          .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}})
459          .Case("read", {{0, 2}, {1, ReturnValueIndex}})
460          .Case("strchr", {{0}, {ReturnValueIndex}})
461          .Case("strrchr", {{0}, {ReturnValueIndex}})
462          .Case("tolower", {{0}, {ReturnValueIndex}})
463          .Case("toupper", {{0}, {ReturnValueIndex}})
464          .Default({});
465
466  if (!Rule.isNull())
467    return Rule;
468  assert(FData.FDecl);
469
470  // Check if it's one of the memory setting/copying functions.
471  // This check is specialized but faster then calling isCLibraryFunction.
472  const FunctionDecl *FDecl = FData.FDecl;
473  unsigned BId = 0;
474  if ((BId = FDecl->getMemoryFunctionKind())) {
475    switch (BId) {
476    case Builtin::BImemcpy:
477    case Builtin::BImemmove:
478    case Builtin::BIstrncpy:
479    case Builtin::BIstrncat:
480      return {{1, 2}, {0, ReturnValueIndex}};
481    case Builtin::BIstrlcpy:
482    case Builtin::BIstrlcat:
483      return {{1, 2}, {0}};
484    case Builtin::BIstrndup:
485      return {{0, 1}, {ReturnValueIndex}};
486
487    default:
488      break;
489    }
490  }
491
492  // Process all other functions which could be defined as builtins.
493  if (Rule.isNull()) {
494    const auto OneOf = [FDecl](const auto &... Name) {
495      // FIXME: use fold expression in C++17
496      using unused = int[];
497      bool ret = false;
498      static_cast<void>(unused{
499          0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...});
500      return ret;
501    };
502    if (OneOf("snprintf"))
503      return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3};
504    if (OneOf("sprintf"))
505      return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2};
506    if (OneOf("strcpy", "stpcpy", "strcat"))
507      return {{1}, {0, ReturnValueIndex}};
508    if (OneOf("bcopy"))
509      return {{0, 2}, {1}};
510    if (OneOf("strdup", "strdupa", "wcsdup"))
511      return {{0}, {ReturnValueIndex}};
512  }
513
514  // Skipping the following functions, since they might be used for cleansing or
515  // smart memory copy:
516  // - memccpy - copying until hitting a special character.
517
518  auto It = findFunctionInConfig(CustomPropagations, FData);
519  if (It != CustomPropagations.end())
520    return It->second.second;
521  return {};
522}
523
524void GenericTaintChecker::checkPreCall(const CallEvent &Call,
525                                       CheckerContext &C) const {
526  Optional<FunctionData> FData = FunctionData::create(Call, C);
527  if (!FData)
528    return;
529
530  // Check for taintedness related errors first: system call, uncontrolled
531  // format string, tainted buffer size.
532  if (checkPre(Call, *FData, C))
533    return;
534
535  // Marks the function's arguments and/or return value tainted if it present in
536  // the list.
537  if (addSourcesPre(Call, *FData, C))
538    return;
539
540  addFiltersPre(Call, *FData, C);
541}
542
543void GenericTaintChecker::checkPostCall(const CallEvent &Call,
544                                        CheckerContext &C) const {
545  // Set the marked values as tainted. The return value only accessible from
546  // checkPostStmt.
547  propagateFromPre(Call, C);
548}
549
550void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
551                                     const char *NL, const char *Sep) const {
552  printTaint(State, Out, NL, Sep);
553}
554
555bool GenericTaintChecker::addSourcesPre(const CallEvent &Call,
556                                        const FunctionData &FData,
557                                        CheckerContext &C) const {
558  // First, try generating a propagation rule for this function.
559  TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
560      this->CustomPropagations, FData, C);
561  if (!Rule.isNull()) {
562    ProgramStateRef State = Rule.process(Call, C);
563    if (State) {
564      C.addTransition(State);
565      return true;
566    }
567  }
568  return false;
569}
570
571bool GenericTaintChecker::addFiltersPre(const CallEvent &Call,
572                                        const FunctionData &FData,
573                                        CheckerContext &C) const {
574  auto It = findFunctionInConfig(CustomFilters, FData);
575  if (It == CustomFilters.end())
576    return false;
577
578  ProgramStateRef State = C.getState();
579  const auto &Value = It->second;
580  const ArgVector &Args = Value.second;
581  for (unsigned ArgNum : Args) {
582    if (ArgNum >= Call.getNumArgs())
583      continue;
584
585    const Expr *Arg = Call.getArgExpr(ArgNum);
586    Optional<SVal> V = getPointeeOf(C, Arg);
587    if (V)
588      State = removeTaint(State, *V);
589  }
590
591  if (State != C.getState()) {
592    C.addTransition(State);
593    return true;
594  }
595  return false;
596}
597
598bool GenericTaintChecker::propagateFromPre(const CallEvent &Call,
599                                           CheckerContext &C) {
600  ProgramStateRef State = C.getState();
601
602  // Depending on what was tainted at pre-visit, we determined a set of
603  // arguments which should be tainted after the function returns. These are
604  // stored in the state as TaintArgsOnPostVisit set.
605  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
606  if (TaintArgs.isEmpty())
607    return false;
608
609  for (unsigned ArgNum : TaintArgs) {
610    // Special handling for the tainted return value.
611    if (ArgNum == ReturnValueIndex) {
612      State = addTaint(State, Call.getReturnValue());
613      continue;
614    }
615
616    // The arguments are pointer arguments. The data they are pointing at is
617    // tainted after the call.
618    if (Call.getNumArgs() < (ArgNum + 1))
619      return false;
620    const Expr *Arg = Call.getArgExpr(ArgNum);
621    Optional<SVal> V = getPointeeOf(C, Arg);
622    if (V)
623      State = addTaint(State, *V);
624  }
625
626  // Clear up the taint info from the state.
627  State = State->remove<TaintArgsOnPostVisit>();
628
629  if (State != C.getState()) {
630    C.addTransition(State);
631    return true;
632  }
633  return false;
634}
635
636bool GenericTaintChecker::checkPre(const CallEvent &Call,
637                                   const FunctionData &FData,
638                                   CheckerContext &C) const {
639  if (checkUncontrolledFormatString(Call, C))
640    return true;
641
642  if (checkSystemCall(Call, FData.Name, C))
643    return true;
644
645  if (checkTaintedBufferSize(Call, C))
646    return true;
647
648  return checkCustomSinks(Call, FData, C);
649}
650
651Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C,
652                                                 const Expr *Arg) {
653  ProgramStateRef State = C.getState();
654  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
655  if (AddrVal.isUnknownOrUndef())
656    return None;
657
658  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
659  if (!AddrLoc)
660    return None;
661
662  QualType ArgTy = Arg->getType().getCanonicalType();
663  if (!ArgTy->isPointerType())
664    return State->getSVal(*AddrLoc);
665
666  QualType ValTy = ArgTy->getPointeeType();
667
668  // Do not dereference void pointers. Treat them as byte pointers instead.
669  // FIXME: we might want to consider more than just the first byte.
670  if (ValTy->isVoidType())
671    ValTy = C.getASTContext().CharTy;
672
673  return State->getSVal(*AddrLoc, ValTy);
674}
675
676ProgramStateRef
677GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call,
678                                                   CheckerContext &C) const {
679  ProgramStateRef State = C.getState();
680
681  // Check for taint in arguments.
682  bool IsTainted = true;
683  for (unsigned ArgNum : SrcArgs) {
684    if (ArgNum >= Call.getNumArgs())
685      continue;
686
687    if ((IsTainted =
688             isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C)))
689      break;
690  }
691
692  // Check for taint in variadic arguments.
693  if (!IsTainted && VariadicType::Src == VarType) {
694    // Check if any of the arguments is tainted
695    for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
696      if ((IsTainted =
697               isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C)))
698        break;
699    }
700  }
701
702  if (PropagationFunc)
703    IsTainted = PropagationFunc(IsTainted, Call, C);
704
705  if (!IsTainted)
706    return State;
707
708  // Mark the arguments which should be tainted after the function returns.
709  for (unsigned ArgNum : DstArgs) {
710    // Should mark the return value?
711    if (ArgNum == ReturnValueIndex) {
712      State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
713      continue;
714    }
715
716    if (ArgNum >= Call.getNumArgs())
717      continue;
718
719    // Mark the given argument.
720    State = State->add<TaintArgsOnPostVisit>(ArgNum);
721  }
722
723  // Mark all variadic arguments tainted if present.
724  if (VariadicType::Dst == VarType) {
725    // For all pointer and references that were passed in:
726    //   If they are not pointing to const data, mark data as tainted.
727    //   TODO: So far we are just going one level down; ideally we'd need to
728    //         recurse here.
729    for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
730      const Expr *Arg = Call.getArgExpr(i);
731      // Process pointer argument.
732      const Type *ArgTy = Arg->getType().getTypePtr();
733      QualType PType = ArgTy->getPointeeType();
734      if ((!PType.isNull() && !PType.isConstQualified()) ||
735          (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) {
736        State = State->add<TaintArgsOnPostVisit>(i);
737      }
738    }
739  }
740
741  return State;
742}
743
744// If argument 0(protocol domain) is network, the return value should get taint.
745bool GenericTaintChecker::TaintPropagationRule::postSocket(
746    bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) {
747  SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
748  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
749  // White list the internal communication protocols.
750  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
751      DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
752    return false;
753  return true;
754}
755
756bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
757  ProgramStateRef State = C.getState();
758  SVal Val = C.getSVal(E);
759
760  // stdin is a pointer, so it would be a region.
761  const MemRegion *MemReg = Val.getAsRegion();
762
763  // The region should be symbolic, we do not know it's value.
764  const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
765  if (!SymReg)
766    return false;
767
768  // Get it's symbol and find the declaration region it's pointing to.
769  const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
770  if (!Sm)
771    return false;
772  const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
773  if (!DeclReg)
774    return false;
775
776  // This region corresponds to a declaration, find out if it's a global/extern
777  // variable named stdin with the proper type.
778  if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
779    D = D->getCanonicalDecl();
780    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
781      const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
782      if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
783                       C.getASTContext().getFILEType().getCanonicalType())
784        return true;
785    }
786  }
787  return false;
788}
789
790static bool getPrintfFormatArgumentNum(const CallEvent &Call,
791                                       const CheckerContext &C,
792                                       unsigned &ArgNum) {
793  // Find if the function contains a format string argument.
794  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
795  // vsnprintf, syslog, custom annotated functions.
796  const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
797  if (!FDecl)
798    return false;
799  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
800    ArgNum = Format->getFormatIdx() - 1;
801    if ((Format->getType()->getName() == "printf") &&
802        Call.getNumArgs() > ArgNum)
803      return true;
804  }
805
806  // Or if a function is named setproctitle (this is a heuristic).
807  if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) {
808    ArgNum = 0;
809    return true;
810  }
811
812  return false;
813}
814
815bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
816                                                  CheckerContext &C) const {
817  assert(E);
818
819  // Check for taint.
820  ProgramStateRef State = C.getState();
821  Optional<SVal> PointedToSVal = getPointeeOf(C, E);
822  SVal TaintedSVal;
823  if (PointedToSVal && isTainted(State, *PointedToSVal))
824    TaintedSVal = *PointedToSVal;
825  else if (isTainted(State, E, C.getLocationContext()))
826    TaintedSVal = C.getSVal(E);
827  else
828    return false;
829
830  // Generate diagnostic.
831  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
832    initBugType();
833    auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
834    report->addRange(E->getSourceRange());
835    report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
836    C.emitReport(std::move(report));
837    return true;
838  }
839  return false;
840}
841
842bool GenericTaintChecker::checkUncontrolledFormatString(
843    const CallEvent &Call, CheckerContext &C) const {
844  // Check if the function contains a format string argument.
845  unsigned ArgNum = 0;
846  if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
847    return false;
848
849  // If either the format string content or the pointer itself are tainted,
850  // warn.
851  return generateReportIfTainted(Call.getArgExpr(ArgNum),
852                                 MsgUncontrolledFormatString, C);
853}
854
855bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name,
856                                          CheckerContext &C) const {
857  // TODO: It might make sense to run this check on demand. In some cases,
858  // we should check if the environment has been cleansed here. We also might
859  // need to know if the user was reset before these calls(seteuid).
860  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
861                        .Case("system", 0)
862                        .Case("popen", 0)
863                        .Case("execl", 0)
864                        .Case("execle", 0)
865                        .Case("execlp", 0)
866                        .Case("execv", 0)
867                        .Case("execvp", 0)
868                        .Case("execvP", 0)
869                        .Case("execve", 0)
870                        .Case("dlopen", 0)
871                        .Default(InvalidArgIndex);
872
873  if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1))
874    return false;
875
876  return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs,
877                                 C);
878}
879
880// TODO: Should this check be a part of the CString checker?
881// If yes, should taint be a global setting?
882bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call,
883                                                 CheckerContext &C) const {
884  const auto *FDecl = Call.getDecl()->getAsFunction();
885  // If the function has a buffer size argument, set ArgNum.
886  unsigned ArgNum = InvalidArgIndex;
887  unsigned BId = 0;
888  if ((BId = FDecl->getMemoryFunctionKind())) {
889    switch (BId) {
890    case Builtin::BImemcpy:
891    case Builtin::BImemmove:
892    case Builtin::BIstrncpy:
893      ArgNum = 2;
894      break;
895    case Builtin::BIstrndup:
896      ArgNum = 1;
897      break;
898    default:
899      break;
900    }
901  }
902
903  if (ArgNum == InvalidArgIndex) {
904    using CCtx = CheckerContext;
905    if (CCtx::isCLibraryFunction(FDecl, "malloc") ||
906        CCtx::isCLibraryFunction(FDecl, "calloc") ||
907        CCtx::isCLibraryFunction(FDecl, "alloca"))
908      ArgNum = 0;
909    else if (CCtx::isCLibraryFunction(FDecl, "memccpy"))
910      ArgNum = 3;
911    else if (CCtx::isCLibraryFunction(FDecl, "realloc"))
912      ArgNum = 1;
913    else if (CCtx::isCLibraryFunction(FDecl, "bcopy"))
914      ArgNum = 2;
915  }
916
917  return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum &&
918         generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize,
919                                 C);
920}
921
922bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call,
923                                           const FunctionData &FData,
924                                           CheckerContext &C) const {
925  auto It = findFunctionInConfig(CustomSinks, FData);
926  if (It == CustomSinks.end())
927    return false;
928
929  const auto &Value = It->second;
930  const GenericTaintChecker::ArgVector &Args = Value.second;
931  for (unsigned ArgNum : Args) {
932    if (ArgNum >= Call.getNumArgs())
933      continue;
934
935    if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C))
936      return true;
937  }
938
939  return false;
940}
941
942void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
943  auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
944  std::string Option{"Config"};
945  StringRef ConfigFile =
946      Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
947  llvm::Optional<TaintConfig> Config =
948      getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
949  if (Config)
950    Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
951}
952
953bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
954  return true;
955}
956