GenericTaintChecker.cpp revision 321369
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/AST/Attr.h"
19#include "clang/Basic/Builtins.h"
20#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
21#include "clang/StaticAnalyzer/Core/Checker.h"
22#include "clang/StaticAnalyzer/Core/CheckerManager.h"
23#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
25#include <climits>
26
27using namespace clang;
28using namespace ento;
29
30namespace {
31class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
32                                            check::PreStmt<CallExpr> > {
33public:
34  static void *getTag() { static int Tag; return &Tag; }
35
36  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37
38  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39
40private:
41  static const unsigned InvalidArgIndex = UINT_MAX;
42  /// Denotes the return vale.
43  static const unsigned ReturnValueIndex = UINT_MAX - 1;
44
45  mutable std::unique_ptr<BugType> BT;
46  inline void initBugType() const {
47    if (!BT)
48      BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
49  }
50
51  /// \brief Catch taint related bugs. Check if tainted data is passed to a
52  /// system call etc.
53  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54
55  /// \brief Add taint sources on a pre-visit.
56  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57
58  /// \brief Propagate taint generated at pre-visit.
59  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60
61  /// \brief Add taint sources on a post visit.
62  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63
64  /// Check if the region the expression evaluates to is the standard input,
65  /// and thus, is tainted.
66  static bool isStdin(const Expr *E, CheckerContext &C);
67
68  /// \brief Given a pointer argument, return the value it points to.
69  static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
70
71  /// Functions defining the attack surface.
72  typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
73                                                       CheckerContext &C) const;
74  ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
75  ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
76  ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
77
78  /// Taint the scanned input if the file is tainted.
79  ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
80
81  /// Check for CWE-134: Uncontrolled Format String.
82  static const char MsgUncontrolledFormatString[];
83  bool checkUncontrolledFormatString(const CallExpr *CE,
84                                     CheckerContext &C) const;
85
86  /// Check for:
87  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
88  /// CWE-78, "Failure to Sanitize Data into an OS Command"
89  static const char MsgSanitizeSystemArgs[];
90  bool checkSystemCall(const CallExpr *CE, StringRef Name,
91                       CheckerContext &C) const;
92
93  /// Check if tainted data is used as a buffer size ins strn.. functions,
94  /// and allocators.
95  static const char MsgTaintedBufferSize[];
96  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
97                              CheckerContext &C) const;
98
99  /// Generate a report if the expression is tainted or points to tainted data.
100  bool generateReportIfTainted(const Expr *E, const char Msg[],
101                               CheckerContext &C) const;
102
103  /// The bug visitor prints a diagnostic message at the location where a given
104  /// variable was tainted.
105  class TaintBugVisitor
106      : public BugReporterVisitorImpl<TaintBugVisitor> {
107  private:
108    const SVal V;
109
110  public:
111    TaintBugVisitor(const SVal V) : V(V) {}
112    void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); }
113
114    std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
115                                                   const ExplodedNode *PrevN,
116                                                   BugReporterContext &BRC,
117                                                   BugReport &BR) override;
118  };
119
120  typedef SmallVector<unsigned, 2> ArgVector;
121
122  /// \brief A struct used to specify taint propagation rules for a function.
123  ///
124  /// If any of the possible taint source arguments is tainted, all of the
125  /// destination arguments should also be tainted. Use InvalidArgIndex in the
126  /// src list to specify that all of the arguments can introduce taint. Use
127  /// InvalidArgIndex in the dst arguments to signify that all the non-const
128  /// pointer and reference arguments might be tainted on return. If
129  /// ReturnValueIndex is added to the dst list, the return value will be
130  /// tainted.
131  struct TaintPropagationRule {
132    /// List of arguments which can be taint sources and should be checked.
133    ArgVector SrcArgs;
134    /// List of arguments which should be tainted on function return.
135    ArgVector DstArgs;
136    // TODO: Check if using other data structures would be more optimal.
137
138    TaintPropagationRule() {}
139
140    TaintPropagationRule(unsigned SArg,
141                         unsigned DArg, bool TaintRet = false) {
142      SrcArgs.push_back(SArg);
143      DstArgs.push_back(DArg);
144      if (TaintRet)
145        DstArgs.push_back(ReturnValueIndex);
146    }
147
148    TaintPropagationRule(unsigned SArg1, unsigned SArg2,
149                         unsigned DArg, bool TaintRet = false) {
150      SrcArgs.push_back(SArg1);
151      SrcArgs.push_back(SArg2);
152      DstArgs.push_back(DArg);
153      if (TaintRet)
154        DstArgs.push_back(ReturnValueIndex);
155    }
156
157    /// Get the propagation rule for a given function.
158    static TaintPropagationRule
159      getTaintPropagationRule(const FunctionDecl *FDecl,
160                              StringRef Name,
161                              CheckerContext &C);
162
163    inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
164    inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
165
166    inline bool isNull() const { return SrcArgs.empty(); }
167
168    inline bool isDestinationArgument(unsigned ArgNum) const {
169      return (std::find(DstArgs.begin(),
170                        DstArgs.end(), ArgNum) != DstArgs.end());
171    }
172
173    static inline bool isTaintedOrPointsToTainted(const Expr *E,
174                                                  ProgramStateRef State,
175                                                  CheckerContext &C) {
176      if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
177        return true;
178
179      if (!E->getType().getTypePtr()->isPointerType())
180        return false;
181
182      Optional<SVal> V = getPointedToSVal(C, E);
183      return (V && State->isTainted(*V));
184    }
185
186    /// \brief Pre-process a function which propagates taint according to the
187    /// taint rule.
188    ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
189
190  };
191};
192
193const unsigned GenericTaintChecker::ReturnValueIndex;
194const unsigned GenericTaintChecker::InvalidArgIndex;
195
196const char GenericTaintChecker::MsgUncontrolledFormatString[] =
197  "Untrusted data is used as a format string "
198  "(CWE-134: Uncontrolled Format String)";
199
200const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
201  "Untrusted data is passed to a system call "
202  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
203
204const char GenericTaintChecker::MsgTaintedBufferSize[] =
205  "Untrusted data is used to specify the buffer size "
206  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
207  "character data and the null terminator)";
208
209} // end of anonymous namespace
210
211/// A set which is used to pass information from call pre-visit instruction
212/// to the call post-visit. The values are unsigned integers, which are either
213/// ReturnValueIndex, or indexes of the pointer/reference argument, which
214/// points to data, which should be tainted on return.
215REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
216
217std::shared_ptr<PathDiagnosticPiece>
218GenericTaintChecker::TaintBugVisitor::VisitNode(const ExplodedNode *N,
219    const ExplodedNode *PrevN, BugReporterContext &BRC, BugReport &BR) {
220
221  // Find the ExplodedNode where the taint was first introduced
222  if (!N->getState()->isTainted(V) || PrevN->getState()->isTainted(V))
223    return nullptr;
224
225  const Stmt *S = PathDiagnosticLocation::getStmt(N);
226  if (!S)
227    return nullptr;
228
229  const LocationContext *NCtx = N->getLocationContext();
230  PathDiagnosticLocation L =
231      PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
232  if (!L.isValid() || !L.asLocation().isValid())
233    return nullptr;
234
235  return std::make_shared<PathDiagnosticEventPiece>(
236      L, "Taint originated here");
237}
238
239GenericTaintChecker::TaintPropagationRule
240GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
241                                                     const FunctionDecl *FDecl,
242                                                     StringRef Name,
243                                                     CheckerContext &C) {
244  // TODO: Currently, we might lose precision here: we always mark a return
245  // value as tainted even if it's just a pointer, pointing to tainted data.
246
247  // Check for exact name match for functions without builtin substitutes.
248  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
249    .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
250    .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
251    .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
252    .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
253    .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
254    .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
255    .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
256    .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
257    .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
258    .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
259    .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
260    .Case("read", TaintPropagationRule(0, 2, 1, true))
261    .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
262    .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
263    .Case("fgets", TaintPropagationRule(2, 0, true))
264    .Case("getline", TaintPropagationRule(2, 0))
265    .Case("getdelim", TaintPropagationRule(3, 0))
266    .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
267    .Default(TaintPropagationRule());
268
269  if (!Rule.isNull())
270    return Rule;
271
272  // Check if it's one of the memory setting/copying functions.
273  // This check is specialized but faster then calling isCLibraryFunction.
274  unsigned BId = 0;
275  if ( (BId = FDecl->getMemoryFunctionKind()) )
276    switch(BId) {
277    case Builtin::BImemcpy:
278    case Builtin::BImemmove:
279    case Builtin::BIstrncpy:
280    case Builtin::BIstrncat:
281      return TaintPropagationRule(1, 2, 0, true);
282    case Builtin::BIstrlcpy:
283    case Builtin::BIstrlcat:
284      return TaintPropagationRule(1, 2, 0, false);
285    case Builtin::BIstrndup:
286      return TaintPropagationRule(0, 1, ReturnValueIndex);
287
288    default:
289      break;
290    };
291
292  // Process all other functions which could be defined as builtins.
293  if (Rule.isNull()) {
294    if (C.isCLibraryFunction(FDecl, "snprintf") ||
295        C.isCLibraryFunction(FDecl, "sprintf"))
296      return TaintPropagationRule(InvalidArgIndex, 0, true);
297    else if (C.isCLibraryFunction(FDecl, "strcpy") ||
298             C.isCLibraryFunction(FDecl, "stpcpy") ||
299             C.isCLibraryFunction(FDecl, "strcat"))
300      return TaintPropagationRule(1, 0, true);
301    else if (C.isCLibraryFunction(FDecl, "bcopy"))
302      return TaintPropagationRule(0, 2, 1, false);
303    else if (C.isCLibraryFunction(FDecl, "strdup") ||
304             C.isCLibraryFunction(FDecl, "strdupa"))
305      return TaintPropagationRule(0, ReturnValueIndex);
306    else if (C.isCLibraryFunction(FDecl, "wcsdup"))
307      return TaintPropagationRule(0, ReturnValueIndex);
308  }
309
310  // Skipping the following functions, since they might be used for cleansing
311  // or smart memory copy:
312  // - memccpy - copying until hitting a special character.
313
314  return TaintPropagationRule();
315}
316
317void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
318                                       CheckerContext &C) const {
319  // Check for errors first.
320  if (checkPre(CE, C))
321    return;
322
323  // Add taint second.
324  addSourcesPre(CE, C);
325}
326
327void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
328                                        CheckerContext &C) const {
329  if (propagateFromPre(CE, C))
330    return;
331  addSourcesPost(CE, C);
332}
333
334void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
335                                        CheckerContext &C) const {
336  ProgramStateRef State = nullptr;
337  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
338  if (!FDecl || FDecl->getKind() != Decl::Function)
339    return;
340
341  StringRef Name = C.getCalleeName(FDecl);
342  if (Name.empty())
343    return;
344
345  // First, try generating a propagation rule for this function.
346  TaintPropagationRule Rule =
347    TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
348  if (!Rule.isNull()) {
349    State = Rule.process(CE, C);
350    if (!State)
351      return;
352    C.addTransition(State);
353    return;
354  }
355
356  // Otherwise, check if we have custom pre-processing implemented.
357  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
358    .Case("fscanf", &GenericTaintChecker::preFscanf)
359    .Default(nullptr);
360  // Check and evaluate the call.
361  if (evalFunction)
362    State = (this->*evalFunction)(CE, C);
363  if (!State)
364    return;
365  C.addTransition(State);
366
367}
368
369bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
370                                           CheckerContext &C) const {
371  ProgramStateRef State = C.getState();
372
373  // Depending on what was tainted at pre-visit, we determined a set of
374  // arguments which should be tainted after the function returns. These are
375  // stored in the state as TaintArgsOnPostVisit set.
376  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
377  if (TaintArgs.isEmpty())
378    return false;
379
380  for (llvm::ImmutableSet<unsigned>::iterator
381         I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
382    unsigned ArgNum  = *I;
383
384    // Special handling for the tainted return value.
385    if (ArgNum == ReturnValueIndex) {
386      State = State->addTaint(CE, C.getLocationContext());
387      continue;
388    }
389
390    // The arguments are pointer arguments. The data they are pointing at is
391    // tainted after the call.
392    if (CE->getNumArgs() < (ArgNum + 1))
393      return false;
394    const Expr* Arg = CE->getArg(ArgNum);
395    Optional<SVal> V = getPointedToSVal(C, Arg);
396    if (V)
397      State = State->addTaint(*V);
398  }
399
400  // Clear up the taint info from the state.
401  State = State->remove<TaintArgsOnPostVisit>();
402
403  if (State != C.getState()) {
404    C.addTransition(State);
405    return true;
406  }
407  return false;
408}
409
410void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
411                                         CheckerContext &C) const {
412  // Define the attack surface.
413  // Set the evaluation function by switching on the callee name.
414  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
415  if (!FDecl || FDecl->getKind() != Decl::Function)
416    return;
417
418  StringRef Name = C.getCalleeName(FDecl);
419  if (Name.empty())
420    return;
421  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
422    .Case("scanf", &GenericTaintChecker::postScanf)
423    // TODO: Add support for vfscanf & family.
424    .Case("getchar", &GenericTaintChecker::postRetTaint)
425    .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
426    .Case("getenv", &GenericTaintChecker::postRetTaint)
427    .Case("fopen", &GenericTaintChecker::postRetTaint)
428    .Case("fdopen", &GenericTaintChecker::postRetTaint)
429    .Case("freopen", &GenericTaintChecker::postRetTaint)
430    .Case("getch", &GenericTaintChecker::postRetTaint)
431    .Case("wgetch", &GenericTaintChecker::postRetTaint)
432    .Case("socket", &GenericTaintChecker::postSocket)
433    .Default(nullptr);
434
435  // If the callee isn't defined, it is not of security concern.
436  // Check and evaluate the call.
437  ProgramStateRef State = nullptr;
438  if (evalFunction)
439    State = (this->*evalFunction)(CE, C);
440  if (!State)
441    return;
442
443  C.addTransition(State);
444}
445
446bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
447
448  if (checkUncontrolledFormatString(CE, C))
449    return true;
450
451  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
452  if (!FDecl || FDecl->getKind() != Decl::Function)
453    return false;
454
455  StringRef Name = C.getCalleeName(FDecl);
456  if (Name.empty())
457    return false;
458
459  if (checkSystemCall(CE, Name, C))
460    return true;
461
462  if (checkTaintedBufferSize(CE, FDecl, C))
463    return true;
464
465  return false;
466}
467
468Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
469                                            const Expr* Arg) {
470  ProgramStateRef State = C.getState();
471  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
472  if (AddrVal.isUnknownOrUndef())
473    return None;
474
475  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
476  if (!AddrLoc)
477    return None;
478
479  const PointerType *ArgTy =
480    dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
481  return State->getSVal(*AddrLoc, ArgTy ? ArgTy->getPointeeType(): QualType());
482}
483
484ProgramStateRef
485GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
486                                                   CheckerContext &C) const {
487  ProgramStateRef State = C.getState();
488
489  // Check for taint in arguments.
490  bool IsTainted = false;
491  for (ArgVector::const_iterator I = SrcArgs.begin(),
492                                 E = SrcArgs.end(); I != E; ++I) {
493    unsigned ArgNum = *I;
494
495    if (ArgNum == InvalidArgIndex) {
496      // Check if any of the arguments is tainted, but skip the
497      // destination arguments.
498      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
499        if (isDestinationArgument(i))
500          continue;
501        if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
502          break;
503      }
504      break;
505    }
506
507    if (CE->getNumArgs() < (ArgNum + 1))
508      return State;
509    if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
510      break;
511  }
512  if (!IsTainted)
513    return State;
514
515  // Mark the arguments which should be tainted after the function returns.
516  for (ArgVector::const_iterator I = DstArgs.begin(),
517                                 E = DstArgs.end(); I != E; ++I) {
518    unsigned ArgNum = *I;
519
520    // Should we mark all arguments as tainted?
521    if (ArgNum == InvalidArgIndex) {
522      // For all pointer and references that were passed in:
523      //   If they are not pointing to const data, mark data as tainted.
524      //   TODO: So far we are just going one level down; ideally we'd need to
525      //         recurse here.
526      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
527        const Expr *Arg = CE->getArg(i);
528        // Process pointer argument.
529        const Type *ArgTy = Arg->getType().getTypePtr();
530        QualType PType = ArgTy->getPointeeType();
531        if ((!PType.isNull() && !PType.isConstQualified())
532            || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
533          State = State->add<TaintArgsOnPostVisit>(i);
534      }
535      continue;
536    }
537
538    // Should mark the return value?
539    if (ArgNum == ReturnValueIndex) {
540      State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
541      continue;
542    }
543
544    // Mark the given argument.
545    assert(ArgNum < CE->getNumArgs());
546    State = State->add<TaintArgsOnPostVisit>(ArgNum);
547  }
548
549  return State;
550}
551
552
553// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
554// and arg 1 should get taint.
555ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
556                                                   CheckerContext &C) const {
557  assert(CE->getNumArgs() >= 2);
558  ProgramStateRef State = C.getState();
559
560  // Check is the file descriptor is tainted.
561  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
562      isStdin(CE->getArg(0), C)) {
563    // All arguments except for the first two should get taint.
564    for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
565        State = State->add<TaintArgsOnPostVisit>(i);
566    return State;
567  }
568
569  return nullptr;
570}
571
572
573// If argument 0(protocol domain) is network, the return value should get taint.
574ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
575                                                CheckerContext &C) const {
576  ProgramStateRef State = C.getState();
577  if (CE->getNumArgs() < 3)
578    return State;
579
580  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
581  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
582  // White list the internal communication protocols.
583  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
584      DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
585    return State;
586  State = State->addTaint(CE, C.getLocationContext());
587  return State;
588}
589
590ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
591                                                   CheckerContext &C) const {
592  ProgramStateRef State = C.getState();
593  if (CE->getNumArgs() < 2)
594    return State;
595
596  // All arguments except for the very first one should get taint.
597  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
598    // The arguments are pointer arguments. The data they are pointing at is
599    // tainted after the call.
600    const Expr* Arg = CE->getArg(i);
601    Optional<SVal> V = getPointedToSVal(C, Arg);
602    if (V)
603      State = State->addTaint(*V);
604  }
605  return State;
606}
607
608ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
609                                                  CheckerContext &C) const {
610  return C.getState()->addTaint(CE, C.getLocationContext());
611}
612
613bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
614  ProgramStateRef State = C.getState();
615  SVal Val = State->getSVal(E, C.getLocationContext());
616
617  // stdin is a pointer, so it would be a region.
618  const MemRegion *MemReg = Val.getAsRegion();
619
620  // The region should be symbolic, we do not know it's value.
621  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
622  if (!SymReg)
623    return false;
624
625  // Get it's symbol and find the declaration region it's pointing to.
626  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
627  if (!Sm)
628    return false;
629  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
630  if (!DeclReg)
631    return false;
632
633  // This region corresponds to a declaration, find out if it's a global/extern
634  // variable named stdin with the proper type.
635  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
636    D = D->getCanonicalDecl();
637    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
638        if (const PointerType * PtrTy =
639              dyn_cast<PointerType>(D->getType().getTypePtr()))
640          if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
641            return true;
642  }
643  return false;
644}
645
646static bool getPrintfFormatArgumentNum(const CallExpr *CE,
647                                       const CheckerContext &C,
648                                       unsigned int &ArgNum) {
649  // Find if the function contains a format string argument.
650  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
651  // vsnprintf, syslog, custom annotated functions.
652  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
653  if (!FDecl)
654    return false;
655  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
656    ArgNum = Format->getFormatIdx() - 1;
657    if ((Format->getType()->getName() == "printf") &&
658         CE->getNumArgs() > ArgNum)
659      return true;
660  }
661
662  // Or if a function is named setproctitle (this is a heuristic).
663  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
664    ArgNum = 0;
665    return true;
666  }
667
668  return false;
669}
670
671bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
672                                                  const char Msg[],
673                                                  CheckerContext &C) const {
674  assert(E);
675
676  // Check for taint.
677  ProgramStateRef State = C.getState();
678  Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
679  SVal TaintedSVal;
680  if (PointedToSVal && State->isTainted(*PointedToSVal))
681    TaintedSVal = *PointedToSVal;
682  else if (State->isTainted(E, C.getLocationContext()))
683    TaintedSVal = C.getSVal(E);
684  else
685    return false;
686
687  // Generate diagnostic.
688  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
689    initBugType();
690    auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
691    report->addRange(E->getSourceRange());
692    report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
693    C.emitReport(std::move(report));
694    return true;
695  }
696  return false;
697}
698
699bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
700                                                        CheckerContext &C) const{
701  // Check if the function contains a format string argument.
702  unsigned int ArgNum = 0;
703  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
704    return false;
705
706  // If either the format string content or the pointer itself are tainted, warn.
707  return generateReportIfTainted(CE->getArg(ArgNum),
708                                 MsgUncontrolledFormatString, C);
709}
710
711bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
712                                          StringRef Name,
713                                          CheckerContext &C) const {
714  // TODO: It might make sense to run this check on demand. In some cases,
715  // we should check if the environment has been cleansed here. We also might
716  // need to know if the user was reset before these calls(seteuid).
717  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
718    .Case("system", 0)
719    .Case("popen", 0)
720    .Case("execl", 0)
721    .Case("execle", 0)
722    .Case("execlp", 0)
723    .Case("execv", 0)
724    .Case("execvp", 0)
725    .Case("execvP", 0)
726    .Case("execve", 0)
727    .Case("dlopen", 0)
728    .Default(UINT_MAX);
729
730  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
731    return false;
732
733  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
734}
735
736// TODO: Should this check be a part of the CString checker?
737// If yes, should taint be a global setting?
738bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
739                                                 const FunctionDecl *FDecl,
740                                                 CheckerContext &C) const {
741  // If the function has a buffer size argument, set ArgNum.
742  unsigned ArgNum = InvalidArgIndex;
743  unsigned BId = 0;
744  if ( (BId = FDecl->getMemoryFunctionKind()) )
745    switch(BId) {
746    case Builtin::BImemcpy:
747    case Builtin::BImemmove:
748    case Builtin::BIstrncpy:
749      ArgNum = 2;
750      break;
751    case Builtin::BIstrndup:
752      ArgNum = 1;
753      break;
754    default:
755      break;
756    };
757
758  if (ArgNum == InvalidArgIndex) {
759    if (C.isCLibraryFunction(FDecl, "malloc") ||
760        C.isCLibraryFunction(FDecl, "calloc") ||
761        C.isCLibraryFunction(FDecl, "alloca"))
762      ArgNum = 0;
763    else if (C.isCLibraryFunction(FDecl, "memccpy"))
764      ArgNum = 3;
765    else if (C.isCLibraryFunction(FDecl, "realloc"))
766      ArgNum = 1;
767    else if (C.isCLibraryFunction(FDecl, "bcopy"))
768      ArgNum = 2;
769  }
770
771  return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
772         generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
773}
774
775void ento::registerGenericTaintChecker(CheckerManager &mgr) {
776  mgr.registerChecker<GenericTaintChecker>();
777}
778