GenericTaintChecker.cpp revision 234287
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23#include "clang/Basic/Builtins.h"
24#include <climits>
25
26using namespace clang;
27using namespace ento;
28
29namespace {
30class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
31                                            check::PreStmt<CallExpr> > {
32public:
33  static void *getTag() { static int Tag; return &Tag; }
34
35  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
36  void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
37
38  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39
40private:
41  static const unsigned InvalidArgIndex = UINT_MAX;
42  /// Denotes the return vale.
43  static const unsigned ReturnValueIndex = UINT_MAX - 1;
44
45  mutable OwningPtr<BugType> BT;
46  inline void initBugType() const {
47    if (!BT)
48      BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data"));
49  }
50
51  /// \brief Catch taint related bugs. Check if tainted data is passed to a
52  /// system call etc.
53  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54
55  /// \brief Add taint sources on a pre-visit.
56  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57
58  /// \brief Propagate taint generated at pre-visit.
59  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60
61  /// \brief Add taint sources on a post visit.
62  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63
64  /// Check if the region the expression evaluates to is the standard input,
65  /// and thus, is tainted.
66  static bool isStdin(const Expr *E, CheckerContext &C);
67
68  /// \brief Given a pointer argument, get the symbol of the value it contains
69  /// (points to).
70  static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
71
72  /// Functions defining the attack surface.
73  typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
74                                                       CheckerContext &C) const;
75  ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
76  ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
77  ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
78
79  /// Taint the scanned input if the file is tainted.
80  ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
81
82  /// Check for CWE-134: Uncontrolled Format String.
83  static const char MsgUncontrolledFormatString[];
84  bool checkUncontrolledFormatString(const CallExpr *CE,
85                                     CheckerContext &C) const;
86
87  /// Check for:
88  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
89  /// CWE-78, "Failure to Sanitize Data into an OS Command"
90  static const char MsgSanitizeSystemArgs[];
91  bool checkSystemCall(const CallExpr *CE, StringRef Name,
92                       CheckerContext &C) const;
93
94  /// Check if tainted data is used as a buffer size ins strn.. functions,
95  /// and allocators.
96  static const char MsgTaintedBufferSize[];
97  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
98                              CheckerContext &C) const;
99
100  /// Generate a report if the expression is tainted or points to tainted data.
101  bool generateReportIfTainted(const Expr *E, const char Msg[],
102                               CheckerContext &C) const;
103
104
105  typedef llvm::SmallVector<unsigned, 2> ArgVector;
106
107  /// \brief A struct used to specify taint propagation rules for a function.
108  ///
109  /// If any of the possible taint source arguments is tainted, all of the
110  /// destination arguments should also be tainted. Use InvalidArgIndex in the
111  /// src list to specify that all of the arguments can introduce taint. Use
112  /// InvalidArgIndex in the dst arguments to signify that all the non-const
113  /// pointer and reference arguments might be tainted on return. If
114  /// ReturnValueIndex is added to the dst list, the return value will be
115  /// tainted.
116  struct TaintPropagationRule {
117    /// List of arguments which can be taint sources and should be checked.
118    ArgVector SrcArgs;
119    /// List of arguments which should be tainted on function return.
120    ArgVector DstArgs;
121    // TODO: Check if using other data structures would be more optimal.
122
123    TaintPropagationRule() {}
124
125    TaintPropagationRule(unsigned SArg,
126                         unsigned DArg, bool TaintRet = false) {
127      SrcArgs.push_back(SArg);
128      DstArgs.push_back(DArg);
129      if (TaintRet)
130        DstArgs.push_back(ReturnValueIndex);
131    }
132
133    TaintPropagationRule(unsigned SArg1, unsigned SArg2,
134                         unsigned DArg, bool TaintRet = false) {
135      SrcArgs.push_back(SArg1);
136      SrcArgs.push_back(SArg2);
137      DstArgs.push_back(DArg);
138      if (TaintRet)
139        DstArgs.push_back(ReturnValueIndex);
140    }
141
142    /// Get the propagation rule for a given function.
143    static TaintPropagationRule
144      getTaintPropagationRule(const FunctionDecl *FDecl,
145                              StringRef Name,
146                              CheckerContext &C);
147
148    inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
149    inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
150
151    inline bool isNull() const { return SrcArgs.empty(); }
152
153    inline bool isDestinationArgument(unsigned ArgNum) const {
154      return (std::find(DstArgs.begin(),
155                        DstArgs.end(), ArgNum) != DstArgs.end());
156    }
157
158    static inline bool isTaintedOrPointsToTainted(const Expr *E,
159                                                  ProgramStateRef State,
160                                                  CheckerContext &C) {
161      return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
162              (E->getType().getTypePtr()->isPointerType() &&
163               State->isTainted(getPointedToSymbol(C, E))));
164    }
165
166    /// \brief Pre-process a function which propagates taint according to the
167    /// taint rule.
168    ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
169
170  };
171};
172
173const unsigned GenericTaintChecker::ReturnValueIndex;
174const unsigned GenericTaintChecker::InvalidArgIndex;
175
176const char GenericTaintChecker::MsgUncontrolledFormatString[] =
177  "Untrusted data is used as a format string "
178  "(CWE-134: Uncontrolled Format String)";
179
180const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
181  "Untrusted data is passed to a system call "
182  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
183
184const char GenericTaintChecker::MsgTaintedBufferSize[] =
185  "Untrusted data is used to specify the buffer size "
186  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
187  "character data and the null terminator)";
188
189} // end of anonymous namespace
190
191/// A set which is used to pass information from call pre-visit instruction
192/// to the call post-visit. The values are unsigned integers, which are either
193/// ReturnValueIndex, or indexes of the pointer/reference argument, which
194/// points to data, which should be tainted on return.
195namespace { struct TaintArgsOnPostVisit{}; }
196namespace clang { namespace ento {
197template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
198    :  public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
199  static void *GDMIndex() { return GenericTaintChecker::getTag(); }
200};
201}}
202
203GenericTaintChecker::TaintPropagationRule
204GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
205                                                     const FunctionDecl *FDecl,
206                                                     StringRef Name,
207                                                     CheckerContext &C) {
208  // TODO: Currently, we might loose precision here: we always mark a return
209  // value as tainted even if it's just a pointer, pointing to tainted data.
210
211  // Check for exact name match for functions without builtin substitutes.
212  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
213    .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
214    .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
215    .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
216    .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
217    .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
218    .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
219    .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
220    .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
221    .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
222    .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
223    .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
224    .Case("read", TaintPropagationRule(0, 2, 1, true))
225    .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
226    .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
227    .Case("fgets", TaintPropagationRule(2, 0, true))
228    .Case("getline", TaintPropagationRule(2, 0))
229    .Case("getdelim", TaintPropagationRule(3, 0))
230    .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
231    .Default(TaintPropagationRule());
232
233  if (!Rule.isNull())
234    return Rule;
235
236  // Check if it's one of the memory setting/copying functions.
237  // This check is specialized but faster then calling isCLibraryFunction.
238  unsigned BId = 0;
239  if ( (BId = FDecl->getMemoryFunctionKind()) )
240    switch(BId) {
241    case Builtin::BImemcpy:
242    case Builtin::BImemmove:
243    case Builtin::BIstrncpy:
244    case Builtin::BIstrncat:
245      return TaintPropagationRule(1, 2, 0, true);
246    case Builtin::BIstrlcpy:
247    case Builtin::BIstrlcat:
248      return TaintPropagationRule(1, 2, 0, false);
249    case Builtin::BIstrndup:
250      return TaintPropagationRule(0, 1, ReturnValueIndex);
251
252    default:
253      break;
254    };
255
256  // Process all other functions which could be defined as builtins.
257  if (Rule.isNull()) {
258    if (C.isCLibraryFunction(FDecl, "snprintf") ||
259        C.isCLibraryFunction(FDecl, "sprintf"))
260      return TaintPropagationRule(InvalidArgIndex, 0, true);
261    else if (C.isCLibraryFunction(FDecl, "strcpy") ||
262             C.isCLibraryFunction(FDecl, "stpcpy") ||
263             C.isCLibraryFunction(FDecl, "strcat"))
264      return TaintPropagationRule(1, 0, true);
265    else if (C.isCLibraryFunction(FDecl, "bcopy"))
266      return TaintPropagationRule(0, 2, 1, false);
267    else if (C.isCLibraryFunction(FDecl, "strdup") ||
268             C.isCLibraryFunction(FDecl, "strdupa"))
269      return TaintPropagationRule(0, ReturnValueIndex);
270    else if (C.isCLibraryFunction(FDecl, "wcsdup"))
271      return TaintPropagationRule(0, ReturnValueIndex);
272  }
273
274  // Skipping the following functions, since they might be used for cleansing
275  // or smart memory copy:
276  // - memccpy - copying untill hitting a special character.
277
278  return TaintPropagationRule();
279}
280
281void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
282                                       CheckerContext &C) const {
283  // Check for errors first.
284  if (checkPre(CE, C))
285    return;
286
287  // Add taint second.
288  addSourcesPre(CE, C);
289}
290
291void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
292                                        CheckerContext &C) const {
293  if (propagateFromPre(CE, C))
294    return;
295  addSourcesPost(CE, C);
296}
297
298void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
299                                        CheckerContext &C) const {
300  ProgramStateRef State = 0;
301  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
302  StringRef Name = C.getCalleeName(FDecl);
303  if (Name.empty())
304    return;
305
306  // First, try generating a propagation rule for this function.
307  TaintPropagationRule Rule =
308    TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
309  if (!Rule.isNull()) {
310    State = Rule.process(CE, C);
311    if (!State)
312      return;
313    C.addTransition(State);
314    return;
315  }
316
317  // Otherwise, check if we have custom pre-processing implemented.
318  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
319    .Case("fscanf", &GenericTaintChecker::preFscanf)
320    .Default(0);
321  // Check and evaluate the call.
322  if (evalFunction)
323    State = (this->*evalFunction)(CE, C);
324  if (!State)
325    return;
326  C.addTransition(State);
327
328}
329
330bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
331                                           CheckerContext &C) const {
332  ProgramStateRef State = C.getState();
333
334  // Depending on what was tainted at pre-visit, we determined a set of
335  // arguments which should be tainted after the function returns. These are
336  // stored in the state as TaintArgsOnPostVisit set.
337  llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
338  if (TaintArgs.isEmpty())
339    return false;
340
341  for (llvm::ImmutableSet<unsigned>::iterator
342         I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
343    unsigned ArgNum  = *I;
344
345    // Special handling for the tainted return value.
346    if (ArgNum == ReturnValueIndex) {
347      State = State->addTaint(CE, C.getLocationContext());
348      continue;
349    }
350
351    // The arguments are pointer arguments. The data they are pointing at is
352    // tainted after the call.
353    if (CE->getNumArgs() < (ArgNum + 1))
354      return false;
355    const Expr* Arg = CE->getArg(ArgNum);
356    SymbolRef Sym = getPointedToSymbol(C, Arg);
357    if (Sym)
358      State = State->addTaint(Sym);
359  }
360
361  // Clear up the taint info from the state.
362  State = State->remove<TaintArgsOnPostVisit>();
363
364  if (State != C.getState()) {
365    C.addTransition(State);
366    return true;
367  }
368  return false;
369}
370
371void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
372                                         CheckerContext &C) const {
373  // Define the attack surface.
374  // Set the evaluation function by switching on the callee name.
375  StringRef Name = C.getCalleeName(CE);
376  if (Name.empty())
377    return;
378  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
379    .Case("scanf", &GenericTaintChecker::postScanf)
380    // TODO: Add support for vfscanf & family.
381    .Case("getchar", &GenericTaintChecker::postRetTaint)
382    .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
383    .Case("getenv", &GenericTaintChecker::postRetTaint)
384    .Case("fopen", &GenericTaintChecker::postRetTaint)
385    .Case("fdopen", &GenericTaintChecker::postRetTaint)
386    .Case("freopen", &GenericTaintChecker::postRetTaint)
387    .Case("getch", &GenericTaintChecker::postRetTaint)
388    .Case("wgetch", &GenericTaintChecker::postRetTaint)
389    .Case("socket", &GenericTaintChecker::postSocket)
390    .Default(0);
391
392  // If the callee isn't defined, it is not of security concern.
393  // Check and evaluate the call.
394  ProgramStateRef State = 0;
395  if (evalFunction)
396    State = (this->*evalFunction)(CE, C);
397  if (!State)
398    return;
399
400  C.addTransition(State);
401}
402
403bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
404
405  if (checkUncontrolledFormatString(CE, C))
406    return true;
407
408  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
409  StringRef Name = C.getCalleeName(FDecl);
410  if (Name.empty())
411    return false;
412
413  if (checkSystemCall(CE, Name, C))
414    return true;
415
416  if (checkTaintedBufferSize(CE, FDecl, C))
417    return true;
418
419  return false;
420}
421
422SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
423                                                  const Expr* Arg) {
424  ProgramStateRef State = C.getState();
425  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
426  if (AddrVal.isUnknownOrUndef())
427    return 0;
428
429  Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
430  if (!AddrLoc)
431    return 0;
432
433  const PointerType *ArgTy =
434    dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
435  SVal Val = State->getSVal(*AddrLoc,
436                            ArgTy ? ArgTy->getPointeeType(): QualType());
437  return Val.getAsSymbol();
438}
439
440ProgramStateRef
441GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
442                                                   CheckerContext &C) const {
443  ProgramStateRef State = C.getState();
444
445  // Check for taint in arguments.
446  bool IsTainted = false;
447  for (ArgVector::const_iterator I = SrcArgs.begin(),
448                                 E = SrcArgs.end(); I != E; ++I) {
449    unsigned ArgNum = *I;
450
451    if (ArgNum == InvalidArgIndex) {
452      // Check if any of the arguments is tainted, but skip the
453      // destination arguments.
454      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
455        if (isDestinationArgument(i))
456          continue;
457        if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
458          break;
459      }
460      break;
461    }
462
463    if (CE->getNumArgs() < (ArgNum + 1))
464      return State;
465    if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
466      break;
467  }
468  if (!IsTainted)
469    return State;
470
471  // Mark the arguments which should be tainted after the function returns.
472  for (ArgVector::const_iterator I = DstArgs.begin(),
473                                 E = DstArgs.end(); I != E; ++I) {
474    unsigned ArgNum = *I;
475
476    // Should we mark all arguments as tainted?
477    if (ArgNum == InvalidArgIndex) {
478      // For all pointer and references that were passed in:
479      //   If they are not pointing to const data, mark data as tainted.
480      //   TODO: So far we are just going one level down; ideally we'd need to
481      //         recurse here.
482      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
483        const Expr *Arg = CE->getArg(i);
484        // Process pointer argument.
485        const Type *ArgTy = Arg->getType().getTypePtr();
486        QualType PType = ArgTy->getPointeeType();
487        if ((!PType.isNull() && !PType.isConstQualified())
488            || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
489          State = State->add<TaintArgsOnPostVisit>(i);
490      }
491      continue;
492    }
493
494    // Should mark the return value?
495    if (ArgNum == ReturnValueIndex) {
496      State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
497      continue;
498    }
499
500    // Mark the given argument.
501    assert(ArgNum < CE->getNumArgs());
502    State = State->add<TaintArgsOnPostVisit>(ArgNum);
503  }
504
505  return State;
506}
507
508
509// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
510// and arg 1 should get taint.
511ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
512                                                   CheckerContext &C) const {
513  assert(CE->getNumArgs() >= 2);
514  ProgramStateRef State = C.getState();
515
516  // Check is the file descriptor is tainted.
517  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
518      isStdin(CE->getArg(0), C)) {
519    // All arguments except for the first two should get taint.
520    for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
521        State = State->add<TaintArgsOnPostVisit>(i);
522    return State;
523  }
524
525  return 0;
526}
527
528
529// If argument 0(protocol domain) is network, the return value should get taint.
530ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
531                                                CheckerContext &C) const {
532  ProgramStateRef State = C.getState();
533  if (CE->getNumArgs() < 3)
534    return State;
535
536  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
537  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
538  // White list the internal communication protocols.
539  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
540      DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
541    return State;
542  State = State->addTaint(CE, C.getLocationContext());
543  return State;
544}
545
546ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
547                                                   CheckerContext &C) const {
548  ProgramStateRef State = C.getState();
549  if (CE->getNumArgs() < 2)
550    return State;
551
552  SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
553  // All arguments except for the very first one should get taint.
554  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
555    // The arguments are pointer arguments. The data they are pointing at is
556    // tainted after the call.
557    const Expr* Arg = CE->getArg(i);
558        SymbolRef Sym = getPointedToSymbol(C, Arg);
559    if (Sym)
560      State = State->addTaint(Sym);
561  }
562  return State;
563}
564
565ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
566                                                  CheckerContext &C) const {
567  return C.getState()->addTaint(CE, C.getLocationContext());
568}
569
570bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
571  ProgramStateRef State = C.getState();
572  SVal Val = State->getSVal(E, C.getLocationContext());
573
574  // stdin is a pointer, so it would be a region.
575  const MemRegion *MemReg = Val.getAsRegion();
576
577  // The region should be symbolic, we do not know it's value.
578  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
579  if (!SymReg)
580    return false;
581
582  // Get it's symbol and find the declaration region it's pointing to.
583  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
584  if (!Sm)
585    return false;
586  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
587  if (!DeclReg)
588    return false;
589
590  // This region corresponds to a declaration, find out if it's a global/extern
591  // variable named stdin with the proper type.
592  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
593    D = D->getCanonicalDecl();
594    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
595        if (const PointerType * PtrTy =
596              dyn_cast<PointerType>(D->getType().getTypePtr()))
597          if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
598            return true;
599  }
600  return false;
601}
602
603static bool getPrintfFormatArgumentNum(const CallExpr *CE,
604                                       const CheckerContext &C,
605                                       unsigned int &ArgNum) {
606  // Find if the function contains a format string argument.
607  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
608  // vsnprintf, syslog, custom annotated functions.
609  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
610  if (!FDecl)
611    return false;
612  for (specific_attr_iterator<FormatAttr>
613         i = FDecl->specific_attr_begin<FormatAttr>(),
614         e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
615
616    const FormatAttr *Format = *i;
617    ArgNum = Format->getFormatIdx() - 1;
618    if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
619      return true;
620  }
621
622  // Or if a function is named setproctitle (this is a heuristic).
623  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
624    ArgNum = 0;
625    return true;
626  }
627
628  return false;
629}
630
631bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
632                                                  const char Msg[],
633                                                  CheckerContext &C) const {
634  assert(E);
635
636  // Check for taint.
637  ProgramStateRef State = C.getState();
638  if (!State->isTainted(getPointedToSymbol(C, E)) &&
639      !State->isTainted(E, C.getLocationContext()))
640    return false;
641
642  // Generate diagnostic.
643  if (ExplodedNode *N = C.addTransition()) {
644    initBugType();
645    BugReport *report = new BugReport(*BT, Msg, N);
646    report->addRange(E->getSourceRange());
647    C.EmitReport(report);
648    return true;
649  }
650  return false;
651}
652
653bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
654                                                        CheckerContext &C) const{
655  // Check if the function contains a format string argument.
656  unsigned int ArgNum = 0;
657  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
658    return false;
659
660  // If either the format string content or the pointer itself are tainted, warn.
661  if (generateReportIfTainted(CE->getArg(ArgNum),
662                              MsgUncontrolledFormatString, C))
663    return true;
664  return false;
665}
666
667bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
668                                          StringRef Name,
669                                          CheckerContext &C) const {
670  // TODO: It might make sense to run this check on demand. In some cases,
671  // we should check if the environment has been cleansed here. We also might
672  // need to know if the user was reset before these calls(seteuid).
673  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
674    .Case("system", 0)
675    .Case("popen", 0)
676    .Case("execl", 0)
677    .Case("execle", 0)
678    .Case("execlp", 0)
679    .Case("execv", 0)
680    .Case("execvp", 0)
681    .Case("execvP", 0)
682    .Case("execve", 0)
683    .Case("dlopen", 0)
684    .Default(UINT_MAX);
685
686  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
687    return false;
688
689  if (generateReportIfTainted(CE->getArg(ArgNum),
690                              MsgSanitizeSystemArgs, C))
691    return true;
692
693  return false;
694}
695
696// TODO: Should this check be a part of the CString checker?
697// If yes, should taint be a global setting?
698bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
699                                                 const FunctionDecl *FDecl,
700                                                 CheckerContext &C) const {
701  // If the function has a buffer size argument, set ArgNum.
702  unsigned ArgNum = InvalidArgIndex;
703  unsigned BId = 0;
704  if ( (BId = FDecl->getMemoryFunctionKind()) )
705    switch(BId) {
706    case Builtin::BImemcpy:
707    case Builtin::BImemmove:
708    case Builtin::BIstrncpy:
709      ArgNum = 2;
710      break;
711    case Builtin::BIstrndup:
712      ArgNum = 1;
713      break;
714    default:
715      break;
716    };
717
718  if (ArgNum == InvalidArgIndex) {
719    if (C.isCLibraryFunction(FDecl, "malloc") ||
720        C.isCLibraryFunction(FDecl, "calloc") ||
721        C.isCLibraryFunction(FDecl, "alloca"))
722      ArgNum = 0;
723    else if (C.isCLibraryFunction(FDecl, "memccpy"))
724      ArgNum = 3;
725    else if (C.isCLibraryFunction(FDecl, "realloc"))
726      ArgNum = 1;
727    else if (C.isCLibraryFunction(FDecl, "bcopy"))
728      ArgNum = 2;
729  }
730
731  if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
732      generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
733    return true;
734
735  return false;
736}
737
738void ento::registerGenericTaintChecker(CheckerManager &mgr) {
739  mgr.registerChecker<GenericTaintChecker>();
740}
741