1234287Sdim//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2234287Sdim//
3234287Sdim//                     The LLVM Compiler Infrastructure
4234287Sdim//
5234287Sdim// This file is distributed under the University of Illinois Open Source
6234287Sdim// License. See LICENSE.TXT for details.
7234287Sdim//
8234287Sdim//===----------------------------------------------------------------------===//
9234287Sdim//
10234287Sdim// This checker defines the attack surface for generic taint propagation.
11234287Sdim//
12234287Sdim// The taint information produced by it might be useful to other checkers. For
13234287Sdim// example, checkers should report errors which involve tainted data more
14234287Sdim// aggressively, even if the involved symbols are under constrained.
15234287Sdim//
16234287Sdim//===----------------------------------------------------------------------===//
17234287Sdim#include "ClangSACheckers.h"
18249423Sdim#include "clang/AST/Attr.h"
19249423Sdim#include "clang/Basic/Builtins.h"
20249423Sdim#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
21234287Sdim#include "clang/StaticAnalyzer/Core/Checker.h"
22234287Sdim#include "clang/StaticAnalyzer/Core/CheckerManager.h"
23234287Sdim#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24234287Sdim#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
25234287Sdim#include <climits>
26234287Sdim
27234287Sdimusing namespace clang;
28234287Sdimusing namespace ento;
29234287Sdim
30234287Sdimnamespace {
31234287Sdimclass GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
32234287Sdim                                            check::PreStmt<CallExpr> > {
33234287Sdimpublic:
34234287Sdim  static void *getTag() { static int Tag; return &Tag; }
35234287Sdim
36234287Sdim  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37234287Sdim  void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
38234287Sdim
39234287Sdim  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
40234287Sdim
41234287Sdimprivate:
42234287Sdim  static const unsigned InvalidArgIndex = UINT_MAX;
43234287Sdim  /// Denotes the return vale.
44234287Sdim  static const unsigned ReturnValueIndex = UINT_MAX - 1;
45234287Sdim
46234287Sdim  mutable OwningPtr<BugType> BT;
47234287Sdim  inline void initBugType() const {
48234287Sdim    if (!BT)
49234287Sdim      BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data"));
50234287Sdim  }
51234287Sdim
52234287Sdim  /// \brief Catch taint related bugs. Check if tainted data is passed to a
53234287Sdim  /// system call etc.
54234287Sdim  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
55234287Sdim
56234287Sdim  /// \brief Add taint sources on a pre-visit.
57234287Sdim  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
58234287Sdim
59234287Sdim  /// \brief Propagate taint generated at pre-visit.
60234287Sdim  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
61234287Sdim
62234287Sdim  /// \brief Add taint sources on a post visit.
63234287Sdim  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
64234287Sdim
65234287Sdim  /// Check if the region the expression evaluates to is the standard input,
66234287Sdim  /// and thus, is tainted.
67234287Sdim  static bool isStdin(const Expr *E, CheckerContext &C);
68234287Sdim
69234287Sdim  /// \brief Given a pointer argument, get the symbol of the value it contains
70234287Sdim  /// (points to).
71234287Sdim  static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
72234287Sdim
73234287Sdim  /// Functions defining the attack surface.
74234287Sdim  typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
75234287Sdim                                                       CheckerContext &C) const;
76234287Sdim  ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
77234287Sdim  ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
78234287Sdim  ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
79234287Sdim
80234287Sdim  /// Taint the scanned input if the file is tainted.
81234287Sdim  ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
82234287Sdim
83234287Sdim  /// Check for CWE-134: Uncontrolled Format String.
84234287Sdim  static const char MsgUncontrolledFormatString[];
85234287Sdim  bool checkUncontrolledFormatString(const CallExpr *CE,
86234287Sdim                                     CheckerContext &C) const;
87234287Sdim
88234287Sdim  /// Check for:
89234287Sdim  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
90234287Sdim  /// CWE-78, "Failure to Sanitize Data into an OS Command"
91234287Sdim  static const char MsgSanitizeSystemArgs[];
92234287Sdim  bool checkSystemCall(const CallExpr *CE, StringRef Name,
93234287Sdim                       CheckerContext &C) const;
94234287Sdim
95234287Sdim  /// Check if tainted data is used as a buffer size ins strn.. functions,
96234287Sdim  /// and allocators.
97234287Sdim  static const char MsgTaintedBufferSize[];
98234287Sdim  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
99234287Sdim                              CheckerContext &C) const;
100234287Sdim
101234287Sdim  /// Generate a report if the expression is tainted or points to tainted data.
102234287Sdim  bool generateReportIfTainted(const Expr *E, const char Msg[],
103234287Sdim                               CheckerContext &C) const;
104234287Sdim
105234287Sdim
106249423Sdim  typedef SmallVector<unsigned, 2> ArgVector;
107234287Sdim
108234287Sdim  /// \brief A struct used to specify taint propagation rules for a function.
109234287Sdim  ///
110234287Sdim  /// If any of the possible taint source arguments is tainted, all of the
111234287Sdim  /// destination arguments should also be tainted. Use InvalidArgIndex in the
112234287Sdim  /// src list to specify that all of the arguments can introduce taint. Use
113234287Sdim  /// InvalidArgIndex in the dst arguments to signify that all the non-const
114234287Sdim  /// pointer and reference arguments might be tainted on return. If
115234287Sdim  /// ReturnValueIndex is added to the dst list, the return value will be
116234287Sdim  /// tainted.
117234287Sdim  struct TaintPropagationRule {
118234287Sdim    /// List of arguments which can be taint sources and should be checked.
119234287Sdim    ArgVector SrcArgs;
120234287Sdim    /// List of arguments which should be tainted on function return.
121234287Sdim    ArgVector DstArgs;
122234287Sdim    // TODO: Check if using other data structures would be more optimal.
123234287Sdim
124234287Sdim    TaintPropagationRule() {}
125234287Sdim
126234287Sdim    TaintPropagationRule(unsigned SArg,
127234287Sdim                         unsigned DArg, bool TaintRet = false) {
128234287Sdim      SrcArgs.push_back(SArg);
129234287Sdim      DstArgs.push_back(DArg);
130234287Sdim      if (TaintRet)
131234287Sdim        DstArgs.push_back(ReturnValueIndex);
132234287Sdim    }
133234287Sdim
134234287Sdim    TaintPropagationRule(unsigned SArg1, unsigned SArg2,
135234287Sdim                         unsigned DArg, bool TaintRet = false) {
136234287Sdim      SrcArgs.push_back(SArg1);
137234287Sdim      SrcArgs.push_back(SArg2);
138234287Sdim      DstArgs.push_back(DArg);
139234287Sdim      if (TaintRet)
140234287Sdim        DstArgs.push_back(ReturnValueIndex);
141234287Sdim    }
142234287Sdim
143234287Sdim    /// Get the propagation rule for a given function.
144234287Sdim    static TaintPropagationRule
145234287Sdim      getTaintPropagationRule(const FunctionDecl *FDecl,
146234287Sdim                              StringRef Name,
147234287Sdim                              CheckerContext &C);
148234287Sdim
149234287Sdim    inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
150234287Sdim    inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
151234287Sdim
152234287Sdim    inline bool isNull() const { return SrcArgs.empty(); }
153234287Sdim
154234287Sdim    inline bool isDestinationArgument(unsigned ArgNum) const {
155234287Sdim      return (std::find(DstArgs.begin(),
156234287Sdim                        DstArgs.end(), ArgNum) != DstArgs.end());
157234287Sdim    }
158234287Sdim
159234287Sdim    static inline bool isTaintedOrPointsToTainted(const Expr *E,
160234287Sdim                                                  ProgramStateRef State,
161234287Sdim                                                  CheckerContext &C) {
162234287Sdim      return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
163234287Sdim              (E->getType().getTypePtr()->isPointerType() &&
164234287Sdim               State->isTainted(getPointedToSymbol(C, E))));
165234287Sdim    }
166234287Sdim
167234287Sdim    /// \brief Pre-process a function which propagates taint according to the
168234287Sdim    /// taint rule.
169234287Sdim    ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
170234287Sdim
171234287Sdim  };
172234287Sdim};
173234287Sdim
174234287Sdimconst unsigned GenericTaintChecker::ReturnValueIndex;
175234287Sdimconst unsigned GenericTaintChecker::InvalidArgIndex;
176234287Sdim
177234287Sdimconst char GenericTaintChecker::MsgUncontrolledFormatString[] =
178234287Sdim  "Untrusted data is used as a format string "
179234287Sdim  "(CWE-134: Uncontrolled Format String)";
180234287Sdim
181234287Sdimconst char GenericTaintChecker::MsgSanitizeSystemArgs[] =
182234287Sdim  "Untrusted data is passed to a system call "
183234287Sdim  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
184234287Sdim
185234287Sdimconst char GenericTaintChecker::MsgTaintedBufferSize[] =
186234287Sdim  "Untrusted data is used to specify the buffer size "
187234287Sdim  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
188234287Sdim  "character data and the null terminator)";
189234287Sdim
190234287Sdim} // end of anonymous namespace
191234287Sdim
192234287Sdim/// A set which is used to pass information from call pre-visit instruction
193234287Sdim/// to the call post-visit. The values are unsigned integers, which are either
194234287Sdim/// ReturnValueIndex, or indexes of the pointer/reference argument, which
195234287Sdim/// points to data, which should be tainted on return.
196243830SdimREGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
197234287Sdim
198234287SdimGenericTaintChecker::TaintPropagationRule
199234287SdimGenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
200234287Sdim                                                     const FunctionDecl *FDecl,
201234287Sdim                                                     StringRef Name,
202234287Sdim                                                     CheckerContext &C) {
203234287Sdim  // TODO: Currently, we might loose precision here: we always mark a return
204234287Sdim  // value as tainted even if it's just a pointer, pointing to tainted data.
205234287Sdim
206234287Sdim  // Check for exact name match for functions without builtin substitutes.
207234287Sdim  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
208234287Sdim    .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
209234287Sdim    .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
210234287Sdim    .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
211234287Sdim    .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
212234287Sdim    .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
213234287Sdim    .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
214234287Sdim    .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
215234287Sdim    .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
216234287Sdim    .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
217234287Sdim    .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
218234287Sdim    .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
219234287Sdim    .Case("read", TaintPropagationRule(0, 2, 1, true))
220234287Sdim    .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
221234287Sdim    .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
222234287Sdim    .Case("fgets", TaintPropagationRule(2, 0, true))
223234287Sdim    .Case("getline", TaintPropagationRule(2, 0))
224234287Sdim    .Case("getdelim", TaintPropagationRule(3, 0))
225234287Sdim    .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
226234287Sdim    .Default(TaintPropagationRule());
227234287Sdim
228234287Sdim  if (!Rule.isNull())
229234287Sdim    return Rule;
230234287Sdim
231234287Sdim  // Check if it's one of the memory setting/copying functions.
232234287Sdim  // This check is specialized but faster then calling isCLibraryFunction.
233234287Sdim  unsigned BId = 0;
234234287Sdim  if ( (BId = FDecl->getMemoryFunctionKind()) )
235234287Sdim    switch(BId) {
236234287Sdim    case Builtin::BImemcpy:
237234287Sdim    case Builtin::BImemmove:
238234287Sdim    case Builtin::BIstrncpy:
239234287Sdim    case Builtin::BIstrncat:
240234287Sdim      return TaintPropagationRule(1, 2, 0, true);
241234287Sdim    case Builtin::BIstrlcpy:
242234287Sdim    case Builtin::BIstrlcat:
243234287Sdim      return TaintPropagationRule(1, 2, 0, false);
244234287Sdim    case Builtin::BIstrndup:
245234287Sdim      return TaintPropagationRule(0, 1, ReturnValueIndex);
246234287Sdim
247234287Sdim    default:
248234287Sdim      break;
249234287Sdim    };
250234287Sdim
251234287Sdim  // Process all other functions which could be defined as builtins.
252234287Sdim  if (Rule.isNull()) {
253234287Sdim    if (C.isCLibraryFunction(FDecl, "snprintf") ||
254234287Sdim        C.isCLibraryFunction(FDecl, "sprintf"))
255234287Sdim      return TaintPropagationRule(InvalidArgIndex, 0, true);
256234287Sdim    else if (C.isCLibraryFunction(FDecl, "strcpy") ||
257234287Sdim             C.isCLibraryFunction(FDecl, "stpcpy") ||
258234287Sdim             C.isCLibraryFunction(FDecl, "strcat"))
259234287Sdim      return TaintPropagationRule(1, 0, true);
260234287Sdim    else if (C.isCLibraryFunction(FDecl, "bcopy"))
261234287Sdim      return TaintPropagationRule(0, 2, 1, false);
262234287Sdim    else if (C.isCLibraryFunction(FDecl, "strdup") ||
263234287Sdim             C.isCLibraryFunction(FDecl, "strdupa"))
264234287Sdim      return TaintPropagationRule(0, ReturnValueIndex);
265234287Sdim    else if (C.isCLibraryFunction(FDecl, "wcsdup"))
266234287Sdim      return TaintPropagationRule(0, ReturnValueIndex);
267234287Sdim  }
268234287Sdim
269234287Sdim  // Skipping the following functions, since they might be used for cleansing
270234287Sdim  // or smart memory copy:
271239462Sdim  // - memccpy - copying until hitting a special character.
272234287Sdim
273234287Sdim  return TaintPropagationRule();
274234287Sdim}
275234287Sdim
276234287Sdimvoid GenericTaintChecker::checkPreStmt(const CallExpr *CE,
277234287Sdim                                       CheckerContext &C) const {
278234287Sdim  // Check for errors first.
279234287Sdim  if (checkPre(CE, C))
280234287Sdim    return;
281234287Sdim
282234287Sdim  // Add taint second.
283234287Sdim  addSourcesPre(CE, C);
284234287Sdim}
285234287Sdim
286234287Sdimvoid GenericTaintChecker::checkPostStmt(const CallExpr *CE,
287234287Sdim                                        CheckerContext &C) const {
288234287Sdim  if (propagateFromPre(CE, C))
289234287Sdim    return;
290234287Sdim  addSourcesPost(CE, C);
291234287Sdim}
292234287Sdim
293234287Sdimvoid GenericTaintChecker::addSourcesPre(const CallExpr *CE,
294234287Sdim                                        CheckerContext &C) const {
295234287Sdim  ProgramStateRef State = 0;
296234287Sdim  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
297239462Sdim  if (!FDecl || FDecl->getKind() != Decl::Function)
298239462Sdim    return;
299239462Sdim
300234287Sdim  StringRef Name = C.getCalleeName(FDecl);
301234287Sdim  if (Name.empty())
302234287Sdim    return;
303234287Sdim
304234287Sdim  // First, try generating a propagation rule for this function.
305234287Sdim  TaintPropagationRule Rule =
306234287Sdim    TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
307234287Sdim  if (!Rule.isNull()) {
308234287Sdim    State = Rule.process(CE, C);
309234287Sdim    if (!State)
310234287Sdim      return;
311234287Sdim    C.addTransition(State);
312234287Sdim    return;
313234287Sdim  }
314234287Sdim
315234287Sdim  // Otherwise, check if we have custom pre-processing implemented.
316234287Sdim  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
317234287Sdim    .Case("fscanf", &GenericTaintChecker::preFscanf)
318234287Sdim    .Default(0);
319234287Sdim  // Check and evaluate the call.
320234287Sdim  if (evalFunction)
321234287Sdim    State = (this->*evalFunction)(CE, C);
322234287Sdim  if (!State)
323234287Sdim    return;
324234287Sdim  C.addTransition(State);
325234287Sdim
326234287Sdim}
327234287Sdim
328234287Sdimbool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
329234287Sdim                                           CheckerContext &C) const {
330234287Sdim  ProgramStateRef State = C.getState();
331234287Sdim
332234287Sdim  // Depending on what was tainted at pre-visit, we determined a set of
333234287Sdim  // arguments which should be tainted after the function returns. These are
334234287Sdim  // stored in the state as TaintArgsOnPostVisit set.
335243830Sdim  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
336234287Sdim  if (TaintArgs.isEmpty())
337234287Sdim    return false;
338234287Sdim
339234287Sdim  for (llvm::ImmutableSet<unsigned>::iterator
340234287Sdim         I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
341234287Sdim    unsigned ArgNum  = *I;
342234287Sdim
343234287Sdim    // Special handling for the tainted return value.
344234287Sdim    if (ArgNum == ReturnValueIndex) {
345234287Sdim      State = State->addTaint(CE, C.getLocationContext());
346234287Sdim      continue;
347234287Sdim    }
348234287Sdim
349234287Sdim    // The arguments are pointer arguments. The data they are pointing at is
350234287Sdim    // tainted after the call.
351234287Sdim    if (CE->getNumArgs() < (ArgNum + 1))
352234287Sdim      return false;
353234287Sdim    const Expr* Arg = CE->getArg(ArgNum);
354234287Sdim    SymbolRef Sym = getPointedToSymbol(C, Arg);
355234287Sdim    if (Sym)
356234287Sdim      State = State->addTaint(Sym);
357234287Sdim  }
358234287Sdim
359234287Sdim  // Clear up the taint info from the state.
360234287Sdim  State = State->remove<TaintArgsOnPostVisit>();
361234287Sdim
362234287Sdim  if (State != C.getState()) {
363234287Sdim    C.addTransition(State);
364234287Sdim    return true;
365234287Sdim  }
366234287Sdim  return false;
367234287Sdim}
368234287Sdim
369234287Sdimvoid GenericTaintChecker::addSourcesPost(const CallExpr *CE,
370234287Sdim                                         CheckerContext &C) const {
371234287Sdim  // Define the attack surface.
372234287Sdim  // Set the evaluation function by switching on the callee name.
373239462Sdim  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
374239462Sdim  if (!FDecl || FDecl->getKind() != Decl::Function)
375239462Sdim    return;
376239462Sdim
377239462Sdim  StringRef Name = C.getCalleeName(FDecl);
378234287Sdim  if (Name.empty())
379234287Sdim    return;
380234287Sdim  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
381234287Sdim    .Case("scanf", &GenericTaintChecker::postScanf)
382234287Sdim    // TODO: Add support for vfscanf & family.
383234287Sdim    .Case("getchar", &GenericTaintChecker::postRetTaint)
384234287Sdim    .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
385234287Sdim    .Case("getenv", &GenericTaintChecker::postRetTaint)
386234287Sdim    .Case("fopen", &GenericTaintChecker::postRetTaint)
387234287Sdim    .Case("fdopen", &GenericTaintChecker::postRetTaint)
388234287Sdim    .Case("freopen", &GenericTaintChecker::postRetTaint)
389234287Sdim    .Case("getch", &GenericTaintChecker::postRetTaint)
390234287Sdim    .Case("wgetch", &GenericTaintChecker::postRetTaint)
391234287Sdim    .Case("socket", &GenericTaintChecker::postSocket)
392234287Sdim    .Default(0);
393234287Sdim
394234287Sdim  // If the callee isn't defined, it is not of security concern.
395234287Sdim  // Check and evaluate the call.
396234287Sdim  ProgramStateRef State = 0;
397234287Sdim  if (evalFunction)
398234287Sdim    State = (this->*evalFunction)(CE, C);
399234287Sdim  if (!State)
400234287Sdim    return;
401234287Sdim
402234287Sdim  C.addTransition(State);
403234287Sdim}
404234287Sdim
405234287Sdimbool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
406234287Sdim
407234287Sdim  if (checkUncontrolledFormatString(CE, C))
408234287Sdim    return true;
409234287Sdim
410234287Sdim  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
411239462Sdim  if (!FDecl || FDecl->getKind() != Decl::Function)
412239462Sdim    return false;
413239462Sdim
414234287Sdim  StringRef Name = C.getCalleeName(FDecl);
415234287Sdim  if (Name.empty())
416234287Sdim    return false;
417234287Sdim
418234287Sdim  if (checkSystemCall(CE, Name, C))
419234287Sdim    return true;
420234287Sdim
421234287Sdim  if (checkTaintedBufferSize(CE, FDecl, C))
422234287Sdim    return true;
423234287Sdim
424234287Sdim  return false;
425234287Sdim}
426234287Sdim
427234287SdimSymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
428234287Sdim                                                  const Expr* Arg) {
429234287Sdim  ProgramStateRef State = C.getState();
430234287Sdim  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
431234287Sdim  if (AddrVal.isUnknownOrUndef())
432234287Sdim    return 0;
433234287Sdim
434249423Sdim  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
435234287Sdim  if (!AddrLoc)
436234287Sdim    return 0;
437234287Sdim
438234287Sdim  const PointerType *ArgTy =
439234287Sdim    dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
440234287Sdim  SVal Val = State->getSVal(*AddrLoc,
441234287Sdim                            ArgTy ? ArgTy->getPointeeType(): QualType());
442234287Sdim  return Val.getAsSymbol();
443234287Sdim}
444234287Sdim
445234287SdimProgramStateRef
446234287SdimGenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
447234287Sdim                                                   CheckerContext &C) const {
448234287Sdim  ProgramStateRef State = C.getState();
449234287Sdim
450234287Sdim  // Check for taint in arguments.
451234287Sdim  bool IsTainted = false;
452234287Sdim  for (ArgVector::const_iterator I = SrcArgs.begin(),
453234287Sdim                                 E = SrcArgs.end(); I != E; ++I) {
454234287Sdim    unsigned ArgNum = *I;
455234287Sdim
456234287Sdim    if (ArgNum == InvalidArgIndex) {
457234287Sdim      // Check if any of the arguments is tainted, but skip the
458234287Sdim      // destination arguments.
459234287Sdim      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
460234287Sdim        if (isDestinationArgument(i))
461234287Sdim          continue;
462234287Sdim        if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
463234287Sdim          break;
464234287Sdim      }
465234287Sdim      break;
466234287Sdim    }
467234287Sdim
468234287Sdim    if (CE->getNumArgs() < (ArgNum + 1))
469234287Sdim      return State;
470234287Sdim    if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
471234287Sdim      break;
472234287Sdim  }
473234287Sdim  if (!IsTainted)
474234287Sdim    return State;
475234287Sdim
476234287Sdim  // Mark the arguments which should be tainted after the function returns.
477234287Sdim  for (ArgVector::const_iterator I = DstArgs.begin(),
478234287Sdim                                 E = DstArgs.end(); I != E; ++I) {
479234287Sdim    unsigned ArgNum = *I;
480234287Sdim
481234287Sdim    // Should we mark all arguments as tainted?
482234287Sdim    if (ArgNum == InvalidArgIndex) {
483234287Sdim      // For all pointer and references that were passed in:
484234287Sdim      //   If they are not pointing to const data, mark data as tainted.
485234287Sdim      //   TODO: So far we are just going one level down; ideally we'd need to
486234287Sdim      //         recurse here.
487234287Sdim      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
488234287Sdim        const Expr *Arg = CE->getArg(i);
489234287Sdim        // Process pointer argument.
490234287Sdim        const Type *ArgTy = Arg->getType().getTypePtr();
491234287Sdim        QualType PType = ArgTy->getPointeeType();
492234287Sdim        if ((!PType.isNull() && !PType.isConstQualified())
493234287Sdim            || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
494234287Sdim          State = State->add<TaintArgsOnPostVisit>(i);
495234287Sdim      }
496234287Sdim      continue;
497234287Sdim    }
498234287Sdim
499234287Sdim    // Should mark the return value?
500234287Sdim    if (ArgNum == ReturnValueIndex) {
501234287Sdim      State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
502234287Sdim      continue;
503234287Sdim    }
504234287Sdim
505234287Sdim    // Mark the given argument.
506234287Sdim    assert(ArgNum < CE->getNumArgs());
507234287Sdim    State = State->add<TaintArgsOnPostVisit>(ArgNum);
508234287Sdim  }
509234287Sdim
510234287Sdim  return State;
511234287Sdim}
512234287Sdim
513234287Sdim
514234287Sdim// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
515234287Sdim// and arg 1 should get taint.
516234287SdimProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
517234287Sdim                                                   CheckerContext &C) const {
518234287Sdim  assert(CE->getNumArgs() >= 2);
519234287Sdim  ProgramStateRef State = C.getState();
520234287Sdim
521234287Sdim  // Check is the file descriptor is tainted.
522234287Sdim  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
523234287Sdim      isStdin(CE->getArg(0), C)) {
524234287Sdim    // All arguments except for the first two should get taint.
525234287Sdim    for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
526234287Sdim        State = State->add<TaintArgsOnPostVisit>(i);
527234287Sdim    return State;
528234287Sdim  }
529234287Sdim
530234287Sdim  return 0;
531234287Sdim}
532234287Sdim
533234287Sdim
534234287Sdim// If argument 0(protocol domain) is network, the return value should get taint.
535234287SdimProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
536234287Sdim                                                CheckerContext &C) const {
537234287Sdim  ProgramStateRef State = C.getState();
538234287Sdim  if (CE->getNumArgs() < 3)
539234287Sdim    return State;
540234287Sdim
541234287Sdim  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
542234287Sdim  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
543234287Sdim  // White list the internal communication protocols.
544234287Sdim  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
545234287Sdim      DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
546234287Sdim    return State;
547234287Sdim  State = State->addTaint(CE, C.getLocationContext());
548234287Sdim  return State;
549234287Sdim}
550234287Sdim
551234287SdimProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
552234287Sdim                                                   CheckerContext &C) const {
553234287Sdim  ProgramStateRef State = C.getState();
554234287Sdim  if (CE->getNumArgs() < 2)
555234287Sdim    return State;
556234287Sdim
557234287Sdim  // All arguments except for the very first one should get taint.
558234287Sdim  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
559234287Sdim    // The arguments are pointer arguments. The data they are pointing at is
560234287Sdim    // tainted after the call.
561234287Sdim    const Expr* Arg = CE->getArg(i);
562234287Sdim        SymbolRef Sym = getPointedToSymbol(C, Arg);
563234287Sdim    if (Sym)
564234287Sdim      State = State->addTaint(Sym);
565234287Sdim  }
566234287Sdim  return State;
567234287Sdim}
568234287Sdim
569234287SdimProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
570234287Sdim                                                  CheckerContext &C) const {
571234287Sdim  return C.getState()->addTaint(CE, C.getLocationContext());
572234287Sdim}
573234287Sdim
574234287Sdimbool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
575234287Sdim  ProgramStateRef State = C.getState();
576234287Sdim  SVal Val = State->getSVal(E, C.getLocationContext());
577234287Sdim
578234287Sdim  // stdin is a pointer, so it would be a region.
579234287Sdim  const MemRegion *MemReg = Val.getAsRegion();
580234287Sdim
581234287Sdim  // The region should be symbolic, we do not know it's value.
582234287Sdim  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
583234287Sdim  if (!SymReg)
584234287Sdim    return false;
585234287Sdim
586234287Sdim  // Get it's symbol and find the declaration region it's pointing to.
587234287Sdim  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
588234287Sdim  if (!Sm)
589234287Sdim    return false;
590234287Sdim  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
591234287Sdim  if (!DeclReg)
592234287Sdim    return false;
593234287Sdim
594234287Sdim  // This region corresponds to a declaration, find out if it's a global/extern
595234287Sdim  // variable named stdin with the proper type.
596234287Sdim  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
597234287Sdim    D = D->getCanonicalDecl();
598234287Sdim    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
599234287Sdim        if (const PointerType * PtrTy =
600234287Sdim              dyn_cast<PointerType>(D->getType().getTypePtr()))
601234287Sdim          if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
602234287Sdim            return true;
603234287Sdim  }
604234287Sdim  return false;
605234287Sdim}
606234287Sdim
607234287Sdimstatic bool getPrintfFormatArgumentNum(const CallExpr *CE,
608234287Sdim                                       const CheckerContext &C,
609234287Sdim                                       unsigned int &ArgNum) {
610234287Sdim  // Find if the function contains a format string argument.
611234287Sdim  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
612234287Sdim  // vsnprintf, syslog, custom annotated functions.
613234287Sdim  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
614234287Sdim  if (!FDecl)
615234287Sdim    return false;
616234287Sdim  for (specific_attr_iterator<FormatAttr>
617234287Sdim         i = FDecl->specific_attr_begin<FormatAttr>(),
618234287Sdim         e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
619234287Sdim
620234287Sdim    const FormatAttr *Format = *i;
621234287Sdim    ArgNum = Format->getFormatIdx() - 1;
622263508Sdim    if ((Format->getType()->getName() == "printf") &&
623263508Sdim         CE->getNumArgs() > ArgNum)
624234287Sdim      return true;
625234287Sdim  }
626234287Sdim
627234287Sdim  // Or if a function is named setproctitle (this is a heuristic).
628234287Sdim  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
629234287Sdim    ArgNum = 0;
630234287Sdim    return true;
631234287Sdim  }
632234287Sdim
633234287Sdim  return false;
634234287Sdim}
635234287Sdim
636234287Sdimbool GenericTaintChecker::generateReportIfTainted(const Expr *E,
637234287Sdim                                                  const char Msg[],
638234287Sdim                                                  CheckerContext &C) const {
639234287Sdim  assert(E);
640234287Sdim
641234287Sdim  // Check for taint.
642234287Sdim  ProgramStateRef State = C.getState();
643234287Sdim  if (!State->isTainted(getPointedToSymbol(C, E)) &&
644234287Sdim      !State->isTainted(E, C.getLocationContext()))
645234287Sdim    return false;
646234287Sdim
647234287Sdim  // Generate diagnostic.
648234287Sdim  if (ExplodedNode *N = C.addTransition()) {
649234287Sdim    initBugType();
650234287Sdim    BugReport *report = new BugReport(*BT, Msg, N);
651234287Sdim    report->addRange(E->getSourceRange());
652243830Sdim    C.emitReport(report);
653234287Sdim    return true;
654234287Sdim  }
655234287Sdim  return false;
656234287Sdim}
657234287Sdim
658234287Sdimbool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
659234287Sdim                                                        CheckerContext &C) const{
660234287Sdim  // Check if the function contains a format string argument.
661234287Sdim  unsigned int ArgNum = 0;
662234287Sdim  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
663234287Sdim    return false;
664234287Sdim
665234287Sdim  // If either the format string content or the pointer itself are tainted, warn.
666234287Sdim  if (generateReportIfTainted(CE->getArg(ArgNum),
667234287Sdim                              MsgUncontrolledFormatString, C))
668234287Sdim    return true;
669234287Sdim  return false;
670234287Sdim}
671234287Sdim
672234287Sdimbool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
673234287Sdim                                          StringRef Name,
674234287Sdim                                          CheckerContext &C) const {
675234287Sdim  // TODO: It might make sense to run this check on demand. In some cases,
676234287Sdim  // we should check if the environment has been cleansed here. We also might
677234287Sdim  // need to know if the user was reset before these calls(seteuid).
678234287Sdim  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
679234287Sdim    .Case("system", 0)
680234287Sdim    .Case("popen", 0)
681234287Sdim    .Case("execl", 0)
682234287Sdim    .Case("execle", 0)
683234287Sdim    .Case("execlp", 0)
684234287Sdim    .Case("execv", 0)
685234287Sdim    .Case("execvp", 0)
686234287Sdim    .Case("execvP", 0)
687234287Sdim    .Case("execve", 0)
688234287Sdim    .Case("dlopen", 0)
689234287Sdim    .Default(UINT_MAX);
690234287Sdim
691234287Sdim  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
692234287Sdim    return false;
693234287Sdim
694234287Sdim  if (generateReportIfTainted(CE->getArg(ArgNum),
695234287Sdim                              MsgSanitizeSystemArgs, C))
696234287Sdim    return true;
697234287Sdim
698234287Sdim  return false;
699234287Sdim}
700234287Sdim
701234287Sdim// TODO: Should this check be a part of the CString checker?
702234287Sdim// If yes, should taint be a global setting?
703234287Sdimbool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
704234287Sdim                                                 const FunctionDecl *FDecl,
705234287Sdim                                                 CheckerContext &C) const {
706234287Sdim  // If the function has a buffer size argument, set ArgNum.
707234287Sdim  unsigned ArgNum = InvalidArgIndex;
708234287Sdim  unsigned BId = 0;
709234287Sdim  if ( (BId = FDecl->getMemoryFunctionKind()) )
710234287Sdim    switch(BId) {
711234287Sdim    case Builtin::BImemcpy:
712234287Sdim    case Builtin::BImemmove:
713234287Sdim    case Builtin::BIstrncpy:
714234287Sdim      ArgNum = 2;
715234287Sdim      break;
716234287Sdim    case Builtin::BIstrndup:
717234287Sdim      ArgNum = 1;
718234287Sdim      break;
719234287Sdim    default:
720234287Sdim      break;
721234287Sdim    };
722234287Sdim
723234287Sdim  if (ArgNum == InvalidArgIndex) {
724234287Sdim    if (C.isCLibraryFunction(FDecl, "malloc") ||
725234287Sdim        C.isCLibraryFunction(FDecl, "calloc") ||
726234287Sdim        C.isCLibraryFunction(FDecl, "alloca"))
727234287Sdim      ArgNum = 0;
728234287Sdim    else if (C.isCLibraryFunction(FDecl, "memccpy"))
729234287Sdim      ArgNum = 3;
730234287Sdim    else if (C.isCLibraryFunction(FDecl, "realloc"))
731234287Sdim      ArgNum = 1;
732234287Sdim    else if (C.isCLibraryFunction(FDecl, "bcopy"))
733234287Sdim      ArgNum = 2;
734234287Sdim  }
735234287Sdim
736234287Sdim  if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
737234287Sdim      generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
738234287Sdim    return true;
739234287Sdim
740234287Sdim  return false;
741234287Sdim}
742234287Sdim
743234287Sdimvoid ento::registerGenericTaintChecker(CheckerManager &mgr) {
744234287Sdim  mgr.registerChecker<GenericTaintChecker>();
745234287Sdim}
746