1234287Sdim//== CStringSyntaxChecker.cpp - CoreFoundation containers API *- C++ -*-==//
2234287Sdim//
3234287Sdim//                     The LLVM Compiler Infrastructure
4234287Sdim//
5234287Sdim// This file is distributed under the University of Illinois Open Source
6234287Sdim// License. See LICENSE.TXT for details.
7234287Sdim//
8234287Sdim//===----------------------------------------------------------------------===//
9234287Sdim//
10234287Sdim// An AST checker that looks for common pitfalls when using C string APIs.
11234287Sdim//  - Identifies erroneous patterns in the last argument to strncat - the number
12234287Sdim//    of bytes to copy.
13234287Sdim//
14234287Sdim//===----------------------------------------------------------------------===//
15234287Sdim#include "ClangSACheckers.h"
16234287Sdim#include "clang/AST/Expr.h"
17234287Sdim#include "clang/AST/OperationKinds.h"
18234287Sdim#include "clang/AST/StmtVisitor.h"
19249423Sdim#include "clang/Analysis/AnalysisContext.h"
20234287Sdim#include "clang/Basic/TargetInfo.h"
21234287Sdim#include "clang/Basic/TypeTraits.h"
22249423Sdim#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
23234287Sdim#include "clang/StaticAnalyzer/Core/Checker.h"
24234287Sdim#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
25234287Sdim#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
26234287Sdim#include "llvm/ADT/SmallString.h"
27234287Sdim#include "llvm/Support/raw_ostream.h"
28234287Sdim
29234287Sdimusing namespace clang;
30234287Sdimusing namespace ento;
31234287Sdim
32234287Sdimnamespace {
33234287Sdimclass WalkAST: public StmtVisitor<WalkAST> {
34234287Sdim  BugReporter &BR;
35234287Sdim  AnalysisDeclContext* AC;
36234287Sdim
37234287Sdim  /// Check if two expressions refer to the same declaration.
38234287Sdim  inline bool sameDecl(const Expr *A1, const Expr *A2) {
39234287Sdim    if (const DeclRefExpr *D1 = dyn_cast<DeclRefExpr>(A1->IgnoreParenCasts()))
40234287Sdim      if (const DeclRefExpr *D2 = dyn_cast<DeclRefExpr>(A2->IgnoreParenCasts()))
41234287Sdim        return D1->getDecl() == D2->getDecl();
42234287Sdim    return false;
43234287Sdim  }
44234287Sdim
45234287Sdim  /// Check if the expression E is a sizeof(WithArg).
46234287Sdim  inline bool isSizeof(const Expr *E, const Expr *WithArg) {
47234287Sdim    if (const UnaryExprOrTypeTraitExpr *UE =
48234287Sdim    dyn_cast<UnaryExprOrTypeTraitExpr>(E))
49234287Sdim      if (UE->getKind() == UETT_SizeOf)
50234287Sdim        return sameDecl(UE->getArgumentExpr(), WithArg);
51234287Sdim    return false;
52234287Sdim  }
53234287Sdim
54234287Sdim  /// Check if the expression E is a strlen(WithArg).
55234287Sdim  inline bool isStrlen(const Expr *E, const Expr *WithArg) {
56234287Sdim    if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
57234287Sdim      const FunctionDecl *FD = CE->getDirectCallee();
58234287Sdim      if (!FD)
59234287Sdim        return false;
60243830Sdim      return (CheckerContext::isCLibraryFunction(FD, "strlen") &&
61243830Sdim              sameDecl(CE->getArg(0), WithArg));
62234287Sdim    }
63234287Sdim    return false;
64234287Sdim  }
65234287Sdim
66234287Sdim  /// Check if the expression is an integer literal with value 1.
67234287Sdim  inline bool isOne(const Expr *E) {
68234287Sdim    if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(E))
69234287Sdim      return (IL->getValue().isIntN(1));
70234287Sdim    return false;
71234287Sdim  }
72234287Sdim
73234287Sdim  inline StringRef getPrintableName(const Expr *E) {
74234287Sdim    if (const DeclRefExpr *D = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts()))
75234287Sdim      return D->getDecl()->getName();
76234287Sdim    return StringRef();
77234287Sdim  }
78234287Sdim
79234287Sdim  /// Identify erroneous patterns in the last argument to strncat - the number
80234287Sdim  /// of bytes to copy.
81234287Sdim  bool containsBadStrncatPattern(const CallExpr *CE);
82234287Sdim
83234287Sdimpublic:
84234287Sdim  WalkAST(BugReporter &br, AnalysisDeclContext* ac) :
85243830Sdim      BR(br), AC(ac) {
86234287Sdim  }
87234287Sdim
88234287Sdim  // Statement visitor methods.
89234287Sdim  void VisitChildren(Stmt *S);
90234287Sdim  void VisitStmt(Stmt *S) {
91234287Sdim    VisitChildren(S);
92234287Sdim  }
93234287Sdim  void VisitCallExpr(CallExpr *CE);
94234287Sdim};
95234287Sdim} // end anonymous namespace
96234287Sdim
97234287Sdim// The correct size argument should look like following:
98234287Sdim//   strncat(dst, src, sizeof(dst) - strlen(dest) - 1);
99234287Sdim// We look for the following anti-patterns:
100234287Sdim//   - strncat(dst, src, sizeof(dst) - strlen(dst));
101234287Sdim//   - strncat(dst, src, sizeof(dst) - 1);
102234287Sdim//   - strncat(dst, src, sizeof(dst));
103234287Sdimbool WalkAST::containsBadStrncatPattern(const CallExpr *CE) {
104251662Sdim  if (CE->getNumArgs() != 3)
105251662Sdim    return false;
106234287Sdim  const Expr *DstArg = CE->getArg(0);
107234287Sdim  const Expr *SrcArg = CE->getArg(1);
108234287Sdim  const Expr *LenArg = CE->getArg(2);
109234287Sdim
110234287Sdim  // Identify wrong size expressions, which are commonly used instead.
111234287Sdim  if (const BinaryOperator *BE =
112234287Sdim              dyn_cast<BinaryOperator>(LenArg->IgnoreParenCasts())) {
113234287Sdim    // - sizeof(dst) - strlen(dst)
114234287Sdim    if (BE->getOpcode() == BO_Sub) {
115234287Sdim      const Expr *L = BE->getLHS();
116234287Sdim      const Expr *R = BE->getRHS();
117234287Sdim      if (isSizeof(L, DstArg) && isStrlen(R, DstArg))
118234287Sdim        return true;
119234287Sdim
120234287Sdim      // - sizeof(dst) - 1
121234287Sdim      if (isSizeof(L, DstArg) && isOne(R->IgnoreParenCasts()))
122234287Sdim        return true;
123234287Sdim    }
124234287Sdim  }
125234287Sdim  // - sizeof(dst)
126234287Sdim  if (isSizeof(LenArg, DstArg))
127234287Sdim    return true;
128234287Sdim
129234287Sdim  // - sizeof(src)
130234287Sdim  if (isSizeof(LenArg, SrcArg))
131234287Sdim    return true;
132234287Sdim  return false;
133234287Sdim}
134234287Sdim
135234287Sdimvoid WalkAST::VisitCallExpr(CallExpr *CE) {
136234287Sdim  const FunctionDecl *FD = CE->getDirectCallee();
137234287Sdim  if (!FD)
138234287Sdim    return;
139234287Sdim
140243830Sdim  if (CheckerContext::isCLibraryFunction(FD, "strncat")) {
141234287Sdim    if (containsBadStrncatPattern(CE)) {
142234287Sdim      const Expr *DstArg = CE->getArg(0);
143234287Sdim      const Expr *LenArg = CE->getArg(2);
144234287Sdim      PathDiagnosticLocation Loc =
145234287Sdim        PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC);
146234287Sdim
147234287Sdim      StringRef DstName = getPrintableName(DstArg);
148234287Sdim
149234287Sdim      SmallString<256> S;
150234287Sdim      llvm::raw_svector_ostream os(S);
151234287Sdim      os << "Potential buffer overflow. ";
152234287Sdim      if (!DstName.empty()) {
153234287Sdim        os << "Replace with 'sizeof(" << DstName << ") "
154234287Sdim              "- strlen(" << DstName <<") - 1'";
155234287Sdim        os << " or u";
156234287Sdim      } else
157234287Sdim        os << "U";
158234287Sdim      os << "se a safer 'strlcat' API";
159234287Sdim
160234287Sdim      BR.EmitBasicReport(FD, "Anti-pattern in the argument", "C String API",
161263508Sdim                         os.str(), Loc, LenArg->getSourceRange());
162234287Sdim    }
163234287Sdim  }
164234287Sdim
165234287Sdim  // Recurse and check children.
166234287Sdim  VisitChildren(CE);
167234287Sdim}
168234287Sdim
169234287Sdimvoid WalkAST::VisitChildren(Stmt *S) {
170234287Sdim  for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I != E;
171234287Sdim      ++I)
172234287Sdim    if (Stmt *child = *I)
173234287Sdim      Visit(child);
174234287Sdim}
175234287Sdim
176234287Sdimnamespace {
177234287Sdimclass CStringSyntaxChecker: public Checker<check::ASTCodeBody> {
178234287Sdimpublic:
179234287Sdim
180234287Sdim  void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr,
181234287Sdim      BugReporter &BR) const {
182234287Sdim    WalkAST walker(BR, Mgr.getAnalysisDeclContext(D));
183234287Sdim    walker.Visit(D->getBody());
184234287Sdim  }
185234287Sdim};
186234287Sdim}
187234287Sdim
188234287Sdimvoid ento::registerCStringSyntaxChecker(CheckerManager &mgr) {
189234287Sdim  mgr.registerChecker<CStringSyntaxChecker>();
190234287Sdim}
191234287Sdim
192