1234287Sdim//== CStringSyntaxChecker.cpp - CoreFoundation containers API *- C++ -*-==// 2234287Sdim// 3234287Sdim// The LLVM Compiler Infrastructure 4234287Sdim// 5234287Sdim// This file is distributed under the University of Illinois Open Source 6234287Sdim// License. See LICENSE.TXT for details. 7234287Sdim// 8234287Sdim//===----------------------------------------------------------------------===// 9234287Sdim// 10234287Sdim// An AST checker that looks for common pitfalls when using C string APIs. 11234287Sdim// - Identifies erroneous patterns in the last argument to strncat - the number 12234287Sdim// of bytes to copy. 13234287Sdim// 14234287Sdim//===----------------------------------------------------------------------===// 15234287Sdim#include "ClangSACheckers.h" 16234287Sdim#include "clang/AST/Expr.h" 17234287Sdim#include "clang/AST/OperationKinds.h" 18234287Sdim#include "clang/AST/StmtVisitor.h" 19249423Sdim#include "clang/Analysis/AnalysisContext.h" 20234287Sdim#include "clang/Basic/TargetInfo.h" 21234287Sdim#include "clang/Basic/TypeTraits.h" 22249423Sdim#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" 23234287Sdim#include "clang/StaticAnalyzer/Core/Checker.h" 24234287Sdim#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" 25234287Sdim#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 26234287Sdim#include "llvm/ADT/SmallString.h" 27234287Sdim#include "llvm/Support/raw_ostream.h" 28234287Sdim 29234287Sdimusing namespace clang; 30234287Sdimusing namespace ento; 31234287Sdim 32234287Sdimnamespace { 33234287Sdimclass WalkAST: public StmtVisitor<WalkAST> { 34234287Sdim BugReporter &BR; 35234287Sdim AnalysisDeclContext* AC; 36234287Sdim 37234287Sdim /// Check if two expressions refer to the same declaration. 38234287Sdim inline bool sameDecl(const Expr *A1, const Expr *A2) { 39234287Sdim if (const DeclRefExpr *D1 = dyn_cast<DeclRefExpr>(A1->IgnoreParenCasts())) 40234287Sdim if (const DeclRefExpr *D2 = dyn_cast<DeclRefExpr>(A2->IgnoreParenCasts())) 41234287Sdim return D1->getDecl() == D2->getDecl(); 42234287Sdim return false; 43234287Sdim } 44234287Sdim 45234287Sdim /// Check if the expression E is a sizeof(WithArg). 46234287Sdim inline bool isSizeof(const Expr *E, const Expr *WithArg) { 47234287Sdim if (const UnaryExprOrTypeTraitExpr *UE = 48234287Sdim dyn_cast<UnaryExprOrTypeTraitExpr>(E)) 49234287Sdim if (UE->getKind() == UETT_SizeOf) 50234287Sdim return sameDecl(UE->getArgumentExpr(), WithArg); 51234287Sdim return false; 52234287Sdim } 53234287Sdim 54234287Sdim /// Check if the expression E is a strlen(WithArg). 55234287Sdim inline bool isStrlen(const Expr *E, const Expr *WithArg) { 56234287Sdim if (const CallExpr *CE = dyn_cast<CallExpr>(E)) { 57234287Sdim const FunctionDecl *FD = CE->getDirectCallee(); 58234287Sdim if (!FD) 59234287Sdim return false; 60243830Sdim return (CheckerContext::isCLibraryFunction(FD, "strlen") && 61243830Sdim sameDecl(CE->getArg(0), WithArg)); 62234287Sdim } 63234287Sdim return false; 64234287Sdim } 65234287Sdim 66234287Sdim /// Check if the expression is an integer literal with value 1. 67234287Sdim inline bool isOne(const Expr *E) { 68234287Sdim if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(E)) 69234287Sdim return (IL->getValue().isIntN(1)); 70234287Sdim return false; 71234287Sdim } 72234287Sdim 73234287Sdim inline StringRef getPrintableName(const Expr *E) { 74234287Sdim if (const DeclRefExpr *D = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts())) 75234287Sdim return D->getDecl()->getName(); 76234287Sdim return StringRef(); 77234287Sdim } 78234287Sdim 79234287Sdim /// Identify erroneous patterns in the last argument to strncat - the number 80234287Sdim /// of bytes to copy. 81234287Sdim bool containsBadStrncatPattern(const CallExpr *CE); 82234287Sdim 83234287Sdimpublic: 84234287Sdim WalkAST(BugReporter &br, AnalysisDeclContext* ac) : 85243830Sdim BR(br), AC(ac) { 86234287Sdim } 87234287Sdim 88234287Sdim // Statement visitor methods. 89234287Sdim void VisitChildren(Stmt *S); 90234287Sdim void VisitStmt(Stmt *S) { 91234287Sdim VisitChildren(S); 92234287Sdim } 93234287Sdim void VisitCallExpr(CallExpr *CE); 94234287Sdim}; 95234287Sdim} // end anonymous namespace 96234287Sdim 97234287Sdim// The correct size argument should look like following: 98234287Sdim// strncat(dst, src, sizeof(dst) - strlen(dest) - 1); 99234287Sdim// We look for the following anti-patterns: 100234287Sdim// - strncat(dst, src, sizeof(dst) - strlen(dst)); 101234287Sdim// - strncat(dst, src, sizeof(dst) - 1); 102234287Sdim// - strncat(dst, src, sizeof(dst)); 103234287Sdimbool WalkAST::containsBadStrncatPattern(const CallExpr *CE) { 104251662Sdim if (CE->getNumArgs() != 3) 105251662Sdim return false; 106234287Sdim const Expr *DstArg = CE->getArg(0); 107234287Sdim const Expr *SrcArg = CE->getArg(1); 108234287Sdim const Expr *LenArg = CE->getArg(2); 109234287Sdim 110234287Sdim // Identify wrong size expressions, which are commonly used instead. 111234287Sdim if (const BinaryOperator *BE = 112234287Sdim dyn_cast<BinaryOperator>(LenArg->IgnoreParenCasts())) { 113234287Sdim // - sizeof(dst) - strlen(dst) 114234287Sdim if (BE->getOpcode() == BO_Sub) { 115234287Sdim const Expr *L = BE->getLHS(); 116234287Sdim const Expr *R = BE->getRHS(); 117234287Sdim if (isSizeof(L, DstArg) && isStrlen(R, DstArg)) 118234287Sdim return true; 119234287Sdim 120234287Sdim // - sizeof(dst) - 1 121234287Sdim if (isSizeof(L, DstArg) && isOne(R->IgnoreParenCasts())) 122234287Sdim return true; 123234287Sdim } 124234287Sdim } 125234287Sdim // - sizeof(dst) 126234287Sdim if (isSizeof(LenArg, DstArg)) 127234287Sdim return true; 128234287Sdim 129234287Sdim // - sizeof(src) 130234287Sdim if (isSizeof(LenArg, SrcArg)) 131234287Sdim return true; 132234287Sdim return false; 133234287Sdim} 134234287Sdim 135234287Sdimvoid WalkAST::VisitCallExpr(CallExpr *CE) { 136234287Sdim const FunctionDecl *FD = CE->getDirectCallee(); 137234287Sdim if (!FD) 138234287Sdim return; 139234287Sdim 140243830Sdim if (CheckerContext::isCLibraryFunction(FD, "strncat")) { 141234287Sdim if (containsBadStrncatPattern(CE)) { 142234287Sdim const Expr *DstArg = CE->getArg(0); 143234287Sdim const Expr *LenArg = CE->getArg(2); 144234287Sdim PathDiagnosticLocation Loc = 145234287Sdim PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC); 146234287Sdim 147234287Sdim StringRef DstName = getPrintableName(DstArg); 148234287Sdim 149234287Sdim SmallString<256> S; 150234287Sdim llvm::raw_svector_ostream os(S); 151234287Sdim os << "Potential buffer overflow. "; 152234287Sdim if (!DstName.empty()) { 153234287Sdim os << "Replace with 'sizeof(" << DstName << ") " 154234287Sdim "- strlen(" << DstName <<") - 1'"; 155234287Sdim os << " or u"; 156234287Sdim } else 157234287Sdim os << "U"; 158234287Sdim os << "se a safer 'strlcat' API"; 159234287Sdim 160234287Sdim BR.EmitBasicReport(FD, "Anti-pattern in the argument", "C String API", 161263508Sdim os.str(), Loc, LenArg->getSourceRange()); 162234287Sdim } 163234287Sdim } 164234287Sdim 165234287Sdim // Recurse and check children. 166234287Sdim VisitChildren(CE); 167234287Sdim} 168234287Sdim 169234287Sdimvoid WalkAST::VisitChildren(Stmt *S) { 170234287Sdim for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I != E; 171234287Sdim ++I) 172234287Sdim if (Stmt *child = *I) 173234287Sdim Visit(child); 174234287Sdim} 175234287Sdim 176234287Sdimnamespace { 177234287Sdimclass CStringSyntaxChecker: public Checker<check::ASTCodeBody> { 178234287Sdimpublic: 179234287Sdim 180234287Sdim void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr, 181234287Sdim BugReporter &BR) const { 182234287Sdim WalkAST walker(BR, Mgr.getAnalysisDeclContext(D)); 183234287Sdim walker.Visit(D->getBody()); 184234287Sdim } 185234287Sdim}; 186234287Sdim} 187234287Sdim 188234287Sdimvoid ento::registerCStringSyntaxChecker(CheckerManager &mgr) { 189234287Sdim mgr.registerChecker<CStringSyntaxChecker>(); 190234287Sdim} 191234287Sdim 192