CStringChecker.cpp revision 296417
1//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This defines CStringChecker, which is an assortment of checks on calls
11// to functions in <string.h>.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ClangSACheckers.h"
16#include "InterCheckerAPI.h"
17#include "clang/Basic/CharInfo.h"
18#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19#include "clang/StaticAnalyzer/Core/Checker.h"
20#include "clang/StaticAnalyzer/Core/CheckerManager.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SmallString.h"
25#include "llvm/ADT/StringSwitch.h"
26#include "llvm/Support/raw_ostream.h"
27
28using namespace clang;
29using namespace ento;
30
31namespace {
32class CStringChecker : public Checker< eval::Call,
33                                         check::PreStmt<DeclStmt>,
34                                         check::LiveSymbols,
35                                         check::DeadSymbols,
36                                         check::RegionChanges
37                                         > {
38  mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
39      BT_NotCString, BT_AdditionOverflow;
40
41  mutable const char *CurrentFunctionDescription;
42
43public:
44  /// The filter is used to filter out the diagnostics which are not enabled by
45  /// the user.
46  struct CStringChecksFilter {
47    DefaultBool CheckCStringNullArg;
48    DefaultBool CheckCStringOutOfBounds;
49    DefaultBool CheckCStringBufferOverlap;
50    DefaultBool CheckCStringNotNullTerm;
51
52    CheckName CheckNameCStringNullArg;
53    CheckName CheckNameCStringOutOfBounds;
54    CheckName CheckNameCStringBufferOverlap;
55    CheckName CheckNameCStringNotNullTerm;
56  };
57
58  CStringChecksFilter Filter;
59
60  static void *getTag() { static int tag; return &tag; }
61
62  bool evalCall(const CallExpr *CE, CheckerContext &C) const;
63  void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
64  void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
65  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
66  bool wantsRegionChangeUpdate(ProgramStateRef state) const;
67
68  ProgramStateRef
69    checkRegionChanges(ProgramStateRef state,
70                       const InvalidatedSymbols *,
71                       ArrayRef<const MemRegion *> ExplicitRegions,
72                       ArrayRef<const MemRegion *> Regions,
73                       const CallEvent *Call) const;
74
75  typedef void (CStringChecker::*FnCheck)(CheckerContext &,
76                                          const CallExpr *) const;
77
78  void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
79  void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
80  void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
81  void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
82  void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
83                      ProgramStateRef state,
84                      const Expr *Size,
85                      const Expr *Source,
86                      const Expr *Dest,
87                      bool Restricted = false,
88                      bool IsMempcpy = false) const;
89
90  void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
91
92  void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
93  void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
94  void evalstrLengthCommon(CheckerContext &C,
95                           const CallExpr *CE,
96                           bool IsStrnlen = false) const;
97
98  void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
99  void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
100  void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
101  void evalStrcpyCommon(CheckerContext &C,
102                        const CallExpr *CE,
103                        bool returnEnd,
104                        bool isBounded,
105                        bool isAppending) const;
106
107  void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
108  void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
109
110  void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
111  void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
112  void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
113  void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
114  void evalStrcmpCommon(CheckerContext &C,
115                        const CallExpr *CE,
116                        bool isBounded = false,
117                        bool ignoreCase = false) const;
118
119  void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
120
121  // Utility methods
122  std::pair<ProgramStateRef , ProgramStateRef >
123  static assumeZero(CheckerContext &C,
124                    ProgramStateRef state, SVal V, QualType Ty);
125
126  static ProgramStateRef setCStringLength(ProgramStateRef state,
127                                              const MemRegion *MR,
128                                              SVal strLength);
129  static SVal getCStringLengthForRegion(CheckerContext &C,
130                                        ProgramStateRef &state,
131                                        const Expr *Ex,
132                                        const MemRegion *MR,
133                                        bool hypothetical);
134  SVal getCStringLength(CheckerContext &C,
135                        ProgramStateRef &state,
136                        const Expr *Ex,
137                        SVal Buf,
138                        bool hypothetical = false) const;
139
140  const StringLiteral *getCStringLiteral(CheckerContext &C,
141                                         ProgramStateRef &state,
142                                         const Expr *expr,
143                                         SVal val) const;
144
145  static ProgramStateRef InvalidateBuffer(CheckerContext &C,
146                                          ProgramStateRef state,
147                                          const Expr *Ex, SVal V,
148                                          bool IsSourceBuffer,
149                                          const Expr *Size);
150
151  static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
152                              const MemRegion *MR);
153
154  // Re-usable checks
155  ProgramStateRef checkNonNull(CheckerContext &C,
156                                   ProgramStateRef state,
157                                   const Expr *S,
158                                   SVal l) const;
159  ProgramStateRef CheckLocation(CheckerContext &C,
160                                    ProgramStateRef state,
161                                    const Expr *S,
162                                    SVal l,
163                                    const char *message = nullptr) const;
164  ProgramStateRef CheckBufferAccess(CheckerContext &C,
165                                        ProgramStateRef state,
166                                        const Expr *Size,
167                                        const Expr *FirstBuf,
168                                        const Expr *SecondBuf,
169                                        const char *firstMessage = nullptr,
170                                        const char *secondMessage = nullptr,
171                                        bool WarnAboutSize = false) const;
172
173  ProgramStateRef CheckBufferAccess(CheckerContext &C,
174                                        ProgramStateRef state,
175                                        const Expr *Size,
176                                        const Expr *Buf,
177                                        const char *message = nullptr,
178                                        bool WarnAboutSize = false) const {
179    // This is a convenience override.
180    return CheckBufferAccess(C, state, Size, Buf, nullptr, message, nullptr,
181                             WarnAboutSize);
182  }
183  ProgramStateRef CheckOverlap(CheckerContext &C,
184                                   ProgramStateRef state,
185                                   const Expr *Size,
186                                   const Expr *First,
187                                   const Expr *Second) const;
188  void emitOverlapBug(CheckerContext &C,
189                      ProgramStateRef state,
190                      const Stmt *First,
191                      const Stmt *Second) const;
192
193  ProgramStateRef checkAdditionOverflow(CheckerContext &C,
194                                            ProgramStateRef state,
195                                            NonLoc left,
196                                            NonLoc right) const;
197
198  // Return true if the destination buffer of the copy function may be in bound.
199  // Expects SVal of Size to be positive and unsigned.
200  // Expects SVal of FirstBuf to be a FieldRegion.
201  static bool IsFirstBufInBound(CheckerContext &C,
202                                ProgramStateRef state,
203                                const Expr *FirstBuf,
204                                const Expr *Size);
205};
206
207} //end anonymous namespace
208
209REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
210
211//===----------------------------------------------------------------------===//
212// Individual checks and utility methods.
213//===----------------------------------------------------------------------===//
214
215std::pair<ProgramStateRef , ProgramStateRef >
216CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
217                           QualType Ty) {
218  Optional<DefinedSVal> val = V.getAs<DefinedSVal>();
219  if (!val)
220    return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
221
222  SValBuilder &svalBuilder = C.getSValBuilder();
223  DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
224  return state->assume(svalBuilder.evalEQ(state, *val, zero));
225}
226
227ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
228                                            ProgramStateRef state,
229                                            const Expr *S, SVal l) const {
230  // If a previous check has failed, propagate the failure.
231  if (!state)
232    return nullptr;
233
234  ProgramStateRef stateNull, stateNonNull;
235  std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
236
237  if (stateNull && !stateNonNull) {
238    if (!Filter.CheckCStringNullArg)
239      return nullptr;
240
241    ExplodedNode *N = C.generateErrorNode(stateNull);
242    if (!N)
243      return nullptr;
244
245    if (!BT_Null)
246      BT_Null.reset(new BuiltinBug(
247          Filter.CheckNameCStringNullArg, categories::UnixAPI,
248          "Null pointer argument in call to byte string function"));
249
250    SmallString<80> buf;
251    llvm::raw_svector_ostream os(buf);
252    assert(CurrentFunctionDescription);
253    os << "Null pointer argument in call to " << CurrentFunctionDescription;
254
255    // Generate a report for this bug.
256    BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
257    auto report = llvm::make_unique<BugReport>(*BT, os.str(), N);
258
259    report->addRange(S->getSourceRange());
260    bugreporter::trackNullOrUndefValue(N, S, *report);
261    C.emitReport(std::move(report));
262    return nullptr;
263  }
264
265  // From here on, assume that the value is non-null.
266  assert(stateNonNull);
267  return stateNonNull;
268}
269
270// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
271ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
272                                             ProgramStateRef state,
273                                             const Expr *S, SVal l,
274                                             const char *warningMsg) const {
275  // If a previous check has failed, propagate the failure.
276  if (!state)
277    return nullptr;
278
279  // Check for out of bound array element access.
280  const MemRegion *R = l.getAsRegion();
281  if (!R)
282    return state;
283
284  const ElementRegion *ER = dyn_cast<ElementRegion>(R);
285  if (!ER)
286    return state;
287
288  assert(ER->getValueType() == C.getASTContext().CharTy &&
289    "CheckLocation should only be called with char* ElementRegions");
290
291  // Get the size of the array.
292  const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
293  SValBuilder &svalBuilder = C.getSValBuilder();
294  SVal Extent =
295    svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
296  DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>();
297
298  // Get the index of the accessed element.
299  DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
300
301  ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
302  ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
303  if (StOutBound && !StInBound) {
304    ExplodedNode *N = C.generateErrorNode(StOutBound);
305    if (!N)
306      return nullptr;
307
308    if (!BT_Bounds) {
309      BT_Bounds.reset(new BuiltinBug(
310          Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access",
311          "Byte string function accesses out-of-bound array element"));
312    }
313    BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
314
315    // Generate a report for this bug.
316    std::unique_ptr<BugReport> report;
317    if (warningMsg) {
318      report = llvm::make_unique<BugReport>(*BT, warningMsg, N);
319    } else {
320      assert(CurrentFunctionDescription);
321      assert(CurrentFunctionDescription[0] != '\0');
322
323      SmallString<80> buf;
324      llvm::raw_svector_ostream os(buf);
325      os << toUppercase(CurrentFunctionDescription[0])
326         << &CurrentFunctionDescription[1]
327         << " accesses out-of-bound array element";
328      report = llvm::make_unique<BugReport>(*BT, os.str(), N);
329    }
330
331    // FIXME: It would be nice to eventually make this diagnostic more clear,
332    // e.g., by referencing the original declaration or by saying *why* this
333    // reference is outside the range.
334
335    report->addRange(S->getSourceRange());
336    C.emitReport(std::move(report));
337    return nullptr;
338  }
339
340  // Array bound check succeeded.  From this point forward the array bound
341  // should always succeed.
342  return StInBound;
343}
344
345ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
346                                                 ProgramStateRef state,
347                                                 const Expr *Size,
348                                                 const Expr *FirstBuf,
349                                                 const Expr *SecondBuf,
350                                                 const char *firstMessage,
351                                                 const char *secondMessage,
352                                                 bool WarnAboutSize) const {
353  // If a previous check has failed, propagate the failure.
354  if (!state)
355    return nullptr;
356
357  SValBuilder &svalBuilder = C.getSValBuilder();
358  ASTContext &Ctx = svalBuilder.getContext();
359  const LocationContext *LCtx = C.getLocationContext();
360
361  QualType sizeTy = Size->getType();
362  QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
363
364  // Check that the first buffer is non-null.
365  SVal BufVal = state->getSVal(FirstBuf, LCtx);
366  state = checkNonNull(C, state, FirstBuf, BufVal);
367  if (!state)
368    return nullptr;
369
370  // If out-of-bounds checking is turned off, skip the rest.
371  if (!Filter.CheckCStringOutOfBounds)
372    return state;
373
374  // Get the access length and make sure it is known.
375  // FIXME: This assumes the caller has already checked that the access length
376  // is positive. And that it's unsigned.
377  SVal LengthVal = state->getSVal(Size, LCtx);
378  Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
379  if (!Length)
380    return state;
381
382  // Compute the offset of the last element to be accessed: size-1.
383  NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
384  NonLoc LastOffset = svalBuilder
385      .evalBinOpNN(state, BO_Sub, *Length, One, sizeTy).castAs<NonLoc>();
386
387  // Check that the first buffer is sufficiently long.
388  SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
389  if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
390    const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
391
392    SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
393                                          LastOffset, PtrTy);
394    state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
395
396    // If the buffer isn't large enough, abort.
397    if (!state)
398      return nullptr;
399  }
400
401  // If there's a second buffer, check it as well.
402  if (SecondBuf) {
403    BufVal = state->getSVal(SecondBuf, LCtx);
404    state = checkNonNull(C, state, SecondBuf, BufVal);
405    if (!state)
406      return nullptr;
407
408    BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
409    if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
410      const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
411
412      SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
413                                            LastOffset, PtrTy);
414      state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
415    }
416  }
417
418  // Large enough or not, return this state!
419  return state;
420}
421
422ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
423                                            ProgramStateRef state,
424                                            const Expr *Size,
425                                            const Expr *First,
426                                            const Expr *Second) const {
427  if (!Filter.CheckCStringBufferOverlap)
428    return state;
429
430  // Do a simple check for overlap: if the two arguments are from the same
431  // buffer, see if the end of the first is greater than the start of the second
432  // or vice versa.
433
434  // If a previous check has failed, propagate the failure.
435  if (!state)
436    return nullptr;
437
438  ProgramStateRef stateTrue, stateFalse;
439
440  // Get the buffer values and make sure they're known locations.
441  const LocationContext *LCtx = C.getLocationContext();
442  SVal firstVal = state->getSVal(First, LCtx);
443  SVal secondVal = state->getSVal(Second, LCtx);
444
445  Optional<Loc> firstLoc = firstVal.getAs<Loc>();
446  if (!firstLoc)
447    return state;
448
449  Optional<Loc> secondLoc = secondVal.getAs<Loc>();
450  if (!secondLoc)
451    return state;
452
453  // Are the two values the same?
454  SValBuilder &svalBuilder = C.getSValBuilder();
455  std::tie(stateTrue, stateFalse) =
456    state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
457
458  if (stateTrue && !stateFalse) {
459    // If the values are known to be equal, that's automatically an overlap.
460    emitOverlapBug(C, stateTrue, First, Second);
461    return nullptr;
462  }
463
464  // assume the two expressions are not equal.
465  assert(stateFalse);
466  state = stateFalse;
467
468  // Which value comes first?
469  QualType cmpTy = svalBuilder.getConditionType();
470  SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
471                                         *firstLoc, *secondLoc, cmpTy);
472  Optional<DefinedOrUnknownSVal> reverseTest =
473      reverse.getAs<DefinedOrUnknownSVal>();
474  if (!reverseTest)
475    return state;
476
477  std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
478  if (stateTrue) {
479    if (stateFalse) {
480      // If we don't know which one comes first, we can't perform this test.
481      return state;
482    } else {
483      // Switch the values so that firstVal is before secondVal.
484      std::swap(firstLoc, secondLoc);
485
486      // Switch the Exprs as well, so that they still correspond.
487      std::swap(First, Second);
488    }
489  }
490
491  // Get the length, and make sure it too is known.
492  SVal LengthVal = state->getSVal(Size, LCtx);
493  Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
494  if (!Length)
495    return state;
496
497  // Convert the first buffer's start address to char*.
498  // Bail out if the cast fails.
499  ASTContext &Ctx = svalBuilder.getContext();
500  QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
501  SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
502                                         First->getType());
503  Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
504  if (!FirstStartLoc)
505    return state;
506
507  // Compute the end of the first buffer. Bail out if THAT fails.
508  SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
509                                 *FirstStartLoc, *Length, CharPtrTy);
510  Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
511  if (!FirstEndLoc)
512    return state;
513
514  // Is the end of the first buffer past the start of the second buffer?
515  SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
516                                *FirstEndLoc, *secondLoc, cmpTy);
517  Optional<DefinedOrUnknownSVal> OverlapTest =
518      Overlap.getAs<DefinedOrUnknownSVal>();
519  if (!OverlapTest)
520    return state;
521
522  std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
523
524  if (stateTrue && !stateFalse) {
525    // Overlap!
526    emitOverlapBug(C, stateTrue, First, Second);
527    return nullptr;
528  }
529
530  // assume the two expressions don't overlap.
531  assert(stateFalse);
532  return stateFalse;
533}
534
535void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
536                                  const Stmt *First, const Stmt *Second) const {
537  ExplodedNode *N = C.generateErrorNode(state);
538  if (!N)
539    return;
540
541  if (!BT_Overlap)
542    BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
543                                 categories::UnixAPI, "Improper arguments"));
544
545  // Generate a report for this bug.
546  auto report = llvm::make_unique<BugReport>(
547      *BT_Overlap, "Arguments must not be overlapping buffers", N);
548  report->addRange(First->getSourceRange());
549  report->addRange(Second->getSourceRange());
550
551  C.emitReport(std::move(report));
552}
553
554ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
555                                                     ProgramStateRef state,
556                                                     NonLoc left,
557                                                     NonLoc right) const {
558  // If out-of-bounds checking is turned off, skip the rest.
559  if (!Filter.CheckCStringOutOfBounds)
560    return state;
561
562  // If a previous check has failed, propagate the failure.
563  if (!state)
564    return nullptr;
565
566  SValBuilder &svalBuilder = C.getSValBuilder();
567  BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
568
569  QualType sizeTy = svalBuilder.getContext().getSizeType();
570  const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
571  NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
572
573  SVal maxMinusRight;
574  if (right.getAs<nonloc::ConcreteInt>()) {
575    maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
576                                                 sizeTy);
577  } else {
578    // Try switching the operands. (The order of these two assignments is
579    // important!)
580    maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
581                                            sizeTy);
582    left = right;
583  }
584
585  if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
586    QualType cmpTy = svalBuilder.getConditionType();
587    // If left > max - right, we have an overflow.
588    SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
589                                                *maxMinusRightNL, cmpTy);
590
591    ProgramStateRef stateOverflow, stateOkay;
592    std::tie(stateOverflow, stateOkay) =
593      state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
594
595    if (stateOverflow && !stateOkay) {
596      // We have an overflow. Emit a bug report.
597      ExplodedNode *N = C.generateErrorNode(stateOverflow);
598      if (!N)
599        return nullptr;
600
601      if (!BT_AdditionOverflow)
602        BT_AdditionOverflow.reset(
603            new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API",
604                           "Sum of expressions causes overflow"));
605
606      // This isn't a great error message, but this should never occur in real
607      // code anyway -- you'd have to create a buffer longer than a size_t can
608      // represent, which is sort of a contradiction.
609      const char *warning =
610        "This expression will create a string whose length is too big to "
611        "be represented as a size_t";
612
613      // Generate a report for this bug.
614      C.emitReport(
615          llvm::make_unique<BugReport>(*BT_AdditionOverflow, warning, N));
616
617      return nullptr;
618    }
619
620    // From now on, assume an overflow didn't occur.
621    assert(stateOkay);
622    state = stateOkay;
623  }
624
625  return state;
626}
627
628ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
629                                                const MemRegion *MR,
630                                                SVal strLength) {
631  assert(!strLength.isUndef() && "Attempt to set an undefined string length");
632
633  MR = MR->StripCasts();
634
635  switch (MR->getKind()) {
636  case MemRegion::StringRegionKind:
637    // FIXME: This can happen if we strcpy() into a string region. This is
638    // undefined [C99 6.4.5p6], but we should still warn about it.
639    return state;
640
641  case MemRegion::SymbolicRegionKind:
642  case MemRegion::AllocaRegionKind:
643  case MemRegion::VarRegionKind:
644  case MemRegion::FieldRegionKind:
645  case MemRegion::ObjCIvarRegionKind:
646    // These are the types we can currently track string lengths for.
647    break;
648
649  case MemRegion::ElementRegionKind:
650    // FIXME: Handle element regions by upper-bounding the parent region's
651    // string length.
652    return state;
653
654  default:
655    // Other regions (mostly non-data) can't have a reliable C string length.
656    // For now, just ignore the change.
657    // FIXME: These are rare but not impossible. We should output some kind of
658    // warning for things like strcpy((char[]){'a', 0}, "b");
659    return state;
660  }
661
662  if (strLength.isUnknown())
663    return state->remove<CStringLength>(MR);
664
665  return state->set<CStringLength>(MR, strLength);
666}
667
668SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
669                                               ProgramStateRef &state,
670                                               const Expr *Ex,
671                                               const MemRegion *MR,
672                                               bool hypothetical) {
673  if (!hypothetical) {
674    // If there's a recorded length, go ahead and return it.
675    const SVal *Recorded = state->get<CStringLength>(MR);
676    if (Recorded)
677      return *Recorded;
678  }
679
680  // Otherwise, get a new symbol and update the state.
681  SValBuilder &svalBuilder = C.getSValBuilder();
682  QualType sizeTy = svalBuilder.getContext().getSizeType();
683  SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
684                                                    MR, Ex, sizeTy,
685                                                    C.blockCount());
686
687  if (!hypothetical) {
688    if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
689      // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
690      BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
691      const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
692      llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
693      const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
694                                                        fourInt);
695      NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
696      SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
697                                                maxLength, sizeTy);
698      state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
699    }
700    state = state->set<CStringLength>(MR, strLength);
701  }
702
703  return strLength;
704}
705
706SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
707                                      const Expr *Ex, SVal Buf,
708                                      bool hypothetical) const {
709  const MemRegion *MR = Buf.getAsRegion();
710  if (!MR) {
711    // If we can't get a region, see if it's something we /know/ isn't a
712    // C string. In the context of locations, the only time we can issue such
713    // a warning is for labels.
714    if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
715      if (!Filter.CheckCStringNotNullTerm)
716        return UndefinedVal();
717
718      if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) {
719        if (!BT_NotCString)
720          BT_NotCString.reset(new BuiltinBug(
721              Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
722              "Argument is not a null-terminated string."));
723
724        SmallString<120> buf;
725        llvm::raw_svector_ostream os(buf);
726        assert(CurrentFunctionDescription);
727        os << "Argument to " << CurrentFunctionDescription
728           << " is the address of the label '" << Label->getLabel()->getName()
729           << "', which is not a null-terminated string";
730
731        // Generate a report for this bug.
732        auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
733
734        report->addRange(Ex->getSourceRange());
735        C.emitReport(std::move(report));
736      }
737      return UndefinedVal();
738
739    }
740
741    // If it's not a region and not a label, give up.
742    return UnknownVal();
743  }
744
745  // If we have a region, strip casts from it and see if we can figure out
746  // its length. For anything we can't figure out, just return UnknownVal.
747  MR = MR->StripCasts();
748
749  switch (MR->getKind()) {
750  case MemRegion::StringRegionKind: {
751    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
752    // so we can assume that the byte length is the correct C string length.
753    SValBuilder &svalBuilder = C.getSValBuilder();
754    QualType sizeTy = svalBuilder.getContext().getSizeType();
755    const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
756    return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
757  }
758  case MemRegion::SymbolicRegionKind:
759  case MemRegion::AllocaRegionKind:
760  case MemRegion::VarRegionKind:
761  case MemRegion::FieldRegionKind:
762  case MemRegion::ObjCIvarRegionKind:
763    return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
764  case MemRegion::CompoundLiteralRegionKind:
765    // FIXME: Can we track this? Is it necessary?
766    return UnknownVal();
767  case MemRegion::ElementRegionKind:
768    // FIXME: How can we handle this? It's not good enough to subtract the
769    // offset from the base string length; consider "123\x00567" and &a[5].
770    return UnknownVal();
771  default:
772    // Other regions (mostly non-data) can't have a reliable C string length.
773    // In this case, an error is emitted and UndefinedVal is returned.
774    // The caller should always be prepared to handle this case.
775    if (!Filter.CheckCStringNotNullTerm)
776      return UndefinedVal();
777
778    if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) {
779      if (!BT_NotCString)
780        BT_NotCString.reset(new BuiltinBug(
781            Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
782            "Argument is not a null-terminated string."));
783
784      SmallString<120> buf;
785      llvm::raw_svector_ostream os(buf);
786
787      assert(CurrentFunctionDescription);
788      os << "Argument to " << CurrentFunctionDescription << " is ";
789
790      if (SummarizeRegion(os, C.getASTContext(), MR))
791        os << ", which is not a null-terminated string";
792      else
793        os << "not a null-terminated string";
794
795      // Generate a report for this bug.
796      auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
797
798      report->addRange(Ex->getSourceRange());
799      C.emitReport(std::move(report));
800    }
801
802    return UndefinedVal();
803  }
804}
805
806const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
807  ProgramStateRef &state, const Expr *expr, SVal val) const {
808
809  // Get the memory region pointed to by the val.
810  const MemRegion *bufRegion = val.getAsRegion();
811  if (!bufRegion)
812    return nullptr;
813
814  // Strip casts off the memory region.
815  bufRegion = bufRegion->StripCasts();
816
817  // Cast the memory region to a string region.
818  const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
819  if (!strRegion)
820    return nullptr;
821
822  // Return the actual string in the string region.
823  return strRegion->getStringLiteral();
824}
825
826bool CStringChecker::IsFirstBufInBound(CheckerContext &C,
827                                       ProgramStateRef state,
828                                       const Expr *FirstBuf,
829                                       const Expr *Size) {
830  // If we do not know that the buffer is long enough we return 'true'.
831  // Otherwise the parent region of this field region would also get
832  // invalidated, which would lead to warnings based on an unknown state.
833
834  // Originally copied from CheckBufferAccess and CheckLocation.
835  SValBuilder &svalBuilder = C.getSValBuilder();
836  ASTContext &Ctx = svalBuilder.getContext();
837  const LocationContext *LCtx = C.getLocationContext();
838
839  QualType sizeTy = Size->getType();
840  QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
841  SVal BufVal = state->getSVal(FirstBuf, LCtx);
842
843  SVal LengthVal = state->getSVal(Size, LCtx);
844  Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
845  if (!Length)
846    return true; // cf top comment.
847
848  // Compute the offset of the last element to be accessed: size-1.
849  NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
850  NonLoc LastOffset =
851      svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy)
852          .castAs<NonLoc>();
853
854  // Check that the first buffer is sufficiently long.
855  SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
856  Optional<Loc> BufLoc = BufStart.getAs<Loc>();
857  if (!BufLoc)
858    return true; // cf top comment.
859
860  SVal BufEnd =
861      svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy);
862
863  // Check for out of bound array element access.
864  const MemRegion *R = BufEnd.getAsRegion();
865  if (!R)
866    return true; // cf top comment.
867
868  const ElementRegion *ER = dyn_cast<ElementRegion>(R);
869  if (!ER)
870    return true; // cf top comment.
871
872  assert(ER->getValueType() == C.getASTContext().CharTy &&
873         "IsFirstBufInBound should only be called with char* ElementRegions");
874
875  // Get the size of the array.
876  const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
877  SVal Extent =
878      svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
879  DefinedOrUnknownSVal ExtentSize = Extent.castAs<DefinedOrUnknownSVal>();
880
881  // Get the index of the accessed element.
882  DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
883
884  ProgramStateRef StInBound = state->assumeInBound(Idx, ExtentSize, true);
885
886  return static_cast<bool>(StInBound);
887}
888
889ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
890                                                 ProgramStateRef state,
891                                                 const Expr *E, SVal V,
892                                                 bool IsSourceBuffer,
893                                                 const Expr *Size) {
894  Optional<Loc> L = V.getAs<Loc>();
895  if (!L)
896    return state;
897
898  // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
899  // some assumptions about the value that CFRefCount can't. Even so, it should
900  // probably be refactored.
901  if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
902    const MemRegion *R = MR->getRegion()->StripCasts();
903
904    // Are we dealing with an ElementRegion?  If so, we should be invalidating
905    // the super-region.
906    if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
907      R = ER->getSuperRegion();
908      // FIXME: What about layers of ElementRegions?
909    }
910
911    // Invalidate this region.
912    const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
913
914    bool CausesPointerEscape = false;
915    RegionAndSymbolInvalidationTraits ITraits;
916    // Invalidate and escape only indirect regions accessible through the source
917    // buffer.
918    if (IsSourceBuffer) {
919      ITraits.setTrait(R,
920                       RegionAndSymbolInvalidationTraits::TK_PreserveContents);
921      ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
922      CausesPointerEscape = true;
923    } else {
924      const MemRegion::Kind& K = R->getKind();
925      if (K == MemRegion::FieldRegionKind)
926        if (Size && IsFirstBufInBound(C, state, E, Size)) {
927          // If destination buffer is a field region and access is in bound,
928          // do not invalidate its super region.
929          ITraits.setTrait(
930              R,
931              RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
932        }
933    }
934
935    return state->invalidateRegions(R, E, C.blockCount(), LCtx,
936                                    CausesPointerEscape, nullptr, nullptr,
937                                    &ITraits);
938  }
939
940  // If we have a non-region value by chance, just remove the binding.
941  // FIXME: is this necessary or correct? This handles the non-Region
942  //  cases.  Is it ever valid to store to these?
943  return state->killBinding(*L);
944}
945
946bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
947                                     const MemRegion *MR) {
948  const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
949
950  switch (MR->getKind()) {
951  case MemRegion::FunctionCodeRegionKind: {
952    const NamedDecl *FD = cast<FunctionCodeRegion>(MR)->getDecl();
953    if (FD)
954      os << "the address of the function '" << *FD << '\'';
955    else
956      os << "the address of a function";
957    return true;
958  }
959  case MemRegion::BlockCodeRegionKind:
960    os << "block text";
961    return true;
962  case MemRegion::BlockDataRegionKind:
963    os << "a block";
964    return true;
965  case MemRegion::CXXThisRegionKind:
966  case MemRegion::CXXTempObjectRegionKind:
967    os << "a C++ temp object of type " << TVR->getValueType().getAsString();
968    return true;
969  case MemRegion::VarRegionKind:
970    os << "a variable of type" << TVR->getValueType().getAsString();
971    return true;
972  case MemRegion::FieldRegionKind:
973    os << "a field of type " << TVR->getValueType().getAsString();
974    return true;
975  case MemRegion::ObjCIvarRegionKind:
976    os << "an instance variable of type " << TVR->getValueType().getAsString();
977    return true;
978  default:
979    return false;
980  }
981}
982
983//===----------------------------------------------------------------------===//
984// evaluation of individual function calls.
985//===----------------------------------------------------------------------===//
986
987void CStringChecker::evalCopyCommon(CheckerContext &C,
988                                    const CallExpr *CE,
989                                    ProgramStateRef state,
990                                    const Expr *Size, const Expr *Dest,
991                                    const Expr *Source, bool Restricted,
992                                    bool IsMempcpy) const {
993  CurrentFunctionDescription = "memory copy function";
994
995  // See if the size argument is zero.
996  const LocationContext *LCtx = C.getLocationContext();
997  SVal sizeVal = state->getSVal(Size, LCtx);
998  QualType sizeTy = Size->getType();
999
1000  ProgramStateRef stateZeroSize, stateNonZeroSize;
1001  std::tie(stateZeroSize, stateNonZeroSize) =
1002    assumeZero(C, state, sizeVal, sizeTy);
1003
1004  // Get the value of the Dest.
1005  SVal destVal = state->getSVal(Dest, LCtx);
1006
1007  // If the size is zero, there won't be any actual memory access, so
1008  // just bind the return value to the destination buffer and return.
1009  if (stateZeroSize && !stateNonZeroSize) {
1010    stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
1011    C.addTransition(stateZeroSize);
1012    return;
1013  }
1014
1015  // If the size can be nonzero, we have to check the other arguments.
1016  if (stateNonZeroSize) {
1017    state = stateNonZeroSize;
1018
1019    // Ensure the destination is not null. If it is NULL there will be a
1020    // NULL pointer dereference.
1021    state = checkNonNull(C, state, Dest, destVal);
1022    if (!state)
1023      return;
1024
1025    // Get the value of the Src.
1026    SVal srcVal = state->getSVal(Source, LCtx);
1027
1028    // Ensure the source is not null. If it is NULL there will be a
1029    // NULL pointer dereference.
1030    state = checkNonNull(C, state, Source, srcVal);
1031    if (!state)
1032      return;
1033
1034    // Ensure the accesses are valid and that the buffers do not overlap.
1035    const char * const writeWarning =
1036      "Memory copy function overflows destination buffer";
1037    state = CheckBufferAccess(C, state, Size, Dest, Source,
1038                              writeWarning, /* sourceWarning = */ nullptr);
1039    if (Restricted)
1040      state = CheckOverlap(C, state, Size, Dest, Source);
1041
1042    if (!state)
1043      return;
1044
1045    // If this is mempcpy, get the byte after the last byte copied and
1046    // bind the expr.
1047    if (IsMempcpy) {
1048      loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>();
1049
1050      // Get the length to copy.
1051      if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) {
1052        // Get the byte after the last byte copied.
1053        SValBuilder &SvalBuilder = C.getSValBuilder();
1054        ASTContext &Ctx = SvalBuilder.getContext();
1055        QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
1056        loc::MemRegionVal DestRegCharVal = SvalBuilder.evalCast(destRegVal,
1057          CharPtrTy, Dest->getType()).castAs<loc::MemRegionVal>();
1058        SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
1059                                                          DestRegCharVal,
1060                                                          *lenValNonLoc,
1061                                                          Dest->getType());
1062
1063        // The byte after the last byte copied is the return value.
1064        state = state->BindExpr(CE, LCtx, lastElement);
1065      } else {
1066        // If we don't know how much we copied, we can at least
1067        // conjure a return value for later.
1068        SVal result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1069                                                          C.blockCount());
1070        state = state->BindExpr(CE, LCtx, result);
1071      }
1072
1073    } else {
1074      // All other copies return the destination buffer.
1075      // (Well, bcopy() has a void return type, but this won't hurt.)
1076      state = state->BindExpr(CE, LCtx, destVal);
1077    }
1078
1079    // Invalidate the destination (regular invalidation without pointer-escaping
1080    // the address of the top-level region).
1081    // FIXME: Even if we can't perfectly model the copy, we should see if we
1082    // can use LazyCompoundVals to copy the source values into the destination.
1083    // This would probably remove any existing bindings past the end of the
1084    // copied region, but that's still an improvement over blank invalidation.
1085    state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest),
1086                             /*IsSourceBuffer*/false, Size);
1087
1088    // Invalidate the source (const-invalidation without const-pointer-escaping
1089    // the address of the top-level region).
1090    state = InvalidateBuffer(C, state, Source, C.getSVal(Source),
1091                             /*IsSourceBuffer*/true, nullptr);
1092
1093    C.addTransition(state);
1094  }
1095}
1096
1097
1098void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
1099  if (CE->getNumArgs() < 3)
1100    return;
1101
1102  // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1103  // The return value is the address of the destination buffer.
1104  const Expr *Dest = CE->getArg(0);
1105  ProgramStateRef state = C.getState();
1106
1107  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
1108}
1109
1110void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
1111  if (CE->getNumArgs() < 3)
1112    return;
1113
1114  // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1115  // The return value is a pointer to the byte following the last written byte.
1116  const Expr *Dest = CE->getArg(0);
1117  ProgramStateRef state = C.getState();
1118
1119  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
1120}
1121
1122void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
1123  if (CE->getNumArgs() < 3)
1124    return;
1125
1126  // void *memmove(void *dst, const void *src, size_t n);
1127  // The return value is the address of the destination buffer.
1128  const Expr *Dest = CE->getArg(0);
1129  ProgramStateRef state = C.getState();
1130
1131  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1132}
1133
1134void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1135  if (CE->getNumArgs() < 3)
1136    return;
1137
1138  // void bcopy(const void *src, void *dst, size_t n);
1139  evalCopyCommon(C, CE, C.getState(),
1140                 CE->getArg(2), CE->getArg(1), CE->getArg(0));
1141}
1142
1143void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1144  if (CE->getNumArgs() < 3)
1145    return;
1146
1147  // int memcmp(const void *s1, const void *s2, size_t n);
1148  CurrentFunctionDescription = "memory comparison function";
1149
1150  const Expr *Left = CE->getArg(0);
1151  const Expr *Right = CE->getArg(1);
1152  const Expr *Size = CE->getArg(2);
1153
1154  ProgramStateRef state = C.getState();
1155  SValBuilder &svalBuilder = C.getSValBuilder();
1156
1157  // See if the size argument is zero.
1158  const LocationContext *LCtx = C.getLocationContext();
1159  SVal sizeVal = state->getSVal(Size, LCtx);
1160  QualType sizeTy = Size->getType();
1161
1162  ProgramStateRef stateZeroSize, stateNonZeroSize;
1163  std::tie(stateZeroSize, stateNonZeroSize) =
1164    assumeZero(C, state, sizeVal, sizeTy);
1165
1166  // If the size can be zero, the result will be 0 in that case, and we don't
1167  // have to check either of the buffers.
1168  if (stateZeroSize) {
1169    state = stateZeroSize;
1170    state = state->BindExpr(CE, LCtx,
1171                            svalBuilder.makeZeroVal(CE->getType()));
1172    C.addTransition(state);
1173  }
1174
1175  // If the size can be nonzero, we have to check the other arguments.
1176  if (stateNonZeroSize) {
1177    state = stateNonZeroSize;
1178    // If we know the two buffers are the same, we know the result is 0.
1179    // First, get the two buffers' addresses. Another checker will have already
1180    // made sure they're not undefined.
1181    DefinedOrUnknownSVal LV =
1182        state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>();
1183    DefinedOrUnknownSVal RV =
1184        state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>();
1185
1186    // See if they are the same.
1187    DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1188    ProgramStateRef StSameBuf, StNotSameBuf;
1189    std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1190
1191    // If the two arguments might be the same buffer, we know the result is 0,
1192    // and we only need to check one size.
1193    if (StSameBuf) {
1194      state = StSameBuf;
1195      state = CheckBufferAccess(C, state, Size, Left);
1196      if (state) {
1197        state = StSameBuf->BindExpr(CE, LCtx,
1198                                    svalBuilder.makeZeroVal(CE->getType()));
1199        C.addTransition(state);
1200      }
1201    }
1202
1203    // If the two arguments might be different buffers, we have to check the
1204    // size of both of them.
1205    if (StNotSameBuf) {
1206      state = StNotSameBuf;
1207      state = CheckBufferAccess(C, state, Size, Left, Right);
1208      if (state) {
1209        // The return value is the comparison result, which we don't know.
1210        SVal CmpV = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1211                                                 C.blockCount());
1212        state = state->BindExpr(CE, LCtx, CmpV);
1213        C.addTransition(state);
1214      }
1215    }
1216  }
1217}
1218
1219void CStringChecker::evalstrLength(CheckerContext &C,
1220                                   const CallExpr *CE) const {
1221  if (CE->getNumArgs() < 1)
1222    return;
1223
1224  // size_t strlen(const char *s);
1225  evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1226}
1227
1228void CStringChecker::evalstrnLength(CheckerContext &C,
1229                                    const CallExpr *CE) const {
1230  if (CE->getNumArgs() < 2)
1231    return;
1232
1233  // size_t strnlen(const char *s, size_t maxlen);
1234  evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1235}
1236
1237void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1238                                         bool IsStrnlen) const {
1239  CurrentFunctionDescription = "string length function";
1240  ProgramStateRef state = C.getState();
1241  const LocationContext *LCtx = C.getLocationContext();
1242
1243  if (IsStrnlen) {
1244    const Expr *maxlenExpr = CE->getArg(1);
1245    SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1246
1247    ProgramStateRef stateZeroSize, stateNonZeroSize;
1248    std::tie(stateZeroSize, stateNonZeroSize) =
1249      assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1250
1251    // If the size can be zero, the result will be 0 in that case, and we don't
1252    // have to check the string itself.
1253    if (stateZeroSize) {
1254      SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1255      stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1256      C.addTransition(stateZeroSize);
1257    }
1258
1259    // If the size is GUARANTEED to be zero, we're done!
1260    if (!stateNonZeroSize)
1261      return;
1262
1263    // Otherwise, record the assumption that the size is nonzero.
1264    state = stateNonZeroSize;
1265  }
1266
1267  // Check that the string argument is non-null.
1268  const Expr *Arg = CE->getArg(0);
1269  SVal ArgVal = state->getSVal(Arg, LCtx);
1270
1271  state = checkNonNull(C, state, Arg, ArgVal);
1272
1273  if (!state)
1274    return;
1275
1276  SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1277
1278  // If the argument isn't a valid C string, there's no valid state to
1279  // transition to.
1280  if (strLength.isUndef())
1281    return;
1282
1283  DefinedOrUnknownSVal result = UnknownVal();
1284
1285  // If the check is for strnlen() then bind the return value to no more than
1286  // the maxlen value.
1287  if (IsStrnlen) {
1288    QualType cmpTy = C.getSValBuilder().getConditionType();
1289
1290    // It's a little unfortunate to be getting this again,
1291    // but it's not that expensive...
1292    const Expr *maxlenExpr = CE->getArg(1);
1293    SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1294
1295    Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1296    Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1297
1298    if (strLengthNL && maxlenValNL) {
1299      ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1300
1301      // Check if the strLength is greater than the maxlen.
1302      std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1303          C.getSValBuilder()
1304              .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1305              .castAs<DefinedOrUnknownSVal>());
1306
1307      if (stateStringTooLong && !stateStringNotTooLong) {
1308        // If the string is longer than maxlen, return maxlen.
1309        result = *maxlenValNL;
1310      } else if (stateStringNotTooLong && !stateStringTooLong) {
1311        // If the string is shorter than maxlen, return its length.
1312        result = *strLengthNL;
1313      }
1314    }
1315
1316    if (result.isUnknown()) {
1317      // If we don't have enough information for a comparison, there's
1318      // no guarantee the full string length will actually be returned.
1319      // All we know is the return value is the min of the string length
1320      // and the limit. This is better than nothing.
1321      result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1322                                                   C.blockCount());
1323      NonLoc resultNL = result.castAs<NonLoc>();
1324
1325      if (strLengthNL) {
1326        state = state->assume(C.getSValBuilder().evalBinOpNN(
1327                                  state, BO_LE, resultNL, *strLengthNL, cmpTy)
1328                                  .castAs<DefinedOrUnknownSVal>(), true);
1329      }
1330
1331      if (maxlenValNL) {
1332        state = state->assume(C.getSValBuilder().evalBinOpNN(
1333                                  state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1334                                  .castAs<DefinedOrUnknownSVal>(), true);
1335      }
1336    }
1337
1338  } else {
1339    // This is a plain strlen(), not strnlen().
1340    result = strLength.castAs<DefinedOrUnknownSVal>();
1341
1342    // If we don't know the length of the string, conjure a return
1343    // value, so it can be used in constraints, at least.
1344    if (result.isUnknown()) {
1345      result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1346                                                   C.blockCount());
1347    }
1348  }
1349
1350  // Bind the return value.
1351  assert(!result.isUnknown() && "Should have conjured a value by now");
1352  state = state->BindExpr(CE, LCtx, result);
1353  C.addTransition(state);
1354}
1355
1356void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1357  if (CE->getNumArgs() < 2)
1358    return;
1359
1360  // char *strcpy(char *restrict dst, const char *restrict src);
1361  evalStrcpyCommon(C, CE,
1362                   /* returnEnd = */ false,
1363                   /* isBounded = */ false,
1364                   /* isAppending = */ false);
1365}
1366
1367void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1368  if (CE->getNumArgs() < 3)
1369    return;
1370
1371  // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1372  evalStrcpyCommon(C, CE,
1373                   /* returnEnd = */ false,
1374                   /* isBounded = */ true,
1375                   /* isAppending = */ false);
1376}
1377
1378void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1379  if (CE->getNumArgs() < 2)
1380    return;
1381
1382  // char *stpcpy(char *restrict dst, const char *restrict src);
1383  evalStrcpyCommon(C, CE,
1384                   /* returnEnd = */ true,
1385                   /* isBounded = */ false,
1386                   /* isAppending = */ false);
1387}
1388
1389void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1390  if (CE->getNumArgs() < 2)
1391    return;
1392
1393  //char *strcat(char *restrict s1, const char *restrict s2);
1394  evalStrcpyCommon(C, CE,
1395                   /* returnEnd = */ false,
1396                   /* isBounded = */ false,
1397                   /* isAppending = */ true);
1398}
1399
1400void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1401  if (CE->getNumArgs() < 3)
1402    return;
1403
1404  //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1405  evalStrcpyCommon(C, CE,
1406                   /* returnEnd = */ false,
1407                   /* isBounded = */ true,
1408                   /* isAppending = */ true);
1409}
1410
1411void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1412                                      bool returnEnd, bool isBounded,
1413                                      bool isAppending) const {
1414  CurrentFunctionDescription = "string copy function";
1415  ProgramStateRef state = C.getState();
1416  const LocationContext *LCtx = C.getLocationContext();
1417
1418  // Check that the destination is non-null.
1419  const Expr *Dst = CE->getArg(0);
1420  SVal DstVal = state->getSVal(Dst, LCtx);
1421
1422  state = checkNonNull(C, state, Dst, DstVal);
1423  if (!state)
1424    return;
1425
1426  // Check that the source is non-null.
1427  const Expr *srcExpr = CE->getArg(1);
1428  SVal srcVal = state->getSVal(srcExpr, LCtx);
1429  state = checkNonNull(C, state, srcExpr, srcVal);
1430  if (!state)
1431    return;
1432
1433  // Get the string length of the source.
1434  SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1435
1436  // If the source isn't a valid C string, give up.
1437  if (strLength.isUndef())
1438    return;
1439
1440  SValBuilder &svalBuilder = C.getSValBuilder();
1441  QualType cmpTy = svalBuilder.getConditionType();
1442  QualType sizeTy = svalBuilder.getContext().getSizeType();
1443
1444  // These two values allow checking two kinds of errors:
1445  // - actual overflows caused by a source that doesn't fit in the destination
1446  // - potential overflows caused by a bound that could exceed the destination
1447  SVal amountCopied = UnknownVal();
1448  SVal maxLastElementIndex = UnknownVal();
1449  const char *boundWarning = nullptr;
1450
1451  // If the function is strncpy, strncat, etc... it is bounded.
1452  if (isBounded) {
1453    // Get the max number of characters to copy.
1454    const Expr *lenExpr = CE->getArg(2);
1455    SVal lenVal = state->getSVal(lenExpr, LCtx);
1456
1457    // Protect against misdeclared strncpy().
1458    lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1459
1460    Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1461    Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1462
1463    // If we know both values, we might be able to figure out how much
1464    // we're copying.
1465    if (strLengthNL && lenValNL) {
1466      ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1467
1468      // Check if the max number to copy is less than the length of the src.
1469      // If the bound is equal to the source length, strncpy won't null-
1470      // terminate the result!
1471      std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1472          svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1473              .castAs<DefinedOrUnknownSVal>());
1474
1475      if (stateSourceTooLong && !stateSourceNotTooLong) {
1476        // Max number to copy is less than the length of the src, so the actual
1477        // strLength copied is the max number arg.
1478        state = stateSourceTooLong;
1479        amountCopied = lenVal;
1480
1481      } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1482        // The source buffer entirely fits in the bound.
1483        state = stateSourceNotTooLong;
1484        amountCopied = strLength;
1485      }
1486    }
1487
1488    // We still want to know if the bound is known to be too large.
1489    if (lenValNL) {
1490      if (isAppending) {
1491        // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1492
1493        // Get the string length of the destination. If the destination is
1494        // memory that can't have a string length, we shouldn't be copying
1495        // into it anyway.
1496        SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1497        if (dstStrLength.isUndef())
1498          return;
1499
1500        if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) {
1501          maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1502                                                        *lenValNL,
1503                                                        *dstStrLengthNL,
1504                                                        sizeTy);
1505          boundWarning = "Size argument is greater than the free space in the "
1506                         "destination buffer";
1507        }
1508
1509      } else {
1510        // For strncpy, this is just checking that lenVal <= sizeof(dst)
1511        // (Yes, strncpy and strncat differ in how they treat termination.
1512        // strncat ALWAYS terminates, but strncpy doesn't.)
1513
1514        // We need a special case for when the copy size is zero, in which
1515        // case strncpy will do no work at all. Our bounds check uses n-1
1516        // as the last element accessed, so n == 0 is problematic.
1517        ProgramStateRef StateZeroSize, StateNonZeroSize;
1518        std::tie(StateZeroSize, StateNonZeroSize) =
1519          assumeZero(C, state, *lenValNL, sizeTy);
1520
1521        // If the size is known to be zero, we're done.
1522        if (StateZeroSize && !StateNonZeroSize) {
1523          StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1524          C.addTransition(StateZeroSize);
1525          return;
1526        }
1527
1528        // Otherwise, go ahead and figure out the last element we'll touch.
1529        // We don't record the non-zero assumption here because we can't
1530        // be sure. We won't warn on a possible zero.
1531        NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1532        maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1533                                                      one, sizeTy);
1534        boundWarning = "Size argument is greater than the length of the "
1535                       "destination buffer";
1536      }
1537    }
1538
1539    // If we couldn't pin down the copy length, at least bound it.
1540    // FIXME: We should actually run this code path for append as well, but
1541    // right now it creates problems with constraints (since we can end up
1542    // trying to pass constraints from symbol to symbol).
1543    if (amountCopied.isUnknown() && !isAppending) {
1544      // Try to get a "hypothetical" string length symbol, which we can later
1545      // set as a real value if that turns out to be the case.
1546      amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1547      assert(!amountCopied.isUndef());
1548
1549      if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) {
1550        if (lenValNL) {
1551          // amountCopied <= lenVal
1552          SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1553                                                             *amountCopiedNL,
1554                                                             *lenValNL,
1555                                                             cmpTy);
1556          state = state->assume(
1557              copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true);
1558          if (!state)
1559            return;
1560        }
1561
1562        if (strLengthNL) {
1563          // amountCopied <= strlen(source)
1564          SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1565                                                           *amountCopiedNL,
1566                                                           *strLengthNL,
1567                                                           cmpTy);
1568          state = state->assume(
1569              copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true);
1570          if (!state)
1571            return;
1572        }
1573      }
1574    }
1575
1576  } else {
1577    // The function isn't bounded. The amount copied should match the length
1578    // of the source buffer.
1579    amountCopied = strLength;
1580  }
1581
1582  assert(state);
1583
1584  // This represents the number of characters copied into the destination
1585  // buffer. (It may not actually be the strlen if the destination buffer
1586  // is not terminated.)
1587  SVal finalStrLength = UnknownVal();
1588
1589  // If this is an appending function (strcat, strncat...) then set the
1590  // string length to strlen(src) + strlen(dst) since the buffer will
1591  // ultimately contain both.
1592  if (isAppending) {
1593    // Get the string length of the destination. If the destination is memory
1594    // that can't have a string length, we shouldn't be copying into it anyway.
1595    SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1596    if (dstStrLength.isUndef())
1597      return;
1598
1599    Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>();
1600    Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1601
1602    // If we know both string lengths, we might know the final string length.
1603    if (srcStrLengthNL && dstStrLengthNL) {
1604      // Make sure the two lengths together don't overflow a size_t.
1605      state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1606      if (!state)
1607        return;
1608
1609      finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1610                                               *dstStrLengthNL, sizeTy);
1611    }
1612
1613    // If we couldn't get a single value for the final string length,
1614    // we can at least bound it by the individual lengths.
1615    if (finalStrLength.isUnknown()) {
1616      // Try to get a "hypothetical" string length symbol, which we can later
1617      // set as a real value if that turns out to be the case.
1618      finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1619      assert(!finalStrLength.isUndef());
1620
1621      if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) {
1622        if (srcStrLengthNL) {
1623          // finalStrLength >= srcStrLength
1624          SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1625                                                        *finalStrLengthNL,
1626                                                        *srcStrLengthNL,
1627                                                        cmpTy);
1628          state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1629                                true);
1630          if (!state)
1631            return;
1632        }
1633
1634        if (dstStrLengthNL) {
1635          // finalStrLength >= dstStrLength
1636          SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1637                                                      *finalStrLengthNL,
1638                                                      *dstStrLengthNL,
1639                                                      cmpTy);
1640          state =
1641              state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
1642          if (!state)
1643            return;
1644        }
1645      }
1646    }
1647
1648  } else {
1649    // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1650    // the final string length will match the input string length.
1651    finalStrLength = amountCopied;
1652  }
1653
1654  // The final result of the function will either be a pointer past the last
1655  // copied element, or a pointer to the start of the destination buffer.
1656  SVal Result = (returnEnd ? UnknownVal() : DstVal);
1657
1658  assert(state);
1659
1660  // If the destination is a MemRegion, try to check for a buffer overflow and
1661  // record the new string length.
1662  if (Optional<loc::MemRegionVal> dstRegVal =
1663          DstVal.getAs<loc::MemRegionVal>()) {
1664    QualType ptrTy = Dst->getType();
1665
1666    // If we have an exact value on a bounded copy, use that to check for
1667    // overflows, rather than our estimate about how much is actually copied.
1668    if (boundWarning) {
1669      if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
1670        SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1671                                                      *maxLastNL, ptrTy);
1672        state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1673                              boundWarning);
1674        if (!state)
1675          return;
1676      }
1677    }
1678
1679    // Then, if the final length is known...
1680    if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
1681      SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1682                                                 *knownStrLength, ptrTy);
1683
1684      // ...and we haven't checked the bound, we'll check the actual copy.
1685      if (!boundWarning) {
1686        const char * const warningMsg =
1687          "String copy function overflows destination buffer";
1688        state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1689        if (!state)
1690          return;
1691      }
1692
1693      // If this is a stpcpy-style copy, the last element is the return value.
1694      if (returnEnd)
1695        Result = lastElement;
1696    }
1697
1698    // Invalidate the destination (regular invalidation without pointer-escaping
1699    // the address of the top-level region). This must happen before we set the
1700    // C string length because invalidation will clear the length.
1701    // FIXME: Even if we can't perfectly model the copy, we should see if we
1702    // can use LazyCompoundVals to copy the source values into the destination.
1703    // This would probably remove any existing bindings past the end of the
1704    // string, but that's still an improvement over blank invalidation.
1705    state = InvalidateBuffer(C, state, Dst, *dstRegVal,
1706                             /*IsSourceBuffer*/false, nullptr);
1707
1708    // Invalidate the source (const-invalidation without const-pointer-escaping
1709    // the address of the top-level region).
1710    state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true,
1711                             nullptr);
1712
1713    // Set the C string length of the destination, if we know it.
1714    if (isBounded && !isAppending) {
1715      // strncpy is annoying in that it doesn't guarantee to null-terminate
1716      // the result string. If the original string didn't fit entirely inside
1717      // the bound (including the null-terminator), we don't know how long the
1718      // result is.
1719      if (amountCopied != strLength)
1720        finalStrLength = UnknownVal();
1721    }
1722    state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1723  }
1724
1725  assert(state);
1726
1727  // If this is a stpcpy-style copy, but we were unable to check for a buffer
1728  // overflow, we still need a result. Conjure a return value.
1729  if (returnEnd && Result.isUnknown()) {
1730    Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1731  }
1732
1733  // Set the return value.
1734  state = state->BindExpr(CE, LCtx, Result);
1735  C.addTransition(state);
1736}
1737
1738void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1739  if (CE->getNumArgs() < 2)
1740    return;
1741
1742  //int strcmp(const char *s1, const char *s2);
1743  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1744}
1745
1746void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1747  if (CE->getNumArgs() < 3)
1748    return;
1749
1750  //int strncmp(const char *s1, const char *s2, size_t n);
1751  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1752}
1753
1754void CStringChecker::evalStrcasecmp(CheckerContext &C,
1755                                    const CallExpr *CE) const {
1756  if (CE->getNumArgs() < 2)
1757    return;
1758
1759  //int strcasecmp(const char *s1, const char *s2);
1760  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1761}
1762
1763void CStringChecker::evalStrncasecmp(CheckerContext &C,
1764                                     const CallExpr *CE) const {
1765  if (CE->getNumArgs() < 3)
1766    return;
1767
1768  //int strncasecmp(const char *s1, const char *s2, size_t n);
1769  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1770}
1771
1772void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1773                                      bool isBounded, bool ignoreCase) const {
1774  CurrentFunctionDescription = "string comparison function";
1775  ProgramStateRef state = C.getState();
1776  const LocationContext *LCtx = C.getLocationContext();
1777
1778  // Check that the first string is non-null
1779  const Expr *s1 = CE->getArg(0);
1780  SVal s1Val = state->getSVal(s1, LCtx);
1781  state = checkNonNull(C, state, s1, s1Val);
1782  if (!state)
1783    return;
1784
1785  // Check that the second string is non-null.
1786  const Expr *s2 = CE->getArg(1);
1787  SVal s2Val = state->getSVal(s2, LCtx);
1788  state = checkNonNull(C, state, s2, s2Val);
1789  if (!state)
1790    return;
1791
1792  // Get the string length of the first string or give up.
1793  SVal s1Length = getCStringLength(C, state, s1, s1Val);
1794  if (s1Length.isUndef())
1795    return;
1796
1797  // Get the string length of the second string or give up.
1798  SVal s2Length = getCStringLength(C, state, s2, s2Val);
1799  if (s2Length.isUndef())
1800    return;
1801
1802  // If we know the two buffers are the same, we know the result is 0.
1803  // First, get the two buffers' addresses. Another checker will have already
1804  // made sure they're not undefined.
1805  DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>();
1806  DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>();
1807
1808  // See if they are the same.
1809  SValBuilder &svalBuilder = C.getSValBuilder();
1810  DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1811  ProgramStateRef StSameBuf, StNotSameBuf;
1812  std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1813
1814  // If the two arguments might be the same buffer, we know the result is 0,
1815  // and we only need to check one size.
1816  if (StSameBuf) {
1817    StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1818                                    svalBuilder.makeZeroVal(CE->getType()));
1819    C.addTransition(StSameBuf);
1820
1821    // If the two arguments are GUARANTEED to be the same, we're done!
1822    if (!StNotSameBuf)
1823      return;
1824  }
1825
1826  assert(StNotSameBuf);
1827  state = StNotSameBuf;
1828
1829  // At this point we can go about comparing the two buffers.
1830  // For now, we only do this if they're both known string literals.
1831
1832  // Attempt to extract string literals from both expressions.
1833  const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1834  const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1835  bool canComputeResult = false;
1836
1837  if (s1StrLiteral && s2StrLiteral) {
1838    StringRef s1StrRef = s1StrLiteral->getString();
1839    StringRef s2StrRef = s2StrLiteral->getString();
1840
1841    if (isBounded) {
1842      // Get the max number of characters to compare.
1843      const Expr *lenExpr = CE->getArg(2);
1844      SVal lenVal = state->getSVal(lenExpr, LCtx);
1845
1846      // If the length is known, we can get the right substrings.
1847      if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1848        // Create substrings of each to compare the prefix.
1849        s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1850        s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1851        canComputeResult = true;
1852      }
1853    } else {
1854      // This is a normal, unbounded strcmp.
1855      canComputeResult = true;
1856    }
1857
1858    if (canComputeResult) {
1859      // Real strcmp stops at null characters.
1860      size_t s1Term = s1StrRef.find('\0');
1861      if (s1Term != StringRef::npos)
1862        s1StrRef = s1StrRef.substr(0, s1Term);
1863
1864      size_t s2Term = s2StrRef.find('\0');
1865      if (s2Term != StringRef::npos)
1866        s2StrRef = s2StrRef.substr(0, s2Term);
1867
1868      // Use StringRef's comparison methods to compute the actual result.
1869      int result;
1870
1871      if (ignoreCase) {
1872        // Compare string 1 to string 2 the same way strcasecmp() does.
1873        result = s1StrRef.compare_lower(s2StrRef);
1874      } else {
1875        // Compare string 1 to string 2 the same way strcmp() does.
1876        result = s1StrRef.compare(s2StrRef);
1877      }
1878
1879      // Build the SVal of the comparison and bind the return value.
1880      SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
1881      state = state->BindExpr(CE, LCtx, resultVal);
1882    }
1883  }
1884
1885  if (!canComputeResult) {
1886    // Conjure a symbolic value. It's the best we can do.
1887    SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1888                                                  C.blockCount());
1889    state = state->BindExpr(CE, LCtx, resultVal);
1890  }
1891
1892  // Record this as a possible path.
1893  C.addTransition(state);
1894}
1895
1896void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
1897  //char *strsep(char **stringp, const char *delim);
1898  if (CE->getNumArgs() < 2)
1899    return;
1900
1901  // Sanity: does the search string parameter match the return type?
1902  const Expr *SearchStrPtr = CE->getArg(0);
1903  QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType();
1904  if (CharPtrTy.isNull() ||
1905      CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
1906    return;
1907
1908  CurrentFunctionDescription = "strsep()";
1909  ProgramStateRef State = C.getState();
1910  const LocationContext *LCtx = C.getLocationContext();
1911
1912  // Check that the search string pointer is non-null (though it may point to
1913  // a null string).
1914  SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx);
1915  State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
1916  if (!State)
1917    return;
1918
1919  // Check that the delimiter string is non-null.
1920  const Expr *DelimStr = CE->getArg(1);
1921  SVal DelimStrVal = State->getSVal(DelimStr, LCtx);
1922  State = checkNonNull(C, State, DelimStr, DelimStrVal);
1923  if (!State)
1924    return;
1925
1926  SValBuilder &SVB = C.getSValBuilder();
1927  SVal Result;
1928  if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
1929    // Get the current value of the search string pointer, as a char*.
1930    Result = State->getSVal(*SearchStrLoc, CharPtrTy);
1931
1932    // Invalidate the search string, representing the change of one delimiter
1933    // character to NUL.
1934    State = InvalidateBuffer(C, State, SearchStrPtr, Result,
1935                             /*IsSourceBuffer*/false, nullptr);
1936
1937    // Overwrite the search string pointer. The new value is either an address
1938    // further along in the same string, or NULL if there are no more tokens.
1939    State = State->bindLoc(*SearchStrLoc,
1940                           SVB.conjureSymbolVal(getTag(), CE, LCtx, CharPtrTy,
1941                                                C.blockCount()));
1942  } else {
1943    assert(SearchStrVal.isUnknown());
1944    // Conjure a symbolic value. It's the best we can do.
1945    Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1946  }
1947
1948  // Set the return value, and finish.
1949  State = State->BindExpr(CE, LCtx, Result);
1950  C.addTransition(State);
1951}
1952
1953
1954//===----------------------------------------------------------------------===//
1955// The driver method, and other Checker callbacks.
1956//===----------------------------------------------------------------------===//
1957
1958bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1959  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
1960
1961  if (!FDecl)
1962    return false;
1963
1964  // FIXME: Poorly-factored string switches are slow.
1965  FnCheck evalFunction = nullptr;
1966  if (C.isCLibraryFunction(FDecl, "memcpy"))
1967    evalFunction =  &CStringChecker::evalMemcpy;
1968  else if (C.isCLibraryFunction(FDecl, "mempcpy"))
1969    evalFunction =  &CStringChecker::evalMempcpy;
1970  else if (C.isCLibraryFunction(FDecl, "memcmp"))
1971    evalFunction =  &CStringChecker::evalMemcmp;
1972  else if (C.isCLibraryFunction(FDecl, "memmove"))
1973    evalFunction =  &CStringChecker::evalMemmove;
1974  else if (C.isCLibraryFunction(FDecl, "strcpy"))
1975    evalFunction =  &CStringChecker::evalStrcpy;
1976  else if (C.isCLibraryFunction(FDecl, "strncpy"))
1977    evalFunction =  &CStringChecker::evalStrncpy;
1978  else if (C.isCLibraryFunction(FDecl, "stpcpy"))
1979    evalFunction =  &CStringChecker::evalStpcpy;
1980  else if (C.isCLibraryFunction(FDecl, "strcat"))
1981    evalFunction =  &CStringChecker::evalStrcat;
1982  else if (C.isCLibraryFunction(FDecl, "strncat"))
1983    evalFunction =  &CStringChecker::evalStrncat;
1984  else if (C.isCLibraryFunction(FDecl, "strlen"))
1985    evalFunction =  &CStringChecker::evalstrLength;
1986  else if (C.isCLibraryFunction(FDecl, "strnlen"))
1987    evalFunction =  &CStringChecker::evalstrnLength;
1988  else if (C.isCLibraryFunction(FDecl, "strcmp"))
1989    evalFunction =  &CStringChecker::evalStrcmp;
1990  else if (C.isCLibraryFunction(FDecl, "strncmp"))
1991    evalFunction =  &CStringChecker::evalStrncmp;
1992  else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
1993    evalFunction =  &CStringChecker::evalStrcasecmp;
1994  else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
1995    evalFunction =  &CStringChecker::evalStrncasecmp;
1996  else if (C.isCLibraryFunction(FDecl, "strsep"))
1997    evalFunction =  &CStringChecker::evalStrsep;
1998  else if (C.isCLibraryFunction(FDecl, "bcopy"))
1999    evalFunction =  &CStringChecker::evalBcopy;
2000  else if (C.isCLibraryFunction(FDecl, "bcmp"))
2001    evalFunction =  &CStringChecker::evalMemcmp;
2002
2003  // If the callee isn't a string function, let another checker handle it.
2004  if (!evalFunction)
2005    return false;
2006
2007  // Check and evaluate the call.
2008  (this->*evalFunction)(C, CE);
2009
2010  // If the evaluate call resulted in no change, chain to the next eval call
2011  // handler.
2012  // Note, the custom CString evaluation calls assume that basic safety
2013  // properties are held. However, if the user chooses to turn off some of these
2014  // checks, we ignore the issues and leave the call evaluation to a generic
2015  // handler.
2016  return C.isDifferent();
2017}
2018
2019void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2020  // Record string length for char a[] = "abc";
2021  ProgramStateRef state = C.getState();
2022
2023  for (const auto *I : DS->decls()) {
2024    const VarDecl *D = dyn_cast<VarDecl>(I);
2025    if (!D)
2026      continue;
2027
2028    // FIXME: Handle array fields of structs.
2029    if (!D->getType()->isArrayType())
2030      continue;
2031
2032    const Expr *Init = D->getInit();
2033    if (!Init)
2034      continue;
2035    if (!isa<StringLiteral>(Init))
2036      continue;
2037
2038    Loc VarLoc = state->getLValue(D, C.getLocationContext());
2039    const MemRegion *MR = VarLoc.getAsRegion();
2040    if (!MR)
2041      continue;
2042
2043    SVal StrVal = state->getSVal(Init, C.getLocationContext());
2044    assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2045    DefinedOrUnknownSVal strLength =
2046        getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2047
2048    state = state->set<CStringLength>(MR, strLength);
2049  }
2050
2051  C.addTransition(state);
2052}
2053
2054bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
2055  CStringLengthTy Entries = state->get<CStringLength>();
2056  return !Entries.isEmpty();
2057}
2058
2059ProgramStateRef
2060CStringChecker::checkRegionChanges(ProgramStateRef state,
2061                                   const InvalidatedSymbols *,
2062                                   ArrayRef<const MemRegion *> ExplicitRegions,
2063                                   ArrayRef<const MemRegion *> Regions,
2064                                   const CallEvent *Call) const {
2065  CStringLengthTy Entries = state->get<CStringLength>();
2066  if (Entries.isEmpty())
2067    return state;
2068
2069  llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2070  llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2071
2072  // First build sets for the changed regions and their super-regions.
2073  for (ArrayRef<const MemRegion *>::iterator
2074       I = Regions.begin(), E = Regions.end(); I != E; ++I) {
2075    const MemRegion *MR = *I;
2076    Invalidated.insert(MR);
2077
2078    SuperRegions.insert(MR);
2079    while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2080      MR = SR->getSuperRegion();
2081      SuperRegions.insert(MR);
2082    }
2083  }
2084
2085  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2086
2087  // Then loop over the entries in the current state.
2088  for (CStringLengthTy::iterator I = Entries.begin(),
2089       E = Entries.end(); I != E; ++I) {
2090    const MemRegion *MR = I.getKey();
2091
2092    // Is this entry for a super-region of a changed region?
2093    if (SuperRegions.count(MR)) {
2094      Entries = F.remove(Entries, MR);
2095      continue;
2096    }
2097
2098    // Is this entry for a sub-region of a changed region?
2099    const MemRegion *Super = MR;
2100    while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2101      Super = SR->getSuperRegion();
2102      if (Invalidated.count(Super)) {
2103        Entries = F.remove(Entries, MR);
2104        break;
2105      }
2106    }
2107  }
2108
2109  return state->set<CStringLength>(Entries);
2110}
2111
2112void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2113                                      SymbolReaper &SR) const {
2114  // Mark all symbols in our string length map as valid.
2115  CStringLengthTy Entries = state->get<CStringLength>();
2116
2117  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2118       I != E; ++I) {
2119    SVal Len = I.getData();
2120
2121    for (SymExpr::symbol_iterator si = Len.symbol_begin(),
2122                                  se = Len.symbol_end(); si != se; ++si)
2123      SR.markInUse(*si);
2124  }
2125}
2126
2127void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2128                                      CheckerContext &C) const {
2129  if (!SR.hasDeadSymbols())
2130    return;
2131
2132  ProgramStateRef state = C.getState();
2133  CStringLengthTy Entries = state->get<CStringLength>();
2134  if (Entries.isEmpty())
2135    return;
2136
2137  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2138  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2139       I != E; ++I) {
2140    SVal Len = I.getData();
2141    if (SymbolRef Sym = Len.getAsSymbol()) {
2142      if (SR.isDead(Sym))
2143        Entries = F.remove(Entries, I.getKey());
2144    }
2145  }
2146
2147  state = state->set<CStringLength>(Entries);
2148  C.addTransition(state);
2149}
2150
2151#define REGISTER_CHECKER(name)                                                 \
2152  void ento::register##name(CheckerManager &mgr) {                             \
2153    CStringChecker *checker = mgr.registerChecker<CStringChecker>();           \
2154    checker->Filter.Check##name = true;                                        \
2155    checker->Filter.CheckName##name = mgr.getCurrentCheckName();               \
2156  }
2157
2158REGISTER_CHECKER(CStringNullArg)
2159REGISTER_CHECKER(CStringOutOfBounds)
2160REGISTER_CHECKER(CStringBufferOverlap)
2161REGISTER_CHECKER(CStringNotNullTerm)
2162
2163void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
2164  registerCStringNullArg(Mgr);
2165}
2166