1//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This defines CStringChecker, which is an assortment of checks on calls
10// to functions in <string.h>.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InterCheckerAPI.h"
15#include "clang/Basic/Builtins.h"
16#include "clang/Basic/CharInfo.h"
17#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
18#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19#include "clang/StaticAnalyzer/Core/Checker.h"
20#include "clang/StaticAnalyzer/Core/CheckerManager.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
22#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
23#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
25#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/SmallString.h"
28#include "llvm/ADT/StringExtras.h"
29#include "llvm/Support/raw_ostream.h"
30#include <functional>
31#include <optional>
32
33using namespace clang;
34using namespace ento;
35using namespace std::placeholders;
36
37namespace {
38struct AnyArgExpr {
39  const Expr *Expression;
40  unsigned ArgumentIndex;
41};
42struct SourceArgExpr : AnyArgExpr {};
43struct DestinationArgExpr : AnyArgExpr {};
44struct SizeArgExpr : AnyArgExpr {};
45
46using ErrorMessage = SmallString<128>;
47enum class AccessKind { write, read };
48
49static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
50                                             AccessKind Access) {
51  ErrorMessage Message;
52  llvm::raw_svector_ostream Os(Message);
53
54  // Function classification like: Memory copy function
55  Os << toUppercase(FunctionDescription.front())
56     << &FunctionDescription.data()[1];
57
58  if (Access == AccessKind::write) {
59    Os << " overflows the destination buffer";
60  } else { // read access
61    Os << " accesses out-of-bound array element";
62  }
63
64  return Message;
65}
66
67enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
68
69enum class CharKind { Regular = 0, Wide };
70constexpr CharKind CK_Regular = CharKind::Regular;
71constexpr CharKind CK_Wide = CharKind::Wide;
72
73static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
74  return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy
75                                                    : Ctx.WideCharTy);
76}
77
78class CStringChecker : public Checker< eval::Call,
79                                         check::PreStmt<DeclStmt>,
80                                         check::LiveSymbols,
81                                         check::DeadSymbols,
82                                         check::RegionChanges
83                                         > {
84  mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
85      BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
86
87  mutable const char *CurrentFunctionDescription = nullptr;
88
89public:
90  /// The filter is used to filter out the diagnostics which are not enabled by
91  /// the user.
92  struct CStringChecksFilter {
93    bool CheckCStringNullArg = false;
94    bool CheckCStringOutOfBounds = false;
95    bool CheckCStringBufferOverlap = false;
96    bool CheckCStringNotNullTerm = false;
97    bool CheckCStringUninitializedRead = false;
98
99    CheckerNameRef CheckNameCStringNullArg;
100    CheckerNameRef CheckNameCStringOutOfBounds;
101    CheckerNameRef CheckNameCStringBufferOverlap;
102    CheckerNameRef CheckNameCStringNotNullTerm;
103    CheckerNameRef CheckNameCStringUninitializedRead;
104  };
105
106  CStringChecksFilter Filter;
107
108  static void *getTag() { static int tag; return &tag; }
109
110  bool evalCall(const CallEvent &Call, CheckerContext &C) const;
111  void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
112  void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
113  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
114
115  ProgramStateRef
116    checkRegionChanges(ProgramStateRef state,
117                       const InvalidatedSymbols *,
118                       ArrayRef<const MemRegion *> ExplicitRegions,
119                       ArrayRef<const MemRegion *> Regions,
120                       const LocationContext *LCtx,
121                       const CallEvent *Call) const;
122
123  using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
124                                     const CallEvent &)>;
125
126  CallDescriptionMap<FnCheck> Callbacks = {
127      {{CDF_MaybeBuiltin, {"memcpy"}, 3},
128       std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
129      {{CDF_MaybeBuiltin, {"wmemcpy"}, 3},
130       std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
131      {{CDF_MaybeBuiltin, {"mempcpy"}, 3},
132       std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
133      {{CDF_None, {"wmempcpy"}, 3},
134       std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
135      {{CDF_MaybeBuiltin, {"memcmp"}, 3},
136       std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
137      {{CDF_MaybeBuiltin, {"wmemcmp"}, 3},
138       std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
139      {{CDF_MaybeBuiltin, {"memmove"}, 3},
140       std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
141      {{CDF_MaybeBuiltin, {"wmemmove"}, 3},
142       std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
143      {{CDF_MaybeBuiltin, {"memset"}, 3}, &CStringChecker::evalMemset},
144      {{CDF_MaybeBuiltin, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
145      {{CDF_MaybeBuiltin, {"strcpy"}, 2}, &CStringChecker::evalStrcpy},
146      {{CDF_MaybeBuiltin, {"strncpy"}, 3}, &CStringChecker::evalStrncpy},
147      {{CDF_MaybeBuiltin, {"stpcpy"}, 2}, &CStringChecker::evalStpcpy},
148      {{CDF_MaybeBuiltin, {"strlcpy"}, 3}, &CStringChecker::evalStrlcpy},
149      {{CDF_MaybeBuiltin, {"strcat"}, 2}, &CStringChecker::evalStrcat},
150      {{CDF_MaybeBuiltin, {"strncat"}, 3}, &CStringChecker::evalStrncat},
151      {{CDF_MaybeBuiltin, {"strlcat"}, 3}, &CStringChecker::evalStrlcat},
152      {{CDF_MaybeBuiltin, {"strlen"}, 1}, &CStringChecker::evalstrLength},
153      {{CDF_MaybeBuiltin, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
154      {{CDF_MaybeBuiltin, {"strnlen"}, 2}, &CStringChecker::evalstrnLength},
155      {{CDF_MaybeBuiltin, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
156      {{CDF_MaybeBuiltin, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
157      {{CDF_MaybeBuiltin, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
158      {{CDF_MaybeBuiltin, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
159      {{CDF_MaybeBuiltin, {"strncasecmp"}, 3},
160       &CStringChecker::evalStrncasecmp},
161      {{CDF_MaybeBuiltin, {"strsep"}, 2}, &CStringChecker::evalStrsep},
162      {{CDF_MaybeBuiltin, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
163      {{CDF_MaybeBuiltin, {"bcmp"}, 3},
164       std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
165      {{CDF_MaybeBuiltin, {"bzero"}, 2}, &CStringChecker::evalBzero},
166      {{CDF_MaybeBuiltin, {"explicit_bzero"}, 2}, &CStringChecker::evalBzero},
167      {{CDF_MaybeBuiltin, {"sprintf"}, 2}, &CStringChecker::evalSprintf},
168      {{CDF_MaybeBuiltin, {"snprintf"}, 2}, &CStringChecker::evalSnprintf},
169  };
170
171  // These require a bit of special handling.
172  CallDescription StdCopy{{"std", "copy"}, 3},
173      StdCopyBackward{{"std", "copy_backward"}, 3};
174
175  FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
176  void evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
177  void evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
178  void evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
179  void evalBcopy(CheckerContext &C, const CallEvent &Call) const;
180  void evalCopyCommon(CheckerContext &C, const CallEvent &Call,
181                      ProgramStateRef state, SizeArgExpr Size,
182                      DestinationArgExpr Dest, SourceArgExpr Source,
183                      bool Restricted, bool IsMempcpy, CharKind CK) const;
184
185  void evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
186
187  void evalstrLength(CheckerContext &C, const CallEvent &Call) const;
188  void evalstrnLength(CheckerContext &C, const CallEvent &Call) const;
189  void evalstrLengthCommon(CheckerContext &C, const CallEvent &Call,
190                           bool IsStrnlen = false) const;
191
192  void evalStrcpy(CheckerContext &C, const CallEvent &Call) const;
193  void evalStrncpy(CheckerContext &C, const CallEvent &Call) const;
194  void evalStpcpy(CheckerContext &C, const CallEvent &Call) const;
195  void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const;
196  void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
197                        bool ReturnEnd, bool IsBounded, ConcatFnKind appendK,
198                        bool returnPtr = true) const;
199
200  void evalStrcat(CheckerContext &C, const CallEvent &Call) const;
201  void evalStrncat(CheckerContext &C, const CallEvent &Call) const;
202  void evalStrlcat(CheckerContext &C, const CallEvent &Call) const;
203
204  void evalStrcmp(CheckerContext &C, const CallEvent &Call) const;
205  void evalStrncmp(CheckerContext &C, const CallEvent &Call) const;
206  void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const;
207  void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const;
208  void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
209                        bool IsBounded = false, bool IgnoreCase = false) const;
210
211  void evalStrsep(CheckerContext &C, const CallEvent &Call) const;
212
213  void evalStdCopy(CheckerContext &C, const CallEvent &Call) const;
214  void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const;
215  void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const;
216  void evalMemset(CheckerContext &C, const CallEvent &Call) const;
217  void evalBzero(CheckerContext &C, const CallEvent &Call) const;
218
219  void evalSprintf(CheckerContext &C, const CallEvent &Call) const;
220  void evalSnprintf(CheckerContext &C, const CallEvent &Call) const;
221  void evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
222                         bool IsBounded, bool IsBuiltin) const;
223
224  // Utility methods
225  std::pair<ProgramStateRef , ProgramStateRef >
226  static assumeZero(CheckerContext &C,
227                    ProgramStateRef state, SVal V, QualType Ty);
228
229  static ProgramStateRef setCStringLength(ProgramStateRef state,
230                                              const MemRegion *MR,
231                                              SVal strLength);
232  static SVal getCStringLengthForRegion(CheckerContext &C,
233                                        ProgramStateRef &state,
234                                        const Expr *Ex,
235                                        const MemRegion *MR,
236                                        bool hypothetical);
237  SVal getCStringLength(CheckerContext &C,
238                        ProgramStateRef &state,
239                        const Expr *Ex,
240                        SVal Buf,
241                        bool hypothetical = false) const;
242
243  const StringLiteral *getCStringLiteral(CheckerContext &C,
244                                         ProgramStateRef &state,
245                                         const Expr *expr,
246                                         SVal val) const;
247
248  /// Invalidate the destination buffer determined by characters copied.
249  static ProgramStateRef
250  invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
251                                    const Expr *BufE, SVal BufV, SVal SizeV,
252                                    QualType SizeTy);
253
254  /// Operation never overflows, do not invalidate the super region.
255  static ProgramStateRef invalidateDestinationBufferNeverOverflows(
256      CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
257
258  /// We do not know whether the operation can overflow (e.g. size is unknown),
259  /// invalidate the super region and escape related pointers.
260  static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
261      CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
262
263  /// Invalidate the source buffer for escaping pointers.
264  static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
265                                                ProgramStateRef S,
266                                                const Expr *BufE, SVal BufV);
267
268  /// @param InvalidationTraitOperations Determine how to invlidate the
269  /// MemRegion by setting the invalidation traits. Return true to cause pointer
270  /// escape, or false otherwise.
271  static ProgramStateRef invalidateBufferAux(
272      CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V,
273      llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
274                              const MemRegion *)>
275          InvalidationTraitOperations);
276
277  static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
278                              const MemRegion *MR);
279
280  static bool memsetAux(const Expr *DstBuffer, SVal CharE,
281                        const Expr *Size, CheckerContext &C,
282                        ProgramStateRef &State);
283
284  // Re-usable checks
285  ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
286                               AnyArgExpr Arg, SVal l) const;
287  ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
288                                AnyArgExpr Buffer, SVal Element,
289                                AccessKind Access,
290                                CharKind CK = CharKind::Regular) const;
291  ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
292                                    AnyArgExpr Buffer, SizeArgExpr Size,
293                                    AccessKind Access,
294                                    CharKind CK = CharKind::Regular) const;
295  ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
296                               SizeArgExpr Size, AnyArgExpr First,
297                               AnyArgExpr Second,
298                               CharKind CK = CharKind::Regular) const;
299  void emitOverlapBug(CheckerContext &C,
300                      ProgramStateRef state,
301                      const Stmt *First,
302                      const Stmt *Second) const;
303
304  void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
305                      StringRef WarningMsg) const;
306  void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
307                          const Stmt *S, StringRef WarningMsg) const;
308  void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
309                         const Stmt *S, StringRef WarningMsg) const;
310  void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
311  void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
312                             const Expr *E) const;
313  ProgramStateRef checkAdditionOverflow(CheckerContext &C,
314                                            ProgramStateRef state,
315                                            NonLoc left,
316                                            NonLoc right) const;
317
318  // Return true if the destination buffer of the copy function may be in bound.
319  // Expects SVal of Size to be positive and unsigned.
320  // Expects SVal of FirstBuf to be a FieldRegion.
321  static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
322                                SVal BufVal, QualType BufTy, SVal LengthVal,
323                                QualType LengthTy);
324};
325
326} //end anonymous namespace
327
328REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
329
330//===----------------------------------------------------------------------===//
331// Individual checks and utility methods.
332//===----------------------------------------------------------------------===//
333
334std::pair<ProgramStateRef , ProgramStateRef >
335CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
336                           QualType Ty) {
337  std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
338  if (!val)
339    return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
340
341  SValBuilder &svalBuilder = C.getSValBuilder();
342  DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
343  return state->assume(svalBuilder.evalEQ(state, *val, zero));
344}
345
346ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
347                                             ProgramStateRef State,
348                                             AnyArgExpr Arg, SVal l) const {
349  // If a previous check has failed, propagate the failure.
350  if (!State)
351    return nullptr;
352
353  ProgramStateRef stateNull, stateNonNull;
354  std::tie(stateNull, stateNonNull) =
355      assumeZero(C, State, l, Arg.Expression->getType());
356
357  if (stateNull && !stateNonNull) {
358    if (Filter.CheckCStringNullArg) {
359      SmallString<80> buf;
360      llvm::raw_svector_ostream OS(buf);
361      assert(CurrentFunctionDescription);
362      OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
363         << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "
364         << CurrentFunctionDescription;
365
366      emitNullArgBug(C, stateNull, Arg.Expression, OS.str());
367    }
368    return nullptr;
369  }
370
371  // From here on, assume that the value is non-null.
372  assert(stateNonNull);
373  return stateNonNull;
374}
375
376// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
377ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
378                                              ProgramStateRef state,
379                                              AnyArgExpr Buffer, SVal Element,
380                                              AccessKind Access,
381                                              CharKind CK) const {
382
383  // If a previous check has failed, propagate the failure.
384  if (!state)
385    return nullptr;
386
387  // Check for out of bound array element access.
388  const MemRegion *R = Element.getAsRegion();
389  if (!R)
390    return state;
391
392  const auto *ER = dyn_cast<ElementRegion>(R);
393  if (!ER)
394    return state;
395
396  SValBuilder &svalBuilder = C.getSValBuilder();
397  ASTContext &Ctx = svalBuilder.getContext();
398
399  // Get the index of the accessed element.
400  NonLoc Idx = ER->getIndex();
401
402  if (CK == CharKind::Regular) {
403    if (ER->getValueType() != Ctx.CharTy)
404      return state;
405  } else {
406    if (ER->getValueType() != Ctx.WideCharTy)
407      return state;
408
409    QualType SizeTy = Ctx.getSizeType();
410    NonLoc WideSize =
411        svalBuilder
412            .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
413                        SizeTy)
414            .castAs<NonLoc>();
415    SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy);
416    if (Offset.isUnknown())
417      return state;
418    Idx = Offset.castAs<NonLoc>();
419  }
420
421  // Get the size of the array.
422  const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
423  DefinedOrUnknownSVal Size =
424      getDynamicExtent(state, superReg, C.getSValBuilder());
425
426  ProgramStateRef StInBound, StOutBound;
427  std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size);
428  if (StOutBound && !StInBound) {
429    // These checks are either enabled by the CString out-of-bounds checker
430    // explicitly or implicitly by the Malloc checker.
431    // In the latter case we only do modeling but do not emit warning.
432    if (!Filter.CheckCStringOutOfBounds)
433      return nullptr;
434
435    // Emit a bug report.
436    ErrorMessage Message =
437        createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
438    emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
439    return nullptr;
440  }
441
442  // Ensure that we wouldn't read uninitialized value.
443  if (Access == AccessKind::read) {
444    if (Filter.CheckCStringUninitializedRead &&
445        StInBound->getSVal(ER).isUndef()) {
446      emitUninitializedReadBug(C, StInBound, Buffer.Expression);
447      return nullptr;
448    }
449  }
450
451  // Array bound check succeeded.  From this point forward the array bound
452  // should always succeed.
453  return StInBound;
454}
455
456ProgramStateRef
457CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
458                                  AnyArgExpr Buffer, SizeArgExpr Size,
459                                  AccessKind Access, CharKind CK) const {
460  // If a previous check has failed, propagate the failure.
461  if (!State)
462    return nullptr;
463
464  SValBuilder &svalBuilder = C.getSValBuilder();
465  ASTContext &Ctx = svalBuilder.getContext();
466
467  QualType SizeTy = Size.Expression->getType();
468  QualType PtrTy = getCharPtrType(Ctx, CK);
469
470  // Check that the first buffer is non-null.
471  SVal BufVal = C.getSVal(Buffer.Expression);
472  State = checkNonNull(C, State, Buffer, BufVal);
473  if (!State)
474    return nullptr;
475
476  // If out-of-bounds checking is turned off, skip the rest.
477  if (!Filter.CheckCStringOutOfBounds)
478    return State;
479
480  SVal BufStart =
481      svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());
482
483  // Check if the first byte of the buffer is accessible.
484  State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
485  if (!State)
486    return nullptr;
487
488  // Get the access length and make sure it is known.
489  // FIXME: This assumes the caller has already checked that the access length
490  // is positive. And that it's unsigned.
491  SVal LengthVal = C.getSVal(Size.Expression);
492  std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
493  if (!Length)
494    return State;
495
496  // Compute the offset of the last element to be accessed: size-1.
497  NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();
498  SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);
499  if (Offset.isUnknown())
500    return nullptr;
501  NonLoc LastOffset = Offset.castAs<NonLoc>();
502
503  // Check that the first buffer is sufficiently long.
504  if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
505
506    SVal BufEnd =
507        svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
508    State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
509
510    // If the buffer isn't large enough, abort.
511    if (!State)
512      return nullptr;
513  }
514
515  // Large enough or not, return this state!
516  return State;
517}
518
519ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
520                                             ProgramStateRef state,
521                                             SizeArgExpr Size, AnyArgExpr First,
522                                             AnyArgExpr Second,
523                                             CharKind CK) const {
524  if (!Filter.CheckCStringBufferOverlap)
525    return state;
526
527  // Do a simple check for overlap: if the two arguments are from the same
528  // buffer, see if the end of the first is greater than the start of the second
529  // or vice versa.
530
531  // If a previous check has failed, propagate the failure.
532  if (!state)
533    return nullptr;
534
535  ProgramStateRef stateTrue, stateFalse;
536
537  // Assume different address spaces cannot overlap.
538  if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
539      Second.Expression->getType()->getPointeeType().getAddressSpace())
540    return state;
541
542  // Get the buffer values and make sure they're known locations.
543  const LocationContext *LCtx = C.getLocationContext();
544  SVal firstVal = state->getSVal(First.Expression, LCtx);
545  SVal secondVal = state->getSVal(Second.Expression, LCtx);
546
547  std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
548  if (!firstLoc)
549    return state;
550
551  std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
552  if (!secondLoc)
553    return state;
554
555  // Are the two values the same?
556  SValBuilder &svalBuilder = C.getSValBuilder();
557  std::tie(stateTrue, stateFalse) =
558      state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
559
560  if (stateTrue && !stateFalse) {
561    // If the values are known to be equal, that's automatically an overlap.
562    emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
563    return nullptr;
564  }
565
566  // assume the two expressions are not equal.
567  assert(stateFalse);
568  state = stateFalse;
569
570  // Which value comes first?
571  QualType cmpTy = svalBuilder.getConditionType();
572  SVal reverse =
573      svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);
574  std::optional<DefinedOrUnknownSVal> reverseTest =
575      reverse.getAs<DefinedOrUnknownSVal>();
576  if (!reverseTest)
577    return state;
578
579  std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
580  if (stateTrue) {
581    if (stateFalse) {
582      // If we don't know which one comes first, we can't perform this test.
583      return state;
584    } else {
585      // Switch the values so that firstVal is before secondVal.
586      std::swap(firstLoc, secondLoc);
587
588      // Switch the Exprs as well, so that they still correspond.
589      std::swap(First, Second);
590    }
591  }
592
593  // Get the length, and make sure it too is known.
594  SVal LengthVal = state->getSVal(Size.Expression, LCtx);
595  std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
596  if (!Length)
597    return state;
598
599  // Convert the first buffer's start address to char*.
600  // Bail out if the cast fails.
601  ASTContext &Ctx = svalBuilder.getContext();
602  QualType CharPtrTy = getCharPtrType(Ctx, CK);
603  SVal FirstStart =
604      svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());
605  std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
606  if (!FirstStartLoc)
607    return state;
608
609  // Compute the end of the first buffer. Bail out if THAT fails.
610  SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,
611                                          *Length, CharPtrTy);
612  std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
613  if (!FirstEndLoc)
614    return state;
615
616  // Is the end of the first buffer past the start of the second buffer?
617  SVal Overlap =
618      svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);
619  std::optional<DefinedOrUnknownSVal> OverlapTest =
620      Overlap.getAs<DefinedOrUnknownSVal>();
621  if (!OverlapTest)
622    return state;
623
624  std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
625
626  if (stateTrue && !stateFalse) {
627    // Overlap!
628    emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
629    return nullptr;
630  }
631
632  // assume the two expressions don't overlap.
633  assert(stateFalse);
634  return stateFalse;
635}
636
637void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
638                                  const Stmt *First, const Stmt *Second) const {
639  ExplodedNode *N = C.generateErrorNode(state);
640  if (!N)
641    return;
642
643  if (!BT_Overlap)
644    BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
645                                 categories::UnixAPI, "Improper arguments"));
646
647  // Generate a report for this bug.
648  auto report = std::make_unique<PathSensitiveBugReport>(
649      *BT_Overlap, "Arguments must not be overlapping buffers", N);
650  report->addRange(First->getSourceRange());
651  report->addRange(Second->getSourceRange());
652
653  C.emitReport(std::move(report));
654}
655
656void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
657                                    const Stmt *S, StringRef WarningMsg) const {
658  if (ExplodedNode *N = C.generateErrorNode(State)) {
659    if (!BT_Null) {
660      // FIXME: This call uses the string constant 'categories::UnixAPI' as the
661      // description of the bug; it should be replaced by a real description.
662      BT_Null.reset(
663          new BugType(Filter.CheckNameCStringNullArg, categories::UnixAPI));
664    }
665
666    auto Report =
667        std::make_unique<PathSensitiveBugReport>(*BT_Null, WarningMsg, N);
668    Report->addRange(S->getSourceRange());
669    if (const auto *Ex = dyn_cast<Expr>(S))
670      bugreporter::trackExpressionValue(N, Ex, *Report);
671    C.emitReport(std::move(Report));
672  }
673}
674
675void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
676                                              ProgramStateRef State,
677                                              const Expr *E) const {
678  if (ExplodedNode *N = C.generateErrorNode(State)) {
679    const char *Msg =
680        "Bytes string function accesses uninitialized/garbage values";
681    if (!BT_UninitRead)
682      BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead,
683                                      "Accessing unitialized/garbage values"));
684
685    auto Report =
686        std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N);
687    Report->addRange(E->getSourceRange());
688    bugreporter::trackExpressionValue(N, E, *Report);
689    C.emitReport(std::move(Report));
690  }
691}
692
693void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
694                                        ProgramStateRef State, const Stmt *S,
695                                        StringRef WarningMsg) const {
696  if (ExplodedNode *N = C.generateErrorNode(State)) {
697    if (!BT_Bounds)
698      BT_Bounds.reset(new BugType(Filter.CheckCStringOutOfBounds
699                                      ? Filter.CheckNameCStringOutOfBounds
700                                      : Filter.CheckNameCStringNullArg,
701                                  "Out-of-bound array access"));
702
703    // FIXME: It would be nice to eventually make this diagnostic more clear,
704    // e.g., by referencing the original declaration or by saying *why* this
705    // reference is outside the range.
706    auto Report =
707        std::make_unique<PathSensitiveBugReport>(*BT_Bounds, WarningMsg, N);
708    Report->addRange(S->getSourceRange());
709    C.emitReport(std::move(Report));
710  }
711}
712
713void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
714                                       const Stmt *S,
715                                       StringRef WarningMsg) const {
716  if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
717    if (!BT_NotCString) {
718      // FIXME: This call uses the string constant 'categories::UnixAPI' as the
719      // description of the bug; it should be replaced by a real description.
720      BT_NotCString.reset(
721          new BugType(Filter.CheckNameCStringNotNullTerm, categories::UnixAPI));
722    }
723
724    auto Report =
725        std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N);
726
727    Report->addRange(S->getSourceRange());
728    C.emitReport(std::move(Report));
729  }
730}
731
732void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
733                                             ProgramStateRef State) const {
734  if (ExplodedNode *N = C.generateErrorNode(State)) {
735    if (!BT_AdditionOverflow) {
736      // FIXME: This call uses the word "API" as the description of the bug;
737      // it should be replaced by a better error message (if this unlikely
738      // situation continues to exist as a separate bug type).
739      BT_AdditionOverflow.reset(
740          new BugType(Filter.CheckNameCStringOutOfBounds, "API"));
741    }
742
743    // This isn't a great error message, but this should never occur in real
744    // code anyway -- you'd have to create a buffer longer than a size_t can
745    // represent, which is sort of a contradiction.
746    const char *WarningMsg =
747        "This expression will create a string whose length is too big to "
748        "be represented as a size_t";
749
750    auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow,
751                                                           WarningMsg, N);
752    C.emitReport(std::move(Report));
753  }
754}
755
756ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
757                                                     ProgramStateRef state,
758                                                     NonLoc left,
759                                                     NonLoc right) const {
760  // If out-of-bounds checking is turned off, skip the rest.
761  if (!Filter.CheckCStringOutOfBounds)
762    return state;
763
764  // If a previous check has failed, propagate the failure.
765  if (!state)
766    return nullptr;
767
768  SValBuilder &svalBuilder = C.getSValBuilder();
769  BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
770
771  QualType sizeTy = svalBuilder.getContext().getSizeType();
772  const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
773  NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
774
775  SVal maxMinusRight;
776  if (isa<nonloc::ConcreteInt>(right)) {
777    maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
778                                                 sizeTy);
779  } else {
780    // Try switching the operands. (The order of these two assignments is
781    // important!)
782    maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
783                                            sizeTy);
784    left = right;
785  }
786
787  if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
788    QualType cmpTy = svalBuilder.getConditionType();
789    // If left > max - right, we have an overflow.
790    SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
791                                                *maxMinusRightNL, cmpTy);
792
793    ProgramStateRef stateOverflow, stateOkay;
794    std::tie(stateOverflow, stateOkay) =
795      state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
796
797    if (stateOverflow && !stateOkay) {
798      // We have an overflow. Emit a bug report.
799      emitAdditionOverflowBug(C, stateOverflow);
800      return nullptr;
801    }
802
803    // From now on, assume an overflow didn't occur.
804    assert(stateOkay);
805    state = stateOkay;
806  }
807
808  return state;
809}
810
811ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
812                                                const MemRegion *MR,
813                                                SVal strLength) {
814  assert(!strLength.isUndef() && "Attempt to set an undefined string length");
815
816  MR = MR->StripCasts();
817
818  switch (MR->getKind()) {
819  case MemRegion::StringRegionKind:
820    // FIXME: This can happen if we strcpy() into a string region. This is
821    // undefined [C99 6.4.5p6], but we should still warn about it.
822    return state;
823
824  case MemRegion::SymbolicRegionKind:
825  case MemRegion::AllocaRegionKind:
826  case MemRegion::NonParamVarRegionKind:
827  case MemRegion::ParamVarRegionKind:
828  case MemRegion::FieldRegionKind:
829  case MemRegion::ObjCIvarRegionKind:
830    // These are the types we can currently track string lengths for.
831    break;
832
833  case MemRegion::ElementRegionKind:
834    // FIXME: Handle element regions by upper-bounding the parent region's
835    // string length.
836    return state;
837
838  default:
839    // Other regions (mostly non-data) can't have a reliable C string length.
840    // For now, just ignore the change.
841    // FIXME: These are rare but not impossible. We should output some kind of
842    // warning for things like strcpy((char[]){'a', 0}, "b");
843    return state;
844  }
845
846  if (strLength.isUnknown())
847    return state->remove<CStringLength>(MR);
848
849  return state->set<CStringLength>(MR, strLength);
850}
851
852SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
853                                               ProgramStateRef &state,
854                                               const Expr *Ex,
855                                               const MemRegion *MR,
856                                               bool hypothetical) {
857  if (!hypothetical) {
858    // If there's a recorded length, go ahead and return it.
859    const SVal *Recorded = state->get<CStringLength>(MR);
860    if (Recorded)
861      return *Recorded;
862  }
863
864  // Otherwise, get a new symbol and update the state.
865  SValBuilder &svalBuilder = C.getSValBuilder();
866  QualType sizeTy = svalBuilder.getContext().getSizeType();
867  SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
868                                                    MR, Ex, sizeTy,
869                                                    C.getLocationContext(),
870                                                    C.blockCount());
871
872  if (!hypothetical) {
873    if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
874      // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
875      BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
876      const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
877      llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
878      const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
879                                                        fourInt);
880      NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
881      SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, maxLength,
882                                                svalBuilder.getConditionType());
883      state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
884    }
885    state = state->set<CStringLength>(MR, strLength);
886  }
887
888  return strLength;
889}
890
891SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
892                                      const Expr *Ex, SVal Buf,
893                                      bool hypothetical) const {
894  const MemRegion *MR = Buf.getAsRegion();
895  if (!MR) {
896    // If we can't get a region, see if it's something we /know/ isn't a
897    // C string. In the context of locations, the only time we can issue such
898    // a warning is for labels.
899    if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
900      if (Filter.CheckCStringNotNullTerm) {
901        SmallString<120> buf;
902        llvm::raw_svector_ostream os(buf);
903        assert(CurrentFunctionDescription);
904        os << "Argument to " << CurrentFunctionDescription
905           << " is the address of the label '" << Label->getLabel()->getName()
906           << "', which is not a null-terminated string";
907
908        emitNotCStringBug(C, state, Ex, os.str());
909      }
910      return UndefinedVal();
911    }
912
913    // If it's not a region and not a label, give up.
914    return UnknownVal();
915  }
916
917  // If we have a region, strip casts from it and see if we can figure out
918  // its length. For anything we can't figure out, just return UnknownVal.
919  MR = MR->StripCasts();
920
921  switch (MR->getKind()) {
922  case MemRegion::StringRegionKind: {
923    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
924    // so we can assume that the byte length is the correct C string length.
925    SValBuilder &svalBuilder = C.getSValBuilder();
926    QualType sizeTy = svalBuilder.getContext().getSizeType();
927    const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
928    return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);
929  }
930  case MemRegion::NonParamVarRegionKind: {
931    // If we have a global constant with a string literal initializer,
932    // compute the initializer's length.
933    const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
934    if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) {
935      if (const Expr *Init = Decl->getInit()) {
936        if (auto *StrLit = dyn_cast<StringLiteral>(Init)) {
937          SValBuilder &SvalBuilder = C.getSValBuilder();
938          QualType SizeTy = SvalBuilder.getContext().getSizeType();
939          return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy);
940        }
941      }
942    }
943    [[fallthrough]];
944  }
945  case MemRegion::SymbolicRegionKind:
946  case MemRegion::AllocaRegionKind:
947  case MemRegion::ParamVarRegionKind:
948  case MemRegion::FieldRegionKind:
949  case MemRegion::ObjCIvarRegionKind:
950    return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
951  case MemRegion::CompoundLiteralRegionKind:
952    // FIXME: Can we track this? Is it necessary?
953    return UnknownVal();
954  case MemRegion::ElementRegionKind:
955    // FIXME: How can we handle this? It's not good enough to subtract the
956    // offset from the base string length; consider "123\x00567" and &a[5].
957    return UnknownVal();
958  default:
959    // Other regions (mostly non-data) can't have a reliable C string length.
960    // In this case, an error is emitted and UndefinedVal is returned.
961    // The caller should always be prepared to handle this case.
962    if (Filter.CheckCStringNotNullTerm) {
963      SmallString<120> buf;
964      llvm::raw_svector_ostream os(buf);
965
966      assert(CurrentFunctionDescription);
967      os << "Argument to " << CurrentFunctionDescription << " is ";
968
969      if (SummarizeRegion(os, C.getASTContext(), MR))
970        os << ", which is not a null-terminated string";
971      else
972        os << "not a null-terminated string";
973
974      emitNotCStringBug(C, state, Ex, os.str());
975    }
976    return UndefinedVal();
977  }
978}
979
980const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
981  ProgramStateRef &state, const Expr *expr, SVal val) const {
982
983  // Get the memory region pointed to by the val.
984  const MemRegion *bufRegion = val.getAsRegion();
985  if (!bufRegion)
986    return nullptr;
987
988  // Strip casts off the memory region.
989  bufRegion = bufRegion->StripCasts();
990
991  // Cast the memory region to a string region.
992  const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
993  if (!strRegion)
994    return nullptr;
995
996  // Return the actual string in the string region.
997  return strRegion->getStringLiteral();
998}
999
1000bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
1001                                       SVal BufVal, QualType BufTy,
1002                                       SVal LengthVal, QualType LengthTy) {
1003  // If we do not know that the buffer is long enough we return 'true'.
1004  // Otherwise the parent region of this field region would also get
1005  // invalidated, which would lead to warnings based on an unknown state.
1006
1007  if (LengthVal.isUnknown())
1008    return false;
1009
1010  // Originally copied from CheckBufferAccess and CheckLocation.
1011  SValBuilder &SB = C.getSValBuilder();
1012  ASTContext &Ctx = C.getASTContext();
1013
1014  QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
1015
1016  std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
1017  if (!Length)
1018    return true; // cf top comment.
1019
1020  // Compute the offset of the last element to be accessed: size-1.
1021  NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
1022  SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
1023  if (Offset.isUnknown())
1024    return true; // cf top comment
1025  NonLoc LastOffset = Offset.castAs<NonLoc>();
1026
1027  // Check that the first buffer is sufficiently long.
1028  SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
1029  std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
1030  if (!BufLoc)
1031    return true; // cf top comment.
1032
1033  SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
1034
1035  // Check for out of bound array element access.
1036  const MemRegion *R = BufEnd.getAsRegion();
1037  if (!R)
1038    return true; // cf top comment.
1039
1040  const ElementRegion *ER = dyn_cast<ElementRegion>(R);
1041  if (!ER)
1042    return true; // cf top comment.
1043
1044  // FIXME: Does this crash when a non-standard definition
1045  // of a library function is encountered?
1046  assert(ER->getValueType() == C.getASTContext().CharTy &&
1047         "isFirstBufInBound should only be called with char* ElementRegions");
1048
1049  // Get the size of the array.
1050  const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1051  DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
1052
1053  // Get the index of the accessed element.
1054  DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1055
1056  ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
1057
1058  return static_cast<bool>(StInBound);
1059}
1060
1061ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1062    CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV,
1063    SVal SizeV, QualType SizeTy) {
1064  auto InvalidationTraitOperations =
1065      [&C, S, BufTy = BufE->getType(), BufV, SizeV,
1066       SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1067        // If destination buffer is a field region and access is in bound, do
1068        // not invalidate its super region.
1069        if (MemRegion::FieldRegionKind == R->getKind() &&
1070            isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
1071          ITraits.setTrait(
1072              R,
1073              RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1074        }
1075        return false;
1076      };
1077
1078  return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1079}
1080
1081ProgramStateRef
1082CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1083    CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1084  auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1085                                        const MemRegion *R) {
1086    return isa<FieldRegion>(R);
1087  };
1088
1089  return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1090}
1091
1092ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1093    CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1094  auto InvalidationTraitOperations =
1095      [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1096        if (MemRegion::FieldRegionKind == R->getKind())
1097          ITraits.setTrait(
1098              R,
1099              RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1100        return false;
1101      };
1102
1103  return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1104}
1105
1106ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1107                                                       ProgramStateRef S,
1108                                                       const Expr *BufE,
1109                                                       SVal BufV) {
1110  auto InvalidationTraitOperations =
1111      [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1112        ITraits.setTrait(
1113            R->getBaseRegion(),
1114            RegionAndSymbolInvalidationTraits::TK_PreserveContents);
1115        ITraits.setTrait(R,
1116                         RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
1117        return true;
1118      };
1119
1120  return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1121}
1122
1123ProgramStateRef CStringChecker::invalidateBufferAux(
1124    CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V,
1125    llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1126                            const MemRegion *)>
1127        InvalidationTraitOperations) {
1128  std::optional<Loc> L = V.getAs<Loc>();
1129  if (!L)
1130    return State;
1131
1132  // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1133  // some assumptions about the value that CFRefCount can't. Even so, it should
1134  // probably be refactored.
1135  if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1136    const MemRegion *R = MR->getRegion()->StripCasts();
1137
1138    // Are we dealing with an ElementRegion?  If so, we should be invalidating
1139    // the super-region.
1140    if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
1141      R = ER->getSuperRegion();
1142      // FIXME: What about layers of ElementRegions?
1143    }
1144
1145    // Invalidate this region.
1146    const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1147    RegionAndSymbolInvalidationTraits ITraits;
1148    bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
1149
1150    return State->invalidateRegions(R, E, C.blockCount(), LCtx,
1151                                    CausesPointerEscape, nullptr, nullptr,
1152                                    &ITraits);
1153  }
1154
1155  // If we have a non-region value by chance, just remove the binding.
1156  // FIXME: is this necessary or correct? This handles the non-Region
1157  //  cases.  Is it ever valid to store to these?
1158  return State->killBinding(*L);
1159}
1160
1161bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1162                                     const MemRegion *MR) {
1163  switch (MR->getKind()) {
1164  case MemRegion::FunctionCodeRegionKind: {
1165    if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())
1166      os << "the address of the function '" << *FD << '\'';
1167    else
1168      os << "the address of a function";
1169    return true;
1170  }
1171  case MemRegion::BlockCodeRegionKind:
1172    os << "block text";
1173    return true;
1174  case MemRegion::BlockDataRegionKind:
1175    os << "a block";
1176    return true;
1177  case MemRegion::CXXThisRegionKind:
1178  case MemRegion::CXXTempObjectRegionKind:
1179    os << "a C++ temp object of type "
1180       << cast<TypedValueRegion>(MR)->getValueType();
1181    return true;
1182  case MemRegion::NonParamVarRegionKind:
1183    os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();
1184    return true;
1185  case MemRegion::ParamVarRegionKind:
1186    os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();
1187    return true;
1188  case MemRegion::FieldRegionKind:
1189    os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();
1190    return true;
1191  case MemRegion::ObjCIvarRegionKind:
1192    os << "an instance variable of type "
1193       << cast<TypedValueRegion>(MR)->getValueType();
1194    return true;
1195  default:
1196    return false;
1197  }
1198}
1199
1200bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
1201                               const Expr *Size, CheckerContext &C,
1202                               ProgramStateRef &State) {
1203  SVal MemVal = C.getSVal(DstBuffer);
1204  SVal SizeVal = C.getSVal(Size);
1205  const MemRegion *MR = MemVal.getAsRegion();
1206  if (!MR)
1207    return false;
1208
1209  // We're about to model memset by producing a "default binding" in the Store.
1210  // Our current implementation - RegionStore - doesn't support default bindings
1211  // that don't cover the whole base region. So we should first get the offset
1212  // and the base region to figure out whether the offset of buffer is 0.
1213  RegionOffset Offset = MR->getAsOffset();
1214  const MemRegion *BR = Offset.getRegion();
1215
1216  std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1217  if (!SizeNL)
1218    return false;
1219
1220  SValBuilder &svalBuilder = C.getSValBuilder();
1221  ASTContext &Ctx = C.getASTContext();
1222
1223  // void *memset(void *dest, int ch, size_t count);
1224  // For now we can only handle the case of offset is 0 and concrete char value.
1225  if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1226      Offset.getOffset() == 0) {
1227    // Get the base region's size.
1228    DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
1229
1230    ProgramStateRef StateWholeReg, StateNotWholeReg;
1231    std::tie(StateWholeReg, StateNotWholeReg) =
1232        State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
1233
1234    // With the semantic of 'memset()', we should convert the CharVal to
1235    // unsigned char.
1236    CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
1237
1238    ProgramStateRef StateNullChar, StateNonNullChar;
1239    std::tie(StateNullChar, StateNonNullChar) =
1240        assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
1241
1242    if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1243        !StateNonNullChar) {
1244      // If the 'memset()' acts on the whole region of destination buffer and
1245      // the value of the second argument of 'memset()' is zero, bind the second
1246      // argument's value to the destination buffer with 'default binding'.
1247      // FIXME: Since there is no perfect way to bind the non-zero character, we
1248      // can only deal with zero value here. In the future, we need to deal with
1249      // the binding of non-zero value in the case of whole region.
1250      State = State->bindDefaultZero(svalBuilder.makeLoc(BR),
1251                                     C.getLocationContext());
1252    } else {
1253      // If the destination buffer's extent is not equal to the value of
1254      // third argument, just invalidate buffer.
1255      State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1256                                                SizeVal, Size->getType());
1257    }
1258
1259    if (StateNullChar && !StateNonNullChar) {
1260      // If the value of the second argument of 'memset()' is zero, set the
1261      // string length of destination buffer to 0 directly.
1262      State = setCStringLength(State, MR,
1263                               svalBuilder.makeZeroVal(Ctx.getSizeType()));
1264    } else if (!StateNullChar && StateNonNullChar) {
1265      SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1266          CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
1267          C.getLocationContext(), C.blockCount());
1268
1269      // If the value of second argument is not zero, then the string length
1270      // is at least the size argument.
1271      SVal NewStrLenGESize = svalBuilder.evalBinOp(
1272          State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
1273
1274      State = setCStringLength(
1275          State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
1276          MR, NewStrLen);
1277    }
1278  } else {
1279    // If the offset is not zero and char value is not concrete, we can do
1280    // nothing but invalidate the buffer.
1281    State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1282                                              SizeVal, Size->getType());
1283  }
1284  return true;
1285}
1286
1287//===----------------------------------------------------------------------===//
1288// evaluation of individual function calls.
1289//===----------------------------------------------------------------------===//
1290
1291void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,
1292                                    ProgramStateRef state, SizeArgExpr Size,
1293                                    DestinationArgExpr Dest,
1294                                    SourceArgExpr Source, bool Restricted,
1295                                    bool IsMempcpy, CharKind CK) const {
1296  CurrentFunctionDescription = "memory copy function";
1297
1298  // See if the size argument is zero.
1299  const LocationContext *LCtx = C.getLocationContext();
1300  SVal sizeVal = state->getSVal(Size.Expression, LCtx);
1301  QualType sizeTy = Size.Expression->getType();
1302
1303  ProgramStateRef stateZeroSize, stateNonZeroSize;
1304  std::tie(stateZeroSize, stateNonZeroSize) =
1305      assumeZero(C, state, sizeVal, sizeTy);
1306
1307  // Get the value of the Dest.
1308  SVal destVal = state->getSVal(Dest.Expression, LCtx);
1309
1310  // If the size is zero, there won't be any actual memory access, so
1311  // just bind the return value to the destination buffer and return.
1312  if (stateZeroSize && !stateNonZeroSize) {
1313    stateZeroSize =
1314        stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1315    C.addTransition(stateZeroSize);
1316    return;
1317  }
1318
1319  // If the size can be nonzero, we have to check the other arguments.
1320  if (stateNonZeroSize) {
1321    state = stateNonZeroSize;
1322
1323    // Ensure the destination is not null. If it is NULL there will be a
1324    // NULL pointer dereference.
1325    state = checkNonNull(C, state, Dest, destVal);
1326    if (!state)
1327      return;
1328
1329    // Get the value of the Src.
1330    SVal srcVal = state->getSVal(Source.Expression, LCtx);
1331
1332    // Ensure the source is not null. If it is NULL there will be a
1333    // NULL pointer dereference.
1334    state = checkNonNull(C, state, Source, srcVal);
1335    if (!state)
1336      return;
1337
1338    // Ensure the accesses are valid and that the buffers do not overlap.
1339    state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);
1340    state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);
1341
1342    if (Restricted)
1343      state = CheckOverlap(C, state, Size, Dest, Source, CK);
1344
1345    if (!state)
1346      return;
1347
1348    // If this is mempcpy, get the byte after the last byte copied and
1349    // bind the expr.
1350    if (IsMempcpy) {
1351      // Get the byte after the last byte copied.
1352      SValBuilder &SvalBuilder = C.getSValBuilder();
1353      ASTContext &Ctx = SvalBuilder.getContext();
1354      QualType CharPtrTy = getCharPtrType(Ctx, CK);
1355      SVal DestRegCharVal =
1356          SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());
1357      SVal lastElement = C.getSValBuilder().evalBinOp(
1358          state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());
1359      // If we don't know how much we copied, we can at least
1360      // conjure a return value for later.
1361      if (lastElement.isUnknown())
1362        lastElement = C.getSValBuilder().conjureSymbolVal(
1363            nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
1364
1365      // The byte after the last byte copied is the return value.
1366      state = state->BindExpr(Call.getOriginExpr(), LCtx, lastElement);
1367    } else {
1368      // All other copies return the destination buffer.
1369      // (Well, bcopy() has a void return type, but this won't hurt.)
1370      state = state->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1371    }
1372
1373    // Invalidate the destination (regular invalidation without pointer-escaping
1374    // the address of the top-level region).
1375    // FIXME: Even if we can't perfectly model the copy, we should see if we
1376    // can use LazyCompoundVals to copy the source values into the destination.
1377    // This would probably remove any existing bindings past the end of the
1378    // copied region, but that's still an improvement over blank invalidation.
1379    state = invalidateDestinationBufferBySize(
1380        C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal,
1381        Size.Expression->getType());
1382
1383    // Invalidate the source (const-invalidation without const-pointer-escaping
1384    // the address of the top-level region).
1385    state = invalidateSourceBuffer(C, state, Source.Expression,
1386                                   C.getSVal(Source.Expression));
1387
1388    C.addTransition(state);
1389  }
1390}
1391
1392void CStringChecker::evalMemcpy(CheckerContext &C, const CallEvent &Call,
1393                                CharKind CK) const {
1394  // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1395  // The return value is the address of the destination buffer.
1396  DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1397  SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1398  SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1399
1400  ProgramStateRef State = C.getState();
1401
1402  constexpr bool IsRestricted = true;
1403  constexpr bool IsMempcpy = false;
1404  evalCopyCommon(C, Call, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);
1405}
1406
1407void CStringChecker::evalMempcpy(CheckerContext &C, const CallEvent &Call,
1408                                 CharKind CK) const {
1409  // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1410  // The return value is a pointer to the byte following the last written byte.
1411  DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1412  SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1413  SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1414
1415  constexpr bool IsRestricted = true;
1416  constexpr bool IsMempcpy = true;
1417  evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1418                 IsMempcpy, CK);
1419}
1420
1421void CStringChecker::evalMemmove(CheckerContext &C, const CallEvent &Call,
1422                                 CharKind CK) const {
1423  // void *memmove(void *dst, const void *src, size_t n);
1424  // The return value is the address of the destination buffer.
1425  DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1426  SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1427  SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1428
1429  constexpr bool IsRestricted = false;
1430  constexpr bool IsMempcpy = false;
1431  evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1432                 IsMempcpy, CK);
1433}
1434
1435void CStringChecker::evalBcopy(CheckerContext &C, const CallEvent &Call) const {
1436  // void bcopy(const void *src, void *dst, size_t n);
1437  SourceArgExpr Src{{Call.getArgExpr(0), 0}};
1438  DestinationArgExpr Dest = {{Call.getArgExpr(1), 1}};
1439  SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1440
1441  constexpr bool IsRestricted = false;
1442  constexpr bool IsMempcpy = false;
1443  evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1444                 IsMempcpy, CharKind::Regular);
1445}
1446
1447void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call,
1448                                CharKind CK) const {
1449  // int memcmp(const void *s1, const void *s2, size_t n);
1450  CurrentFunctionDescription = "memory comparison function";
1451
1452  AnyArgExpr Left = {Call.getArgExpr(0), 0};
1453  AnyArgExpr Right = {Call.getArgExpr(1), 1};
1454  SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1455
1456  ProgramStateRef State = C.getState();
1457  SValBuilder &Builder = C.getSValBuilder();
1458  const LocationContext *LCtx = C.getLocationContext();
1459
1460  // See if the size argument is zero.
1461  SVal sizeVal = State->getSVal(Size.Expression, LCtx);
1462  QualType sizeTy = Size.Expression->getType();
1463
1464  ProgramStateRef stateZeroSize, stateNonZeroSize;
1465  std::tie(stateZeroSize, stateNonZeroSize) =
1466      assumeZero(C, State, sizeVal, sizeTy);
1467
1468  // If the size can be zero, the result will be 0 in that case, and we don't
1469  // have to check either of the buffers.
1470  if (stateZeroSize) {
1471    State = stateZeroSize;
1472    State = State->BindExpr(Call.getOriginExpr(), LCtx,
1473                            Builder.makeZeroVal(Call.getResultType()));
1474    C.addTransition(State);
1475  }
1476
1477  // If the size can be nonzero, we have to check the other arguments.
1478  if (stateNonZeroSize) {
1479    State = stateNonZeroSize;
1480    // If we know the two buffers are the same, we know the result is 0.
1481    // First, get the two buffers' addresses. Another checker will have already
1482    // made sure they're not undefined.
1483    DefinedOrUnknownSVal LV =
1484        State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1485    DefinedOrUnknownSVal RV =
1486        State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1487
1488    // See if they are the same.
1489    ProgramStateRef SameBuffer, NotSameBuffer;
1490    std::tie(SameBuffer, NotSameBuffer) =
1491        State->assume(Builder.evalEQ(State, LV, RV));
1492
1493    // If the two arguments are the same buffer, we know the result is 0,
1494    // and we only need to check one size.
1495    if (SameBuffer && !NotSameBuffer) {
1496      State = SameBuffer;
1497      State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);
1498      if (State) {
1499        State = SameBuffer->BindExpr(Call.getOriginExpr(), LCtx,
1500                                     Builder.makeZeroVal(Call.getResultType()));
1501        C.addTransition(State);
1502      }
1503      return;
1504    }
1505
1506    // If the two arguments might be different buffers, we have to check
1507    // the size of both of them.
1508    assert(NotSameBuffer);
1509    State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);
1510    State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);
1511    if (State) {
1512      // The return value is the comparison result, which we don't know.
1513      SVal CmpV = Builder.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,
1514                                           C.blockCount());
1515      State = State->BindExpr(Call.getOriginExpr(), LCtx, CmpV);
1516      C.addTransition(State);
1517    }
1518  }
1519}
1520
1521void CStringChecker::evalstrLength(CheckerContext &C,
1522                                   const CallEvent &Call) const {
1523  // size_t strlen(const char *s);
1524  evalstrLengthCommon(C, Call, /* IsStrnlen = */ false);
1525}
1526
1527void CStringChecker::evalstrnLength(CheckerContext &C,
1528                                    const CallEvent &Call) const {
1529  // size_t strnlen(const char *s, size_t maxlen);
1530  evalstrLengthCommon(C, Call, /* IsStrnlen = */ true);
1531}
1532
1533void CStringChecker::evalstrLengthCommon(CheckerContext &C,
1534                                         const CallEvent &Call,
1535                                         bool IsStrnlen) const {
1536  CurrentFunctionDescription = "string length function";
1537  ProgramStateRef state = C.getState();
1538  const LocationContext *LCtx = C.getLocationContext();
1539
1540  if (IsStrnlen) {
1541    const Expr *maxlenExpr = Call.getArgExpr(1);
1542    SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1543
1544    ProgramStateRef stateZeroSize, stateNonZeroSize;
1545    std::tie(stateZeroSize, stateNonZeroSize) =
1546      assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1547
1548    // If the size can be zero, the result will be 0 in that case, and we don't
1549    // have to check the string itself.
1550    if (stateZeroSize) {
1551      SVal zero = C.getSValBuilder().makeZeroVal(Call.getResultType());
1552      stateZeroSize = stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, zero);
1553      C.addTransition(stateZeroSize);
1554    }
1555
1556    // If the size is GUARANTEED to be zero, we're done!
1557    if (!stateNonZeroSize)
1558      return;
1559
1560    // Otherwise, record the assumption that the size is nonzero.
1561    state = stateNonZeroSize;
1562  }
1563
1564  // Check that the string argument is non-null.
1565  AnyArgExpr Arg = {Call.getArgExpr(0), 0};
1566  SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
1567  state = checkNonNull(C, state, Arg, ArgVal);
1568
1569  if (!state)
1570    return;
1571
1572  SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
1573
1574  // If the argument isn't a valid C string, there's no valid state to
1575  // transition to.
1576  if (strLength.isUndef())
1577    return;
1578
1579  DefinedOrUnknownSVal result = UnknownVal();
1580
1581  // If the check is for strnlen() then bind the return value to no more than
1582  // the maxlen value.
1583  if (IsStrnlen) {
1584    QualType cmpTy = C.getSValBuilder().getConditionType();
1585
1586    // It's a little unfortunate to be getting this again,
1587    // but it's not that expensive...
1588    const Expr *maxlenExpr = Call.getArgExpr(1);
1589    SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1590
1591    std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1592    std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1593
1594    if (strLengthNL && maxlenValNL) {
1595      ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1596
1597      // Check if the strLength is greater than the maxlen.
1598      std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1599          C.getSValBuilder()
1600              .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1601              .castAs<DefinedOrUnknownSVal>());
1602
1603      if (stateStringTooLong && !stateStringNotTooLong) {
1604        // If the string is longer than maxlen, return maxlen.
1605        result = *maxlenValNL;
1606      } else if (stateStringNotTooLong && !stateStringTooLong) {
1607        // If the string is shorter than maxlen, return its length.
1608        result = *strLengthNL;
1609      }
1610    }
1611
1612    if (result.isUnknown()) {
1613      // If we don't have enough information for a comparison, there's
1614      // no guarantee the full string length will actually be returned.
1615      // All we know is the return value is the min of the string length
1616      // and the limit. This is better than nothing.
1617      result = C.getSValBuilder().conjureSymbolVal(
1618          nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
1619      NonLoc resultNL = result.castAs<NonLoc>();
1620
1621      if (strLengthNL) {
1622        state = state->assume(C.getSValBuilder().evalBinOpNN(
1623                                  state, BO_LE, resultNL, *strLengthNL, cmpTy)
1624                                  .castAs<DefinedOrUnknownSVal>(), true);
1625      }
1626
1627      if (maxlenValNL) {
1628        state = state->assume(C.getSValBuilder().evalBinOpNN(
1629                                  state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1630                                  .castAs<DefinedOrUnknownSVal>(), true);
1631      }
1632    }
1633
1634  } else {
1635    // This is a plain strlen(), not strnlen().
1636    result = strLength.castAs<DefinedOrUnknownSVal>();
1637
1638    // If we don't know the length of the string, conjure a return
1639    // value, so it can be used in constraints, at least.
1640    if (result.isUnknown()) {
1641      result = C.getSValBuilder().conjureSymbolVal(
1642          nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
1643    }
1644  }
1645
1646  // Bind the return value.
1647  assert(!result.isUnknown() && "Should have conjured a value by now");
1648  state = state->BindExpr(Call.getOriginExpr(), LCtx, result);
1649  C.addTransition(state);
1650}
1651
1652void CStringChecker::evalStrcpy(CheckerContext &C,
1653                                const CallEvent &Call) const {
1654  // char *strcpy(char *restrict dst, const char *restrict src);
1655  evalStrcpyCommon(C, Call,
1656                   /* ReturnEnd = */ false,
1657                   /* IsBounded = */ false,
1658                   /* appendK = */ ConcatFnKind::none);
1659}
1660
1661void CStringChecker::evalStrncpy(CheckerContext &C,
1662                                 const CallEvent &Call) const {
1663  // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1664  evalStrcpyCommon(C, Call,
1665                   /* ReturnEnd = */ false,
1666                   /* IsBounded = */ true,
1667                   /* appendK = */ ConcatFnKind::none);
1668}
1669
1670void CStringChecker::evalStpcpy(CheckerContext &C,
1671                                const CallEvent &Call) const {
1672  // char *stpcpy(char *restrict dst, const char *restrict src);
1673  evalStrcpyCommon(C, Call,
1674                   /* ReturnEnd = */ true,
1675                   /* IsBounded = */ false,
1676                   /* appendK = */ ConcatFnKind::none);
1677}
1678
1679void CStringChecker::evalStrlcpy(CheckerContext &C,
1680                                 const CallEvent &Call) const {
1681  // size_t strlcpy(char *dest, const char *src, size_t size);
1682  evalStrcpyCommon(C, Call,
1683                   /* ReturnEnd = */ true,
1684                   /* IsBounded = */ true,
1685                   /* appendK = */ ConcatFnKind::none,
1686                   /* returnPtr = */ false);
1687}
1688
1689void CStringChecker::evalStrcat(CheckerContext &C,
1690                                const CallEvent &Call) const {
1691  // char *strcat(char *restrict s1, const char *restrict s2);
1692  evalStrcpyCommon(C, Call,
1693                   /* ReturnEnd = */ false,
1694                   /* IsBounded = */ false,
1695                   /* appendK = */ ConcatFnKind::strcat);
1696}
1697
1698void CStringChecker::evalStrncat(CheckerContext &C,
1699                                 const CallEvent &Call) const {
1700  // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1701  evalStrcpyCommon(C, Call,
1702                   /* ReturnEnd = */ false,
1703                   /* IsBounded = */ true,
1704                   /* appendK = */ ConcatFnKind::strcat);
1705}
1706
1707void CStringChecker::evalStrlcat(CheckerContext &C,
1708                                 const CallEvent &Call) const {
1709  // size_t strlcat(char *dst, const char *src, size_t size);
1710  // It will append at most size - strlen(dst) - 1 bytes,
1711  // NULL-terminating the result.
1712  evalStrcpyCommon(C, Call,
1713                   /* ReturnEnd = */ false,
1714                   /* IsBounded = */ true,
1715                   /* appendK = */ ConcatFnKind::strlcat,
1716                   /* returnPtr = */ false);
1717}
1718
1719void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
1720                                      bool ReturnEnd, bool IsBounded,
1721                                      ConcatFnKind appendK,
1722                                      bool returnPtr) const {
1723  if (appendK == ConcatFnKind::none)
1724    CurrentFunctionDescription = "string copy function";
1725  else
1726    CurrentFunctionDescription = "string concatenation function";
1727
1728  ProgramStateRef state = C.getState();
1729  const LocationContext *LCtx = C.getLocationContext();
1730
1731  // Check that the destination is non-null.
1732  DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};
1733  SVal DstVal = state->getSVal(Dst.Expression, LCtx);
1734  state = checkNonNull(C, state, Dst, DstVal);
1735  if (!state)
1736    return;
1737
1738  // Check that the source is non-null.
1739  SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};
1740  SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
1741  state = checkNonNull(C, state, srcExpr, srcVal);
1742  if (!state)
1743    return;
1744
1745  // Get the string length of the source.
1746  SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
1747  std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1748
1749  // Get the string length of the destination buffer.
1750  SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
1751  std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1752
1753  // If the source isn't a valid C string, give up.
1754  if (strLength.isUndef())
1755    return;
1756
1757  SValBuilder &svalBuilder = C.getSValBuilder();
1758  QualType cmpTy = svalBuilder.getConditionType();
1759  QualType sizeTy = svalBuilder.getContext().getSizeType();
1760
1761  // These two values allow checking two kinds of errors:
1762  // - actual overflows caused by a source that doesn't fit in the destination
1763  // - potential overflows caused by a bound that could exceed the destination
1764  SVal amountCopied = UnknownVal();
1765  SVal maxLastElementIndex = UnknownVal();
1766  const char *boundWarning = nullptr;
1767
1768  // FIXME: Why do we choose the srcExpr if the access has no size?
1769  //  Note that the 3rd argument of the call would be the size parameter.
1770  SizeArgExpr SrcExprAsSizeDummy = {
1771      {srcExpr.Expression, srcExpr.ArgumentIndex}};
1772  state = CheckOverlap(
1773      C, state,
1774      (IsBounded ? SizeArgExpr{{Call.getArgExpr(2), 2}} : SrcExprAsSizeDummy),
1775      Dst, srcExpr);
1776
1777  if (!state)
1778    return;
1779
1780  // If the function is strncpy, strncat, etc... it is bounded.
1781  if (IsBounded) {
1782    // Get the max number of characters to copy.
1783    SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}};
1784    SVal lenVal = state->getSVal(lenExpr.Expression, LCtx);
1785
1786    // Protect against misdeclared strncpy().
1787    lenVal =
1788        svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());
1789
1790    std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1791
1792    // If we know both values, we might be able to figure out how much
1793    // we're copying.
1794    if (strLengthNL && lenValNL) {
1795      switch (appendK) {
1796      case ConcatFnKind::none:
1797      case ConcatFnKind::strcat: {
1798        ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1799        // Check if the max number to copy is less than the length of the src.
1800        // If the bound is equal to the source length, strncpy won't null-
1801        // terminate the result!
1802        std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1803            svalBuilder
1804                .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1805                .castAs<DefinedOrUnknownSVal>());
1806
1807        if (stateSourceTooLong && !stateSourceNotTooLong) {
1808          // Max number to copy is less than the length of the src, so the
1809          // actual strLength copied is the max number arg.
1810          state = stateSourceTooLong;
1811          amountCopied = lenVal;
1812
1813        } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1814          // The source buffer entirely fits in the bound.
1815          state = stateSourceNotTooLong;
1816          amountCopied = strLength;
1817        }
1818        break;
1819      }
1820      case ConcatFnKind::strlcat:
1821        if (!dstStrLengthNL)
1822          return;
1823
1824        // amountCopied = min (size - dstLen - 1 , srcLen)
1825        SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1826                                                 *dstStrLengthNL, sizeTy);
1827        if (!isa<NonLoc>(freeSpace))
1828          return;
1829        freeSpace =
1830            svalBuilder.evalBinOp(state, BO_Sub, freeSpace,
1831                                  svalBuilder.makeIntVal(1, sizeTy), sizeTy);
1832        std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1833
1834        // While unlikely, it is possible that the subtraction is
1835        // too complex to compute, let's check whether it succeeded.
1836        if (!freeSpaceNL)
1837          return;
1838        SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1839            state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);
1840
1841        ProgramStateRef TrueState, FalseState;
1842        std::tie(TrueState, FalseState) =
1843            state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
1844
1845        // srcStrLength <= size - dstStrLength -1
1846        if (TrueState && !FalseState) {
1847          amountCopied = strLength;
1848        }
1849
1850        // srcStrLength > size - dstStrLength -1
1851        if (!TrueState && FalseState) {
1852          amountCopied = freeSpace;
1853        }
1854
1855        if (TrueState && FalseState)
1856          amountCopied = UnknownVal();
1857        break;
1858      }
1859    }
1860    // We still want to know if the bound is known to be too large.
1861    if (lenValNL) {
1862      switch (appendK) {
1863      case ConcatFnKind::strcat:
1864        // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1865
1866        // Get the string length of the destination. If the destination is
1867        // memory that can't have a string length, we shouldn't be copying
1868        // into it anyway.
1869        if (dstStrLength.isUndef())
1870          return;
1871
1872        if (dstStrLengthNL) {
1873          maxLastElementIndex = svalBuilder.evalBinOpNN(
1874              state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);
1875
1876          boundWarning = "Size argument is greater than the free space in the "
1877                         "destination buffer";
1878        }
1879        break;
1880      case ConcatFnKind::none:
1881      case ConcatFnKind::strlcat:
1882        // For strncpy and strlcat, this is just checking
1883        //  that lenVal <= sizeof(dst).
1884        // (Yes, strncpy and strncat differ in how they treat termination.
1885        // strncat ALWAYS terminates, but strncpy doesn't.)
1886
1887        // We need a special case for when the copy size is zero, in which
1888        // case strncpy will do no work at all. Our bounds check uses n-1
1889        // as the last element accessed, so n == 0 is problematic.
1890        ProgramStateRef StateZeroSize, StateNonZeroSize;
1891        std::tie(StateZeroSize, StateNonZeroSize) =
1892            assumeZero(C, state, *lenValNL, sizeTy);
1893
1894        // If the size is known to be zero, we're done.
1895        if (StateZeroSize && !StateNonZeroSize) {
1896          if (returnPtr) {
1897            StateZeroSize =
1898                StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, DstVal);
1899          } else {
1900            if (appendK == ConcatFnKind::none) {
1901              // strlcpy returns strlen(src)
1902              StateZeroSize = StateZeroSize->BindExpr(Call.getOriginExpr(),
1903                                                      LCtx, strLength);
1904            } else {
1905              // strlcat returns strlen(src) + strlen(dst)
1906              SVal retSize = svalBuilder.evalBinOp(
1907                  state, BO_Add, strLength, dstStrLength, sizeTy);
1908              StateZeroSize =
1909                  StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, retSize);
1910            }
1911          }
1912          C.addTransition(StateZeroSize);
1913          return;
1914        }
1915
1916        // Otherwise, go ahead and figure out the last element we'll touch.
1917        // We don't record the non-zero assumption here because we can't
1918        // be sure. We won't warn on a possible zero.
1919        NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1920        maxLastElementIndex =
1921            svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);
1922        boundWarning = "Size argument is greater than the length of the "
1923                       "destination buffer";
1924        break;
1925      }
1926    }
1927  } else {
1928    // The function isn't bounded. The amount copied should match the length
1929    // of the source buffer.
1930    amountCopied = strLength;
1931  }
1932
1933  assert(state);
1934
1935  // This represents the number of characters copied into the destination
1936  // buffer. (It may not actually be the strlen if the destination buffer
1937  // is not terminated.)
1938  SVal finalStrLength = UnknownVal();
1939  SVal strlRetVal = UnknownVal();
1940
1941  if (appendK == ConcatFnKind::none && !returnPtr) {
1942    // strlcpy returns the sizeof(src)
1943    strlRetVal = strLength;
1944  }
1945
1946  // If this is an appending function (strcat, strncat...) then set the
1947  // string length to strlen(src) + strlen(dst) since the buffer will
1948  // ultimately contain both.
1949  if (appendK != ConcatFnKind::none) {
1950    // Get the string length of the destination. If the destination is memory
1951    // that can't have a string length, we shouldn't be copying into it anyway.
1952    if (dstStrLength.isUndef())
1953      return;
1954
1955    if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
1956      strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,
1957                                           *dstStrLengthNL, sizeTy);
1958    }
1959
1960    std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
1961
1962    // If we know both string lengths, we might know the final string length.
1963    if (amountCopiedNL && dstStrLengthNL) {
1964      // Make sure the two lengths together don't overflow a size_t.
1965      state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);
1966      if (!state)
1967        return;
1968
1969      finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,
1970                                               *dstStrLengthNL, sizeTy);
1971    }
1972
1973    // If we couldn't get a single value for the final string length,
1974    // we can at least bound it by the individual lengths.
1975    if (finalStrLength.isUnknown()) {
1976      // Try to get a "hypothetical" string length symbol, which we can later
1977      // set as a real value if that turns out to be the case.
1978      finalStrLength =
1979          getCStringLength(C, state, Call.getOriginExpr(), DstVal, true);
1980      assert(!finalStrLength.isUndef());
1981
1982      if (std::optional<NonLoc> finalStrLengthNL =
1983              finalStrLength.getAs<NonLoc>()) {
1984        if (amountCopiedNL && appendK == ConcatFnKind::none) {
1985          // we overwrite dst string with the src
1986          // finalStrLength >= srcStrLength
1987          SVal sourceInResult = svalBuilder.evalBinOpNN(
1988              state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);
1989          state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1990                                true);
1991          if (!state)
1992            return;
1993        }
1994
1995        if (dstStrLengthNL && appendK != ConcatFnKind::none) {
1996          // we extend the dst string with the src
1997          // finalStrLength >= dstStrLength
1998          SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1999                                                      *finalStrLengthNL,
2000                                                      *dstStrLengthNL,
2001                                                      cmpTy);
2002          state =
2003              state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
2004          if (!state)
2005            return;
2006        }
2007      }
2008    }
2009
2010  } else {
2011    // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
2012    // the final string length will match the input string length.
2013    finalStrLength = amountCopied;
2014  }
2015
2016  SVal Result;
2017
2018  if (returnPtr) {
2019    // The final result of the function will either be a pointer past the last
2020    // copied element, or a pointer to the start of the destination buffer.
2021    Result = (ReturnEnd ? UnknownVal() : DstVal);
2022  } else {
2023    if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
2024      //strlcpy, strlcat
2025      Result = strlRetVal;
2026    else
2027      Result = finalStrLength;
2028  }
2029
2030  assert(state);
2031
2032  // If the destination is a MemRegion, try to check for a buffer overflow and
2033  // record the new string length.
2034  if (std::optional<loc::MemRegionVal> dstRegVal =
2035          DstVal.getAs<loc::MemRegionVal>()) {
2036    QualType ptrTy = Dst.Expression->getType();
2037
2038    // If we have an exact value on a bounded copy, use that to check for
2039    // overflows, rather than our estimate about how much is actually copied.
2040    if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
2041      SVal maxLastElement =
2042          svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);
2043
2044      // Check if the first byte of the destination is writable.
2045      state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2046      if (!state)
2047        return;
2048      // Check if the last byte of the destination is writable.
2049      state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);
2050      if (!state)
2051        return;
2052    }
2053
2054    // Then, if the final length is known...
2055    if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
2056      SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
2057          *knownStrLength, ptrTy);
2058
2059      // ...and we haven't checked the bound, we'll check the actual copy.
2060      if (!boundWarning) {
2061        // Check if the first byte of the destination is writable.
2062        state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2063        if (!state)
2064          return;
2065        // Check if the last byte of the destination is writable.
2066        state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);
2067        if (!state)
2068          return;
2069      }
2070
2071      // If this is a stpcpy-style copy, the last element is the return value.
2072      if (returnPtr && ReturnEnd)
2073        Result = lastElement;
2074    }
2075
2076    // Invalidate the destination (regular invalidation without pointer-escaping
2077    // the address of the top-level region). This must happen before we set the
2078    // C string length because invalidation will clear the length.
2079    // FIXME: Even if we can't perfectly model the copy, we should see if we
2080    // can use LazyCompoundVals to copy the source values into the destination.
2081    // This would probably remove any existing bindings past the end of the
2082    // string, but that's still an improvement over blank invalidation.
2083    state = invalidateDestinationBufferBySize(C, state, Dst.Expression,
2084                                              *dstRegVal, amountCopied,
2085                                              C.getASTContext().getSizeType());
2086
2087    // Invalidate the source (const-invalidation without const-pointer-escaping
2088    // the address of the top-level region).
2089    state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal);
2090
2091    // Set the C string length of the destination, if we know it.
2092    if (IsBounded && (appendK == ConcatFnKind::none)) {
2093      // strncpy is annoying in that it doesn't guarantee to null-terminate
2094      // the result string. If the original string didn't fit entirely inside
2095      // the bound (including the null-terminator), we don't know how long the
2096      // result is.
2097      if (amountCopied != strLength)
2098        finalStrLength = UnknownVal();
2099    }
2100    state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
2101  }
2102
2103  assert(state);
2104
2105  if (returnPtr) {
2106    // If this is a stpcpy-style copy, but we were unable to check for a buffer
2107    // overflow, we still need a result. Conjure a return value.
2108    if (ReturnEnd && Result.isUnknown()) {
2109      Result = svalBuilder.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,
2110                                            C.blockCount());
2111    }
2112  }
2113  // Set the return value.
2114  state = state->BindExpr(Call.getOriginExpr(), LCtx, Result);
2115  C.addTransition(state);
2116}
2117
2118void CStringChecker::evalStrcmp(CheckerContext &C,
2119                                const CallEvent &Call) const {
2120  //int strcmp(const char *s1, const char *s2);
2121  evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ false);
2122}
2123
2124void CStringChecker::evalStrncmp(CheckerContext &C,
2125                                 const CallEvent &Call) const {
2126  //int strncmp(const char *s1, const char *s2, size_t n);
2127  evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ false);
2128}
2129
2130void CStringChecker::evalStrcasecmp(CheckerContext &C,
2131                                    const CallEvent &Call) const {
2132  //int strcasecmp(const char *s1, const char *s2);
2133  evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ true);
2134}
2135
2136void CStringChecker::evalStrncasecmp(CheckerContext &C,
2137                                     const CallEvent &Call) const {
2138  //int strncasecmp(const char *s1, const char *s2, size_t n);
2139  evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ true);
2140}
2141
2142void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
2143                                      bool IsBounded, bool IgnoreCase) const {
2144  CurrentFunctionDescription = "string comparison function";
2145  ProgramStateRef state = C.getState();
2146  const LocationContext *LCtx = C.getLocationContext();
2147
2148  // Check that the first string is non-null
2149  AnyArgExpr Left = {Call.getArgExpr(0), 0};
2150  SVal LeftVal = state->getSVal(Left.Expression, LCtx);
2151  state = checkNonNull(C, state, Left, LeftVal);
2152  if (!state)
2153    return;
2154
2155  // Check that the second string is non-null.
2156  AnyArgExpr Right = {Call.getArgExpr(1), 1};
2157  SVal RightVal = state->getSVal(Right.Expression, LCtx);
2158  state = checkNonNull(C, state, Right, RightVal);
2159  if (!state)
2160    return;
2161
2162  // Get the string length of the first string or give up.
2163  SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
2164  if (LeftLength.isUndef())
2165    return;
2166
2167  // Get the string length of the second string or give up.
2168  SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
2169  if (RightLength.isUndef())
2170    return;
2171
2172  // If we know the two buffers are the same, we know the result is 0.
2173  // First, get the two buffers' addresses. Another checker will have already
2174  // made sure they're not undefined.
2175  DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2176  DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2177
2178  // See if they are the same.
2179  SValBuilder &svalBuilder = C.getSValBuilder();
2180  DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
2181  ProgramStateRef StSameBuf, StNotSameBuf;
2182  std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
2183
2184  // If the two arguments might be the same buffer, we know the result is 0,
2185  // and we only need to check one size.
2186  if (StSameBuf) {
2187    StSameBuf =
2188        StSameBuf->BindExpr(Call.getOriginExpr(), LCtx,
2189                            svalBuilder.makeZeroVal(Call.getResultType()));
2190    C.addTransition(StSameBuf);
2191
2192    // If the two arguments are GUARANTEED to be the same, we're done!
2193    if (!StNotSameBuf)
2194      return;
2195  }
2196
2197  assert(StNotSameBuf);
2198  state = StNotSameBuf;
2199
2200  // At this point we can go about comparing the two buffers.
2201  // For now, we only do this if they're both known string literals.
2202
2203  // Attempt to extract string literals from both expressions.
2204  const StringLiteral *LeftStrLiteral =
2205      getCStringLiteral(C, state, Left.Expression, LeftVal);
2206  const StringLiteral *RightStrLiteral =
2207      getCStringLiteral(C, state, Right.Expression, RightVal);
2208  bool canComputeResult = false;
2209  SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, Call.getOriginExpr(),
2210                                                LCtx, C.blockCount());
2211
2212  if (LeftStrLiteral && RightStrLiteral) {
2213    StringRef LeftStrRef = LeftStrLiteral->getString();
2214    StringRef RightStrRef = RightStrLiteral->getString();
2215
2216    if (IsBounded) {
2217      // Get the max number of characters to compare.
2218      const Expr *lenExpr = Call.getArgExpr(2);
2219      SVal lenVal = state->getSVal(lenExpr, LCtx);
2220
2221      // If the length is known, we can get the right substrings.
2222      if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
2223        // Create substrings of each to compare the prefix.
2224        LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());
2225        RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());
2226        canComputeResult = true;
2227      }
2228    } else {
2229      // This is a normal, unbounded strcmp.
2230      canComputeResult = true;
2231    }
2232
2233    if (canComputeResult) {
2234      // Real strcmp stops at null characters.
2235      size_t s1Term = LeftStrRef.find('\0');
2236      if (s1Term != StringRef::npos)
2237        LeftStrRef = LeftStrRef.substr(0, s1Term);
2238
2239      size_t s2Term = RightStrRef.find('\0');
2240      if (s2Term != StringRef::npos)
2241        RightStrRef = RightStrRef.substr(0, s2Term);
2242
2243      // Use StringRef's comparison methods to compute the actual result.
2244      int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)
2245                                  : LeftStrRef.compare(RightStrRef);
2246
2247      // The strcmp function returns an integer greater than, equal to, or less
2248      // than zero, [c11, p7.24.4.2].
2249      if (compareRes == 0) {
2250        resultVal = svalBuilder.makeIntVal(compareRes, Call.getResultType());
2251      }
2252      else {
2253        DefinedSVal zeroVal = svalBuilder.makeIntVal(0, Call.getResultType());
2254        // Constrain strcmp's result range based on the result of StringRef's
2255        // comparison methods.
2256        BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2257        SVal compareWithZero =
2258          svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
2259              svalBuilder.getConditionType());
2260        DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2261        state = state->assume(compareWithZeroVal, true);
2262      }
2263    }
2264  }
2265
2266  state = state->BindExpr(Call.getOriginExpr(), LCtx, resultVal);
2267
2268  // Record this as a possible path.
2269  C.addTransition(state);
2270}
2271
2272void CStringChecker::evalStrsep(CheckerContext &C,
2273                                const CallEvent &Call) const {
2274  // char *strsep(char **stringp, const char *delim);
2275  // Verify whether the search string parameter matches the return type.
2276  SourceArgExpr SearchStrPtr = {{Call.getArgExpr(0), 0}};
2277
2278  QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2279  if (CharPtrTy.isNull() || Call.getResultType().getUnqualifiedType() !=
2280                                CharPtrTy.getUnqualifiedType())
2281    return;
2282
2283  CurrentFunctionDescription = "strsep()";
2284  ProgramStateRef State = C.getState();
2285  const LocationContext *LCtx = C.getLocationContext();
2286
2287  // Check that the search string pointer is non-null (though it may point to
2288  // a null string).
2289  SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
2290  State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
2291  if (!State)
2292    return;
2293
2294  // Check that the delimiter string is non-null.
2295  AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};
2296  SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
2297  State = checkNonNull(C, State, DelimStr, DelimStrVal);
2298  if (!State)
2299    return;
2300
2301  SValBuilder &SVB = C.getSValBuilder();
2302  SVal Result;
2303  if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2304    // Get the current value of the search string pointer, as a char*.
2305    Result = State->getSVal(*SearchStrLoc, CharPtrTy);
2306
2307    // Invalidate the search string, representing the change of one delimiter
2308    // character to NUL.
2309    // As the replacement never overflows, do not invalidate its super region.
2310    State = invalidateDestinationBufferNeverOverflows(
2311        C, State, SearchStrPtr.Expression, Result);
2312
2313    // Overwrite the search string pointer. The new value is either an address
2314    // further along in the same string, or NULL if there are no more tokens.
2315    State =
2316        State->bindLoc(*SearchStrLoc,
2317                       SVB.conjureSymbolVal(getTag(), Call.getOriginExpr(),
2318                                            LCtx, CharPtrTy, C.blockCount()),
2319                       LCtx);
2320  } else {
2321    assert(SearchStrVal.isUnknown());
2322    // Conjure a symbolic value. It's the best we can do.
2323    Result = SVB.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,
2324                                  C.blockCount());
2325  }
2326
2327  // Set the return value, and finish.
2328  State = State->BindExpr(Call.getOriginExpr(), LCtx, Result);
2329  C.addTransition(State);
2330}
2331
2332// These should probably be moved into a C++ standard library checker.
2333void CStringChecker::evalStdCopy(CheckerContext &C,
2334                                 const CallEvent &Call) const {
2335  evalStdCopyCommon(C, Call);
2336}
2337
2338void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2339                                         const CallEvent &Call) const {
2340  evalStdCopyCommon(C, Call);
2341}
2342
2343void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2344                                       const CallEvent &Call) const {
2345  if (!Call.getArgExpr(2)->getType()->isPointerType())
2346    return;
2347
2348  ProgramStateRef State = C.getState();
2349
2350  const LocationContext *LCtx = C.getLocationContext();
2351
2352  // template <class _InputIterator, class _OutputIterator>
2353  // _OutputIterator
2354  // copy(_InputIterator __first, _InputIterator __last,
2355  //        _OutputIterator __result)
2356
2357  // Invalidate the destination buffer
2358  const Expr *Dst = Call.getArgExpr(2);
2359  SVal DstVal = State->getSVal(Dst, LCtx);
2360  // FIXME: As we do not know how many items are copied, we also invalidate the
2361  // super region containing the target location.
2362  State =
2363      invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal);
2364
2365  SValBuilder &SVB = C.getSValBuilder();
2366
2367  SVal ResultVal =
2368      SVB.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
2369  State = State->BindExpr(Call.getOriginExpr(), LCtx, ResultVal);
2370
2371  C.addTransition(State);
2372}
2373
2374void CStringChecker::evalMemset(CheckerContext &C,
2375                                const CallEvent &Call) const {
2376  // void *memset(void *s, int c, size_t n);
2377  CurrentFunctionDescription = "memory set function";
2378
2379  DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2380  AnyArgExpr CharE = {Call.getArgExpr(1), 1};
2381  SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2382
2383  ProgramStateRef State = C.getState();
2384
2385  // See if the size argument is zero.
2386  const LocationContext *LCtx = C.getLocationContext();
2387  SVal SizeVal = C.getSVal(Size.Expression);
2388  QualType SizeTy = Size.Expression->getType();
2389
2390  ProgramStateRef ZeroSize, NonZeroSize;
2391  std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);
2392
2393  // Get the value of the memory area.
2394  SVal BufferPtrVal = C.getSVal(Buffer.Expression);
2395
2396  // If the size is zero, there won't be any actual memory access, so
2397  // just bind the return value to the buffer and return.
2398  if (ZeroSize && !NonZeroSize) {
2399    ZeroSize = ZeroSize->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2400    C.addTransition(ZeroSize);
2401    return;
2402  }
2403
2404  // Ensure the memory area is not null.
2405  // If it is NULL there will be a NULL pointer dereference.
2406  State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);
2407  if (!State)
2408    return;
2409
2410  State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2411  if (!State)
2412    return;
2413
2414  // According to the values of the arguments, bind the value of the second
2415  // argument to the destination buffer and set string length, or just
2416  // invalidate the destination buffer.
2417  if (!memsetAux(Buffer.Expression, C.getSVal(CharE.Expression),
2418                 Size.Expression, C, State))
2419    return;
2420
2421  State = State->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2422  C.addTransition(State);
2423}
2424
2425void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const {
2426  CurrentFunctionDescription = "memory clearance function";
2427
2428  DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2429  SizeArgExpr Size = {{Call.getArgExpr(1), 1}};
2430  SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);
2431
2432  ProgramStateRef State = C.getState();
2433
2434  // See if the size argument is zero.
2435  SVal SizeVal = C.getSVal(Size.Expression);
2436  QualType SizeTy = Size.Expression->getType();
2437
2438  ProgramStateRef StateZeroSize, StateNonZeroSize;
2439  std::tie(StateZeroSize, StateNonZeroSize) =
2440    assumeZero(C, State, SizeVal, SizeTy);
2441
2442  // If the size is zero, there won't be any actual memory access,
2443  // In this case we just return.
2444  if (StateZeroSize && !StateNonZeroSize) {
2445    C.addTransition(StateZeroSize);
2446    return;
2447  }
2448
2449  // Get the value of the memory area.
2450  SVal MemVal = C.getSVal(Buffer.Expression);
2451
2452  // Ensure the memory area is not null.
2453  // If it is NULL there will be a NULL pointer dereference.
2454  State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);
2455  if (!State)
2456    return;
2457
2458  State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2459  if (!State)
2460    return;
2461
2462  if (!memsetAux(Buffer.Expression, Zero, Size.Expression, C, State))
2463    return;
2464
2465  C.addTransition(State);
2466}
2467
2468void CStringChecker::evalSprintf(CheckerContext &C,
2469                                 const CallEvent &Call) const {
2470  CurrentFunctionDescription = "'sprintf'";
2471  const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2472  bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___sprintf_chk;
2473  evalSprintfCommon(C, Call, /* IsBounded */ false, IsBI);
2474}
2475
2476void CStringChecker::evalSnprintf(CheckerContext &C,
2477                                  const CallEvent &Call) const {
2478  CurrentFunctionDescription = "'snprintf'";
2479  const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2480  bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___snprintf_chk;
2481  evalSprintfCommon(C, Call, /* IsBounded */ true, IsBI);
2482}
2483
2484void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
2485                                       bool IsBounded, bool IsBuiltin) const {
2486  ProgramStateRef State = C.getState();
2487  const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2488  DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2489
2490  const auto NumParams = Call.parameters().size();
2491  assert(CE->getNumArgs() >= NumParams);
2492
2493  const auto AllArguments =
2494      llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
2495  const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams);
2496
2497  for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {
2498    // We consider only string buffers
2499    if (const QualType type = ArgExpr->getType();
2500        !type->isAnyPointerType() ||
2501        !type->getPointeeType()->isAnyCharacterType())
2502      continue;
2503    SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}};
2504
2505    // Ensure the buffers do not overlap.
2506    SizeArgExpr SrcExprAsSizeDummy = {
2507        {Source.Expression, Source.ArgumentIndex}};
2508    State = CheckOverlap(
2509        C, State,
2510        (IsBounded ? SizeArgExpr{{Call.getArgExpr(1), 1}} : SrcExprAsSizeDummy),
2511        Dest, Source);
2512    if (!State)
2513      return;
2514  }
2515
2516  C.addTransition(State);
2517}
2518
2519//===----------------------------------------------------------------------===//
2520// The driver method, and other Checker callbacks.
2521//===----------------------------------------------------------------------===//
2522
2523CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2524                                                     CheckerContext &C) const {
2525  const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
2526  if (!CE)
2527    return nullptr;
2528
2529  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
2530  if (!FD)
2531    return nullptr;
2532
2533  if (StdCopy.matches(Call))
2534    return &CStringChecker::evalStdCopy;
2535  if (StdCopyBackward.matches(Call))
2536    return &CStringChecker::evalStdCopyBackward;
2537
2538  // Pro-actively check that argument types are safe to do arithmetic upon.
2539  // We do not want to crash if someone accidentally passes a structure
2540  // into, say, a C++ overload of any of these functions. We could not check
2541  // that for std::copy because they may have arguments of other types.
2542  for (auto I : CE->arguments()) {
2543    QualType T = I->getType();
2544    if (!T->isIntegralOrEnumerationType() && !T->isPointerType())
2545      return nullptr;
2546  }
2547
2548  const FnCheck *Callback = Callbacks.lookup(Call);
2549  if (Callback)
2550    return *Callback;
2551
2552  return nullptr;
2553}
2554
2555bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2556  FnCheck Callback = identifyCall(Call, C);
2557
2558  // If the callee isn't a string function, let another checker handle it.
2559  if (!Callback)
2560    return false;
2561
2562  // Check and evaluate the call.
2563  assert(isa<CallExpr>(Call.getOriginExpr()));
2564  Callback(this, C, Call);
2565
2566  // If the evaluate call resulted in no change, chain to the next eval call
2567  // handler.
2568  // Note, the custom CString evaluation calls assume that basic safety
2569  // properties are held. However, if the user chooses to turn off some of these
2570  // checks, we ignore the issues and leave the call evaluation to a generic
2571  // handler.
2572  return C.isDifferent();
2573}
2574
2575void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2576  // Record string length for char a[] = "abc";
2577  ProgramStateRef state = C.getState();
2578
2579  for (const auto *I : DS->decls()) {
2580    const VarDecl *D = dyn_cast<VarDecl>(I);
2581    if (!D)
2582      continue;
2583
2584    // FIXME: Handle array fields of structs.
2585    if (!D->getType()->isArrayType())
2586      continue;
2587
2588    const Expr *Init = D->getInit();
2589    if (!Init)
2590      continue;
2591    if (!isa<StringLiteral>(Init))
2592      continue;
2593
2594    Loc VarLoc = state->getLValue(D, C.getLocationContext());
2595    const MemRegion *MR = VarLoc.getAsRegion();
2596    if (!MR)
2597      continue;
2598
2599    SVal StrVal = C.getSVal(Init);
2600    assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2601    DefinedOrUnknownSVal strLength =
2602      getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2603
2604    state = state->set<CStringLength>(MR, strLength);
2605  }
2606
2607  C.addTransition(state);
2608}
2609
2610ProgramStateRef
2611CStringChecker::checkRegionChanges(ProgramStateRef state,
2612    const InvalidatedSymbols *,
2613    ArrayRef<const MemRegion *> ExplicitRegions,
2614    ArrayRef<const MemRegion *> Regions,
2615    const LocationContext *LCtx,
2616    const CallEvent *Call) const {
2617  CStringLengthTy Entries = state->get<CStringLength>();
2618  if (Entries.isEmpty())
2619    return state;
2620
2621  llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2622  llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2623
2624  // First build sets for the changed regions and their super-regions.
2625  for (const MemRegion *MR : Regions) {
2626    Invalidated.insert(MR);
2627
2628    SuperRegions.insert(MR);
2629    while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2630      MR = SR->getSuperRegion();
2631      SuperRegions.insert(MR);
2632    }
2633  }
2634
2635  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2636
2637  // Then loop over the entries in the current state.
2638  for (const MemRegion *MR : llvm::make_first_range(Entries)) {
2639    // Is this entry for a super-region of a changed region?
2640    if (SuperRegions.count(MR)) {
2641      Entries = F.remove(Entries, MR);
2642      continue;
2643    }
2644
2645    // Is this entry for a sub-region of a changed region?
2646    const MemRegion *Super = MR;
2647    while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2648      Super = SR->getSuperRegion();
2649      if (Invalidated.count(Super)) {
2650        Entries = F.remove(Entries, MR);
2651        break;
2652      }
2653    }
2654  }
2655
2656  return state->set<CStringLength>(Entries);
2657}
2658
2659void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2660    SymbolReaper &SR) const {
2661  // Mark all symbols in our string length map as valid.
2662  CStringLengthTy Entries = state->get<CStringLength>();
2663
2664  for (SVal Len : llvm::make_second_range(Entries)) {
2665    for (SymbolRef Sym : Len.symbols())
2666      SR.markInUse(Sym);
2667  }
2668}
2669
2670void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2671    CheckerContext &C) const {
2672  ProgramStateRef state = C.getState();
2673  CStringLengthTy Entries = state->get<CStringLength>();
2674  if (Entries.isEmpty())
2675    return;
2676
2677  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2678  for (auto [Reg, Len] : Entries) {
2679    if (SymbolRef Sym = Len.getAsSymbol()) {
2680      if (SR.isDead(Sym))
2681        Entries = F.remove(Entries, Reg);
2682    }
2683  }
2684
2685  state = state->set<CStringLength>(Entries);
2686  C.addTransition(state);
2687}
2688
2689void ento::registerCStringModeling(CheckerManager &Mgr) {
2690  Mgr.registerChecker<CStringChecker>();
2691}
2692
2693bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) {
2694  return true;
2695}
2696
2697#define REGISTER_CHECKER(name)                                                 \
2698  void ento::register##name(CheckerManager &mgr) {                             \
2699    CStringChecker *checker = mgr.getChecker<CStringChecker>();                \
2700    checker->Filter.Check##name = true;                                        \
2701    checker->Filter.CheckName##name = mgr.getCurrentCheckerName();             \
2702  }                                                                            \
2703                                                                               \
2704  bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
2705
2706REGISTER_CHECKER(CStringNullArg)
2707REGISTER_CHECKER(CStringOutOfBounds)
2708REGISTER_CHECKER(CStringBufferOverlap)
2709REGISTER_CHECKER(CStringNotNullTerm)
2710REGISTER_CHECKER(CStringUninitializedRead)
2711