1//===----- UninitializedObjectChecker.cpp ------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a checker that reports uninitialized fields in objects
10// created after a constructor call.
11//
12// To read about command line options and how the checker works, refer to the
13// top of the file and inline comments in UninitializedObject.h.
14//
15// Some of the logic is implemented in UninitializedPointee.cpp, to reduce the
16// complexity of this file.
17//
18//===----------------------------------------------------------------------===//
19
20#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
21#include "UninitializedObject.h"
22#include "clang/ASTMatchers/ASTMatchFinder.h"
23#include "clang/Driver/DriverDiagnostic.h"
24#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
25#include "clang/StaticAnalyzer/Core/Checker.h"
26#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
27#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h"
28
29using namespace clang;
30using namespace clang::ento;
31using namespace clang::ast_matchers;
32
33/// We'll mark fields (and pointee of fields) that are confirmed to be
34/// uninitialized as already analyzed.
35REGISTER_SET_WITH_PROGRAMSTATE(AnalyzedRegions, const MemRegion *)
36
37namespace {
38
39class UninitializedObjectChecker
40    : public Checker<check::EndFunction, check::DeadSymbols> {
41  std::unique_ptr<BuiltinBug> BT_uninitField;
42
43public:
44  // The fields of this struct will be initialized when registering the checker.
45  UninitObjCheckerOptions Opts;
46
47  UninitializedObjectChecker()
48      : BT_uninitField(new BuiltinBug(this, "Uninitialized fields")) {}
49
50  void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const;
51  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
52};
53
54/// A basic field type, that is not a pointer or a reference, it's dynamic and
55/// static type is the same.
56class RegularField final : public FieldNode {
57public:
58  RegularField(const FieldRegion *FR) : FieldNode(FR) {}
59
60  void printNoteMsg(llvm::raw_ostream &Out) const override {
61    Out << "uninitialized field ";
62  }
63
64  void printPrefix(llvm::raw_ostream &Out) const override {}
65
66  void printNode(llvm::raw_ostream &Out) const override {
67    Out << getVariableName(getDecl());
68  }
69
70  void printSeparator(llvm::raw_ostream &Out) const override { Out << '.'; }
71};
72
73/// Represents that the FieldNode that comes after this is declared in a base
74/// of the previous FieldNode. As such, this descendant doesn't wrap a
75/// FieldRegion, and is purely a tool to describe a relation between two other
76/// FieldRegion wrapping descendants.
77class BaseClass final : public FieldNode {
78  const QualType BaseClassT;
79
80public:
81  BaseClass(const QualType &T) : FieldNode(nullptr), BaseClassT(T) {
82    assert(!T.isNull());
83    assert(T->getAsCXXRecordDecl());
84  }
85
86  void printNoteMsg(llvm::raw_ostream &Out) const override {
87    llvm_unreachable("This node can never be the final node in the "
88                     "fieldchain!");
89  }
90
91  void printPrefix(llvm::raw_ostream &Out) const override {}
92
93  void printNode(llvm::raw_ostream &Out) const override {
94    Out << BaseClassT->getAsCXXRecordDecl()->getName() << "::";
95  }
96
97  void printSeparator(llvm::raw_ostream &Out) const override {}
98
99  bool isBase() const override { return true; }
100};
101
102} // end of anonymous namespace
103
104// Utility function declarations.
105
106/// Returns the region that was constructed by CtorDecl, or nullptr if that
107/// isn't possible.
108static const TypedValueRegion *
109getConstructedRegion(const CXXConstructorDecl *CtorDecl,
110                     CheckerContext &Context);
111
112/// Checks whether the object constructed by \p Ctor will be analyzed later
113/// (e.g. if the object is a field of another object, in which case we'd check
114/// it multiple times).
115static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
116                                      CheckerContext &Context);
117
118/// Checks whether RD contains a field with a name or type name that matches
119/// \p Pattern.
120static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern);
121
122/// Checks _syntactically_ whether it is possible to access FD from the record
123/// that contains it without a preceding assert (even if that access happens
124/// inside a method). This is mainly used for records that act like unions, like
125/// having multiple bit fields, with only a fraction being properly initialized.
126/// If these fields are properly guarded with asserts, this method returns
127/// false.
128///
129/// Since this check is done syntactically, this method could be inaccurate.
130static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State);
131
132//===----------------------------------------------------------------------===//
133//                  Methods for UninitializedObjectChecker.
134//===----------------------------------------------------------------------===//
135
136void UninitializedObjectChecker::checkEndFunction(
137    const ReturnStmt *RS, CheckerContext &Context) const {
138
139  const auto *CtorDecl = dyn_cast_or_null<CXXConstructorDecl>(
140      Context.getLocationContext()->getDecl());
141  if (!CtorDecl)
142    return;
143
144  if (!CtorDecl->isUserProvided())
145    return;
146
147  if (CtorDecl->getParent()->isUnion())
148    return;
149
150  // This avoids essentially the same error being reported multiple times.
151  if (willObjectBeAnalyzedLater(CtorDecl, Context))
152    return;
153
154  const TypedValueRegion *R = getConstructedRegion(CtorDecl, Context);
155  if (!R)
156    return;
157
158  FindUninitializedFields F(Context.getState(), R, Opts);
159
160  std::pair<ProgramStateRef, const UninitFieldMap &> UninitInfo =
161      F.getResults();
162
163  ProgramStateRef UpdatedState = UninitInfo.first;
164  const UninitFieldMap &UninitFields = UninitInfo.second;
165
166  if (UninitFields.empty()) {
167    Context.addTransition(UpdatedState);
168    return;
169  }
170
171  // There are uninitialized fields in the record.
172
173  ExplodedNode *Node = Context.generateNonFatalErrorNode(UpdatedState);
174  if (!Node)
175    return;
176
177  PathDiagnosticLocation LocUsedForUniqueing;
178  const Stmt *CallSite = Context.getStackFrame()->getCallSite();
179  if (CallSite)
180    LocUsedForUniqueing = PathDiagnosticLocation::createBegin(
181        CallSite, Context.getSourceManager(), Node->getLocationContext());
182
183  // For Plist consumers that don't support notes just yet, we'll convert notes
184  // to warnings.
185  if (Opts.ShouldConvertNotesToWarnings) {
186    for (const auto &Pair : UninitFields) {
187
188      auto Report = std::make_unique<PathSensitiveBugReport>(
189          *BT_uninitField, Pair.second, Node, LocUsedForUniqueing,
190          Node->getLocationContext()->getDecl());
191      Context.emitReport(std::move(Report));
192    }
193    return;
194  }
195
196  SmallString<100> WarningBuf;
197  llvm::raw_svector_ostream WarningOS(WarningBuf);
198  WarningOS << UninitFields.size() << " uninitialized field"
199            << (UninitFields.size() == 1 ? "" : "s")
200            << " at the end of the constructor call";
201
202  auto Report = std::make_unique<PathSensitiveBugReport>(
203      *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing,
204      Node->getLocationContext()->getDecl());
205
206  for (const auto &Pair : UninitFields) {
207    Report->addNote(Pair.second,
208                    PathDiagnosticLocation::create(Pair.first->getDecl(),
209                                                   Context.getSourceManager()));
210  }
211  Context.emitReport(std::move(Report));
212}
213
214void UninitializedObjectChecker::checkDeadSymbols(SymbolReaper &SR,
215                                                  CheckerContext &C) const {
216  ProgramStateRef State = C.getState();
217  for (const MemRegion *R : State->get<AnalyzedRegions>()) {
218    if (!SR.isLiveRegion(R))
219      State = State->remove<AnalyzedRegions>(R);
220  }
221}
222
223//===----------------------------------------------------------------------===//
224//                   Methods for FindUninitializedFields.
225//===----------------------------------------------------------------------===//
226
227FindUninitializedFields::FindUninitializedFields(
228    ProgramStateRef State, const TypedValueRegion *const R,
229    const UninitObjCheckerOptions &Opts)
230    : State(State), ObjectR(R), Opts(Opts) {
231
232  isNonUnionUninit(ObjectR, FieldChainInfo(ChainFactory));
233
234  // In non-pedantic mode, if ObjectR doesn't contain a single initialized
235  // field, we'll assume that Object was intentionally left uninitialized.
236  if (!Opts.IsPedantic && !isAnyFieldInitialized())
237    UninitFields.clear();
238}
239
240bool FindUninitializedFields::addFieldToUninits(FieldChainInfo Chain,
241                                                const MemRegion *PointeeR) {
242  const FieldRegion *FR = Chain.getUninitRegion();
243
244  assert((PointeeR || !isDereferencableType(FR->getDecl()->getType())) &&
245         "One must also pass the pointee region as a parameter for "
246         "dereferenceable fields!");
247
248  if (State->getStateManager().getContext().getSourceManager().isInSystemHeader(
249          FR->getDecl()->getLocation()))
250    return false;
251
252  if (Opts.IgnoreGuardedFields && !hasUnguardedAccess(FR->getDecl(), State))
253    return false;
254
255  if (State->contains<AnalyzedRegions>(FR))
256    return false;
257
258  if (PointeeR) {
259    if (State->contains<AnalyzedRegions>(PointeeR)) {
260      return false;
261    }
262    State = State->add<AnalyzedRegions>(PointeeR);
263  }
264
265  State = State->add<AnalyzedRegions>(FR);
266
267  UninitFieldMap::mapped_type NoteMsgBuf;
268  llvm::raw_svector_ostream OS(NoteMsgBuf);
269  Chain.printNoteMsg(OS);
270
271  return UninitFields.insert({FR, std::move(NoteMsgBuf)}).second;
272}
273
274bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R,
275                                               FieldChainInfo LocalChain) {
276  assert(R->getValueType()->isRecordType() &&
277         !R->getValueType()->isUnionType() &&
278         "This method only checks non-union record objects!");
279
280  const RecordDecl *RD = R->getValueType()->getAsRecordDecl()->getDefinition();
281
282  if (!RD) {
283    IsAnyFieldInitialized = true;
284    return true;
285  }
286
287  if (!Opts.IgnoredRecordsWithFieldPattern.empty() &&
288      shouldIgnoreRecord(RD, Opts.IgnoredRecordsWithFieldPattern)) {
289    IsAnyFieldInitialized = true;
290    return false;
291  }
292
293  bool ContainsUninitField = false;
294
295  // Are all of this non-union's fields initialized?
296  for (const FieldDecl *I : RD->fields()) {
297
298    const auto FieldVal =
299        State->getLValue(I, loc::MemRegionVal(R)).castAs<loc::MemRegionVal>();
300    const auto *FR = FieldVal.getRegionAs<FieldRegion>();
301    QualType T = I->getType();
302
303    // If LocalChain already contains FR, then we encountered a cyclic
304    // reference. In this case, region FR is already under checking at an
305    // earlier node in the directed tree.
306    if (LocalChain.contains(FR))
307      return false;
308
309    if (T->isStructureOrClassType()) {
310      if (isNonUnionUninit(FR, LocalChain.add(RegularField(FR))))
311        ContainsUninitField = true;
312      continue;
313    }
314
315    if (T->isUnionType()) {
316      if (isUnionUninit(FR)) {
317        if (addFieldToUninits(LocalChain.add(RegularField(FR))))
318          ContainsUninitField = true;
319      } else
320        IsAnyFieldInitialized = true;
321      continue;
322    }
323
324    if (T->isArrayType()) {
325      IsAnyFieldInitialized = true;
326      continue;
327    }
328
329    SVal V = State->getSVal(FieldVal);
330
331    if (isDereferencableType(T) || isa<nonloc::LocAsInteger>(V)) {
332      if (isDereferencableUninit(FR, LocalChain))
333        ContainsUninitField = true;
334      continue;
335    }
336
337    if (isPrimitiveType(T)) {
338      if (isPrimitiveUninit(V)) {
339        if (addFieldToUninits(LocalChain.add(RegularField(FR))))
340          ContainsUninitField = true;
341      }
342      continue;
343    }
344
345    llvm_unreachable("All cases are handled!");
346  }
347
348  // Checking bases. The checker will regard inherited data members as direct
349  // fields.
350  const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
351  if (!CXXRD)
352    return ContainsUninitField;
353
354  for (const CXXBaseSpecifier &BaseSpec : CXXRD->bases()) {
355    const auto *BaseRegion = State->getLValue(BaseSpec, R)
356                                 .castAs<loc::MemRegionVal>()
357                                 .getRegionAs<TypedValueRegion>();
358
359    // If the head of the list is also a BaseClass, we'll overwrite it to avoid
360    // note messages like 'this->A::B::x'.
361    if (!LocalChain.isEmpty() && LocalChain.getHead().isBase()) {
362      if (isNonUnionUninit(BaseRegion, LocalChain.replaceHead(
363                                           BaseClass(BaseSpec.getType()))))
364        ContainsUninitField = true;
365    } else {
366      if (isNonUnionUninit(BaseRegion,
367                           LocalChain.add(BaseClass(BaseSpec.getType()))))
368        ContainsUninitField = true;
369    }
370  }
371
372  return ContainsUninitField;
373}
374
375bool FindUninitializedFields::isUnionUninit(const TypedValueRegion *R) {
376  assert(R->getValueType()->isUnionType() &&
377         "This method only checks union objects!");
378  // TODO: Implement support for union fields.
379  return false;
380}
381
382bool FindUninitializedFields::isPrimitiveUninit(const SVal &V) {
383  if (V.isUndef())
384    return true;
385
386  IsAnyFieldInitialized = true;
387  return false;
388}
389
390//===----------------------------------------------------------------------===//
391//                       Methods for FieldChainInfo.
392//===----------------------------------------------------------------------===//
393
394bool FieldChainInfo::contains(const FieldRegion *FR) const {
395  for (const FieldNode &Node : Chain) {
396    if (Node.isSameRegion(FR))
397      return true;
398  }
399  return false;
400}
401
402/// Prints every element except the last to `Out`. Since ImmutableLists store
403/// elements in reverse order, and have no reverse iterators, we use a
404/// recursive function to print the fieldchain correctly. The last element in
405/// the chain is to be printed by `FieldChainInfo::print`.
406static void printTail(llvm::raw_ostream &Out,
407                      const FieldChainInfo::FieldChain L);
408
409// FIXME: This function constructs an incorrect string in the following case:
410//
411//   struct Base { int x; };
412//   struct D1 : Base {}; struct D2 : Base {};
413//
414//   struct MostDerived : D1, D2 {
415//     MostDerived() {}
416//   }
417//
418// A call to MostDerived::MostDerived() will cause two notes that say
419// "uninitialized field 'this->x'", but we can't refer to 'x' directly,
420// we need an explicit namespace resolution whether the uninit field was
421// 'D1::x' or 'D2::x'.
422void FieldChainInfo::printNoteMsg(llvm::raw_ostream &Out) const {
423  if (Chain.isEmpty())
424    return;
425
426  const FieldNode &LastField = getHead();
427
428  LastField.printNoteMsg(Out);
429  Out << '\'';
430
431  for (const FieldNode &Node : Chain)
432    Node.printPrefix(Out);
433
434  Out << "this->";
435  printTail(Out, Chain.getTail());
436  LastField.printNode(Out);
437  Out << '\'';
438}
439
440static void printTail(llvm::raw_ostream &Out,
441                      const FieldChainInfo::FieldChain L) {
442  if (L.isEmpty())
443    return;
444
445  printTail(Out, L.getTail());
446
447  L.getHead().printNode(Out);
448  L.getHead().printSeparator(Out);
449}
450
451//===----------------------------------------------------------------------===//
452//                           Utility functions.
453//===----------------------------------------------------------------------===//
454
455static const TypedValueRegion *
456getConstructedRegion(const CXXConstructorDecl *CtorDecl,
457                     CheckerContext &Context) {
458
459  Loc ThisLoc =
460      Context.getSValBuilder().getCXXThis(CtorDecl, Context.getStackFrame());
461
462  SVal ObjectV = Context.getState()->getSVal(ThisLoc);
463
464  auto *R = ObjectV.getAsRegion()->getAs<TypedValueRegion>();
465  if (R && !R->getValueType()->getAsCXXRecordDecl())
466    return nullptr;
467
468  return R;
469}
470
471static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
472                                      CheckerContext &Context) {
473
474  const TypedValueRegion *CurrRegion = getConstructedRegion(Ctor, Context);
475  if (!CurrRegion)
476    return false;
477
478  const LocationContext *LC = Context.getLocationContext();
479  while ((LC = LC->getParent())) {
480
481    // If \p Ctor was called by another constructor.
482    const auto *OtherCtor = dyn_cast<CXXConstructorDecl>(LC->getDecl());
483    if (!OtherCtor)
484      continue;
485
486    const TypedValueRegion *OtherRegion =
487        getConstructedRegion(OtherCtor, Context);
488    if (!OtherRegion)
489      continue;
490
491    // If the CurrRegion is a subregion of OtherRegion, it will be analyzed
492    // during the analysis of OtherRegion.
493    if (CurrRegion->isSubRegionOf(OtherRegion))
494      return true;
495  }
496
497  return false;
498}
499
500static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern) {
501  llvm::Regex R(Pattern);
502
503  for (const FieldDecl *FD : RD->fields()) {
504    if (R.match(FD->getType().getAsString()))
505      return true;
506    if (R.match(FD->getName()))
507      return true;
508  }
509
510  return false;
511}
512
513static const Stmt *getMethodBody(const CXXMethodDecl *M) {
514  if (isa<CXXConstructorDecl>(M))
515    return nullptr;
516
517  if (!M->isDefined())
518    return nullptr;
519
520  return M->getDefinition()->getBody();
521}
522
523static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State) {
524
525  if (FD->getAccess() == AccessSpecifier::AS_public)
526    return true;
527
528  const auto *Parent = dyn_cast<CXXRecordDecl>(FD->getParent());
529
530  if (!Parent)
531    return true;
532
533  Parent = Parent->getDefinition();
534  assert(Parent && "The record's definition must be avaible if an uninitialized"
535                   " field of it was found!");
536
537  ASTContext &AC = State->getStateManager().getContext();
538
539  auto FieldAccessM = memberExpr(hasDeclaration(equalsNode(FD))).bind("access");
540
541  auto AssertLikeM = callExpr(callee(functionDecl(
542      hasAnyName("exit", "panic", "error", "Assert", "assert", "ziperr",
543                 "assfail", "db_error", "__assert", "__assert2", "_wassert",
544                 "__assert_rtn", "__assert_fail", "dtrace_assfail",
545                 "yy_fatal_error", "_XCAssertionFailureHandler",
546                 "_DTAssertionFailureHandler", "_TSAssertionFailureHandler"))));
547
548  auto NoReturnFuncM = callExpr(callee(functionDecl(isNoReturn())));
549
550  auto GuardM =
551      stmt(anyOf(ifStmt(), switchStmt(), conditionalOperator(), AssertLikeM,
552            NoReturnFuncM))
553          .bind("guard");
554
555  for (const CXXMethodDecl *M : Parent->methods()) {
556    const Stmt *MethodBody = getMethodBody(M);
557    if (!MethodBody)
558      continue;
559
560    auto Accesses = match(stmt(hasDescendant(FieldAccessM)), *MethodBody, AC);
561    if (Accesses.empty())
562      continue;
563    const auto *FirstAccess = Accesses[0].getNodeAs<MemberExpr>("access");
564    assert(FirstAccess);
565
566    auto Guards = match(stmt(hasDescendant(GuardM)), *MethodBody, AC);
567    if (Guards.empty())
568      return true;
569    const auto *FirstGuard = Guards[0].getNodeAs<Stmt>("guard");
570    assert(FirstGuard);
571
572    if (FirstAccess->getBeginLoc() < FirstGuard->getBeginLoc())
573      return true;
574  }
575
576  return false;
577}
578
579std::string clang::ento::getVariableName(const FieldDecl *Field) {
580  // If Field is a captured lambda variable, Field->getName() will return with
581  // an empty string. We can however acquire it's name from the lambda's
582  // captures.
583  const auto *CXXParent = dyn_cast<CXXRecordDecl>(Field->getParent());
584
585  if (CXXParent && CXXParent->isLambda()) {
586    assert(CXXParent->captures_begin());
587    auto It = CXXParent->captures_begin() + Field->getFieldIndex();
588
589    if (It->capturesVariable())
590      return llvm::Twine("/*captured variable*/" +
591                         It->getCapturedVar()->getName())
592          .str();
593
594    if (It->capturesThis())
595      return "/*'this' capture*/";
596
597    llvm_unreachable("No other capture type is expected!");
598  }
599
600  return std::string(Field->getName());
601}
602
603void ento::registerUninitializedObjectChecker(CheckerManager &Mgr) {
604  auto Chk = Mgr.registerChecker<UninitializedObjectChecker>();
605
606  const AnalyzerOptions &AnOpts = Mgr.getAnalyzerOptions();
607  UninitObjCheckerOptions &ChOpts = Chk->Opts;
608
609  ChOpts.IsPedantic = AnOpts.getCheckerBooleanOption(Chk, "Pedantic");
610  ChOpts.ShouldConvertNotesToWarnings = AnOpts.getCheckerBooleanOption(
611      Chk, "NotesAsWarnings");
612  ChOpts.CheckPointeeInitialization = AnOpts.getCheckerBooleanOption(
613      Chk, "CheckPointeeInitialization");
614  ChOpts.IgnoredRecordsWithFieldPattern =
615      std::string(AnOpts.getCheckerStringOption(Chk, "IgnoreRecordsWithField"));
616  ChOpts.IgnoreGuardedFields =
617      AnOpts.getCheckerBooleanOption(Chk, "IgnoreGuardedFields");
618
619  std::string ErrorMsg;
620  if (!llvm::Regex(ChOpts.IgnoredRecordsWithFieldPattern).isValid(ErrorMsg))
621    Mgr.reportInvalidCheckerOptionValue(Chk, "IgnoreRecordsWithField",
622        "a valid regex, building failed with error message "
623        "\"" + ErrorMsg + "\"");
624}
625
626bool ento::shouldRegisterUninitializedObjectChecker(const CheckerManager &mgr) {
627  return true;
628}
629