1//=== InnerPointerChecker.cpp -------------------------------------*- C++ -*--//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a check that marks a raw pointer to a C++ container's
10// inner buffer released when the object is destroyed. This information can
11// be used by MallocChecker to detect use-after-free problems.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AllocationState.h"
16#include "InterCheckerAPI.h"
17#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
18#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h"
20#include "clang/StaticAnalyzer/Core/Checker.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
22#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
23#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24
25using namespace clang;
26using namespace ento;
27
28// Associate container objects with a set of raw pointer symbols.
29REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(PtrSet, SymbolRef)
30REGISTER_MAP_WITH_PROGRAMSTATE(RawPtrMap, const MemRegion *, PtrSet)
31
32
33namespace {
34
35class InnerPointerChecker
36    : public Checker<check::DeadSymbols, check::PostCall> {
37
38  CallDescription AppendFn, AssignFn, AddressofFn, AddressofFn_, ClearFn,
39      CStrFn, DataFn, DataMemberFn, EraseFn, InsertFn, PopBackFn, PushBackFn,
40      ReplaceFn, ReserveFn, ResizeFn, ShrinkToFitFn, SwapFn;
41
42public:
43  class InnerPointerBRVisitor : public BugReporterVisitor {
44    SymbolRef PtrToBuf;
45
46  public:
47    InnerPointerBRVisitor(SymbolRef Sym) : PtrToBuf(Sym) {}
48
49    static void *getTag() {
50      static int Tag = 0;
51      return &Tag;
52    }
53
54    void Profile(llvm::FoldingSetNodeID &ID) const override {
55      ID.AddPointer(getTag());
56    }
57
58    PathDiagnosticPieceRef VisitNode(const ExplodedNode *N,
59                                     BugReporterContext &BRC,
60                                     PathSensitiveBugReport &BR) override;
61
62    // FIXME: Scan the map once in the visitor's constructor and do a direct
63    // lookup by region.
64    bool isSymbolTracked(ProgramStateRef State, SymbolRef Sym) {
65      RawPtrMapTy Map = State->get<RawPtrMap>();
66      for (const auto &Entry : Map) {
67        if (Entry.second.contains(Sym))
68          return true;
69      }
70      return false;
71    }
72  };
73
74  InnerPointerChecker()
75      : AppendFn({"std", "basic_string", "append"}),
76        AssignFn({"std", "basic_string", "assign"}),
77        AddressofFn({"std", "addressof"}), AddressofFn_({"std", "__addressof"}),
78        ClearFn({"std", "basic_string", "clear"}),
79        CStrFn({"std", "basic_string", "c_str"}), DataFn({"std", "data"}, 1),
80        DataMemberFn({"std", "basic_string", "data"}),
81        EraseFn({"std", "basic_string", "erase"}),
82        InsertFn({"std", "basic_string", "insert"}),
83        PopBackFn({"std", "basic_string", "pop_back"}),
84        PushBackFn({"std", "basic_string", "push_back"}),
85        ReplaceFn({"std", "basic_string", "replace"}),
86        ReserveFn({"std", "basic_string", "reserve"}),
87        ResizeFn({"std", "basic_string", "resize"}),
88        ShrinkToFitFn({"std", "basic_string", "shrink_to_fit"}),
89        SwapFn({"std", "basic_string", "swap"}) {}
90
91  /// Check whether the called member function potentially invalidates
92  /// pointers referring to the container object's inner buffer.
93  bool isInvalidatingMemberFunction(const CallEvent &Call) const;
94
95  /// Check whether the called function returns a raw inner pointer.
96  bool isInnerPointerAccessFunction(const CallEvent &Call) const;
97
98  /// Mark pointer symbols associated with the given memory region released
99  /// in the program state.
100  void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State,
101                              const MemRegion *ObjRegion,
102                              CheckerContext &C) const;
103
104  /// Standard library functions that take a non-const `basic_string` argument by
105  /// reference may invalidate its inner pointers. Check for these cases and
106  /// mark the pointers released.
107  void checkFunctionArguments(const CallEvent &Call, ProgramStateRef State,
108                              CheckerContext &C) const;
109
110  /// Record the connection between raw pointers referring to a container
111  /// object's inner buffer and the object's memory region in the program state.
112  /// Mark potentially invalidated pointers released.
113  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
114
115  /// Clean up the program state map.
116  void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const;
117};
118
119} // end anonymous namespace
120
121bool InnerPointerChecker::isInvalidatingMemberFunction(
122        const CallEvent &Call) const {
123  if (const auto *MemOpCall = dyn_cast<CXXMemberOperatorCall>(&Call)) {
124    OverloadedOperatorKind Opc = MemOpCall->getOriginExpr()->getOperator();
125    if (Opc == OO_Equal || Opc == OO_PlusEqual)
126      return true;
127    return false;
128  }
129  return isa<CXXDestructorCall>(Call) ||
130         matchesAny(Call, AppendFn, AssignFn, ClearFn, EraseFn, InsertFn,
131                    PopBackFn, PushBackFn, ReplaceFn, ReserveFn, ResizeFn,
132                    ShrinkToFitFn, SwapFn);
133}
134
135bool InnerPointerChecker::isInnerPointerAccessFunction(
136    const CallEvent &Call) const {
137  return matchesAny(Call, CStrFn, DataFn, DataMemberFn);
138}
139
140void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call,
141                                                 ProgramStateRef State,
142                                                 const MemRegion *MR,
143                                                 CheckerContext &C) const {
144  if (const PtrSet *PS = State->get<RawPtrMap>(MR)) {
145    const Expr *Origin = Call.getOriginExpr();
146    for (const auto Symbol : *PS) {
147      // NOTE: `Origin` may be null, and will be stored so in the symbol's
148      // `RefState` in MallocChecker's `RegionState` program state map.
149      State = allocation_state::markReleased(State, Symbol, Origin);
150    }
151    State = State->remove<RawPtrMap>(MR);
152    C.addTransition(State);
153    return;
154  }
155}
156
157void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call,
158                                                 ProgramStateRef State,
159                                                 CheckerContext &C) const {
160  if (const auto *FC = dyn_cast<AnyFunctionCall>(&Call)) {
161    const FunctionDecl *FD = FC->getDecl();
162    if (!FD || !FD->isInStdNamespace())
163      return;
164
165    for (unsigned I = 0, E = FD->getNumParams(); I != E; ++I) {
166      QualType ParamTy = FD->getParamDecl(I)->getType();
167      if (!ParamTy->isReferenceType() ||
168          ParamTy->getPointeeType().isConstQualified())
169        continue;
170
171      // In case of member operator calls, `this` is counted as an
172      // argument but not as a parameter.
173      bool isaMemberOpCall = isa<CXXMemberOperatorCall>(FC);
174      unsigned ArgI = isaMemberOpCall ? I+1 : I;
175
176      SVal Arg = FC->getArgSVal(ArgI);
177      const auto *ArgRegion =
178          dyn_cast_or_null<TypedValueRegion>(Arg.getAsRegion());
179      if (!ArgRegion)
180        continue;
181
182      // std::addressof functions accepts a non-const reference as an argument,
183      // but doesn't modify it.
184      if (matchesAny(Call, AddressofFn, AddressofFn_))
185        continue;
186
187      markPtrSymbolsReleased(Call, State, ArgRegion, C);
188    }
189  }
190}
191
192// [string.require]
193//
194// "References, pointers, and iterators referring to the elements of a
195// basic_string sequence may be invalidated by the following uses of that
196// basic_string object:
197//
198// -- As an argument to any standard library function taking a reference
199// to non-const basic_string as an argument. For example, as an argument to
200// non-member functions swap(), operator>>(), and getline(), or as an argument
201// to basic_string::swap().
202//
203// -- Calling non-const member functions, except operator[], at, front, back,
204// begin, rbegin, end, and rend."
205
206void InnerPointerChecker::checkPostCall(const CallEvent &Call,
207                                        CheckerContext &C) const {
208  ProgramStateRef State = C.getState();
209
210  // TODO: Do we need these to be typed?
211  const TypedValueRegion *ObjRegion = nullptr;
212
213  if (const auto *ICall = dyn_cast<CXXInstanceCall>(&Call)) {
214    ObjRegion = dyn_cast_or_null<TypedValueRegion>(
215        ICall->getCXXThisVal().getAsRegion());
216
217    // Check [string.require] / second point.
218    if (isInvalidatingMemberFunction(Call)) {
219      markPtrSymbolsReleased(Call, State, ObjRegion, C);
220      return;
221    }
222  }
223
224  if (isInnerPointerAccessFunction(Call)) {
225
226    if (isa<SimpleFunctionCall>(Call)) {
227      // NOTE: As of now, we only have one free access function: std::data.
228      //       If we add more functions like this in the list, hardcoded
229      //       argument index should be changed.
230      ObjRegion =
231          dyn_cast_or_null<TypedValueRegion>(Call.getArgSVal(0).getAsRegion());
232    }
233
234    if (!ObjRegion)
235      return;
236
237    SVal RawPtr = Call.getReturnValue();
238    if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) {
239      // Start tracking this raw pointer by adding it to the set of symbols
240      // associated with this container object in the program state map.
241
242      PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>();
243      const PtrSet *SetPtr = State->get<RawPtrMap>(ObjRegion);
244      PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet();
245      assert(C.wasInlined || !Set.contains(Sym));
246      Set = F.add(Set, Sym);
247
248      State = State->set<RawPtrMap>(ObjRegion, Set);
249      C.addTransition(State);
250    }
251
252    return;
253  }
254
255  // Check [string.require] / first point.
256  checkFunctionArguments(Call, State, C);
257}
258
259void InnerPointerChecker::checkDeadSymbols(SymbolReaper &SymReaper,
260                                           CheckerContext &C) const {
261  ProgramStateRef State = C.getState();
262  PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>();
263  RawPtrMapTy RPM = State->get<RawPtrMap>();
264  for (const auto &Entry : RPM) {
265    if (!SymReaper.isLiveRegion(Entry.first)) {
266      // Due to incomplete destructor support, some dead regions might
267      // remain in the program state map. Clean them up.
268      State = State->remove<RawPtrMap>(Entry.first);
269    }
270    if (const PtrSet *OldSet = State->get<RawPtrMap>(Entry.first)) {
271      PtrSet CleanedUpSet = *OldSet;
272      for (const auto Symbol : Entry.second) {
273        if (!SymReaper.isLive(Symbol))
274          CleanedUpSet = F.remove(CleanedUpSet, Symbol);
275      }
276      State = CleanedUpSet.isEmpty()
277                  ? State->remove<RawPtrMap>(Entry.first)
278                  : State->set<RawPtrMap>(Entry.first, CleanedUpSet);
279    }
280  }
281  C.addTransition(State);
282}
283
284namespace clang {
285namespace ento {
286namespace allocation_state {
287
288std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym) {
289  return std::make_unique<InnerPointerChecker::InnerPointerBRVisitor>(Sym);
290}
291
292const MemRegion *getContainerObjRegion(ProgramStateRef State, SymbolRef Sym) {
293  RawPtrMapTy Map = State->get<RawPtrMap>();
294  for (const auto &Entry : Map) {
295    if (Entry.second.contains(Sym)) {
296      return Entry.first;
297    }
298  }
299  return nullptr;
300}
301
302} // end namespace allocation_state
303} // end namespace ento
304} // end namespace clang
305
306PathDiagnosticPieceRef InnerPointerChecker::InnerPointerBRVisitor::VisitNode(
307    const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) {
308  if (!isSymbolTracked(N->getState(), PtrToBuf) ||
309      isSymbolTracked(N->getFirstPred()->getState(), PtrToBuf))
310    return nullptr;
311
312  const Stmt *S = N->getStmtForDiagnostics();
313  if (!S)
314    return nullptr;
315
316  const MemRegion *ObjRegion =
317      allocation_state::getContainerObjRegion(N->getState(), PtrToBuf);
318  const auto *TypedRegion = cast<TypedValueRegion>(ObjRegion);
319  QualType ObjTy = TypedRegion->getValueType();
320
321  SmallString<256> Buf;
322  llvm::raw_svector_ostream OS(Buf);
323  OS << "Pointer to inner buffer of '" << ObjTy << "' obtained here";
324  PathDiagnosticLocation Pos(S, BRC.getSourceManager(),
325                             N->getLocationContext());
326  return std::make_shared<PathDiagnosticEventPiece>(Pos, OS.str(), true);
327}
328
329void ento::registerInnerPointerChecker(CheckerManager &Mgr) {
330  registerInnerPointerCheckerAux(Mgr);
331  Mgr.registerChecker<InnerPointerChecker>();
332}
333
334bool ento::shouldRegisterInnerPointerChecker(const CheckerManager &mgr) {
335  return true;
336}
337