1//===- CalledOnceCheck.cpp - Check 'called once' parameters ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Analysis/Analyses/CalledOnceCheck.h"
10#include "clang/AST/ASTContext.h"
11#include "clang/AST/Attr.h"
12#include "clang/AST/Decl.h"
13#include "clang/AST/DeclBase.h"
14#include "clang/AST/Expr.h"
15#include "clang/AST/ExprObjC.h"
16#include "clang/AST/OperationKinds.h"
17#include "clang/AST/ParentMap.h"
18#include "clang/AST/RecursiveASTVisitor.h"
19#include "clang/AST/Stmt.h"
20#include "clang/AST/StmtObjC.h"
21#include "clang/AST/StmtVisitor.h"
22#include "clang/AST/Type.h"
23#include "clang/Analysis/AnalysisDeclContext.h"
24#include "clang/Analysis/CFG.h"
25#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
26#include "clang/Basic/Builtins.h"
27#include "clang/Basic/IdentifierTable.h"
28#include "clang/Basic/LLVM.h"
29#include "llvm/ADT/BitVector.h"
30#include "llvm/ADT/BitmaskEnum.h"
31#include "llvm/ADT/Optional.h"
32#include "llvm/ADT/PointerIntPair.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/Sequence.h"
35#include "llvm/ADT/SmallVector.h"
36#include "llvm/ADT/StringRef.h"
37#include "llvm/Support/Casting.h"
38#include "llvm/Support/Compiler.h"
39#include "llvm/Support/ErrorHandling.h"
40#include <memory>
41
42using namespace clang;
43
44namespace {
45static constexpr unsigned EXPECTED_MAX_NUMBER_OF_PARAMS = 2;
46template <class T>
47using ParamSizedVector = llvm::SmallVector<T, EXPECTED_MAX_NUMBER_OF_PARAMS>;
48static constexpr unsigned EXPECTED_NUMBER_OF_BASIC_BLOCKS = 8;
49template <class T>
50using CFGSizedVector = llvm::SmallVector<T, EXPECTED_NUMBER_OF_BASIC_BLOCKS>;
51constexpr llvm::StringLiteral CONVENTIONAL_NAMES[] = {
52    "completionHandler", "completion",      "withCompletionHandler",
53    "withCompletion",    "completionBlock", "withCompletionBlock",
54    "replyTo",           "reply",           "withReplyTo"};
55constexpr llvm::StringLiteral CONVENTIONAL_SUFFIXES[] = {
56    "WithCompletionHandler", "WithCompletion", "WithCompletionBlock",
57    "WithReplyTo", "WithReply"};
58constexpr llvm::StringLiteral CONVENTIONAL_CONDITIONS[] = {
59    "error", "cancel", "shouldCall", "done", "OK", "success"};
60
61struct KnownCalledOnceParameter {
62  llvm::StringLiteral FunctionName;
63  unsigned ParamIndex;
64};
65constexpr KnownCalledOnceParameter KNOWN_CALLED_ONCE_PARAMETERS[] = {
66    {llvm::StringLiteral{"dispatch_async"}, 1},
67    {llvm::StringLiteral{"dispatch_async_and_wait"}, 1},
68    {llvm::StringLiteral{"dispatch_after"}, 2},
69    {llvm::StringLiteral{"dispatch_sync"}, 1},
70    {llvm::StringLiteral{"dispatch_once"}, 1},
71    {llvm::StringLiteral{"dispatch_barrier_async"}, 1},
72    {llvm::StringLiteral{"dispatch_barrier_async_and_wait"}, 1},
73    {llvm::StringLiteral{"dispatch_barrier_sync"}, 1}};
74
75class ParameterStatus {
76public:
77  // Status kind is basically the main part of parameter's status.
78  // The kind represents our knowledge (so far) about a tracked parameter
79  // in the context of this analysis.
80  //
81  // Since we want to report on missing and extraneous calls, we need to
82  // track the fact whether paramater was called or not.  This automatically
83  // decides two kinds: `NotCalled` and `Called`.
84  //
85  // One of the erroneous situations is the case when parameter is called only
86  // on some of the paths.  We could've considered it `NotCalled`, but we want
87  // to report double call warnings even if these two calls are not guaranteed
88  // to happen in every execution.  We also don't want to have it as `Called`
89  // because not calling tracked parameter on all of the paths is an error
90  // on its own.  For these reasons, we need to have a separate kind,
91  // `MaybeCalled`, and change `Called` to `DefinitelyCalled` to avoid
92  // confusion.
93  //
94  // Two violations of calling parameter more than once and not calling it on
95  // every path are not, however, mutually exclusive.  In situations where both
96  // violations take place, we prefer to report ONLY double call.  It's always
97  // harder to pinpoint a bug that has arisen when a user neglects to take the
98  // right action (and therefore, no action is taken), than when a user takes
99  // the wrong action.  And, in order to remember that we already reported
100  // a double call, we need another kind: `Reported`.
101  //
102  // Our analysis is intra-procedural and, while in the perfect world,
103  // developers only use tracked parameters to call them, in the real world,
104  // the picture might be different.  Parameters can be stored in global
105  // variables or leaked into other functions that we know nothing about.
106  // We try to be lenient and trust users.  Another kind `Escaped` reflects
107  // such situations.  We don't know if it gets called there or not, but we
108  // should always think of `Escaped` as the best possible option.
109  //
110  // Some of the paths in the analyzed functions might end with a call
111  // to noreturn functions.  Such paths are not required to have parameter
112  // calls and we want to track that.  For the purposes of better diagnostics,
113  // we don't want to reuse `Escaped` and, thus, have another kind `NoReturn`.
114  //
115  // Additionally, we have `NotVisited` kind that tells us nothing about
116  // a tracked parameter, but is used for tracking analyzed (aka visited)
117  // basic blocks.
118  //
119  // If we consider `|` to be a JOIN operation of two kinds coming from
120  // two different paths, the following properties must hold:
121  //
122  //   1. for any Kind K: K | K == K
123  //      Joining two identical kinds should result in the same kind.
124  //
125  //   2. for any Kind K: Reported | K == Reported
126  //      Doesn't matter on which path it was reported, it still is.
127  //
128  //   3. for any Kind K: NoReturn | K == K
129  //      We can totally ignore noreturn paths during merges.
130  //
131  //   4. DefinitelyCalled | NotCalled == MaybeCalled
132  //      Called on one path, not called on another - that's simply
133  //      a definition for MaybeCalled.
134  //
135  //   5. for any Kind K in [DefinitelyCalled, NotCalled, MaybeCalled]:
136  //      Escaped | K == K
137  //      Escaped mirrors other statuses after joins.
138  //      Every situation, when we join any of the listed kinds K,
139  //      is a violation.  For this reason, in order to assume the
140  //      best outcome for this escape, we consider it to be the
141  //      same as the other path.
142  //
143  //   6. for any Kind K in [DefinitelyCalled, NotCalled]:
144  //      MaybeCalled | K == MaybeCalled
145  //      MaybeCalled should basically stay after almost every join.
146  enum Kind {
147    // No-return paths should be absolutely transparent for the analysis.
148    // 0x0 is the identity element for selected join operation (binary or).
149    NoReturn = 0x0, /* 0000 */
150    // Escaped marks situations when marked parameter escaped into
151    // another function (so we can assume that it was possibly called there).
152    Escaped = 0x1, /* 0001 */
153    // Parameter was definitely called once at this point.
154    DefinitelyCalled = 0x3, /* 0011 */
155    // Kinds less or equal to NON_ERROR_STATUS are not considered errors.
156    NON_ERROR_STATUS = DefinitelyCalled,
157    // Parameter was not yet called.
158    NotCalled = 0x5, /* 0101 */
159    // Parameter was not called at least on one path leading to this point,
160    // while there is also at least one path that it gets called.
161    MaybeCalled = 0x7, /* 0111 */
162    // Parameter was not yet analyzed.
163    NotVisited = 0x8, /* 1000 */
164    // We already reported a violation and stopped tracking calls for this
165    // parameter.
166    Reported = 0x15, /* 1111 */
167    LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported)
168  };
169
170  constexpr ParameterStatus() = default;
171  /* implicit */ ParameterStatus(Kind K) : StatusKind(K) {
172    assert(!seenAnyCalls(K) && "Can't initialize status without a call");
173  }
174  ParameterStatus(Kind K, const Expr *Call) : StatusKind(K), Call(Call) {
175    assert(seenAnyCalls(K) && "This kind is not supposed to have a call");
176  }
177
178  const Expr &getCall() const {
179    assert(seenAnyCalls(getKind()) && "ParameterStatus doesn't have a call");
180    return *Call;
181  }
182  static bool seenAnyCalls(Kind K) {
183    return (K & DefinitelyCalled) == DefinitelyCalled && K != Reported;
184  }
185  bool seenAnyCalls() const { return seenAnyCalls(getKind()); }
186
187  static bool isErrorStatus(Kind K) { return K > NON_ERROR_STATUS; }
188  bool isErrorStatus() const { return isErrorStatus(getKind()); }
189
190  Kind getKind() const { return StatusKind; }
191
192  void join(const ParameterStatus &Other) {
193    // If we have a pointer already, let's keep it.
194    // For the purposes of the analysis, it doesn't really matter
195    // which call we report.
196    //
197    // If we don't have a pointer, let's take whatever gets joined.
198    if (!Call) {
199      Call = Other.Call;
200    }
201    // Join kinds.
202    StatusKind |= Other.getKind();
203  }
204
205  bool operator==(const ParameterStatus &Other) const {
206    // We compare only kinds, pointers on their own is only additional
207    // information.
208    return getKind() == Other.getKind();
209  }
210
211private:
212  // It would've been a perfect place to use llvm::PointerIntPair, but
213  // unfortunately NumLowBitsAvailable for clang::Expr had been reduced to 2.
214  Kind StatusKind = NotVisited;
215  const Expr *Call = nullptr;
216};
217
218/// State aggregates statuses of all tracked parameters.
219class State {
220public:
221  State(unsigned Size, ParameterStatus::Kind K = ParameterStatus::NotVisited)
222      : ParamData(Size, K) {}
223
224  /// Return status of a parameter with the given index.
225  /// \{
226  ParameterStatus &getStatusFor(unsigned Index) { return ParamData[Index]; }
227  const ParameterStatus &getStatusFor(unsigned Index) const {
228    return ParamData[Index];
229  }
230  /// \}
231
232  /// Return true if parameter with the given index can be called.
233  bool seenAnyCalls(unsigned Index) const {
234    return getStatusFor(Index).seenAnyCalls();
235  }
236  /// Return a reference that we consider a call.
237  ///
238  /// Should only be used for parameters that can be called.
239  const Expr &getCallFor(unsigned Index) const {
240    return getStatusFor(Index).getCall();
241  }
242  /// Return status kind of parameter with the given index.
243  ParameterStatus::Kind getKindFor(unsigned Index) const {
244    return getStatusFor(Index).getKind();
245  }
246
247  bool isVisited() const {
248    return llvm::all_of(ParamData, [](const ParameterStatus &S) {
249      return S.getKind() != ParameterStatus::NotVisited;
250    });
251  }
252
253  // Join other state into the current state.
254  void join(const State &Other) {
255    assert(ParamData.size() == Other.ParamData.size() &&
256           "Couldn't join statuses with different sizes");
257    for (auto Pair : llvm::zip(ParamData, Other.ParamData)) {
258      std::get<0>(Pair).join(std::get<1>(Pair));
259    }
260  }
261
262  using iterator = ParamSizedVector<ParameterStatus>::iterator;
263  using const_iterator = ParamSizedVector<ParameterStatus>::const_iterator;
264
265  iterator begin() { return ParamData.begin(); }
266  iterator end() { return ParamData.end(); }
267
268  const_iterator begin() const { return ParamData.begin(); }
269  const_iterator end() const { return ParamData.end(); }
270
271  bool operator==(const State &Other) const {
272    return ParamData == Other.ParamData;
273  }
274
275private:
276  ParamSizedVector<ParameterStatus> ParamData;
277};
278
279/// A simple class that finds DeclRefExpr in the given expression.
280///
281/// However, we don't want to find ANY nested DeclRefExpr skipping whatever
282/// expressions on our way.  Only certain expressions considered "no-op"
283/// for our task are indeed skipped.
284class DeclRefFinder
285    : public ConstStmtVisitor<DeclRefFinder, const DeclRefExpr *> {
286public:
287  /// Find a DeclRefExpr in the given expression.
288  ///
289  /// In its most basic form (ShouldRetrieveFromComparisons == false),
290  /// this function can be simply reduced to the following question:
291  ///
292  ///   - If expression E is used as a function argument, could we say
293  ///     that DeclRefExpr nested in E is used as an argument?
294  ///
295  /// According to this rule, we can say that parens, casts and dereferencing
296  /// (dereferencing only applied to function pointers, but this is our case)
297  /// can be skipped.
298  ///
299  /// When we should look into comparisons the question changes to:
300  ///
301  ///   - If expression E is used as a condition, could we say that
302  ///     DeclRefExpr is being checked?
303  ///
304  /// And even though, these are two different questions, they have quite a lot
305  /// in common.  Actually, we can say that whatever expression answers
306  /// positively the first question also fits the second question as well.
307  ///
308  /// In addition, we skip binary operators == and !=, and unary opeartor !.
309  static const DeclRefExpr *find(const Expr *E,
310                                 bool ShouldRetrieveFromComparisons = false) {
311    return DeclRefFinder(ShouldRetrieveFromComparisons).Visit(E);
312  }
313
314  const DeclRefExpr *VisitDeclRefExpr(const DeclRefExpr *DR) { return DR; }
315
316  const DeclRefExpr *VisitUnaryOperator(const UnaryOperator *UO) {
317    switch (UO->getOpcode()) {
318    case UO_LNot:
319      // We care about logical not only if we care about comparisons.
320      if (!ShouldRetrieveFromComparisons)
321        return nullptr;
322      LLVM_FALLTHROUGH;
323    // Function pointer/references can be dereferenced before a call.
324    // That doesn't make it, however, any different from a regular call.
325    // For this reason, dereference operation is a "no-op".
326    case UO_Deref:
327      return Visit(UO->getSubExpr());
328    default:
329      return nullptr;
330    }
331  }
332
333  const DeclRefExpr *VisitBinaryOperator(const BinaryOperator *BO) {
334    if (!ShouldRetrieveFromComparisons)
335      return nullptr;
336
337    switch (BO->getOpcode()) {
338    case BO_EQ:
339    case BO_NE: {
340      const DeclRefExpr *LHS = Visit(BO->getLHS());
341      return LHS ? LHS : Visit(BO->getRHS());
342    }
343    default:
344      return nullptr;
345    }
346  }
347
348  const DeclRefExpr *VisitOpaqueValueExpr(const OpaqueValueExpr *OVE) {
349    return Visit(OVE->getSourceExpr());
350  }
351
352  const DeclRefExpr *VisitCallExpr(const CallExpr *CE) {
353    if (!ShouldRetrieveFromComparisons)
354      return nullptr;
355
356    // We want to see through some of the boolean builtin functions
357    // that we are likely to see in conditions.
358    switch (CE->getBuiltinCallee()) {
359    case Builtin::BI__builtin_expect:
360    case Builtin::BI__builtin_expect_with_probability: {
361      assert(CE->getNumArgs() >= 2);
362
363      const DeclRefExpr *Candidate = Visit(CE->getArg(0));
364      return Candidate != nullptr ? Candidate : Visit(CE->getArg(1));
365    }
366
367    case Builtin::BI__builtin_unpredictable:
368      return Visit(CE->getArg(0));
369
370    default:
371      return nullptr;
372    }
373  }
374
375  const DeclRefExpr *VisitExpr(const Expr *E) {
376    // It is a fallback method that gets called whenever the actual type
377    // of the given expression is not covered.
378    //
379    // We first check if we have anything to skip.  And then repeat the whole
380    // procedure for a nested expression instead.
381    const Expr *DeclutteredExpr = E->IgnoreParenCasts();
382    return E != DeclutteredExpr ? Visit(DeclutteredExpr) : nullptr;
383  }
384
385private:
386  DeclRefFinder(bool ShouldRetrieveFromComparisons)
387      : ShouldRetrieveFromComparisons(ShouldRetrieveFromComparisons) {}
388
389  bool ShouldRetrieveFromComparisons;
390};
391
392const DeclRefExpr *findDeclRefExpr(const Expr *In,
393                                   bool ShouldRetrieveFromComparisons = false) {
394  return DeclRefFinder::find(In, ShouldRetrieveFromComparisons);
395}
396
397const ParmVarDecl *
398findReferencedParmVarDecl(const Expr *In,
399                          bool ShouldRetrieveFromComparisons = false) {
400  if (const DeclRefExpr *DR =
401          findDeclRefExpr(In, ShouldRetrieveFromComparisons)) {
402    return dyn_cast<ParmVarDecl>(DR->getDecl());
403  }
404
405  return nullptr;
406}
407
408/// Return conditions expression of a statement if it has one.
409const Expr *getCondition(const Stmt *S) {
410  if (!S) {
411    return nullptr;
412  }
413
414  if (const auto *If = dyn_cast<IfStmt>(S)) {
415    return If->getCond();
416  }
417  if (const auto *Ternary = dyn_cast<AbstractConditionalOperator>(S)) {
418    return Ternary->getCond();
419  }
420
421  return nullptr;
422}
423
424/// A small helper class that collects all named identifiers in the given
425/// expression.  It traverses it recursively, so names from deeper levels
426/// of the AST will end up in the results.
427/// Results might have duplicate names, if this is a problem, convert to
428/// string sets afterwards.
429class NamesCollector : public RecursiveASTVisitor<NamesCollector> {
430public:
431  static constexpr unsigned EXPECTED_NUMBER_OF_NAMES = 5;
432  using NameCollection =
433      llvm::SmallVector<llvm::StringRef, EXPECTED_NUMBER_OF_NAMES>;
434
435  static NameCollection collect(const Expr *From) {
436    NamesCollector Impl;
437    Impl.TraverseStmt(const_cast<Expr *>(From));
438    return Impl.Result;
439  }
440
441  bool VisitDeclRefExpr(const DeclRefExpr *E) {
442    Result.push_back(E->getDecl()->getName());
443    return true;
444  }
445
446  bool VisitObjCPropertyRefExpr(const ObjCPropertyRefExpr *E) {
447    llvm::StringRef Name;
448
449    if (E->isImplicitProperty()) {
450      ObjCMethodDecl *PropertyMethodDecl = nullptr;
451      if (E->isMessagingGetter()) {
452        PropertyMethodDecl = E->getImplicitPropertyGetter();
453      } else {
454        PropertyMethodDecl = E->getImplicitPropertySetter();
455      }
456      assert(PropertyMethodDecl &&
457             "Implicit property must have associated declaration");
458      Name = PropertyMethodDecl->getSelector().getNameForSlot(0);
459    } else {
460      assert(E->isExplicitProperty());
461      Name = E->getExplicitProperty()->getName();
462    }
463
464    Result.push_back(Name);
465    return true;
466  }
467
468private:
469  NamesCollector() = default;
470  NameCollection Result;
471};
472
473/// Check whether the given expression mentions any of conventional names.
474bool mentionsAnyOfConventionalNames(const Expr *E) {
475  NamesCollector::NameCollection MentionedNames = NamesCollector::collect(E);
476
477  return llvm::any_of(MentionedNames, [](llvm::StringRef ConditionName) {
478    return llvm::any_of(
479        CONVENTIONAL_CONDITIONS,
480        [ConditionName](const llvm::StringLiteral &Conventional) {
481          return ConditionName.contains_lower(Conventional);
482        });
483  });
484}
485
486/// Clarification is a simple pair of a reason why parameter is not called
487/// on every path and a statement to blame.
488struct Clarification {
489  NeverCalledReason Reason;
490  const Stmt *Location;
491};
492
493/// A helper class that can produce a clarification based on the given pair
494/// of basic blocks.
495class NotCalledClarifier
496    : public ConstStmtVisitor<NotCalledClarifier,
497                              llvm::Optional<Clarification>> {
498public:
499  /// The main entrypoint for the class, the function that tries to find the
500  /// clarification of how to explain which sub-path starts with a CFG edge
501  /// from Conditional to SuccWithoutCall.
502  ///
503  /// This means that this function has one precondition:
504  ///   SuccWithoutCall should be a successor block for Conditional.
505  ///
506  /// Because clarification is not needed for non-trivial pairs of blocks
507  /// (i.e. SuccWithoutCall is not the only successor), it returns meaningful
508  /// results only for such cases.  For this very reason, the parent basic
509  /// block, Conditional, is named that way, so it is clear what kind of
510  /// block is expected.
511  static llvm::Optional<Clarification>
512  clarify(const CFGBlock *Conditional, const CFGBlock *SuccWithoutCall) {
513    if (const Stmt *Terminator = Conditional->getTerminatorStmt()) {
514      return NotCalledClarifier{Conditional, SuccWithoutCall}.Visit(Terminator);
515    }
516    return llvm::None;
517  }
518
519  llvm::Optional<Clarification> VisitIfStmt(const IfStmt *If) {
520    return VisitBranchingBlock(If, NeverCalledReason::IfThen);
521  }
522
523  llvm::Optional<Clarification>
524  VisitAbstractConditionalOperator(const AbstractConditionalOperator *Ternary) {
525    return VisitBranchingBlock(Ternary, NeverCalledReason::IfThen);
526  }
527
528  llvm::Optional<Clarification> VisitSwitchStmt(const SwitchStmt *Switch) {
529    const Stmt *CaseToBlame = SuccInQuestion->getLabel();
530    if (!CaseToBlame) {
531      // If interesting basic block is not labeled, it means that this
532      // basic block does not represent any of the cases.
533      return Clarification{NeverCalledReason::SwitchSkipped, Switch};
534    }
535
536    for (const SwitchCase *Case = Switch->getSwitchCaseList(); Case;
537         Case = Case->getNextSwitchCase()) {
538      if (Case == CaseToBlame) {
539        return Clarification{NeverCalledReason::Switch, Case};
540      }
541    }
542
543    llvm_unreachable("Found unexpected switch structure");
544  }
545
546  llvm::Optional<Clarification> VisitForStmt(const ForStmt *For) {
547    return VisitBranchingBlock(For, NeverCalledReason::LoopEntered);
548  }
549
550  llvm::Optional<Clarification> VisitWhileStmt(const WhileStmt *While) {
551    return VisitBranchingBlock(While, NeverCalledReason::LoopEntered);
552  }
553
554  llvm::Optional<Clarification>
555  VisitBranchingBlock(const Stmt *Terminator, NeverCalledReason DefaultReason) {
556    assert(Parent->succ_size() == 2 &&
557           "Branching block should have exactly two successors");
558    unsigned SuccessorIndex = getSuccessorIndex(Parent, SuccInQuestion);
559    NeverCalledReason ActualReason =
560        updateForSuccessor(DefaultReason, SuccessorIndex);
561    return Clarification{ActualReason, Terminator};
562  }
563
564  llvm::Optional<Clarification> VisitBinaryOperator(const BinaryOperator *) {
565    // We don't want to report on short-curcuit logical operations.
566    return llvm::None;
567  }
568
569  llvm::Optional<Clarification> VisitStmt(const Stmt *Terminator) {
570    // If we got here, we didn't have a visit function for more derived
571    // classes of statement that this terminator actually belongs to.
572    //
573    // This is not a good scenario and should not happen in practice, but
574    // at least we'll warn the user.
575    return Clarification{NeverCalledReason::FallbackReason, Terminator};
576  }
577
578  static unsigned getSuccessorIndex(const CFGBlock *Parent,
579                                    const CFGBlock *Child) {
580    CFGBlock::const_succ_iterator It = llvm::find(Parent->succs(), Child);
581    assert(It != Parent->succ_end() &&
582           "Given blocks should be in parent-child relationship");
583    return It - Parent->succ_begin();
584  }
585
586  static NeverCalledReason
587  updateForSuccessor(NeverCalledReason ReasonForTrueBranch,
588                     unsigned SuccessorIndex) {
589    assert(SuccessorIndex <= 1);
590    unsigned RawReason =
591        static_cast<unsigned>(ReasonForTrueBranch) + SuccessorIndex;
592    assert(RawReason <=
593           static_cast<unsigned>(NeverCalledReason::LARGEST_VALUE));
594    return static_cast<NeverCalledReason>(RawReason);
595  }
596
597private:
598  NotCalledClarifier(const CFGBlock *Parent, const CFGBlock *SuccInQuestion)
599      : Parent(Parent), SuccInQuestion(SuccInQuestion) {}
600
601  const CFGBlock *Parent, *SuccInQuestion;
602};
603
604class CalledOnceChecker : public ConstStmtVisitor<CalledOnceChecker> {
605public:
606  static void check(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler,
607                    bool CheckConventionalParameters) {
608    CalledOnceChecker(AC, Handler, CheckConventionalParameters).check();
609  }
610
611private:
612  CalledOnceChecker(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler,
613                    bool CheckConventionalParameters)
614      : FunctionCFG(*AC.getCFG()), AC(AC), Handler(Handler),
615        CheckConventionalParameters(CheckConventionalParameters),
616        CurrentState(0) {
617    initDataStructures();
618    assert((size() == 0 || !States.empty()) &&
619           "Data structures are inconsistent");
620  }
621
622  //===----------------------------------------------------------------------===//
623  //                            Initializing functions
624  //===----------------------------------------------------------------------===//
625
626  void initDataStructures() {
627    const Decl *AnalyzedDecl = AC.getDecl();
628
629    if (const auto *Function = dyn_cast<FunctionDecl>(AnalyzedDecl)) {
630      findParamsToTrack(Function);
631    } else if (const auto *Method = dyn_cast<ObjCMethodDecl>(AnalyzedDecl)) {
632      findParamsToTrack(Method);
633    } else if (const auto *Block = dyn_cast<BlockDecl>(AnalyzedDecl)) {
634      findCapturesToTrack(Block);
635      findParamsToTrack(Block);
636    }
637
638    // Have something to track, let's init states for every block from the CFG.
639    if (size() != 0) {
640      States =
641          CFGSizedVector<State>(FunctionCFG.getNumBlockIDs(), State(size()));
642    }
643  }
644
645  void findCapturesToTrack(const BlockDecl *Block) {
646    for (const auto &Capture : Block->captures()) {
647      if (const auto *P = dyn_cast<ParmVarDecl>(Capture.getVariable())) {
648        // Parameter DeclContext is its owning function or method.
649        const DeclContext *ParamContext = P->getDeclContext();
650        if (shouldBeCalledOnce(ParamContext, P)) {
651          TrackedParams.push_back(P);
652        }
653      }
654    }
655  }
656
657  template <class FunctionLikeDecl>
658  void findParamsToTrack(const FunctionLikeDecl *Function) {
659    for (unsigned Index : llvm::seq<unsigned>(0u, Function->param_size())) {
660      if (shouldBeCalledOnce(Function, Index)) {
661        TrackedParams.push_back(Function->getParamDecl(Index));
662      }
663    }
664  }
665
666  //===----------------------------------------------------------------------===//
667  //                         Main logic 'check' functions
668  //===----------------------------------------------------------------------===//
669
670  void check() {
671    // Nothing to check here: we don't have marked parameters.
672    if (size() == 0 || isPossiblyEmptyImpl())
673      return;
674
675    assert(
676        llvm::none_of(States, [](const State &S) { return S.isVisited(); }) &&
677        "None of the blocks should be 'visited' before the analysis");
678
679    // For our task, both backward and forward approaches suite well.
680    // However, in order to report better diagnostics, we decided to go with
681    // backward analysis.
682    //
683    // Let's consider the following CFG and how forward and backward analyses
684    // will work for it.
685    //
686    //                  FORWARD:           |                 BACKWARD:
687    //                    #1               |                     #1
688    //                +---------+          |               +-----------+
689    //                |   if    |          |               |MaybeCalled|
690    //                +---------+          |               +-----------+
691    //                |NotCalled|          |               |    if     |
692    //                +---------+          |               +-----------+
693    //                 /       \           |                 /       \
694    //         #2     /         \  #3      |         #2     /         \  #3
695    // +----------------+      +---------+ | +----------------+      +---------+
696    // |     foo()      |      |   ...   | | |DefinitelyCalled|      |NotCalled|
697    // +----------------+      +---------+ | +----------------+      +---------+
698    // |DefinitelyCalled|      |NotCalled| | |     foo()      |      |   ...   |
699    // +----------------+      +---------+ | +----------------+      +---------+
700    //                \         /          |                \         /
701    //                 \  #4   /           |                 \  #4   /
702    //               +-----------+         |                +---------+
703    //               |    ...    |         |                |NotCalled|
704    //               +-----------+         |                +---------+
705    //               |MaybeCalled|         |                |   ...   |
706    //               +-----------+         |                +---------+
707    //
708    // The most natural way to report lacking call in the block #3 would be to
709    // message that the false branch of the if statement in the block #1 doesn't
710    // have a call.  And while with the forward approach we'll need to find a
711    // least common ancestor or something like that to find the 'if' to blame,
712    // backward analysis gives it to us out of the box.
713    BackwardDataflowWorklist Worklist(FunctionCFG, AC);
714
715    // Let's visit EXIT.
716    const CFGBlock *Exit = &FunctionCFG.getExit();
717    assignState(Exit, State(size(), ParameterStatus::NotCalled));
718    Worklist.enqueuePredecessors(Exit);
719
720    while (const CFGBlock *BB = Worklist.dequeue()) {
721      assert(BB && "Worklist should filter out null blocks");
722      check(BB);
723      assert(CurrentState.isVisited() &&
724             "After the check, basic block should be visited");
725
726      // Traverse successor basic blocks if the status of this block
727      // has changed.
728      if (assignState(BB, CurrentState)) {
729        Worklist.enqueuePredecessors(BB);
730      }
731    }
732
733    // Check that we have all tracked parameters at the last block.
734    // As we are performing a backward version of the analysis,
735    // it should be the ENTRY block.
736    checkEntry(&FunctionCFG.getEntry());
737  }
738
739  void check(const CFGBlock *BB) {
740    // We start with a state 'inherited' from all the successors.
741    CurrentState = joinSuccessors(BB);
742    assert(CurrentState.isVisited() &&
743           "Shouldn't start with a 'not visited' state");
744
745    // This is the 'exit' situation, broken promises are probably OK
746    // in such scenarios.
747    if (BB->hasNoReturnElement()) {
748      markNoReturn();
749      // This block still can have calls (even multiple calls) and
750      // for this reason there is no early return here.
751    }
752
753    // We use a backward dataflow propagation and for this reason we
754    // should traverse basic blocks bottom-up.
755    for (const CFGElement &Element : llvm::reverse(*BB)) {
756      if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) {
757        check(S->getStmt());
758      }
759    }
760  }
761  void check(const Stmt *S) { Visit(S); }
762
763  void checkEntry(const CFGBlock *Entry) {
764    // We finalize this algorithm with the ENTRY block because
765    // we use a backward version of the analysis.  This is where
766    // we can judge that some of the tracked parameters are not called on
767    // every path from ENTRY to EXIT.
768
769    const State &EntryStatus = getState(Entry);
770    llvm::BitVector NotCalledOnEveryPath(size(), false);
771    llvm::BitVector NotUsedOnEveryPath(size(), false);
772
773    // Check if there are no calls of the marked parameter at all
774    for (const auto &IndexedStatus : llvm::enumerate(EntryStatus)) {
775      const ParmVarDecl *Parameter = getParameter(IndexedStatus.index());
776
777      switch (IndexedStatus.value().getKind()) {
778      case ParameterStatus::NotCalled:
779        // If there were places where this parameter escapes (aka being used),
780        // we can provide a more useful diagnostic by pointing at the exact
781        // branches where it is not even mentioned.
782        if (!hasEverEscaped(IndexedStatus.index())) {
783          // This parameter is was not used at all, so we should report the
784          // most generic version of the warning.
785          if (isCaptured(Parameter)) {
786            // We want to specify that it was captured by the block.
787            Handler.handleCapturedNeverCalled(Parameter, AC.getDecl(),
788                                              !isExplicitlyMarked(Parameter));
789          } else {
790            Handler.handleNeverCalled(Parameter,
791                                      !isExplicitlyMarked(Parameter));
792          }
793        } else {
794          // Mark it as 'interesting' to figure out which paths don't even
795          // have escapes.
796          NotUsedOnEveryPath[IndexedStatus.index()] = true;
797        }
798
799        break;
800      case ParameterStatus::MaybeCalled:
801        // If we have 'maybe called' at this point, we have an error
802        // that there is at least one path where this parameter
803        // is not called.
804        //
805        // However, reporting the warning with only that information can be
806        // too vague for the users.  For this reason, we mark such parameters
807        // as "interesting" for further analysis.
808        NotCalledOnEveryPath[IndexedStatus.index()] = true;
809        break;
810      default:
811        break;
812      }
813    }
814
815    // Early exit if we don't have parameters for extra analysis...
816    if (NotCalledOnEveryPath.none() && NotUsedOnEveryPath.none() &&
817        // ... or if we've seen variables with cleanup functions.
818        // We can't reason that we've seen every path in this case,
819        // and thus abandon reporting any warnings that imply that.
820        !FunctionHasCleanupVars)
821      return;
822
823    // We are looking for a pair of blocks A, B so that the following is true:
824    //   * A is a predecessor of B
825    //   * B is marked as NotCalled
826    //   * A has at least one successor marked as either
827    //     Escaped or DefinitelyCalled
828    //
829    // In that situation, it is guaranteed that B is the first block of the path
830    // where the user doesn't call or use parameter in question.
831    //
832    // For this reason, branch A -> B can be used for reporting.
833    //
834    // This part of the algorithm is guarded by a condition that the function
835    // does indeed have a violation of contract.  For this reason, we can
836    // spend more time to find a good spot to place the warning.
837    //
838    // The following algorithm has the worst case complexity of O(V + E),
839    // where V is the number of basic blocks in FunctionCFG,
840    //       E is the number of edges between blocks in FunctionCFG.
841    for (const CFGBlock *BB : FunctionCFG) {
842      if (!BB)
843        continue;
844
845      const State &BlockState = getState(BB);
846
847      for (unsigned Index : llvm::seq(0u, size())) {
848        // We don't want to use 'isLosingCall' here because we want to report
849        // the following situation as well:
850        //
851        //           MaybeCalled
852        //            |  ...  |
853        //    MaybeCalled   NotCalled
854        //
855        // Even though successor is not 'DefinitelyCalled', it is still useful
856        // to report it, it is still a path without a call.
857        if (NotCalledOnEveryPath[Index] &&
858            BlockState.getKindFor(Index) == ParameterStatus::MaybeCalled) {
859
860          findAndReportNotCalledBranches(BB, Index);
861        } else if (NotUsedOnEveryPath[Index] &&
862                   isLosingEscape(BlockState, BB, Index)) {
863
864          findAndReportNotCalledBranches(BB, Index, /* IsEscape = */ true);
865        }
866      }
867    }
868  }
869
870  /// Check potential call of a tracked parameter.
871  void checkDirectCall(const CallExpr *Call) {
872    if (auto Index = getIndexOfCallee(Call)) {
873      processCallFor(*Index, Call);
874    }
875  }
876
877  /// Check the call expression for being an indirect call of one of the tracked
878  /// parameters.  It is indirect in the sense that this particular call is not
879  /// calling the parameter itself, but rather uses it as the argument.
880  template <class CallLikeExpr>
881  void checkIndirectCall(const CallLikeExpr *CallOrMessage) {
882    // CallExpr::arguments does not interact nicely with llvm::enumerate.
883    llvm::ArrayRef<const Expr *> Arguments = llvm::makeArrayRef(
884        CallOrMessage->getArgs(), CallOrMessage->getNumArgs());
885
886    // Let's check if any of the call arguments is a point of interest.
887    for (const auto &Argument : llvm::enumerate(Arguments)) {
888      if (auto Index = getIndexOfExpression(Argument.value())) {
889        if (shouldBeCalledOnce(CallOrMessage, Argument.index())) {
890          // If the corresponding parameter is marked as 'called_once' we should
891          // consider it as a call.
892          processCallFor(*Index, CallOrMessage);
893        } else {
894          // Otherwise, we mark this parameter as escaped, which can be
895          // interpreted both as called or not called depending on the context.
896          processEscapeFor(*Index);
897        }
898        // Otherwise, let's keep the state as it is.
899      }
900    }
901  }
902
903  /// Process call of the parameter with the given index
904  void processCallFor(unsigned Index, const Expr *Call) {
905    ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index);
906
907    if (CurrentParamStatus.seenAnyCalls()) {
908
909      // At this point, this parameter was called, so this is a second call.
910      const ParmVarDecl *Parameter = getParameter(Index);
911      Handler.handleDoubleCall(
912          Parameter, &CurrentState.getCallFor(Index), Call,
913          !isExplicitlyMarked(Parameter),
914          // We are sure that the second call is definitely
915          // going to happen if the status is 'DefinitelyCalled'.
916          CurrentParamStatus.getKind() == ParameterStatus::DefinitelyCalled);
917
918      // Mark this parameter as already reported on, so we don't repeat
919      // warnings.
920      CurrentParamStatus = ParameterStatus::Reported;
921
922    } else if (CurrentParamStatus.getKind() != ParameterStatus::Reported) {
923      // If we didn't report anything yet, let's mark this parameter
924      // as called.
925      ParameterStatus Called(ParameterStatus::DefinitelyCalled, Call);
926      CurrentParamStatus = Called;
927    }
928  }
929
930  /// Process escape of the parameter with the given index
931  void processEscapeFor(unsigned Index) {
932    ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index);
933
934    // Escape overrides whatever error we think happened.
935    if (CurrentParamStatus.isErrorStatus()) {
936      CurrentParamStatus = ParameterStatus::Escaped;
937    }
938  }
939
940  void findAndReportNotCalledBranches(const CFGBlock *Parent, unsigned Index,
941                                      bool IsEscape = false) {
942    for (const CFGBlock *Succ : Parent->succs()) {
943      if (!Succ)
944        continue;
945
946      if (getState(Succ).getKindFor(Index) == ParameterStatus::NotCalled) {
947        assert(Parent->succ_size() >= 2 &&
948               "Block should have at least two successors at this point");
949        if (auto Clarification = NotCalledClarifier::clarify(Parent, Succ)) {
950          const ParmVarDecl *Parameter = getParameter(Index);
951          Handler.handleNeverCalled(
952              Parameter, AC.getDecl(), Clarification->Location,
953              Clarification->Reason, !IsEscape, !isExplicitlyMarked(Parameter));
954        }
955      }
956    }
957  }
958
959  //===----------------------------------------------------------------------===//
960  //                   Predicate functions to check parameters
961  //===----------------------------------------------------------------------===//
962
963  /// Return true if parameter is explicitly marked as 'called_once'.
964  static bool isExplicitlyMarked(const ParmVarDecl *Parameter) {
965    return Parameter->hasAttr<CalledOnceAttr>();
966  }
967
968  /// Return true if the given name matches conventional pattens.
969  static bool isConventional(llvm::StringRef Name) {
970    return llvm::count(CONVENTIONAL_NAMES, Name) != 0;
971  }
972
973  /// Return true if the given name has conventional suffixes.
974  static bool hasConventionalSuffix(llvm::StringRef Name) {
975    return llvm::any_of(CONVENTIONAL_SUFFIXES, [Name](llvm::StringRef Suffix) {
976      return Name.endswith(Suffix);
977    });
978  }
979
980  /// Return true if the given type can be used for conventional parameters.
981  static bool isConventional(QualType Ty) {
982    if (!Ty->isBlockPointerType()) {
983      return false;
984    }
985
986    QualType BlockType = Ty->castAs<BlockPointerType>()->getPointeeType();
987    // Completion handlers should have a block type with void return type.
988    return BlockType->castAs<FunctionType>()->getReturnType()->isVoidType();
989  }
990
991  /// Return true if the only parameter of the function is conventional.
992  static bool isOnlyParameterConventional(const FunctionDecl *Function) {
993    IdentifierInfo *II = Function->getIdentifier();
994    return Function->getNumParams() == 1 && II &&
995           hasConventionalSuffix(II->getName());
996  }
997
998  /// Return true/false if 'swift_async' attribute states that the given
999  /// parameter is conventionally called once.
1000  /// Return llvm::None if the given declaration doesn't have 'swift_async'
1001  /// attribute.
1002  static llvm::Optional<bool> isConventionalSwiftAsync(const Decl *D,
1003                                                       unsigned ParamIndex) {
1004    if (const SwiftAsyncAttr *A = D->getAttr<SwiftAsyncAttr>()) {
1005      if (A->getKind() == SwiftAsyncAttr::None) {
1006        return false;
1007      }
1008
1009      return A->getCompletionHandlerIndex().getASTIndex() == ParamIndex;
1010    }
1011    return llvm::None;
1012  }
1013
1014  /// Return true if the specified selector represents init method.
1015  static bool isInitMethod(Selector MethodSelector) {
1016    return MethodSelector.getMethodFamily() == OMF_init;
1017  }
1018
1019  /// Return true if the specified selector piece matches conventions.
1020  static bool isConventionalSelectorPiece(Selector MethodSelector,
1021                                          unsigned PieceIndex,
1022                                          QualType PieceType) {
1023    if (!isConventional(PieceType) || isInitMethod(MethodSelector)) {
1024      return false;
1025    }
1026
1027    if (MethodSelector.getNumArgs() == 1) {
1028      assert(PieceIndex == 0);
1029      return hasConventionalSuffix(MethodSelector.getNameForSlot(0));
1030    }
1031
1032    llvm::StringRef PieceName = MethodSelector.getNameForSlot(PieceIndex);
1033    return isConventional(PieceName) || hasConventionalSuffix(PieceName);
1034  }
1035
1036  bool shouldBeCalledOnce(const ParmVarDecl *Parameter) const {
1037    return isExplicitlyMarked(Parameter) ||
1038           (CheckConventionalParameters &&
1039            (isConventional(Parameter->getName()) ||
1040             hasConventionalSuffix(Parameter->getName())) &&
1041            isConventional(Parameter->getType()));
1042  }
1043
1044  bool shouldBeCalledOnce(const DeclContext *ParamContext,
1045                          const ParmVarDecl *Param) {
1046    unsigned ParamIndex = Param->getFunctionScopeIndex();
1047    if (const auto *Function = dyn_cast<FunctionDecl>(ParamContext)) {
1048      return shouldBeCalledOnce(Function, ParamIndex);
1049    }
1050    if (const auto *Method = dyn_cast<ObjCMethodDecl>(ParamContext)) {
1051      return shouldBeCalledOnce(Method, ParamIndex);
1052    }
1053    return shouldBeCalledOnce(Param);
1054  }
1055
1056  bool shouldBeCalledOnce(const BlockDecl *Block, unsigned ParamIndex) const {
1057    return shouldBeCalledOnce(Block->getParamDecl(ParamIndex));
1058  }
1059
1060  bool shouldBeCalledOnce(const FunctionDecl *Function,
1061                          unsigned ParamIndex) const {
1062    if (ParamIndex >= Function->getNumParams()) {
1063      return false;
1064    }
1065    // 'swift_async' goes first and overrides anything else.
1066    if (auto ConventionalAsync =
1067            isConventionalSwiftAsync(Function, ParamIndex)) {
1068      return ConventionalAsync.getValue();
1069    }
1070
1071    return shouldBeCalledOnce(Function->getParamDecl(ParamIndex)) ||
1072           (CheckConventionalParameters &&
1073            isOnlyParameterConventional(Function));
1074  }
1075
1076  bool shouldBeCalledOnce(const ObjCMethodDecl *Method,
1077                          unsigned ParamIndex) const {
1078    Selector MethodSelector = Method->getSelector();
1079    if (ParamIndex >= MethodSelector.getNumArgs()) {
1080      return false;
1081    }
1082
1083    // 'swift_async' goes first and overrides anything else.
1084    if (auto ConventionalAsync = isConventionalSwiftAsync(Method, ParamIndex)) {
1085      return ConventionalAsync.getValue();
1086    }
1087
1088    const ParmVarDecl *Parameter = Method->getParamDecl(ParamIndex);
1089    return shouldBeCalledOnce(Parameter) ||
1090           (CheckConventionalParameters &&
1091            isConventionalSelectorPiece(MethodSelector, ParamIndex,
1092                                        Parameter->getType()));
1093  }
1094
1095  bool shouldBeCalledOnce(const CallExpr *Call, unsigned ParamIndex) const {
1096    const FunctionDecl *Function = Call->getDirectCallee();
1097    return Function && shouldBeCalledOnce(Function, ParamIndex);
1098  }
1099
1100  bool shouldBeCalledOnce(const ObjCMessageExpr *Message,
1101                          unsigned ParamIndex) const {
1102    const ObjCMethodDecl *Method = Message->getMethodDecl();
1103    return Method && ParamIndex < Method->param_size() &&
1104           shouldBeCalledOnce(Method, ParamIndex);
1105  }
1106
1107  //===----------------------------------------------------------------------===//
1108  //                               Utility methods
1109  //===----------------------------------------------------------------------===//
1110
1111  bool isCaptured(const ParmVarDecl *Parameter) const {
1112    if (const BlockDecl *Block = dyn_cast<BlockDecl>(AC.getDecl())) {
1113      return Block->capturesVariable(Parameter);
1114    }
1115    return false;
1116  }
1117
1118  // Return a call site where the block is called exactly once or null otherwise
1119  const Expr *getBlockGuaraneedCallSite(const BlockExpr *Block) const {
1120    ParentMap &PM = AC.getParentMap();
1121
1122    // We don't want to track the block through assignments and so on, instead
1123    // we simply see how the block used and if it's used directly in a call,
1124    // we decide based on call to what it is.
1125    //
1126    // In order to do this, we go up the parents of the block looking for
1127    // a call or a message expressions.  These might not be immediate parents
1128    // of the actual block expression due to casts and parens, so we skip them.
1129    for (const Stmt *Prev = Block, *Current = PM.getParent(Block);
1130         Current != nullptr; Prev = Current, Current = PM.getParent(Current)) {
1131      // Skip no-op (for our case) operations.
1132      if (isa<CastExpr>(Current) || isa<ParenExpr>(Current))
1133        continue;
1134
1135      // At this point, Prev represents our block as an immediate child of the
1136      // call.
1137      if (const auto *Call = dyn_cast<CallExpr>(Current)) {
1138        // It might be the call of the Block itself...
1139        if (Call->getCallee() == Prev)
1140          return Call;
1141
1142        // ...or it can be an indirect call of the block.
1143        return shouldBlockArgumentBeCalledOnce(Call, Prev) ? Call : nullptr;
1144      }
1145      if (const auto *Message = dyn_cast<ObjCMessageExpr>(Current)) {
1146        return shouldBlockArgumentBeCalledOnce(Message, Prev) ? Message
1147                                                              : nullptr;
1148      }
1149
1150      break;
1151    }
1152
1153    return nullptr;
1154  }
1155
1156  template <class CallLikeExpr>
1157  bool shouldBlockArgumentBeCalledOnce(const CallLikeExpr *CallOrMessage,
1158                                       const Stmt *BlockArgument) const {
1159    // CallExpr::arguments does not interact nicely with llvm::enumerate.
1160    llvm::ArrayRef<const Expr *> Arguments = llvm::makeArrayRef(
1161        CallOrMessage->getArgs(), CallOrMessage->getNumArgs());
1162
1163    for (const auto &Argument : llvm::enumerate(Arguments)) {
1164      if (Argument.value() == BlockArgument) {
1165        return shouldBlockArgumentBeCalledOnce(CallOrMessage, Argument.index());
1166      }
1167    }
1168
1169    return false;
1170  }
1171
1172  bool shouldBlockArgumentBeCalledOnce(const CallExpr *Call,
1173                                       unsigned ParamIndex) const {
1174    const FunctionDecl *Function = Call->getDirectCallee();
1175    return shouldBlockArgumentBeCalledOnce(Function, ParamIndex) ||
1176           shouldBeCalledOnce(Call, ParamIndex);
1177  }
1178
1179  bool shouldBlockArgumentBeCalledOnce(const ObjCMessageExpr *Message,
1180                                       unsigned ParamIndex) const {
1181    // At the moment, we don't have any Obj-C methods we want to specifically
1182    // check in here.
1183    return shouldBeCalledOnce(Message, ParamIndex);
1184  }
1185
1186  static bool shouldBlockArgumentBeCalledOnce(const FunctionDecl *Function,
1187                                              unsigned ParamIndex) {
1188    // There is a list of important API functions that while not following
1189    // conventions nor being directly annotated, still guarantee that the
1190    // callback parameter will be called exactly once.
1191    //
1192    // Here we check if this is the case.
1193    return Function &&
1194           llvm::any_of(KNOWN_CALLED_ONCE_PARAMETERS,
1195                        [Function, ParamIndex](
1196                            const KnownCalledOnceParameter &Reference) {
1197                          return Reference.FunctionName ==
1198                                     Function->getName() &&
1199                                 Reference.ParamIndex == ParamIndex;
1200                        });
1201  }
1202
1203  /// Return true if the analyzed function is actually a default implementation
1204  /// of the method that has to be overriden.
1205  ///
1206  /// These functions can have tracked parameters, but wouldn't call them
1207  /// because they are not designed to perform any meaningful actions.
1208  ///
1209  /// There are a couple of flavors of such default implementations:
1210  ///   1. Empty methods or methods with a single return statement
1211  ///   2. Methods that have one block with a call to no return function
1212  ///   3. Methods with only assertion-like operations
1213  bool isPossiblyEmptyImpl() const {
1214    if (!isa<ObjCMethodDecl>(AC.getDecl())) {
1215      // We care only about functions that are not supposed to be called.
1216      // Only methods can be overriden.
1217      return false;
1218    }
1219
1220    // Case #1 (without return statements)
1221    if (FunctionCFG.size() == 2) {
1222      // Method has only two blocks: ENTRY and EXIT.
1223      // This is equivalent to empty function.
1224      return true;
1225    }
1226
1227    // Case #2
1228    if (FunctionCFG.size() == 3) {
1229      const CFGBlock &Entry = FunctionCFG.getEntry();
1230      if (Entry.succ_empty()) {
1231        return false;
1232      }
1233
1234      const CFGBlock *OnlyBlock = *Entry.succ_begin();
1235      // Method has only one block, let's see if it has a no-return
1236      // element.
1237      if (OnlyBlock && OnlyBlock->hasNoReturnElement()) {
1238        return true;
1239      }
1240      // Fallthrough, CFGs with only one block can fall into #1 and #3 as well.
1241    }
1242
1243    // Cases #1 (return statements) and #3.
1244    //
1245    // It is hard to detect that something is an assertion or came
1246    // from assertion.  Here we use a simple heuristic:
1247    //
1248    //   - If it came from a macro, it can be an assertion.
1249    //
1250    // Additionally, we can't assume a number of basic blocks or the CFG's
1251    // structure because assertions might include loops and conditions.
1252    return llvm::all_of(FunctionCFG, [](const CFGBlock *BB) {
1253      if (!BB) {
1254        // Unreachable blocks are totally fine.
1255        return true;
1256      }
1257
1258      // Return statements can have sub-expressions that are represented as
1259      // separate statements of a basic block.  We should allow this.
1260      // This parent map will be initialized with a parent tree for all
1261      // subexpressions of the block's return statement (if it has one).
1262      std::unique_ptr<ParentMap> ReturnChildren;
1263
1264      return llvm::all_of(
1265          llvm::reverse(*BB), // we should start with return statements, if we
1266                              // have any, i.e. from the bottom of the block
1267          [&ReturnChildren](const CFGElement &Element) {
1268            if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) {
1269              const Stmt *SuspiciousStmt = S->getStmt();
1270
1271              if (isa<ReturnStmt>(SuspiciousStmt)) {
1272                // Let's initialize this structure to test whether
1273                // some further statement is a part of this return.
1274                ReturnChildren = std::make_unique<ParentMap>(
1275                    const_cast<Stmt *>(SuspiciousStmt));
1276                // Return statements are allowed as part of #1.
1277                return true;
1278              }
1279
1280              return SuspiciousStmt->getBeginLoc().isMacroID() ||
1281                     (ReturnChildren &&
1282                      ReturnChildren->hasParent(SuspiciousStmt));
1283            }
1284            return true;
1285          });
1286    });
1287  }
1288
1289  /// Check if parameter with the given index has ever escaped.
1290  bool hasEverEscaped(unsigned Index) const {
1291    return llvm::any_of(States, [Index](const State &StateForOneBB) {
1292      return StateForOneBB.getKindFor(Index) == ParameterStatus::Escaped;
1293    });
1294  }
1295
1296  /// Return status stored for the given basic block.
1297  /// \{
1298  State &getState(const CFGBlock *BB) {
1299    assert(BB);
1300    return States[BB->getBlockID()];
1301  }
1302  const State &getState(const CFGBlock *BB) const {
1303    assert(BB);
1304    return States[BB->getBlockID()];
1305  }
1306  /// \}
1307
1308  /// Assign status to the given basic block.
1309  ///
1310  /// Returns true when the stored status changed.
1311  bool assignState(const CFGBlock *BB, const State &ToAssign) {
1312    State &Current = getState(BB);
1313    if (Current == ToAssign) {
1314      return false;
1315    }
1316
1317    Current = ToAssign;
1318    return true;
1319  }
1320
1321  /// Join all incoming statuses for the given basic block.
1322  State joinSuccessors(const CFGBlock *BB) const {
1323    auto Succs =
1324        llvm::make_filter_range(BB->succs(), [this](const CFGBlock *Succ) {
1325          return Succ && this->getState(Succ).isVisited();
1326        });
1327    // We came to this block from somewhere after all.
1328    assert(!Succs.empty() &&
1329           "Basic block should have at least one visited successor");
1330
1331    State Result = getState(*Succs.begin());
1332
1333    for (const CFGBlock *Succ : llvm::drop_begin(Succs, 1)) {
1334      Result.join(getState(Succ));
1335    }
1336
1337    if (const Expr *Condition = getCondition(BB->getTerminatorStmt())) {
1338      handleConditional(BB, Condition, Result);
1339    }
1340
1341    return Result;
1342  }
1343
1344  void handleConditional(const CFGBlock *BB, const Expr *Condition,
1345                         State &ToAlter) const {
1346    handleParameterCheck(BB, Condition, ToAlter);
1347    if (SuppressOnConventionalErrorPaths) {
1348      handleConventionalCheck(BB, Condition, ToAlter);
1349    }
1350  }
1351
1352  void handleParameterCheck(const CFGBlock *BB, const Expr *Condition,
1353                            State &ToAlter) const {
1354    // In this function, we try to deal with the following pattern:
1355    //
1356    //   if (parameter)
1357    //     parameter(...);
1358    //
1359    // It's not good to show a warning here because clearly 'parameter'
1360    // couldn't and shouldn't be called on the 'else' path.
1361    //
1362    // Let's check if this if statement has a check involving one of
1363    // the tracked parameters.
1364    if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(
1365            Condition,
1366            /* ShouldRetrieveFromComparisons = */ true)) {
1367      if (const auto Index = getIndex(*Parameter)) {
1368        ParameterStatus &CurrentStatus = ToAlter.getStatusFor(*Index);
1369
1370        // We don't want to deep dive into semantics of the check and
1371        // figure out if that check was for null or something else.
1372        // We simply trust the user that they know what they are doing.
1373        //
1374        // For this reason, in the following loop we look for the
1375        // best-looking option.
1376        for (const CFGBlock *Succ : BB->succs()) {
1377          if (!Succ)
1378            continue;
1379
1380          const ParameterStatus &StatusInSucc =
1381              getState(Succ).getStatusFor(*Index);
1382
1383          if (StatusInSucc.isErrorStatus()) {
1384            continue;
1385          }
1386
1387          // Let's use this status instead.
1388          CurrentStatus = StatusInSucc;
1389
1390          if (StatusInSucc.getKind() == ParameterStatus::DefinitelyCalled) {
1391            // This is the best option to have and we already found it.
1392            break;
1393          }
1394
1395          // If we found 'Escaped' first, we still might find 'DefinitelyCalled'
1396          // on the other branch.  And we prefer the latter.
1397        }
1398      }
1399    }
1400  }
1401
1402  void handleConventionalCheck(const CFGBlock *BB, const Expr *Condition,
1403                               State &ToAlter) const {
1404    // Even when the analysis is technically correct, it is a widespread pattern
1405    // not to call completion handlers in some scenarios.  These usually have
1406    // typical conditional names, such as 'error' or 'cancel'.
1407    if (!mentionsAnyOfConventionalNames(Condition)) {
1408      return;
1409    }
1410
1411    for (const auto &IndexedStatus : llvm::enumerate(ToAlter)) {
1412      const ParmVarDecl *Parameter = getParameter(IndexedStatus.index());
1413      // Conventions do not apply to explicitly marked parameters.
1414      if (isExplicitlyMarked(Parameter)) {
1415        continue;
1416      }
1417
1418      ParameterStatus &CurrentStatus = IndexedStatus.value();
1419      // If we did find that on one of the branches the user uses the callback
1420      // and doesn't on the other path, we believe that they know what they are
1421      // doing and trust them.
1422      //
1423      // There are two possible scenarios for that:
1424      //   1. Current status is 'MaybeCalled' and one of the branches is
1425      //      'DefinitelyCalled'
1426      //   2. Current status is 'NotCalled' and one of the branches is 'Escaped'
1427      if (isLosingCall(ToAlter, BB, IndexedStatus.index()) ||
1428          isLosingEscape(ToAlter, BB, IndexedStatus.index())) {
1429        CurrentStatus = ParameterStatus::Escaped;
1430      }
1431    }
1432  }
1433
1434  bool isLosingCall(const State &StateAfterJoin, const CFGBlock *JoinBlock,
1435                    unsigned ParameterIndex) const {
1436    // Let's check if the block represents DefinitelyCalled -> MaybeCalled
1437    // transition.
1438    return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex,
1439                        ParameterStatus::MaybeCalled,
1440                        ParameterStatus::DefinitelyCalled);
1441  }
1442
1443  bool isLosingEscape(const State &StateAfterJoin, const CFGBlock *JoinBlock,
1444                      unsigned ParameterIndex) const {
1445    // Let's check if the block represents Escaped -> NotCalled transition.
1446    return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex,
1447                        ParameterStatus::NotCalled, ParameterStatus::Escaped);
1448  }
1449
1450  bool isLosingJoin(const State &StateAfterJoin, const CFGBlock *JoinBlock,
1451                    unsigned ParameterIndex, ParameterStatus::Kind AfterJoin,
1452                    ParameterStatus::Kind BeforeJoin) const {
1453    assert(!ParameterStatus::isErrorStatus(BeforeJoin) &&
1454           ParameterStatus::isErrorStatus(AfterJoin) &&
1455           "It's not a losing join if statuses do not represent "
1456           "correct-to-error transition");
1457
1458    const ParameterStatus &CurrentStatus =
1459        StateAfterJoin.getStatusFor(ParameterIndex);
1460
1461    return CurrentStatus.getKind() == AfterJoin &&
1462           anySuccessorHasStatus(JoinBlock, ParameterIndex, BeforeJoin);
1463  }
1464
1465  /// Return true if any of the successors of the given basic block has
1466  /// a specified status for the given parameter.
1467  bool anySuccessorHasStatus(const CFGBlock *Parent, unsigned ParameterIndex,
1468                             ParameterStatus::Kind ToFind) const {
1469    return llvm::any_of(
1470        Parent->succs(), [this, ParameterIndex, ToFind](const CFGBlock *Succ) {
1471          return Succ && getState(Succ).getKindFor(ParameterIndex) == ToFind;
1472        });
1473  }
1474
1475  /// Check given expression that was discovered to escape.
1476  void checkEscapee(const Expr *E) {
1477    if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) {
1478      checkEscapee(*Parameter);
1479    }
1480  }
1481
1482  /// Check given parameter that was discovered to escape.
1483  void checkEscapee(const ParmVarDecl &Parameter) {
1484    if (auto Index = getIndex(Parameter)) {
1485      processEscapeFor(*Index);
1486    }
1487  }
1488
1489  /// Mark all parameters in the current state as 'no-return'.
1490  void markNoReturn() {
1491    for (ParameterStatus &PS : CurrentState) {
1492      PS = ParameterStatus::NoReturn;
1493    }
1494  }
1495
1496  /// Check if the given assignment represents suppression and act on it.
1497  void checkSuppression(const BinaryOperator *Assignment) {
1498    // Suppression has the following form:
1499    //    parameter = 0;
1500    // 0 can be of any form (NULL, nil, etc.)
1501    if (auto Index = getIndexOfExpression(Assignment->getLHS())) {
1502
1503      // We don't care what is written in the RHS, it could be whatever
1504      // we can interpret as 0.
1505      if (auto Constant =
1506              Assignment->getRHS()->IgnoreParenCasts()->getIntegerConstantExpr(
1507                  AC.getASTContext())) {
1508
1509        ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index);
1510
1511        if (0 == *Constant && CurrentParamStatus.seenAnyCalls()) {
1512          // Even though this suppression mechanism is introduced to tackle
1513          // false positives for multiple calls, the fact that the user has
1514          // to use suppression can also tell us that we couldn't figure out
1515          // how different paths cancel each other out.  And if that is true,
1516          // we will most certainly have false positives about parameters not
1517          // being called on certain paths.
1518          //
1519          // For this reason, we abandon tracking this parameter altogether.
1520          CurrentParamStatus = ParameterStatus::Reported;
1521        }
1522      }
1523    }
1524  }
1525
1526public:
1527  //===----------------------------------------------------------------------===//
1528  //                            Tree traversal methods
1529  //===----------------------------------------------------------------------===//
1530
1531  void VisitCallExpr(const CallExpr *Call) {
1532    // This call might be a direct call, i.e. a parameter call...
1533    checkDirectCall(Call);
1534    // ... or an indirect call, i.e. when parameter is an argument.
1535    checkIndirectCall(Call);
1536  }
1537
1538  void VisitObjCMessageExpr(const ObjCMessageExpr *Message) {
1539    // The most common situation that we are defending against here is
1540    // copying a tracked parameter.
1541    if (const Expr *Receiver = Message->getInstanceReceiver()) {
1542      checkEscapee(Receiver);
1543    }
1544    // Message expressions unlike calls, could not be direct.
1545    checkIndirectCall(Message);
1546  }
1547
1548  void VisitBlockExpr(const BlockExpr *Block) {
1549    // Block expressions are tricky.  It is a very common practice to capture
1550    // completion handlers by blocks and use them there.
1551    // For this reason, it is important to analyze blocks and report warnings
1552    // for completion handler misuse in blocks.
1553    //
1554    // However, it can be quite difficult to track how the block itself is being
1555    // used.  The full precise anlysis of that will be similar to alias analysis
1556    // for completion handlers and can be too heavyweight for a compile-time
1557    // diagnostic.  Instead, we judge about the immediate use of the block.
1558    //
1559    // Here, we try to find a call expression where we know due to conventions,
1560    // annotations, or other reasons that the block is called once and only
1561    // once.
1562    const Expr *CalledOnceCallSite = getBlockGuaraneedCallSite(Block);
1563
1564    // We need to report this information to the handler because in the
1565    // situation when we know that the block is called exactly once, we can be
1566    // stricter in terms of reported diagnostics.
1567    if (CalledOnceCallSite) {
1568      Handler.handleBlockThatIsGuaranteedToBeCalledOnce(Block->getBlockDecl());
1569    } else {
1570      Handler.handleBlockWithNoGuarantees(Block->getBlockDecl());
1571    }
1572
1573    for (const auto &Capture : Block->getBlockDecl()->captures()) {
1574      if (const auto *Param = dyn_cast<ParmVarDecl>(Capture.getVariable())) {
1575        if (auto Index = getIndex(*Param)) {
1576          if (CalledOnceCallSite) {
1577            // The call site of a block can be considered a call site of the
1578            // captured parameter we track.
1579            processCallFor(*Index, CalledOnceCallSite);
1580          } else {
1581            // We still should consider this block as an escape for parameter,
1582            // if we don't know about its call site or the number of time it
1583            // can be invoked.
1584            processEscapeFor(*Index);
1585          }
1586        }
1587      }
1588    }
1589  }
1590
1591  void VisitBinaryOperator(const BinaryOperator *Op) {
1592    if (Op->getOpcode() == clang::BO_Assign) {
1593      // Let's check if one of the tracked parameters is assigned into
1594      // something, and if it is we don't want to track extra variables, so we
1595      // consider it as an escapee.
1596      checkEscapee(Op->getRHS());
1597
1598      // Let's check whether this assignment is a suppression.
1599      checkSuppression(Op);
1600    }
1601  }
1602
1603  void VisitDeclStmt(const DeclStmt *DS) {
1604    // Variable initialization is not assignment and should be handled
1605    // separately.
1606    //
1607    // Multiple declarations can be a part of declaration statement.
1608    for (const auto *Declaration : DS->getDeclGroup()) {
1609      if (const auto *Var = dyn_cast<VarDecl>(Declaration)) {
1610        if (Var->getInit()) {
1611          checkEscapee(Var->getInit());
1612        }
1613
1614        if (Var->hasAttr<CleanupAttr>()) {
1615          FunctionHasCleanupVars = true;
1616        }
1617      }
1618    }
1619  }
1620
1621  void VisitCStyleCastExpr(const CStyleCastExpr *Cast) {
1622    // We consider '(void)parameter' as a manual no-op escape.
1623    // It should be used to explicitly tell the analysis that this parameter
1624    // is intentionally not called on this path.
1625    if (Cast->getType().getCanonicalType()->isVoidType()) {
1626      checkEscapee(Cast->getSubExpr());
1627    }
1628  }
1629
1630  void VisitObjCAtThrowStmt(const ObjCAtThrowStmt *) {
1631    // It is OK not to call marked parameters on exceptional paths.
1632    markNoReturn();
1633  }
1634
1635private:
1636  unsigned size() const { return TrackedParams.size(); }
1637
1638  llvm::Optional<unsigned> getIndexOfCallee(const CallExpr *Call) const {
1639    return getIndexOfExpression(Call->getCallee());
1640  }
1641
1642  llvm::Optional<unsigned> getIndexOfExpression(const Expr *E) const {
1643    if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) {
1644      return getIndex(*Parameter);
1645    }
1646
1647    return llvm::None;
1648  }
1649
1650  llvm::Optional<unsigned> getIndex(const ParmVarDecl &Parameter) const {
1651    // Expected number of parameters that we actually track is 1.
1652    //
1653    // Also, the maximum number of declared parameters could not be on a scale
1654    // of hundreds of thousands.
1655    //
1656    // In this setting, linear search seems reasonable and even performs better
1657    // than bisection.
1658    ParamSizedVector<const ParmVarDecl *>::const_iterator It =
1659        llvm::find(TrackedParams, &Parameter);
1660
1661    if (It != TrackedParams.end()) {
1662      return It - TrackedParams.begin();
1663    }
1664
1665    return llvm::None;
1666  }
1667
1668  const ParmVarDecl *getParameter(unsigned Index) const {
1669    assert(Index < TrackedParams.size());
1670    return TrackedParams[Index];
1671  }
1672
1673  const CFG &FunctionCFG;
1674  AnalysisDeclContext &AC;
1675  CalledOnceCheckHandler &Handler;
1676  bool CheckConventionalParameters;
1677  // As of now, we turn this behavior off.  So, we still are going to report
1678  // missing calls on paths that look like it was intentional.
1679  // Technically such reports are true positives, but they can make some users
1680  // grumpy because of the sheer number of warnings.
1681  // It can be turned back on if we decide that we want to have the other way
1682  // around.
1683  bool SuppressOnConventionalErrorPaths = false;
1684
1685  // The user can annotate variable declarations with cleanup functions, which
1686  // essentially imposes a custom destructor logic on that variable.
1687  // It is possible to use it, however, to call tracked parameters on all exits
1688  // from the function.  For this reason, we track the fact that the function
1689  // actually has these.
1690  bool FunctionHasCleanupVars = false;
1691
1692  State CurrentState;
1693  ParamSizedVector<const ParmVarDecl *> TrackedParams;
1694  CFGSizedVector<State> States;
1695};
1696
1697} // end anonymous namespace
1698
1699namespace clang {
1700void checkCalledOnceParameters(AnalysisDeclContext &AC,
1701                               CalledOnceCheckHandler &Handler,
1702                               bool CheckConventionalParameters) {
1703  CalledOnceChecker::check(AC, Handler, CheckConventionalParameters);
1704}
1705} // end namespace clang
1706