1//===- MustExecute.h - Is an instruction known to execute--------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Contains a collection of routines for determining if a given instruction is
10/// guaranteed to execute if a given point in control flow is reached. The most
11/// common example is an instruction within a loop being provably executed if we
12/// branch to the header of it's containing loop.
13///
14/// There are two interfaces available to determine if an instruction is
15/// executed once a given point in the control flow is reached:
16/// 1) A loop-centric one derived from LoopSafetyInfo.
17/// 2) A "must be executed context"-based one implemented in the
18///    MustBeExecutedContextExplorer.
19/// Please refer to the class comments for more information.
20///
21//===----------------------------------------------------------------------===//
22
23#ifndef LLVM_ANALYSIS_MUSTEXECUTE_H
24#define LLVM_ANALYSIS_MUSTEXECUTE_H
25
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/DenseSet.h"
28#include "llvm/Analysis/EHPersonalities.h"
29#include "llvm/Analysis/InstructionPrecedenceTracking.h"
30#include "llvm/IR/PassManager.h"
31#include "llvm/Support/raw_ostream.h"
32
33namespace llvm {
34
35namespace {
36template <typename T> using GetterTy = std::function<T *(const Function &F)>;
37}
38
39class BasicBlock;
40class DominatorTree;
41class Instruction;
42class Loop;
43class LoopInfo;
44class PostDominatorTree;
45
46/// Captures loop safety information.
47/// It keep information for loop blocks may throw exception or otherwise
48/// exit abnormally on any iteration of the loop which might actually execute
49/// at runtime.  The primary way to consume this information is via
50/// isGuaranteedToExecute below, but some callers bailout or fallback to
51/// alternate reasoning if a loop contains any implicit control flow.
52/// NOTE: LoopSafetyInfo contains cached information regarding loops and their
53/// particular blocks. This information is only dropped on invocation of
54/// computeLoopSafetyInfo. If the loop or any of its block is deleted, or if
55/// any thrower instructions have been added or removed from them, or if the
56/// control flow has changed, or in case of other meaningful modifications, the
57/// LoopSafetyInfo needs to be recomputed. If a meaningful modifications to the
58/// loop were made and the info wasn't recomputed properly, the behavior of all
59/// methods except for computeLoopSafetyInfo is undefined.
60class LoopSafetyInfo {
61  // Used to update funclet bundle operands.
62  DenseMap<BasicBlock *, ColorVector> BlockColors;
63
64protected:
65  /// Computes block colors.
66  void computeBlockColors(const Loop *CurLoop);
67
68public:
69  /// Returns block colors map that is used to update funclet operand bundles.
70  const DenseMap<BasicBlock *, ColorVector> &getBlockColors() const;
71
72  /// Copy colors of block \p Old into the block \p New.
73  void copyColors(BasicBlock *New, BasicBlock *Old);
74
75  /// Returns true iff the block \p BB potentially may throw exception. It can
76  /// be false-positive in cases when we want to avoid complex analysis.
77  virtual bool blockMayThrow(const BasicBlock *BB) const = 0;
78
79  /// Returns true iff any block of the loop for which this info is contains an
80  /// instruction that may throw or otherwise exit abnormally.
81  virtual bool anyBlockMayThrow() const = 0;
82
83  /// Return true if we must reach the block \p BB under assumption that the
84  /// loop \p CurLoop is entered.
85  bool allLoopPathsLeadToBlock(const Loop *CurLoop, const BasicBlock *BB,
86                               const DominatorTree *DT) const;
87
88  /// Computes safety information for a loop checks loop body & header for
89  /// the possibility of may throw exception, it takes LoopSafetyInfo and loop
90  /// as argument. Updates safety information in LoopSafetyInfo argument.
91  /// Note: This is defined to clear and reinitialize an already initialized
92  /// LoopSafetyInfo.  Some callers rely on this fact.
93  virtual void computeLoopSafetyInfo(const Loop *CurLoop) = 0;
94
95  /// Returns true if the instruction in a loop is guaranteed to execute at
96  /// least once (under the assumption that the loop is entered).
97  virtual bool isGuaranteedToExecute(const Instruction &Inst,
98                                     const DominatorTree *DT,
99                                     const Loop *CurLoop) const = 0;
100
101  LoopSafetyInfo() = default;
102
103  virtual ~LoopSafetyInfo() = default;
104};
105
106
107/// Simple and conservative implementation of LoopSafetyInfo that can give
108/// false-positive answers to its queries in order to avoid complicated
109/// analysis.
110class SimpleLoopSafetyInfo: public LoopSafetyInfo {
111  bool MayThrow = false;       // The current loop contains an instruction which
112                               // may throw.
113  bool HeaderMayThrow = false; // Same as previous, but specific to loop header
114
115public:
116  bool blockMayThrow(const BasicBlock *BB) const override;
117
118  bool anyBlockMayThrow() const override;
119
120  void computeLoopSafetyInfo(const Loop *CurLoop) override;
121
122  bool isGuaranteedToExecute(const Instruction &Inst,
123                             const DominatorTree *DT,
124                             const Loop *CurLoop) const override;
125};
126
127/// This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to
128/// give precise answers on "may throw" queries. This implementation uses cache
129/// that should be invalidated by calling the methods insertInstructionTo and
130/// removeInstruction whenever we modify a basic block's contents by adding or
131/// removing instructions.
132class ICFLoopSafetyInfo: public LoopSafetyInfo {
133  bool MayThrow = false;       // The current loop contains an instruction which
134                               // may throw.
135  // Contains information about implicit control flow in this loop's blocks.
136  mutable ImplicitControlFlowTracking ICF;
137  // Contains information about instruction that may possibly write memory.
138  mutable MemoryWriteTracking MW;
139
140public:
141  bool blockMayThrow(const BasicBlock *BB) const override;
142
143  bool anyBlockMayThrow() const override;
144
145  void computeLoopSafetyInfo(const Loop *CurLoop) override;
146
147  bool isGuaranteedToExecute(const Instruction &Inst,
148                             const DominatorTree *DT,
149                             const Loop *CurLoop) const override;
150
151  /// Returns true if we could not execute a memory-modifying instruction before
152  /// we enter \p BB under assumption that \p CurLoop is entered.
153  bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop)
154      const;
155
156  /// Returns true if we could not execute a memory-modifying instruction before
157  /// we execute \p I under assumption that \p CurLoop is entered.
158  bool doesNotWriteMemoryBefore(const Instruction &I, const Loop *CurLoop)
159      const;
160
161  /// Inform the safety info that we are planning to insert a new instruction
162  /// \p Inst into the basic block \p BB. It will make all cache updates to keep
163  /// it correct after this insertion.
164  void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB);
165
166  /// Inform safety info that we are planning to remove the instruction \p Inst
167  /// from its block. It will make all cache updates to keep it correct after
168  /// this removal.
169  void removeInstruction(const Instruction *Inst);
170};
171
172bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI);
173
174struct MustBeExecutedContextExplorer;
175
176/// Enum that allows us to spell out the direction.
177enum class ExplorationDirection {
178  BACKWARD = 0,
179  FORWARD = 1,
180};
181
182/// Must be executed iterators visit stretches of instructions that are
183/// guaranteed to be executed together, potentially with other instruction
184/// executed in-between.
185///
186/// Given the following code, and assuming all statements are single
187/// instructions which transfer execution to the successor (see
188/// isGuaranteedToTransferExecutionToSuccessor), there are two possible
189/// outcomes. If we start the iterator at A, B, or E, we will visit only A, B,
190/// and E. If we start at C or D, we will visit all instructions A-E.
191///
192/// \code
193///   A;
194///   B;
195///   if (...) {
196///     C;
197///     D;
198///   }
199///   E;
200/// \endcode
201///
202///
203/// Below is the example extneded with instructions F and G. Now we assume F
204/// might not transfer execution to it's successor G. As a result we get the
205/// following visit sets:
206///
207/// Start Instruction   | Visit Set
208/// A                   | A, B,       E, F
209///    B                | A, B,       E, F
210///       C             | A, B, C, D, E, F
211///          D          | A, B, C, D, E, F
212///             E       | A, B,       E, F
213///                F    | A, B,       E, F
214///                   G | A, B,       E, F, G
215///
216///
217/// \code
218///   A;
219///   B;
220///   if (...) {
221///     C;
222///     D;
223///   }
224///   E;
225///   F;  // Might not transfer execution to its successor G.
226///   G;
227/// \endcode
228///
229///
230/// A more complex example involving conditionals, loops, break, and continue
231/// is shown below. We again assume all instructions will transmit control to
232/// the successor and we assume we can prove the inner loop to be finite. We
233/// omit non-trivial branch conditions as the exploration is oblivious to them.
234/// Constant branches are assumed to be unconditional in the CFG. The resulting
235/// visist sets are shown in the table below.
236///
237/// \code
238///   A;
239///   while (true) {
240///     B;
241///     if (...)
242///       C;
243///     if (...)
244///       continue;
245///     D;
246///     if (...)
247///       break;
248///     do {
249///       if (...)
250///         continue;
251///       E;
252///     } while (...);
253///     F;
254///   }
255///   G;
256/// \endcode
257///
258/// Start Instruction    | Visit Set
259/// A                    | A, B
260///    B                 | A, B
261///       C              | A, B, C
262///          D           | A, B,    D
263///             E        | A, B,    D, E, F
264///                F     | A, B,    D,    F
265///                   G  | A, B,    D,       G
266///
267///
268/// Note that the examples show optimal visist sets but not necessarily the ones
269/// derived by the explorer depending on the available CFG analyses (see
270/// MustBeExecutedContextExplorer). Also note that we, depending on the options,
271/// the visit set can contain instructions from other functions.
272struct MustBeExecutedIterator {
273  /// Type declarations that make his class an input iterator.
274  ///{
275  typedef const Instruction *value_type;
276  typedef std::ptrdiff_t difference_type;
277  typedef const Instruction **pointer;
278  typedef const Instruction *&reference;
279  typedef std::input_iterator_tag iterator_category;
280  ///}
281
282  using ExplorerTy = MustBeExecutedContextExplorer;
283
284  MustBeExecutedIterator(const MustBeExecutedIterator &Other)
285      : Visited(Other.Visited), Explorer(Other.Explorer),
286        CurInst(Other.CurInst), Head(Other.Head), Tail(Other.Tail) {}
287
288  MustBeExecutedIterator(MustBeExecutedIterator &&Other)
289      : Visited(std::move(Other.Visited)), Explorer(Other.Explorer),
290        CurInst(Other.CurInst), Head(Other.Head), Tail(Other.Tail) {}
291
292  MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) {
293    if (this != &Other) {
294      std::swap(Visited, Other.Visited);
295      std::swap(CurInst, Other.CurInst);
296      std::swap(Head, Other.Head);
297      std::swap(Tail, Other.Tail);
298    }
299    return *this;
300  }
301
302  ~MustBeExecutedIterator() {}
303
304  /// Pre- and post-increment operators.
305  ///{
306  MustBeExecutedIterator &operator++() {
307    CurInst = advance();
308    return *this;
309  }
310
311  MustBeExecutedIterator operator++(int) {
312    MustBeExecutedIterator tmp(*this);
313    operator++();
314    return tmp;
315  }
316  ///}
317
318  /// Equality and inequality operators. Note that we ignore the history here.
319  ///{
320  bool operator==(const MustBeExecutedIterator &Other) const {
321    return CurInst == Other.CurInst && Head == Other.Head && Tail == Other.Tail;
322  }
323
324  bool operator!=(const MustBeExecutedIterator &Other) const {
325    return !(*this == Other);
326  }
327  ///}
328
329  /// Return the underlying instruction.
330  const Instruction *&operator*() { return CurInst; }
331  const Instruction *getCurrentInst() const { return CurInst; }
332
333  /// Return true if \p I was encountered by this iterator already.
334  bool count(const Instruction *I) const {
335    return Visited.count({I, ExplorationDirection::FORWARD}) ||
336           Visited.count({I, ExplorationDirection::BACKWARD});
337  }
338
339private:
340  using VisitedSetTy =
341      DenseSet<PointerIntPair<const Instruction *, 1, ExplorationDirection>>;
342
343  /// Private constructors.
344  MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I);
345
346  /// Reset the iterator to its initial state pointing at \p I.
347  void reset(const Instruction *I);
348
349  /// Reset the iterator to point at \p I, keep cached state.
350  void resetInstruction(const Instruction *I);
351
352  /// Try to advance one of the underlying positions (Head or Tail).
353  ///
354  /// \return The next instruction in the must be executed context, or nullptr
355  ///         if none was found.
356  const Instruction *advance();
357
358  /// A set to track the visited instructions in order to deal with endless
359  /// loops and recursion.
360  VisitedSetTy Visited;
361
362  /// A reference to the explorer that created this iterator.
363  ExplorerTy &Explorer;
364
365  /// The instruction we are currently exposing to the user. There is always an
366  /// instruction that we know is executed with the given program point,
367  /// initially the program point itself.
368  const Instruction *CurInst;
369
370  /// Two positions that mark the program points where this iterator will look
371  /// for the next instruction. Note that the current instruction is either the
372  /// one pointed to by Head, Tail, or both.
373  const Instruction *Head, *Tail;
374
375  friend struct MustBeExecutedContextExplorer;
376};
377
378/// A "must be executed context" for a given program point PP is the set of
379/// instructions, potentially before and after PP, that are executed always when
380/// PP is reached. The MustBeExecutedContextExplorer an interface to explore
381/// "must be executed contexts" in a module through the use of
382/// MustBeExecutedIterator.
383///
384/// The explorer exposes "must be executed iterators" that traverse the must be
385/// executed context. There is little information sharing between iterators as
386/// the expected use case involves few iterators for "far apart" instructions.
387/// If that changes, we should consider caching more intermediate results.
388struct MustBeExecutedContextExplorer {
389
390  /// In the description of the parameters we use PP to denote a program point
391  /// for which the must be executed context is explored, or put differently,
392  /// for which the MustBeExecutedIterator is created.
393  ///
394  /// \param ExploreInterBlock    Flag to indicate if instructions in blocks
395  ///                             other than the parent of PP should be
396  ///                             explored.
397  /// \param ExploreCFGForward    Flag to indicate if instructions located after
398  ///                             PP in the CFG, e.g., post-dominating PP,
399  ///                             should be explored.
400  /// \param ExploreCFGBackward   Flag to indicate if instructions located
401  ///                             before PP in the CFG, e.g., dominating PP,
402  ///                             should be explored.
403  MustBeExecutedContextExplorer(
404      bool ExploreInterBlock, bool ExploreCFGForward, bool ExploreCFGBackward,
405      GetterTy<const LoopInfo> LIGetter =
406          [](const Function &) { return nullptr; },
407      GetterTy<const DominatorTree> DTGetter =
408          [](const Function &) { return nullptr; },
409      GetterTy<const PostDominatorTree> PDTGetter =
410          [](const Function &) { return nullptr; })
411      : ExploreInterBlock(ExploreInterBlock),
412        ExploreCFGForward(ExploreCFGForward),
413        ExploreCFGBackward(ExploreCFGBackward), LIGetter(LIGetter),
414        DTGetter(DTGetter), PDTGetter(PDTGetter), EndIterator(*this, nullptr) {}
415
416  /// Iterator-based interface. \see MustBeExecutedIterator.
417  ///{
418  using iterator = MustBeExecutedIterator;
419  using const_iterator = const MustBeExecutedIterator;
420
421  /// Return an iterator to explore the context around \p PP.
422  iterator &begin(const Instruction *PP) {
423    auto &It = InstructionIteratorMap[PP];
424    if (!It)
425      It.reset(new iterator(*this, PP));
426    return *It;
427  }
428
429  /// Return an iterator to explore the cached context around \p PP.
430  const_iterator &begin(const Instruction *PP) const {
431    return *InstructionIteratorMap.find(PP)->second;
432  }
433
434  /// Return an universal end iterator.
435  ///{
436  iterator &end() { return EndIterator; }
437  iterator &end(const Instruction *) { return EndIterator; }
438
439  const_iterator &end() const { return EndIterator; }
440  const_iterator &end(const Instruction *) const { return EndIterator; }
441  ///}
442
443  /// Return an iterator range to explore the context around \p PP.
444  llvm::iterator_range<iterator> range(const Instruction *PP) {
445    return llvm::make_range(begin(PP), end(PP));
446  }
447
448  /// Return an iterator range to explore the cached context around \p PP.
449  llvm::iterator_range<const_iterator> range(const Instruction *PP) const {
450    return llvm::make_range(begin(PP), end(PP));
451  }
452  ///}
453
454  /// Check \p Pred on all instructions in the context.
455  ///
456  /// This method will evaluate \p Pred and return
457  /// true if \p Pred holds in every instruction.
458  bool checkForAllContext(const Instruction *PP,
459                          function_ref<bool(const Instruction *)> Pred) {
460    for (auto EIt = begin(PP), EEnd = end(PP); EIt != EEnd; ++EIt)
461      if (!Pred(*EIt))
462        return false;
463    return true;
464  }
465
466  /// Helper to look for \p I in the context of \p PP.
467  ///
468  /// The context is expanded until \p I was found or no more expansion is
469  /// possible.
470  ///
471  /// \returns True, iff \p I was found.
472  bool findInContextOf(const Instruction *I, const Instruction *PP) {
473    auto EIt = begin(PP), EEnd = end(PP);
474    return findInContextOf(I, EIt, EEnd);
475  }
476
477  /// Helper to look for \p I in the context defined by \p EIt and \p EEnd.
478  ///
479  /// The context is expanded until \p I was found or no more expansion is
480  /// possible.
481  ///
482  /// \returns True, iff \p I was found.
483  bool findInContextOf(const Instruction *I, iterator &EIt, iterator &EEnd) {
484    bool Found = EIt.count(I);
485    while (!Found && EIt != EEnd)
486      Found = (++EIt).getCurrentInst() == I;
487    return Found;
488  }
489
490  /// Return the next instruction that is guaranteed to be executed after \p PP.
491  ///
492  /// \param It              The iterator that is used to traverse the must be
493  ///                        executed context.
494  /// \param PP              The program point for which the next instruction
495  ///                        that is guaranteed to execute is determined.
496  const Instruction *
497  getMustBeExecutedNextInstruction(MustBeExecutedIterator &It,
498                                   const Instruction *PP);
499  /// Return the previous instr. that is guaranteed to be executed before \p PP.
500  ///
501  /// \param It              The iterator that is used to traverse the must be
502  ///                        executed context.
503  /// \param PP              The program point for which the previous instr.
504  ///                        that is guaranteed to execute is determined.
505  const Instruction *
506  getMustBeExecutedPrevInstruction(MustBeExecutedIterator &It,
507                                   const Instruction *PP);
508
509  /// Find the next join point from \p InitBB in forward direction.
510  const BasicBlock *findForwardJoinPoint(const BasicBlock *InitBB);
511
512  /// Find the next join point from \p InitBB in backward direction.
513  const BasicBlock *findBackwardJoinPoint(const BasicBlock *InitBB);
514
515  /// Parameter that limit the performed exploration. See the constructor for
516  /// their meaning.
517  ///{
518  const bool ExploreInterBlock;
519  const bool ExploreCFGForward;
520  const bool ExploreCFGBackward;
521  ///}
522
523private:
524  /// Getters for common CFG analyses: LoopInfo, DominatorTree, and
525  /// PostDominatorTree.
526  ///{
527  GetterTy<const LoopInfo> LIGetter;
528  GetterTy<const DominatorTree> DTGetter;
529  GetterTy<const PostDominatorTree> PDTGetter;
530  ///}
531
532  /// Map to cache isGuaranteedToTransferExecutionToSuccessor results.
533  DenseMap<const BasicBlock *, Optional<bool>> BlockTransferMap;
534
535  /// Map to cache containsIrreducibleCFG results.
536  DenseMap<const Function*, Optional<bool>> IrreducibleControlMap;
537
538  /// Map from instructions to associated must be executed iterators.
539  DenseMap<const Instruction *, std::unique_ptr<MustBeExecutedIterator>>
540      InstructionIteratorMap;
541
542  /// A unique end iterator.
543  MustBeExecutedIterator EndIterator;
544};
545
546class MustExecutePrinterPass : public PassInfoMixin<MustExecutePrinterPass> {
547  raw_ostream &OS;
548
549public:
550  MustExecutePrinterPass(raw_ostream &OS) : OS(OS) {}
551  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
552};
553
554class MustBeExecutedContextPrinterPass
555    : public PassInfoMixin<MustBeExecutedContextPrinterPass> {
556  raw_ostream &OS;
557
558public:
559  MustBeExecutedContextPrinterPass(raw_ostream &OS) : OS(OS) {}
560  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
561};
562
563} // namespace llvm
564
565#endif
566