1//===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass looks for safe point where the prologue and epilogue can be
10// inserted.
11// The safe point for the prologue (resp. epilogue) is called Save
12// (resp. Restore).
13// A point is safe for prologue (resp. epilogue) if and only if
14// it 1) dominates (resp. post-dominates) all the frame related operations and
15// between 2) two executions of the Save (resp. Restore) point there is an
16// execution of the Restore (resp. Save) point.
17//
18// For instance, the following points are safe:
19// for (int i = 0; i < 10; ++i) {
20//   Save
21//   ...
22//   Restore
23// }
24// Indeed, the execution looks like Save -> Restore -> Save -> Restore ...
25// And the following points are not:
26// for (int i = 0; i < 10; ++i) {
27//   Save
28//   ...
29// }
30// for (int i = 0; i < 10; ++i) {
31//   ...
32//   Restore
33// }
34// Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore.
35//
36// This pass also ensures that the safe points are 3) cheaper than the regular
37// entry and exits blocks.
38//
39// Property #1 is ensured via the use of MachineDominatorTree and
40// MachinePostDominatorTree.
41// Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both
42// points must be in the same loop.
43// Property #3 is ensured via the MachineBlockFrequencyInfo.
44//
45// If this pass found points matching all these properties, then
46// MachineFrameInfo is updated with this information.
47//
48//===----------------------------------------------------------------------===//
49
50#include "llvm/ADT/BitVector.h"
51#include "llvm/ADT/PostOrderIterator.h"
52#include "llvm/ADT/SetVector.h"
53#include "llvm/ADT/SmallVector.h"
54#include "llvm/ADT/Statistic.h"
55#include "llvm/Analysis/CFG.h"
56#include "llvm/Analysis/ValueTracking.h"
57#include "llvm/CodeGen/MachineBasicBlock.h"
58#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
59#include "llvm/CodeGen/MachineDominators.h"
60#include "llvm/CodeGen/MachineFrameInfo.h"
61#include "llvm/CodeGen/MachineFunction.h"
62#include "llvm/CodeGen/MachineFunctionPass.h"
63#include "llvm/CodeGen/MachineInstr.h"
64#include "llvm/CodeGen/MachineLoopInfo.h"
65#include "llvm/CodeGen/MachineOperand.h"
66#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
67#include "llvm/CodeGen/MachinePostDominators.h"
68#include "llvm/CodeGen/RegisterClassInfo.h"
69#include "llvm/CodeGen/RegisterScavenging.h"
70#include "llvm/CodeGen/TargetFrameLowering.h"
71#include "llvm/CodeGen/TargetInstrInfo.h"
72#include "llvm/CodeGen/TargetLowering.h"
73#include "llvm/CodeGen/TargetRegisterInfo.h"
74#include "llvm/CodeGen/TargetSubtargetInfo.h"
75#include "llvm/IR/Attributes.h"
76#include "llvm/IR/Function.h"
77#include "llvm/InitializePasses.h"
78#include "llvm/MC/MCAsmInfo.h"
79#include "llvm/Pass.h"
80#include "llvm/Support/CommandLine.h"
81#include "llvm/Support/Debug.h"
82#include "llvm/Support/ErrorHandling.h"
83#include "llvm/Support/raw_ostream.h"
84#include "llvm/Target/TargetMachine.h"
85#include <cassert>
86#include <cstdint>
87#include <memory>
88
89using namespace llvm;
90
91#define DEBUG_TYPE "shrink-wrap"
92
93STATISTIC(NumFunc, "Number of functions");
94STATISTIC(NumCandidates, "Number of shrink-wrapping candidates");
95STATISTIC(NumCandidatesDropped,
96          "Number of shrink-wrapping candidates dropped because of frequency");
97
98static cl::opt<cl::boolOrDefault>
99EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
100                    cl::desc("enable the shrink-wrapping pass"));
101static cl::opt<bool> EnablePostShrinkWrapOpt(
102    "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden,
103    cl::desc("enable splitting of the restore block if possible"));
104
105namespace {
106
107/// Class to determine where the safe point to insert the
108/// prologue and epilogue are.
109/// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the
110/// shrink-wrapping term for prologue/epilogue placement, this pass
111/// does not rely on expensive data-flow analysis. Instead we use the
112/// dominance properties and loop information to decide which point
113/// are safe for such insertion.
114class ShrinkWrap : public MachineFunctionPass {
115  /// Hold callee-saved information.
116  RegisterClassInfo RCI;
117  MachineDominatorTree *MDT = nullptr;
118  MachinePostDominatorTree *MPDT = nullptr;
119
120  /// Current safe point found for the prologue.
121  /// The prologue will be inserted before the first instruction
122  /// in this basic block.
123  MachineBasicBlock *Save = nullptr;
124
125  /// Current safe point found for the epilogue.
126  /// The epilogue will be inserted before the first terminator instruction
127  /// in this basic block.
128  MachineBasicBlock *Restore = nullptr;
129
130  /// Hold the information of the basic block frequency.
131  /// Use to check the profitability of the new points.
132  MachineBlockFrequencyInfo *MBFI = nullptr;
133
134  /// Hold the loop information. Used to determine if Save and Restore
135  /// are in the same loop.
136  MachineLoopInfo *MLI = nullptr;
137
138  // Emit remarks.
139  MachineOptimizationRemarkEmitter *ORE = nullptr;
140
141  /// Frequency of the Entry block.
142  BlockFrequency EntryFreq;
143
144  /// Current opcode for frame setup.
145  unsigned FrameSetupOpcode = ~0u;
146
147  /// Current opcode for frame destroy.
148  unsigned FrameDestroyOpcode = ~0u;
149
150  /// Stack pointer register, used by llvm.{savestack,restorestack}
151  Register SP;
152
153  /// Entry block.
154  const MachineBasicBlock *Entry = nullptr;
155
156  using SetOfRegs = SmallSetVector<unsigned, 16>;
157
158  /// Registers that need to be saved for the current function.
159  mutable SetOfRegs CurrentCSRs;
160
161  /// Current MachineFunction.
162  MachineFunction *MachineFunc = nullptr;
163
164  /// Is `true` for block numbers where we can guarantee no stack access
165  /// or computation of stack-relative addresses on any CFG path including
166  /// the block itself.
167  BitVector StackAddressUsedBlockInfo;
168
169  /// Check if \p MI uses or defines a callee-saved register or
170  /// a frame index. If this is the case, this means \p MI must happen
171  /// after Save and before Restore.
172  bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
173                       bool StackAddressUsed) const;
174
175  const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const {
176    if (CurrentCSRs.empty()) {
177      BitVector SavedRegs;
178      const TargetFrameLowering *TFI =
179          MachineFunc->getSubtarget().getFrameLowering();
180
181      TFI->determineCalleeSaves(*MachineFunc, SavedRegs, RS);
182
183      for (int Reg = SavedRegs.find_first(); Reg != -1;
184           Reg = SavedRegs.find_next(Reg))
185        CurrentCSRs.insert((unsigned)Reg);
186    }
187    return CurrentCSRs;
188  }
189
190  /// Update the Save and Restore points such that \p MBB is in
191  /// the region that is dominated by Save and post-dominated by Restore
192  /// and Save and Restore still match the safe point definition.
193  /// Such point may not exist and Save and/or Restore may be null after
194  /// this call.
195  void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
196
197  // Try to find safe point based on dominance and block frequency without
198  // any change in IR.
199  bool performShrinkWrapping(
200      const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
201      RegScavenger *RS);
202
203  /// This function tries to split the restore point if doing so can shrink the
204  /// save point further. \return True if restore point is split.
205  bool postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
206                          RegScavenger *RS);
207
208  /// This function analyzes if the restore point can split to create a new
209  /// restore point. This function collects
210  /// 1. Any preds of current restore that are reachable by callee save/FI
211  /// blocks
212  /// - indicated by DirtyPreds
213  /// 2. Any preds of current restore that are not DirtyPreds - indicated by
214  /// CleanPreds
215  /// Both sets should be non-empty for considering restore point split.
216  bool checkIfRestoreSplittable(
217      const MachineBasicBlock *CurRestore,
218      const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
219      SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
220      SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
221      const TargetInstrInfo *TII, RegScavenger *RS);
222
223  /// Initialize the pass for \p MF.
224  void init(MachineFunction &MF) {
225    RCI.runOnMachineFunction(MF);
226    MDT = &getAnalysis<MachineDominatorTree>();
227    MPDT = &getAnalysis<MachinePostDominatorTree>();
228    Save = nullptr;
229    Restore = nullptr;
230    MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
231    MLI = &getAnalysis<MachineLoopInfo>();
232    ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
233    EntryFreq = MBFI->getEntryFreq();
234    const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
235    const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
236    FrameSetupOpcode = TII.getCallFrameSetupOpcode();
237    FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
238    SP = Subtarget.getTargetLowering()->getStackPointerRegisterToSaveRestore();
239    Entry = &MF.front();
240    CurrentCSRs.clear();
241    MachineFunc = &MF;
242
243    ++NumFunc;
244  }
245
246  /// Check whether or not Save and Restore points are still interesting for
247  /// shrink-wrapping.
248  bool ArePointsInteresting() const { return Save != Entry && Save && Restore; }
249
250  /// Check if shrink wrapping is enabled for this target and function.
251  static bool isShrinkWrapEnabled(const MachineFunction &MF);
252
253public:
254  static char ID;
255
256  ShrinkWrap() : MachineFunctionPass(ID) {
257    initializeShrinkWrapPass(*PassRegistry::getPassRegistry());
258  }
259
260  void getAnalysisUsage(AnalysisUsage &AU) const override {
261    AU.setPreservesAll();
262    AU.addRequired<MachineBlockFrequencyInfo>();
263    AU.addRequired<MachineDominatorTree>();
264    AU.addRequired<MachinePostDominatorTree>();
265    AU.addRequired<MachineLoopInfo>();
266    AU.addRequired<MachineOptimizationRemarkEmitterPass>();
267    MachineFunctionPass::getAnalysisUsage(AU);
268  }
269
270  MachineFunctionProperties getRequiredProperties() const override {
271    return MachineFunctionProperties().set(
272      MachineFunctionProperties::Property::NoVRegs);
273  }
274
275  StringRef getPassName() const override { return "Shrink Wrapping analysis"; }
276
277  /// Perform the shrink-wrapping analysis and update
278  /// the MachineFrameInfo attached to \p MF with the results.
279  bool runOnMachineFunction(MachineFunction &MF) override;
280};
281
282} // end anonymous namespace
283
284char ShrinkWrap::ID = 0;
285
286char &llvm::ShrinkWrapID = ShrinkWrap::ID;
287
288INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
289INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
290INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
291INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
292INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
293INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
294INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
295
296bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
297                                 bool StackAddressUsed) const {
298  /// Check if \p Op is known to access an address not on the function's stack .
299  /// At the moment, accesses where the underlying object is a global, function
300  /// argument, or jump table are considered non-stack accesses. Note that the
301  /// caller's stack may get accessed when passing an argument via the stack,
302  /// but not the stack of the current function.
303  ///
304  auto IsKnownNonStackPtr = [](MachineMemOperand *Op) {
305    if (Op->getValue()) {
306      const Value *UO = getUnderlyingObject(Op->getValue());
307      if (!UO)
308        return false;
309      if (auto *Arg = dyn_cast<Argument>(UO))
310        return !Arg->hasPassPointeeByValueCopyAttr();
311      return isa<GlobalValue>(UO);
312    }
313    if (const PseudoSourceValue *PSV = Op->getPseudoValue())
314      return PSV->isJumpTable();
315    return false;
316  };
317  // Load/store operations may access the stack indirectly when we previously
318  // computed an address to a stack location.
319  if (StackAddressUsed && MI.mayLoadOrStore() &&
320      (MI.isCall() || MI.hasUnmodeledSideEffects() || MI.memoperands_empty() ||
321       !all_of(MI.memoperands(), IsKnownNonStackPtr)))
322    return true;
323
324  if (MI.getOpcode() == FrameSetupOpcode ||
325      MI.getOpcode() == FrameDestroyOpcode) {
326    LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
327    return true;
328  }
329  const MachineFunction *MF = MI.getParent()->getParent();
330  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
331  for (const MachineOperand &MO : MI.operands()) {
332    bool UseOrDefCSR = false;
333    if (MO.isReg()) {
334      // Ignore instructions like DBG_VALUE which don't read/def the register.
335      if (!MO.isDef() && !MO.readsReg())
336        continue;
337      Register PhysReg = MO.getReg();
338      if (!PhysReg)
339        continue;
340      assert(PhysReg.isPhysical() && "Unallocated register?!");
341      // The stack pointer is not normally described as a callee-saved register
342      // in calling convention definitions, so we need to watch for it
343      // separately. An SP mentioned by a call instruction, we can ignore,
344      // though, as it's harmless and we do not want to effectively disable tail
345      // calls by forcing the restore point to post-dominate them.
346      // PPC's LR is also not normally described as a callee-saved register in
347      // calling convention definitions, so we need to watch for it, too. An LR
348      // mentioned implicitly by a return (or "branch to link register")
349      // instruction we can ignore, otherwise we may pessimize shrinkwrapping.
350      UseOrDefCSR =
351          (!MI.isCall() && PhysReg == SP) ||
352          RCI.getLastCalleeSavedAlias(PhysReg) ||
353          (!MI.isReturn() && TRI->isNonallocatableRegisterCalleeSave(PhysReg));
354    } else if (MO.isRegMask()) {
355      // Check if this regmask clobbers any of the CSRs.
356      for (unsigned Reg : getCurrentCSRs(RS)) {
357        if (MO.clobbersPhysReg(Reg)) {
358          UseOrDefCSR = true;
359          break;
360        }
361      }
362    }
363    // Skip FrameIndex operands in DBG_VALUE instructions.
364    if (UseOrDefCSR || (MO.isFI() && !MI.isDebugValue())) {
365      LLVM_DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI("
366                        << MO.isFI() << "): " << MI << '\n');
367      return true;
368    }
369  }
370  return false;
371}
372
373/// Helper function to find the immediate (post) dominator.
374template <typename ListOfBBs, typename DominanceAnalysis>
375static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
376                                   DominanceAnalysis &Dom, bool Strict = true) {
377  MachineBasicBlock *IDom = &Block;
378  for (MachineBasicBlock *BB : BBs) {
379    IDom = Dom.findNearestCommonDominator(IDom, BB);
380    if (!IDom)
381      break;
382  }
383  if (Strict && IDom == &Block)
384    return nullptr;
385  return IDom;
386}
387
388static bool isAnalyzableBB(const TargetInstrInfo &TII,
389                           MachineBasicBlock &Entry) {
390  // Check if the block is analyzable.
391  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
392  SmallVector<MachineOperand, 4> Cond;
393  return !TII.analyzeBranch(Entry, TBB, FBB, Cond);
394}
395
396/// Determines if any predecessor of MBB is on the path from block that has use
397/// or def of CSRs/FI to MBB.
398/// ReachableByDirty: All blocks reachable from block that has use or def of
399/// CSR/FI.
400static bool
401hasDirtyPred(const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
402             const MachineBasicBlock &MBB) {
403  for (const MachineBasicBlock *PredBB : MBB.predecessors())
404    if (ReachableByDirty.count(PredBB))
405      return true;
406  return false;
407}
408
409/// Derives the list of all the basic blocks reachable from MBB.
410static void markAllReachable(DenseSet<const MachineBasicBlock *> &Visited,
411                             const MachineBasicBlock &MBB) {
412  SmallVector<MachineBasicBlock *, 4> Worklist(MBB.succ_begin(),
413                                               MBB.succ_end());
414  Visited.insert(&MBB);
415  while (!Worklist.empty()) {
416    MachineBasicBlock *SuccMBB = Worklist.pop_back_val();
417    if (!Visited.insert(SuccMBB).second)
418      continue;
419    Worklist.append(SuccMBB->succ_begin(), SuccMBB->succ_end());
420  }
421}
422
423/// Collect blocks reachable by use or def of CSRs/FI.
424static void collectBlocksReachableByDirty(
425    const DenseSet<const MachineBasicBlock *> &DirtyBBs,
426    DenseSet<const MachineBasicBlock *> &ReachableByDirty) {
427  for (const MachineBasicBlock *MBB : DirtyBBs) {
428    if (ReachableByDirty.count(MBB))
429      continue;
430    // Mark all offsprings as reachable.
431    markAllReachable(ReachableByDirty, *MBB);
432  }
433}
434
435/// \return true if there is a clean path from SavePoint to the original
436/// Restore.
437static bool
438isSaveReachableThroughClean(const MachineBasicBlock *SavePoint,
439                            ArrayRef<MachineBasicBlock *> CleanPreds) {
440  DenseSet<const MachineBasicBlock *> Visited;
441  SmallVector<MachineBasicBlock *, 4> Worklist(CleanPreds.begin(),
442                                               CleanPreds.end());
443  while (!Worklist.empty()) {
444    MachineBasicBlock *CleanBB = Worklist.pop_back_val();
445    if (CleanBB == SavePoint)
446      return true;
447    if (!Visited.insert(CleanBB).second || !CleanBB->pred_size())
448      continue;
449    Worklist.append(CleanBB->pred_begin(), CleanBB->pred_end());
450  }
451  return false;
452}
453
454/// This function updates the branches post restore point split.
455///
456/// Restore point has been split.
457/// Old restore point: MBB
458/// New restore point: NMBB
459/// Any basic block(say BBToUpdate) which had a fallthrough to MBB
460/// previously should
461/// 1. Fallthrough to NMBB iff NMBB is inserted immediately above MBB in the
462/// block layout OR
463/// 2. Branch unconditionally to NMBB iff NMBB is inserted at any other place.
464static void updateTerminator(MachineBasicBlock *BBToUpdate,
465                             MachineBasicBlock *NMBB,
466                             const TargetInstrInfo *TII) {
467  DebugLoc DL = BBToUpdate->findBranchDebugLoc();
468  // if NMBB isn't the new layout successor for BBToUpdate, insert unconditional
469  // branch to it
470  if (!BBToUpdate->isLayoutSuccessor(NMBB))
471    TII->insertUnconditionalBranch(*BBToUpdate, NMBB, DL);
472}
473
474/// This function splits the restore point and returns new restore point/BB.
475///
476/// DirtyPreds: Predessors of \p MBB that are ReachableByDirty
477///
478/// Decision has been made to split the restore point.
479/// old restore point: \p MBB
480/// new restore point: \p NMBB
481/// This function makes the necessary block layout changes so that
482/// 1. \p NMBB points to \p MBB unconditionally
483/// 2. All dirtyPreds that previously pointed to \p MBB point to \p NMBB
484static MachineBasicBlock *
485tryToSplitRestore(MachineBasicBlock *MBB,
486                  ArrayRef<MachineBasicBlock *> DirtyPreds,
487                  const TargetInstrInfo *TII) {
488  MachineFunction *MF = MBB->getParent();
489
490  // get the list of DirtyPreds who have a fallthrough to MBB
491  // before the block layout change. This is just to ensure that if the NMBB is
492  // inserted after MBB, then we create unconditional branch from
493  // DirtyPred/CleanPred to NMBB
494  SmallPtrSet<MachineBasicBlock *, 8> MBBFallthrough;
495  for (MachineBasicBlock *BB : DirtyPreds)
496    if (BB->getFallThrough(false) == MBB)
497      MBBFallthrough.insert(BB);
498
499  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
500  // Insert this block at the end of the function. Inserting in between may
501  // interfere with control flow optimizer decisions.
502  MF->insert(MF->end(), NMBB);
503
504  for (const MachineBasicBlock::RegisterMaskPair &LI : MBB->liveins())
505    NMBB->addLiveIn(LI.PhysReg);
506
507  TII->insertUnconditionalBranch(*NMBB, MBB, DebugLoc());
508
509  // After splitting, all predecessors of the restore point should be dirty
510  // blocks.
511  for (MachineBasicBlock *SuccBB : DirtyPreds)
512    SuccBB->ReplaceUsesOfBlockWith(MBB, NMBB);
513
514  NMBB->addSuccessor(MBB);
515
516  for (MachineBasicBlock *BBToUpdate : MBBFallthrough)
517    updateTerminator(BBToUpdate, NMBB, TII);
518
519  return NMBB;
520}
521
522/// This function undoes the restore point split done earlier.
523///
524/// DirtyPreds: All predecessors of \p NMBB that are ReachableByDirty.
525///
526/// Restore point was split and the change needs to be unrolled. Make necessary
527/// changes to reset restore point from \p NMBB to \p MBB.
528static void rollbackRestoreSplit(MachineFunction &MF, MachineBasicBlock *NMBB,
529                                 MachineBasicBlock *MBB,
530                                 ArrayRef<MachineBasicBlock *> DirtyPreds,
531                                 const TargetInstrInfo *TII) {
532  // For a BB, if NMBB is fallthrough in the current layout, then in the new
533  // layout a. BB should fallthrough to MBB OR b. BB should undconditionally
534  // branch to MBB
535  SmallPtrSet<MachineBasicBlock *, 8> NMBBFallthrough;
536  for (MachineBasicBlock *BB : DirtyPreds)
537    if (BB->getFallThrough(false) == NMBB)
538      NMBBFallthrough.insert(BB);
539
540  NMBB->removeSuccessor(MBB);
541  for (MachineBasicBlock *SuccBB : DirtyPreds)
542    SuccBB->ReplaceUsesOfBlockWith(NMBB, MBB);
543
544  NMBB->erase(NMBB->begin(), NMBB->end());
545  NMBB->eraseFromParent();
546
547  for (MachineBasicBlock *BBToUpdate : NMBBFallthrough)
548    updateTerminator(BBToUpdate, MBB, TII);
549}
550
551// A block is deemed fit for restore point split iff there exist
552// 1. DirtyPreds - preds of CurRestore reachable from use or def of CSR/FI
553// 2. CleanPreds - preds of CurRestore that arent DirtyPreds
554bool ShrinkWrap::checkIfRestoreSplittable(
555    const MachineBasicBlock *CurRestore,
556    const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
557    SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
558    SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
559    const TargetInstrInfo *TII, RegScavenger *RS) {
560  for (const MachineInstr &MI : *CurRestore)
561    if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true))
562      return false;
563
564  for (MachineBasicBlock *PredBB : CurRestore->predecessors()) {
565    if (!isAnalyzableBB(*TII, *PredBB))
566      return false;
567
568    if (ReachableByDirty.count(PredBB))
569      DirtyPreds.push_back(PredBB);
570    else
571      CleanPreds.push_back(PredBB);
572  }
573
574  return !(CleanPreds.empty() || DirtyPreds.empty());
575}
576
577bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
578                                    RegScavenger *RS) {
579  if (!EnablePostShrinkWrapOpt)
580    return false;
581
582  MachineBasicBlock *InitSave = nullptr;
583  MachineBasicBlock *InitRestore = nullptr;
584
585  if (HasCandidate) {
586    InitSave = Save;
587    InitRestore = Restore;
588  } else {
589    InitRestore = nullptr;
590    InitSave = &MF.front();
591    for (MachineBasicBlock &MBB : MF) {
592      if (MBB.isEHFuncletEntry())
593        return false;
594      if (MBB.isReturnBlock()) {
595        // Do not support multiple restore points.
596        if (InitRestore)
597          return false;
598        InitRestore = &MBB;
599      }
600    }
601  }
602
603  if (!InitSave || !InitRestore || InitRestore == InitSave ||
604      !MDT->dominates(InitSave, InitRestore) ||
605      !MPDT->dominates(InitRestore, InitSave))
606    return false;
607
608  // Bail out of the optimization if any of the basic block is target of
609  // INLINEASM_BR instruction
610  for (MachineBasicBlock &MBB : MF)
611    if (MBB.isInlineAsmBrIndirectTarget())
612      return false;
613
614  DenseSet<const MachineBasicBlock *> DirtyBBs;
615  for (MachineBasicBlock &MBB : MF) {
616    if (MBB.isEHPad()) {
617      DirtyBBs.insert(&MBB);
618      continue;
619    }
620    for (const MachineInstr &MI : MBB)
621      if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true)) {
622        DirtyBBs.insert(&MBB);
623        break;
624      }
625  }
626
627  // Find blocks reachable from the use or def of CSRs/FI.
628  DenseSet<const MachineBasicBlock *> ReachableByDirty;
629  collectBlocksReachableByDirty(DirtyBBs, ReachableByDirty);
630
631  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
632  SmallVector<MachineBasicBlock *, 2> DirtyPreds;
633  SmallVector<MachineBasicBlock *, 2> CleanPreds;
634  if (!checkIfRestoreSplittable(InitRestore, ReachableByDirty, DirtyPreds,
635                                CleanPreds, TII, RS))
636    return false;
637
638  // Trying to reach out to the new save point which dominates all dirty blocks.
639  MachineBasicBlock *NewSave =
640      FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false);
641
642  while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) ||
643                     EntryFreq < MBFI->getBlockFreq(NewSave) ||
644                     /*Entry freq has been observed more than a loop block in
645                        some cases*/
646                     MLI->getLoopFor(NewSave)))
647    NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT,
648                         false);
649
650  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
651  if (!NewSave || NewSave == InitSave ||
652      isSaveReachableThroughClean(NewSave, CleanPreds) ||
653      !TFI->canUseAsPrologue(*NewSave))
654    return false;
655
656  // Now we know that splitting a restore point can isolate the restore point
657  // from clean blocks and doing so can shrink the save point.
658  MachineBasicBlock *NewRestore =
659      tryToSplitRestore(InitRestore, DirtyPreds, TII);
660
661  // Make sure if the new restore point is valid as an epilogue, depending on
662  // targets.
663  if (!TFI->canUseAsEpilogue(*NewRestore)) {
664    rollbackRestoreSplit(MF, NewRestore, InitRestore, DirtyPreds, TII);
665    return false;
666  }
667
668  Save = NewSave;
669  Restore = NewRestore;
670
671  MDT->runOnMachineFunction(MF);
672  MPDT->runOnMachineFunction(MF);
673
674  assert((MDT->dominates(Save, Restore) && MPDT->dominates(Restore, Save)) &&
675         "Incorrect save or restore point due to dominance relations");
676  assert((!MLI->getLoopFor(Save) && !MLI->getLoopFor(Restore)) &&
677         "Unexpected save or restore point in a loop");
678  assert((EntryFreq >= MBFI->getBlockFreq(Save) &&
679          EntryFreq >= MBFI->getBlockFreq(Restore)) &&
680         "Incorrect save or restore point based on block frequency");
681  return true;
682}
683
684void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
685                                         RegScavenger *RS) {
686  // Get rid of the easy cases first.
687  if (!Save)
688    Save = &MBB;
689  else
690    Save = MDT->findNearestCommonDominator(Save, &MBB);
691  assert(Save);
692
693  if (!Restore)
694    Restore = &MBB;
695  else if (MPDT->getNode(&MBB)) // If the block is not in the post dom tree, it
696                                // means the block never returns. If that's the
697                                // case, we don't want to call
698                                // `findNearestCommonDominator`, which will
699                                // return `Restore`.
700    Restore = MPDT->findNearestCommonDominator(Restore, &MBB);
701  else
702    Restore = nullptr; // Abort, we can't find a restore point in this case.
703
704  // Make sure we would be able to insert the restore code before the
705  // terminator.
706  if (Restore == &MBB) {
707    for (const MachineInstr &Terminator : MBB.terminators()) {
708      if (!useOrDefCSROrFI(Terminator, RS, /*StackAddressUsed=*/true))
709        continue;
710      // One of the terminator needs to happen before the restore point.
711      if (MBB.succ_empty()) {
712        Restore = nullptr; // Abort, we can't find a restore point in this case.
713        break;
714      }
715      // Look for a restore point that post-dominates all the successors.
716      // The immediate post-dominator is what we are looking for.
717      Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
718      break;
719    }
720  }
721
722  if (!Restore) {
723    LLVM_DEBUG(
724        dbgs() << "Restore point needs to be spanned on several blocks\n");
725    return;
726  }
727
728  // Make sure Save and Restore are suitable for shrink-wrapping:
729  // 1. all path from Save needs to lead to Restore before exiting.
730  // 2. all path to Restore needs to go through Save from Entry.
731  // We achieve that by making sure that:
732  // A. Save dominates Restore.
733  // B. Restore post-dominates Save.
734  // C. Save and Restore are in the same loop.
735  bool SaveDominatesRestore = false;
736  bool RestorePostDominatesSave = false;
737  while (Restore &&
738         (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) ||
739          !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) ||
740          // Post-dominance is not enough in loops to ensure that all uses/defs
741          // are after the prologue and before the epilogue at runtime.
742          // E.g.,
743          // while(1) {
744          //  Save
745          //  Restore
746          //   if (...)
747          //     break;
748          //  use/def CSRs
749          // }
750          // All the uses/defs of CSRs are dominated by Save and post-dominated
751          // by Restore. However, the CSRs uses are still reachable after
752          // Restore and before Save are executed.
753          //
754          // For now, just push the restore/save points outside of loops.
755          // FIXME: Refine the criteria to still find interesting cases
756          // for loops.
757          MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
758    // Fix (A).
759    if (!SaveDominatesRestore) {
760      Save = MDT->findNearestCommonDominator(Save, Restore);
761      continue;
762    }
763    // Fix (B).
764    if (!RestorePostDominatesSave)
765      Restore = MPDT->findNearestCommonDominator(Restore, Save);
766
767    // Fix (C).
768    if (Restore && (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
769      if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) {
770        // Push Save outside of this loop if immediate dominator is different
771        // from save block. If immediate dominator is not different, bail out.
772        Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
773        if (!Save)
774          break;
775      } else {
776        // If the loop does not exit, there is no point in looking
777        // for a post-dominator outside the loop.
778        SmallVector<MachineBasicBlock*, 4> ExitBlocks;
779        MLI->getLoopFor(Restore)->getExitingBlocks(ExitBlocks);
780        // Push Restore outside of this loop.
781        // Look for the immediate post-dominator of the loop exits.
782        MachineBasicBlock *IPdom = Restore;
783        for (MachineBasicBlock *LoopExitBB: ExitBlocks) {
784          IPdom = FindIDom<>(*IPdom, LoopExitBB->successors(), *MPDT);
785          if (!IPdom)
786            break;
787        }
788        // If the immediate post-dominator is not in a less nested loop,
789        // then we are stuck in a program with an infinite loop.
790        // In that case, we will not find a safe point, hence, bail out.
791        if (IPdom && MLI->getLoopDepth(IPdom) < MLI->getLoopDepth(Restore))
792          Restore = IPdom;
793        else {
794          Restore = nullptr;
795          break;
796        }
797      }
798    }
799  }
800}
801
802static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
803                              StringRef RemarkName, StringRef RemarkMessage,
804                              const DiagnosticLocation &Loc,
805                              const MachineBasicBlock *MBB) {
806  ORE->emit([&]() {
807    return MachineOptimizationRemarkMissed(DEBUG_TYPE, RemarkName, Loc, MBB)
808           << RemarkMessage;
809  });
810
811  LLVM_DEBUG(dbgs() << RemarkMessage << '\n');
812  return false;
813}
814
815bool ShrinkWrap::performShrinkWrapping(
816    const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
817    RegScavenger *RS) {
818  for (MachineBasicBlock *MBB : RPOT) {
819    LLVM_DEBUG(dbgs() << "Look into: " << printMBBReference(*MBB) << '\n');
820
821    if (MBB->isEHFuncletEntry())
822      return giveUpWithRemarks(ORE, "UnsupportedEHFunclets",
823                               "EH Funclets are not supported yet.",
824                               MBB->front().getDebugLoc(), MBB);
825
826    if (MBB->isEHPad() || MBB->isInlineAsmBrIndirectTarget()) {
827      // Push the prologue and epilogue outside of the region that may throw (or
828      // jump out via inlineasm_br), by making sure that all the landing pads
829      // are at least at the boundary of the save and restore points.  The
830      // problem is that a basic block can jump out from the middle in these
831      // cases, which we do not handle.
832      updateSaveRestorePoints(*MBB, RS);
833      if (!ArePointsInteresting()) {
834        LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n");
835        return false;
836      }
837      continue;
838    }
839
840    bool StackAddressUsed = false;
841    // Check if we found any stack accesses in the predecessors. We are not
842    // doing a full dataflow analysis here to keep things simple but just
843    // rely on a reverse portorder traversal (RPOT) to guarantee predecessors
844    // are already processed except for loops (and accept the conservative
845    // result for loops).
846    for (const MachineBasicBlock *Pred : MBB->predecessors()) {
847      if (StackAddressUsedBlockInfo.test(Pred->getNumber())) {
848        StackAddressUsed = true;
849        break;
850      }
851    }
852
853    for (const MachineInstr &MI : *MBB) {
854      if (useOrDefCSROrFI(MI, RS, StackAddressUsed)) {
855        // Save (resp. restore) point must dominate (resp. post dominate)
856        // MI. Look for the proper basic block for those.
857        updateSaveRestorePoints(*MBB, RS);
858        // If we are at a point where we cannot improve the placement of
859        // save/restore instructions, just give up.
860        if (!ArePointsInteresting()) {
861          LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
862          return false;
863        }
864        // No need to look for other instructions, this basic block
865        // will already be part of the handled region.
866        StackAddressUsed = true;
867        break;
868      }
869    }
870    StackAddressUsedBlockInfo[MBB->getNumber()] = StackAddressUsed;
871  }
872  if (!ArePointsInteresting()) {
873    // If the points are not interesting at this point, then they must be null
874    // because it means we did not encounter any frame/CSR related code.
875    // Otherwise, we would have returned from the previous loop.
876    assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!");
877    LLVM_DEBUG(dbgs() << "Nothing to shrink-wrap\n");
878    return false;
879  }
880
881  LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: "
882                    << EntryFreq.getFrequency() << '\n');
883
884  const TargetFrameLowering *TFI =
885      MachineFunc->getSubtarget().getFrameLowering();
886  do {
887    LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
888                      << printMBBReference(*Save) << ' '
889                      << printBlockFreq(*MBFI, *Save)
890                      << "\nRestore: " << printMBBReference(*Restore) << ' '
891                      << printBlockFreq(*MBFI, *Restore) << '\n');
892
893    bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
894    if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save)) &&
895         EntryFreq >= MBFI->getBlockFreq(Restore)) &&
896        ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) &&
897         TFI->canUseAsEpilogue(*Restore)))
898      break;
899    LLVM_DEBUG(
900        dbgs() << "New points are too expensive or invalid for the target\n");
901    MachineBasicBlock *NewBB;
902    if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) {
903      Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
904      if (!Save)
905        break;
906      NewBB = Save;
907    } else {
908      // Restore is expensive.
909      Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
910      if (!Restore)
911        break;
912      NewBB = Restore;
913    }
914    updateSaveRestorePoints(*NewBB, RS);
915  } while (Save && Restore);
916
917  if (!ArePointsInteresting()) {
918    ++NumCandidatesDropped;
919    return false;
920  }
921  return true;
922}
923
924bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
925  if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
926    return false;
927
928  LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
929
930  init(MF);
931
932  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
933  if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
934    // If MF is irreducible, a block may be in a loop without
935    // MachineLoopInfo reporting it. I.e., we may use the
936    // post-dominance property in loops, which lead to incorrect
937    // results. Moreover, we may miss that the prologue and
938    // epilogue are not in the same loop, leading to unbalanced
939    // construction/deconstruction of the stack frame.
940    return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
941                             "Irreducible CFGs are not supported yet.",
942                             MF.getFunction().getSubprogram(), &MF.front());
943  }
944
945  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
946  std::unique_ptr<RegScavenger> RS(
947      TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
948
949  bool Changed = false;
950
951  StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true);
952  bool HasCandidate = performShrinkWrapping(RPOT, RS.get());
953  StackAddressUsedBlockInfo.clear();
954  Changed = postShrinkWrapping(HasCandidate, MF, RS.get());
955  if (!HasCandidate && !Changed)
956    return false;
957  if (!ArePointsInteresting())
958    return Changed;
959
960  LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
961                    << printMBBReference(*Save) << ' '
962                    << "\nRestore: " << printMBBReference(*Restore) << '\n');
963
964  MachineFrameInfo &MFI = MF.getFrameInfo();
965  MFI.setSavePoint(Save);
966  MFI.setRestorePoint(Restore);
967  ++NumCandidates;
968  return Changed;
969}
970
971bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {
972  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
973
974  switch (EnableShrinkWrapOpt) {
975  case cl::BOU_UNSET:
976    return TFI->enableShrinkWrapping(MF) &&
977           // Windows with CFI has some limitations that make it impossible
978           // to use shrink-wrapping.
979           !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
980           // Sanitizers look at the value of the stack at the location
981           // of the crash. Since a crash can happen anywhere, the
982           // frame must be lowered before anything else happen for the
983           // sanitizers to be able to get a correct stack frame.
984           !(MF.getFunction().hasFnAttribute(Attribute::SanitizeAddress) ||
985             MF.getFunction().hasFnAttribute(Attribute::SanitizeThread) ||
986             MF.getFunction().hasFnAttribute(Attribute::SanitizeMemory) ||
987             MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress));
988  // If EnableShrinkWrap is set, it takes precedence on whatever the
989  // target sets. The rational is that we assume we want to test
990  // something related to shrink-wrapping.
991  case cl::BOU_TRUE:
992    return true;
993  case cl::BOU_FALSE:
994    return false;
995  }
996  llvm_unreachable("Invalid shrink-wrapping state");
997}
998