1//===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs loop invariant code motion on machine instructions. We
10// attempt to remove as much code from the body of a loop as possible.
11//
12// This pass is not intended to be a replacement or a complete alternative
13// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
14// constructs that are not exposed before lowering and instruction selection.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/BitVector.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/AliasAnalysis.h"
25#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
27#include "llvm/CodeGen/MachineDominators.h"
28#include "llvm/CodeGen/MachineFrameInfo.h"
29#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineFunctionPass.h"
31#include "llvm/CodeGen/MachineInstr.h"
32#include "llvm/CodeGen/MachineLoopInfo.h"
33#include "llvm/CodeGen/MachineMemOperand.h"
34#include "llvm/CodeGen/MachineOperand.h"
35#include "llvm/CodeGen/MachineRegisterInfo.h"
36#include "llvm/CodeGen/PseudoSourceValue.h"
37#include "llvm/CodeGen/TargetInstrInfo.h"
38#include "llvm/CodeGen/TargetLowering.h"
39#include "llvm/CodeGen/TargetRegisterInfo.h"
40#include "llvm/CodeGen/TargetSchedule.h"
41#include "llvm/CodeGen/TargetSubtargetInfo.h"
42#include "llvm/IR/DebugLoc.h"
43#include "llvm/InitializePasses.h"
44#include "llvm/MC/MCInstrDesc.h"
45#include "llvm/MC/MCRegister.h"
46#include "llvm/MC/MCRegisterInfo.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/Debug.h"
51#include "llvm/Support/raw_ostream.h"
52#include <algorithm>
53#include <cassert>
54#include <limits>
55#include <vector>
56
57using namespace llvm;
58
59#define DEBUG_TYPE "machinelicm"
60
61static cl::opt<bool>
62AvoidSpeculation("avoid-speculation",
63                 cl::desc("MachineLICM should avoid speculation"),
64                 cl::init(true), cl::Hidden);
65
66static cl::opt<bool>
67HoistCheapInsts("hoist-cheap-insts",
68                cl::desc("MachineLICM should hoist even cheap instructions"),
69                cl::init(false), cl::Hidden);
70
71static cl::opt<bool>
72HoistConstStores("hoist-const-stores",
73                 cl::desc("Hoist invariant stores"),
74                 cl::init(true), cl::Hidden);
75
76static cl::opt<bool> HoistConstLoads("hoist-const-loads",
77                                     cl::desc("Hoist invariant loads"),
78                                     cl::init(true), cl::Hidden);
79
80// The default threshold of 100 (i.e. if target block is 100 times hotter)
81// is based on empirical data on a single target and is subject to tuning.
82static cl::opt<unsigned>
83BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
84                             cl::desc("Do not hoist instructions if target"
85                             "block is N times hotter than the source."),
86                             cl::init(100), cl::Hidden);
87
88enum class UseBFI { None, PGO, All };
89
90static cl::opt<UseBFI>
91DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
92                              cl::desc("Disable hoisting instructions to"
93                              " hotter blocks"),
94                              cl::init(UseBFI::PGO), cl::Hidden,
95                              cl::values(clEnumValN(UseBFI::None, "none",
96                              "disable the feature"),
97                              clEnumValN(UseBFI::PGO, "pgo",
98                              "enable the feature when using profile data"),
99                              clEnumValN(UseBFI::All, "all",
100                              "enable the feature with/wo profile data")));
101
102STATISTIC(NumHoisted,
103          "Number of machine instructions hoisted out of loops");
104STATISTIC(NumLowRP,
105          "Number of instructions hoisted in low reg pressure situation");
106STATISTIC(NumHighLatency,
107          "Number of high latency instructions hoisted");
108STATISTIC(NumCSEed,
109          "Number of hoisted machine instructions CSEed");
110STATISTIC(NumPostRAHoisted,
111          "Number of machine instructions hoisted out of loops post regalloc");
112STATISTIC(NumStoreConst,
113          "Number of stores of const phys reg hoisted out of loops");
114STATISTIC(NumNotHoistedDueToHotness,
115          "Number of instructions not hoisted due to block frequency");
116
117namespace {
118  enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };
119
120  class MachineLICMBase : public MachineFunctionPass {
121    const TargetInstrInfo *TII = nullptr;
122    const TargetLoweringBase *TLI = nullptr;
123    const TargetRegisterInfo *TRI = nullptr;
124    const MachineFrameInfo *MFI = nullptr;
125    MachineRegisterInfo *MRI = nullptr;
126    TargetSchedModel SchedModel;
127    bool PreRegAlloc = false;
128    bool HasProfileData = false;
129
130    // Various analyses that we use...
131    AliasAnalysis *AA = nullptr;               // Alias analysis info.
132    MachineBlockFrequencyInfo *MBFI = nullptr; // Machine block frequncy info
133    MachineLoopInfo *MLI = nullptr;            // Current MachineLoopInfo
134    MachineDominatorTree *DT = nullptr; // Machine dominator tree for the cur loop
135
136    // State that is updated as we process loops
137    bool Changed = false;           // True if a loop is changed.
138    bool FirstInLoop = false;       // True if it's the first LICM in the loop.
139
140    // Holds information about whether it is allowed to move load instructions
141    // out of the loop
142    SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;
143
144    // Exit blocks of each Loop.
145    DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap;
146
147    bool isExitBlock(MachineLoop *CurLoop, const MachineBasicBlock *MBB) {
148      if (ExitBlockMap.contains(CurLoop))
149        return is_contained(ExitBlockMap[CurLoop], MBB);
150
151      SmallVector<MachineBasicBlock *, 8> ExitBlocks;
152      CurLoop->getExitBlocks(ExitBlocks);
153      ExitBlockMap[CurLoop] = ExitBlocks;
154      return is_contained(ExitBlocks, MBB);
155    }
156
157    // Track 'estimated' register pressure.
158    SmallSet<Register, 32> RegSeen;
159    SmallVector<unsigned, 8> RegPressure;
160
161    // Register pressure "limit" per register pressure set. If the pressure
162    // is higher than the limit, then it's considered high.
163    SmallVector<unsigned, 8> RegLimit;
164
165    // Register pressure on path leading from loop preheader to current BB.
166    SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
167
168    // For each opcode per preheader, keep a list of potential CSE instructions.
169    DenseMap<MachineBasicBlock *,
170             DenseMap<unsigned, std::vector<MachineInstr *>>>
171        CSEMap;
172
173    enum {
174      SpeculateFalse   = 0,
175      SpeculateTrue    = 1,
176      SpeculateUnknown = 2
177    };
178
179    // If a MBB does not dominate loop exiting blocks then it may not safe
180    // to hoist loads from this block.
181    // Tri-state: 0 - false, 1 - true, 2 - unknown
182    unsigned SpeculationState = SpeculateUnknown;
183
184  public:
185    MachineLICMBase(char &PassID, bool PreRegAlloc)
186        : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {}
187
188    bool runOnMachineFunction(MachineFunction &MF) override;
189
190    void getAnalysisUsage(AnalysisUsage &AU) const override {
191      AU.addRequired<MachineLoopInfo>();
192      if (DisableHoistingToHotterBlocks != UseBFI::None)
193        AU.addRequired<MachineBlockFrequencyInfo>();
194      AU.addRequired<MachineDominatorTree>();
195      AU.addRequired<AAResultsWrapperPass>();
196      AU.addPreserved<MachineLoopInfo>();
197      MachineFunctionPass::getAnalysisUsage(AU);
198    }
199
200    void releaseMemory() override {
201      RegSeen.clear();
202      RegPressure.clear();
203      RegLimit.clear();
204      BackTrace.clear();
205      CSEMap.clear();
206      ExitBlockMap.clear();
207    }
208
209  private:
210    /// Keep track of information about hoisting candidates.
211    struct CandidateInfo {
212      MachineInstr *MI;
213      unsigned      Def;
214      int           FI;
215
216      CandidateInfo(MachineInstr *mi, unsigned def, int fi)
217        : MI(mi), Def(def), FI(fi) {}
218    };
219
220    void HoistRegionPostRA(MachineLoop *CurLoop,
221                           MachineBasicBlock *CurPreheader);
222
223    void HoistPostRA(MachineInstr *MI, unsigned Def, MachineLoop *CurLoop,
224                     MachineBasicBlock *CurPreheader);
225
226    void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
227                   BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
228                   SmallVectorImpl<CandidateInfo> &Candidates,
229                   MachineLoop *CurLoop);
230
231    void AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop);
232
233    bool IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop);
234
235    bool IsLoopInvariantInst(MachineInstr &I, MachineLoop *CurLoop);
236
237    bool HasLoopPHIUse(const MachineInstr *MI, MachineLoop *CurLoop);
238
239    bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, Register Reg,
240                               MachineLoop *CurLoop) const;
241
242    bool IsCheapInstruction(MachineInstr &MI) const;
243
244    bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
245                                 bool Cheap);
246
247    void UpdateBackTraceRegPressure(const MachineInstr *MI);
248
249    bool IsProfitableToHoist(MachineInstr &MI, MachineLoop *CurLoop);
250
251    bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop);
252
253    bool isTriviallyReMaterializable(const MachineInstr &MI) const;
254
255    void EnterScope(MachineBasicBlock *MBB);
256
257    void ExitScope(MachineBasicBlock *MBB);
258
259    void ExitScopeIfDone(
260        MachineDomTreeNode *Node,
261        DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
262        const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
263
264    void HoistOutOfLoop(MachineDomTreeNode *HeaderN, MachineLoop *CurLoop,
265                        MachineBasicBlock *CurPreheader);
266
267    void InitRegPressure(MachineBasicBlock *BB);
268
269    DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
270                                             bool ConsiderSeen,
271                                             bool ConsiderUnseenAsDef);
272
273    void UpdateRegPressure(const MachineInstr *MI,
274                           bool ConsiderUnseenAsDef = false);
275
276    MachineInstr *ExtractHoistableLoad(MachineInstr *MI, MachineLoop *CurLoop);
277
278    MachineInstr *LookForDuplicate(const MachineInstr *MI,
279                                   std::vector<MachineInstr *> &PrevMIs);
280
281    bool
282    EliminateCSE(MachineInstr *MI,
283                 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);
284
285    bool MayCSE(MachineInstr *MI);
286
287    unsigned Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
288                   MachineLoop *CurLoop);
289
290    void InitCSEMap(MachineBasicBlock *BB);
291
292    void InitializeLoadsHoistableLoops();
293
294    bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
295                            MachineBasicBlock *TgtBlock);
296    MachineBasicBlock *getCurPreheader(MachineLoop *CurLoop,
297                                       MachineBasicBlock *CurPreheader);
298  };
299
300  class MachineLICM : public MachineLICMBase {
301  public:
302    static char ID;
303    MachineLICM() : MachineLICMBase(ID, false) {
304      initializeMachineLICMPass(*PassRegistry::getPassRegistry());
305    }
306  };
307
308  class EarlyMachineLICM : public MachineLICMBase {
309  public:
310    static char ID;
311    EarlyMachineLICM() : MachineLICMBase(ID, true) {
312      initializeEarlyMachineLICMPass(*PassRegistry::getPassRegistry());
313    }
314  };
315
316} // end anonymous namespace
317
318char MachineLICM::ID;
319char EarlyMachineLICM::ID;
320
321char &llvm::MachineLICMID = MachineLICM::ID;
322char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
323
324INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
325                      "Machine Loop Invariant Code Motion", false, false)
326INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
327INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
328INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
329INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
330INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
331                    "Machine Loop Invariant Code Motion", false, false)
332
333INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
334                      "Early Machine Loop Invariant Code Motion", false, false)
335INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
336INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
337INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
338INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
339INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
340                    "Early Machine Loop Invariant Code Motion", false, false)
341
342bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
343  if (skipFunction(MF.getFunction()))
344    return false;
345
346  Changed = FirstInLoop = false;
347  const TargetSubtargetInfo &ST = MF.getSubtarget();
348  TII = ST.getInstrInfo();
349  TLI = ST.getTargetLowering();
350  TRI = ST.getRegisterInfo();
351  MFI = &MF.getFrameInfo();
352  MRI = &MF.getRegInfo();
353  SchedModel.init(&ST);
354
355  PreRegAlloc = MRI->isSSA();
356  HasProfileData = MF.getFunction().hasProfileData();
357
358  if (PreRegAlloc)
359    LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
360  else
361    LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
362  LLVM_DEBUG(dbgs() << MF.getName() << " ********\n");
363
364  if (PreRegAlloc) {
365    // Estimate register pressure during pre-regalloc pass.
366    unsigned NumRPS = TRI->getNumRegPressureSets();
367    RegPressure.resize(NumRPS);
368    std::fill(RegPressure.begin(), RegPressure.end(), 0);
369    RegLimit.resize(NumRPS);
370    for (unsigned i = 0, e = NumRPS; i != e; ++i)
371      RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
372  }
373
374  // Get our Loop information...
375  if (DisableHoistingToHotterBlocks != UseBFI::None)
376    MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
377  MLI = &getAnalysis<MachineLoopInfo>();
378  DT  = &getAnalysis<MachineDominatorTree>();
379  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
380
381  if (HoistConstLoads)
382    InitializeLoadsHoistableLoops();
383
384  SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
385  while (!Worklist.empty()) {
386    MachineLoop *CurLoop = Worklist.pop_back_val();
387    MachineBasicBlock *CurPreheader = nullptr;
388
389    if (!PreRegAlloc)
390      HoistRegionPostRA(CurLoop, CurPreheader);
391    else {
392      // CSEMap is initialized for loop header when the first instruction is
393      // being hoisted.
394      MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
395      FirstInLoop = true;
396      HoistOutOfLoop(N, CurLoop, CurPreheader);
397      CSEMap.clear();
398    }
399  }
400
401  return Changed;
402}
403
404/// Return true if instruction stores to the specified frame.
405static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
406  // Check mayStore before memory operands so that e.g. DBG_VALUEs will return
407  // true since they have no memory operands.
408  if (!MI->mayStore())
409     return false;
410  // If we lost memory operands, conservatively assume that the instruction
411  // writes to all slots.
412  if (MI->memoperands_empty())
413    return true;
414  for (const MachineMemOperand *MemOp : MI->memoperands()) {
415    if (!MemOp->isStore() || !MemOp->getPseudoValue())
416      continue;
417    if (const FixedStackPseudoSourceValue *Value =
418        dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
419      if (Value->getFrameIndex() == FI)
420        return true;
421    }
422  }
423  return false;
424}
425
426/// Examine the instruction for potentai LICM candidate. Also
427/// gather register def and frame object update information.
428void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
429                                BitVector &PhysRegClobbers,
430                                SmallSet<int, 32> &StoredFIs,
431                                SmallVectorImpl<CandidateInfo> &Candidates,
432                                MachineLoop *CurLoop) {
433  bool RuledOut = false;
434  bool HasNonInvariantUse = false;
435  unsigned Def = 0;
436  for (const MachineOperand &MO : MI->operands()) {
437    if (MO.isFI()) {
438      // Remember if the instruction stores to the frame index.
439      int FI = MO.getIndex();
440      if (!StoredFIs.count(FI) &&
441          MFI->isSpillSlotObjectIndex(FI) &&
442          InstructionStoresToFI(MI, FI))
443        StoredFIs.insert(FI);
444      HasNonInvariantUse = true;
445      continue;
446    }
447
448    // We can't hoist an instruction defining a physreg that is clobbered in
449    // the loop.
450    if (MO.isRegMask()) {
451      PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
452      continue;
453    }
454
455    if (!MO.isReg())
456      continue;
457    Register Reg = MO.getReg();
458    if (!Reg)
459      continue;
460    assert(Reg.isPhysical() && "Not expecting virtual register!");
461
462    if (!MO.isDef()) {
463      if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
464        // If it's using a non-loop-invariant register, then it's obviously not
465        // safe to hoist.
466        HasNonInvariantUse = true;
467      continue;
468    }
469
470    if (MO.isImplicit()) {
471      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
472        PhysRegClobbers.set(*AI);
473      if (!MO.isDead())
474        // Non-dead implicit def? This cannot be hoisted.
475        RuledOut = true;
476      // No need to check if a dead implicit def is also defined by
477      // another instruction.
478      continue;
479    }
480
481    // FIXME: For now, avoid instructions with multiple defs, unless
482    // it's a dead implicit def.
483    if (Def)
484      RuledOut = true;
485    else
486      Def = Reg;
487
488    // If we have already seen another instruction that defines the same
489    // register, then this is not safe.  Two defs is indicated by setting a
490    // PhysRegClobbers bit.
491    for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
492      if (PhysRegDefs.test(*AS))
493        PhysRegClobbers.set(*AS);
494    }
495    // Need a second loop because MCRegAliasIterator can visit the same
496    // register twice.
497    for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS)
498      PhysRegDefs.set(*AS);
499
500    if (PhysRegClobbers.test(Reg))
501      // MI defined register is seen defined by another instruction in
502      // the loop, it cannot be a LICM candidate.
503      RuledOut = true;
504  }
505
506  // Only consider reloads for now and remats which do not have register
507  // operands. FIXME: Consider unfold load folding instructions.
508  if (Def && !RuledOut) {
509    int FI = std::numeric_limits<int>::min();
510    if ((!HasNonInvariantUse && IsLICMCandidate(*MI, CurLoop)) ||
511        (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
512      Candidates.push_back(CandidateInfo(MI, Def, FI));
513  }
514}
515
516/// Walk the specified region of the CFG and hoist loop invariants out to the
517/// preheader.
518void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
519                                        MachineBasicBlock *CurPreheader) {
520  MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
521  if (!Preheader)
522    return;
523
524  unsigned NumRegs = TRI->getNumRegs();
525  BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
526  BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
527
528  SmallVector<CandidateInfo, 32> Candidates;
529  SmallSet<int, 32> StoredFIs;
530
531  // Walk the entire region, count number of defs for each register, and
532  // collect potential LICM candidates.
533  for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
534    // If the header of the loop containing this basic block is a landing pad,
535    // then don't try to hoist instructions out of this loop.
536    const MachineLoop *ML = MLI->getLoopFor(BB);
537    if (ML && ML->getHeader()->isEHPad()) continue;
538
539    // Conservatively treat live-in's as an external def.
540    // FIXME: That means a reload that're reused in successor block(s) will not
541    // be LICM'ed.
542    for (const auto &LI : BB->liveins()) {
543      for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
544        PhysRegDefs.set(*AI);
545    }
546
547    // Funclet entry blocks will clobber all registers
548    if (const uint32_t *Mask = BB->getBeginClobberMask(TRI))
549      PhysRegClobbers.setBitsNotInMask(Mask);
550
551    SpeculationState = SpeculateUnknown;
552    for (MachineInstr &MI : *BB)
553      ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates,
554                CurLoop);
555  }
556
557  // Gather the registers read / clobbered by the terminator.
558  BitVector TermRegs(NumRegs);
559  MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
560  if (TI != Preheader->end()) {
561    for (const MachineOperand &MO : TI->operands()) {
562      if (!MO.isReg())
563        continue;
564      Register Reg = MO.getReg();
565      if (!Reg)
566        continue;
567      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
568        TermRegs.set(*AI);
569    }
570  }
571
572  // Now evaluate whether the potential candidates qualify.
573  // 1. Check if the candidate defined register is defined by another
574  //    instruction in the loop.
575  // 2. If the candidate is a load from stack slot (always true for now),
576  //    check if the slot is stored anywhere in the loop.
577  // 3. Make sure candidate def should not clobber
578  //    registers read by the terminator. Similarly its def should not be
579  //    clobbered by the terminator.
580  for (CandidateInfo &Candidate : Candidates) {
581    if (Candidate.FI != std::numeric_limits<int>::min() &&
582        StoredFIs.count(Candidate.FI))
583      continue;
584
585    unsigned Def = Candidate.Def;
586    if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
587      bool Safe = true;
588      MachineInstr *MI = Candidate.MI;
589      for (const MachineOperand &MO : MI->all_uses()) {
590        if (!MO.getReg())
591          continue;
592        Register Reg = MO.getReg();
593        if (PhysRegDefs.test(Reg) ||
594            PhysRegClobbers.test(Reg)) {
595          // If it's using a non-loop-invariant register, then it's obviously
596          // not safe to hoist.
597          Safe = false;
598          break;
599        }
600      }
601      if (Safe)
602        HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader);
603    }
604  }
605}
606
607/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
608/// sure it is not killed by any instructions in the loop.
609void MachineLICMBase::AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop) {
610  for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
611    if (!BB->isLiveIn(Reg))
612      BB->addLiveIn(Reg);
613    for (MachineInstr &MI : *BB) {
614      for (MachineOperand &MO : MI.all_uses()) {
615        if (!MO.getReg())
616          continue;
617        if (TRI->regsOverlap(Reg, MO.getReg()))
618          MO.setIsKill(false);
619      }
620    }
621  }
622}
623
624/// When an instruction is found to only use loop invariant operands that is
625/// safe to hoist, this instruction is called to do the dirty work.
626void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def,
627                                  MachineLoop *CurLoop,
628                                  MachineBasicBlock *CurPreheader) {
629  MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
630
631  // Now move the instructions to the predecessor, inserting it before any
632  // terminator instructions.
633  LLVM_DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader)
634                    << " from " << printMBBReference(*MI->getParent()) << ": "
635                    << *MI);
636
637  // Splice the instruction to the preheader.
638  MachineBasicBlock *MBB = MI->getParent();
639  Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
640
641  // Since we are moving the instruction out of its basic block, we do not
642  // retain its debug location. Doing so would degrade the debugging
643  // experience and adversely affect the accuracy of profiling information.
644  assert(!MI->isDebugInstr() && "Should not hoist debug inst");
645  MI->setDebugLoc(DebugLoc());
646
647  // Add register to livein list to all the BBs in the current loop since a
648  // loop invariant must be kept live throughout the whole loop. This is
649  // important to ensure later passes do not scavenge the def register.
650  AddToLiveIns(Def, CurLoop);
651
652  ++NumPostRAHoisted;
653  Changed = true;
654}
655
656/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
657/// may not be safe to hoist.
658bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB,
659                                            MachineLoop *CurLoop) {
660  if (SpeculationState != SpeculateUnknown)
661    return SpeculationState == SpeculateFalse;
662
663  if (BB != CurLoop->getHeader()) {
664    // Check loop exiting blocks.
665    SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
666    CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
667    for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
668      if (!DT->dominates(BB, CurrentLoopExitingBlock)) {
669        SpeculationState = SpeculateTrue;
670        return false;
671      }
672  }
673
674  SpeculationState = SpeculateFalse;
675  return true;
676}
677
678/// Check if \p MI is trivially remateralizable and if it does not have any
679/// virtual register uses. Even though rematerializable RA might not actually
680/// rematerialize it in this scenario. In that case we do not want to hoist such
681/// instruction out of the loop in a belief RA will sink it back if needed.
682bool MachineLICMBase::isTriviallyReMaterializable(
683    const MachineInstr &MI) const {
684  if (!TII->isTriviallyReMaterializable(MI))
685    return false;
686
687  for (const MachineOperand &MO : MI.all_uses()) {
688    if (MO.getReg().isVirtual())
689      return false;
690  }
691
692  return true;
693}
694
695void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) {
696  LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
697
698  // Remember livein register pressure.
699  BackTrace.push_back(RegPressure);
700}
701
702void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) {
703  LLVM_DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');
704  BackTrace.pop_back();
705}
706
707/// Destroy scope for the MBB that corresponds to the given dominator tree node
708/// if its a leaf or all of its children are done. Walk up the dominator tree to
709/// destroy ancestors which are now done.
710void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node,
711    DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
712    const DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
713  if (OpenChildren[Node])
714    return;
715
716  for(;;) {
717    ExitScope(Node->getBlock());
718    // Now traverse upwards to pop ancestors whose offsprings are all done.
719    MachineDomTreeNode *Parent = ParentMap.lookup(Node);
720    if (!Parent || --OpenChildren[Parent] != 0)
721      break;
722    Node = Parent;
723  }
724}
725
726/// Walk the specified loop in the CFG (defined by all blocks dominated by the
727/// specified header block, and that are in the current loop) in depth first
728/// order w.r.t the DominatorTree. This allows us to visit definitions before
729/// uses, allowing us to hoist a loop body in one pass without iteration.
730void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN,
731                                     MachineLoop *CurLoop,
732                                     MachineBasicBlock *CurPreheader) {
733  MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
734  if (!Preheader)
735    return;
736
737  SmallVector<MachineDomTreeNode*, 32> Scopes;
738  SmallVector<MachineDomTreeNode*, 8> WorkList;
739  DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
740  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
741
742  // Perform a DFS walk to determine the order of visit.
743  WorkList.push_back(HeaderN);
744  while (!WorkList.empty()) {
745    MachineDomTreeNode *Node = WorkList.pop_back_val();
746    assert(Node && "Null dominator tree node?");
747    MachineBasicBlock *BB = Node->getBlock();
748
749    // If the header of the loop containing this basic block is a landing pad,
750    // then don't try to hoist instructions out of this loop.
751    const MachineLoop *ML = MLI->getLoopFor(BB);
752    if (ML && ML->getHeader()->isEHPad())
753      continue;
754
755    // If this subregion is not in the top level loop at all, exit.
756    if (!CurLoop->contains(BB))
757      continue;
758
759    Scopes.push_back(Node);
760    unsigned NumChildren = Node->getNumChildren();
761
762    // Don't hoist things out of a large switch statement.  This often causes
763    // code to be hoisted that wasn't going to be executed, and increases
764    // register pressure in a situation where it's likely to matter.
765    if (BB->succ_size() >= 25)
766      NumChildren = 0;
767
768    OpenChildren[Node] = NumChildren;
769    if (NumChildren) {
770      // Add children in reverse order as then the next popped worklist node is
771      // the first child of this node.  This means we ultimately traverse the
772      // DOM tree in exactly the same order as if we'd recursed.
773      for (MachineDomTreeNode *Child : reverse(Node->children())) {
774        ParentMap[Child] = Node;
775        WorkList.push_back(Child);
776      }
777    }
778  }
779
780  if (Scopes.size() == 0)
781    return;
782
783  // Compute registers which are livein into the loop headers.
784  RegSeen.clear();
785  BackTrace.clear();
786  InitRegPressure(Preheader);
787
788  // Now perform LICM.
789  for (MachineDomTreeNode *Node : Scopes) {
790    MachineBasicBlock *MBB = Node->getBlock();
791
792    EnterScope(MBB);
793
794    // Process the block
795    SpeculationState = SpeculateUnknown;
796    for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
797      unsigned HoistRes = HoistResult::NotHoisted;
798      HoistRes = Hoist(&MI, Preheader, CurLoop);
799      if (HoistRes & HoistResult::NotHoisted) {
800        // We have failed to hoist MI to outermost loop's preheader. If MI is in
801        // a subloop, try to hoist it to subloop's preheader.
802        SmallVector<MachineLoop *> InnerLoopWorkList;
803        for (MachineLoop *L = MLI->getLoopFor(MI.getParent()); L != CurLoop;
804             L = L->getParentLoop())
805          InnerLoopWorkList.push_back(L);
806
807        while (!InnerLoopWorkList.empty()) {
808          MachineLoop *InnerLoop = InnerLoopWorkList.pop_back_val();
809          MachineBasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
810          if (InnerLoopPreheader) {
811            HoistRes = Hoist(&MI, InnerLoopPreheader, InnerLoop);
812            if (HoistRes & HoistResult::Hoisted)
813              break;
814          }
815        }
816      }
817
818      if (HoistRes & HoistResult::ErasedMI)
819        continue;
820
821      UpdateRegPressure(&MI);
822    }
823
824    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
825    ExitScopeIfDone(Node, OpenChildren, ParentMap);
826  }
827}
828
829static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
830  return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
831}
832
833/// Find all virtual register references that are liveout of the preheader to
834/// initialize the starting "register pressure". Note this does not count live
835/// through (livein but not used) registers.
836void MachineLICMBase::InitRegPressure(MachineBasicBlock *BB) {
837  std::fill(RegPressure.begin(), RegPressure.end(), 0);
838
839  // If the preheader has only a single predecessor and it ends with a
840  // fallthrough or an unconditional branch, then scan its predecessor for live
841  // defs as well. This happens whenever the preheader is created by splitting
842  // the critical edge from the loop predecessor to the loop header.
843  if (BB->pred_size() == 1) {
844    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
845    SmallVector<MachineOperand, 4> Cond;
846    if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
847      InitRegPressure(*BB->pred_begin());
848  }
849
850  for (const MachineInstr &MI : *BB)
851    UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
852}
853
854/// Update estimate of register pressure after the specified instruction.
855void MachineLICMBase::UpdateRegPressure(const MachineInstr *MI,
856                                        bool ConsiderUnseenAsDef) {
857  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
858  for (const auto &RPIdAndCost : Cost) {
859    unsigned Class = RPIdAndCost.first;
860    if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
861      RegPressure[Class] = 0;
862    else
863      RegPressure[Class] += RPIdAndCost.second;
864  }
865}
866
867/// Calculate the additional register pressure that the registers used in MI
868/// cause.
869///
870/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
871/// figure out which usages are live-ins.
872/// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
873DenseMap<unsigned, int>
874MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
875                                  bool ConsiderUnseenAsDef) {
876  DenseMap<unsigned, int> Cost;
877  if (MI->isImplicitDef())
878    return Cost;
879  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
880    const MachineOperand &MO = MI->getOperand(i);
881    if (!MO.isReg() || MO.isImplicit())
882      continue;
883    Register Reg = MO.getReg();
884    if (!Reg.isVirtual())
885      continue;
886
887    // FIXME: It seems bad to use RegSeen only for some of these calculations.
888    bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
889    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
890
891    RegClassWeight W = TRI->getRegClassWeight(RC);
892    int RCCost = 0;
893    if (MO.isDef())
894      RCCost = W.RegWeight;
895    else {
896      bool isKill = isOperandKill(MO, MRI);
897      if (isNew && !isKill && ConsiderUnseenAsDef)
898        // Haven't seen this, it must be a livein.
899        RCCost = W.RegWeight;
900      else if (!isNew && isKill)
901        RCCost = -W.RegWeight;
902    }
903    if (RCCost == 0)
904      continue;
905    const int *PS = TRI->getRegClassPressureSets(RC);
906    for (; *PS != -1; ++PS) {
907      if (!Cost.contains(*PS))
908        Cost[*PS] = RCCost;
909      else
910        Cost[*PS] += RCCost;
911    }
912  }
913  return Cost;
914}
915
916/// Return true if this machine instruction loads from global offset table or
917/// constant pool.
918static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
919  assert(MI.mayLoad() && "Expected MI that loads!");
920
921  // If we lost memory operands, conservatively assume that the instruction
922  // reads from everything..
923  if (MI.memoperands_empty())
924    return true;
925
926  for (MachineMemOperand *MemOp : MI.memoperands())
927    if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
928      if (PSV->isGOT() || PSV->isConstantPool())
929        return true;
930
931  return false;
932}
933
934// This function iterates through all the operands of the input store MI and
935// checks that each register operand statisfies isCallerPreservedPhysReg.
936// This means, the value being stored and the address where it is being stored
937// is constant throughout the body of the function (not including prologue and
938// epilogue). When called with an MI that isn't a store, it returns false.
939// A future improvement can be to check if the store registers are constant
940// throughout the loop rather than throughout the funtion.
941static bool isInvariantStore(const MachineInstr &MI,
942                             const TargetRegisterInfo *TRI,
943                             const MachineRegisterInfo *MRI) {
944
945  bool FoundCallerPresReg = false;
946  if (!MI.mayStore() || MI.hasUnmodeledSideEffects() ||
947      (MI.getNumOperands() == 0))
948    return false;
949
950  // Check that all register operands are caller-preserved physical registers.
951  for (const MachineOperand &MO : MI.operands()) {
952    if (MO.isReg()) {
953      Register Reg = MO.getReg();
954      // If operand is a virtual register, check if it comes from a copy of a
955      // physical register.
956      if (Reg.isVirtual())
957        Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
958      if (Reg.isVirtual())
959        return false;
960      if (!TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))
961        return false;
962      else
963        FoundCallerPresReg = true;
964    } else if (!MO.isImm()) {
965        return false;
966    }
967  }
968  return FoundCallerPresReg;
969}
970
971// Return true if the input MI is a copy instruction that feeds an invariant
972// store instruction. This means that the src of the copy has to satisfy
973// isCallerPreservedPhysReg and atleast one of it's users should satisfy
974// isInvariantStore.
975static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
976                                        const MachineRegisterInfo *MRI,
977                                        const TargetRegisterInfo *TRI) {
978
979  // FIXME: If targets would like to look through instructions that aren't
980  // pure copies, this can be updated to a query.
981  if (!MI.isCopy())
982    return false;
983
984  const MachineFunction *MF = MI.getMF();
985  // Check that we are copying a constant physical register.
986  Register CopySrcReg = MI.getOperand(1).getReg();
987  if (CopySrcReg.isVirtual())
988    return false;
989
990  if (!TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))
991    return false;
992
993  Register CopyDstReg = MI.getOperand(0).getReg();
994  // Check if any of the uses of the copy are invariant stores.
995  assert(CopyDstReg.isVirtual() && "copy dst is not a virtual reg");
996
997  for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
998    if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
999      return true;
1000  }
1001  return false;
1002}
1003
1004/// Returns true if the instruction may be a suitable candidate for LICM.
1005/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
1006bool MachineLICMBase::IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop) {
1007  // Check if it's safe to move the instruction.
1008  bool DontMoveAcrossStore = !HoistConstLoads || !AllowedToHoistLoads[CurLoop];
1009  if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
1010      !(HoistConstStores && isInvariantStore(I, TRI, MRI))) {
1011    LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
1012    return false;
1013  }
1014
1015  // If it is a load then check if it is guaranteed to execute by making sure
1016  // that it dominates all exiting blocks. If it doesn't, then there is a path
1017  // out of the loop which does not execute this load, so we can't hoist it.
1018  // Loads from constant memory are safe to speculate, for example indexed load
1019  // from a jump table.
1020  // Stores and side effects are already checked by isSafeToMove.
1021  if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
1022      !IsGuaranteedToExecute(I.getParent(), CurLoop)) {
1023    LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
1024    return false;
1025  }
1026
1027  // Convergent attribute has been used on operations that involve inter-thread
1028  // communication which results are implicitly affected by the enclosing
1029  // control flows. It is not safe to hoist or sink such operations across
1030  // control flow.
1031  if (I.isConvergent())
1032    return false;
1033
1034  if (!TII->shouldHoist(I, CurLoop))
1035    return false;
1036
1037  return true;
1038}
1039
1040/// Returns true if the instruction is loop invariant.
1041bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I,
1042                                          MachineLoop *CurLoop) {
1043  if (!IsLICMCandidate(I, CurLoop)) {
1044    LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
1045    return false;
1046  }
1047  return CurLoop->isLoopInvariant(I);
1048}
1049
1050/// Return true if the specified instruction is used by a phi node and hoisting
1051/// it could cause a copy to be inserted.
1052bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI,
1053                                    MachineLoop *CurLoop) {
1054  SmallVector<const MachineInstr *, 8> Work(1, MI);
1055  do {
1056    MI = Work.pop_back_val();
1057    for (const MachineOperand &MO : MI->all_defs()) {
1058      Register Reg = MO.getReg();
1059      if (!Reg.isVirtual())
1060        continue;
1061      for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
1062        // A PHI may cause a copy to be inserted.
1063        if (UseMI.isPHI()) {
1064          // A PHI inside the loop causes a copy because the live range of Reg is
1065          // extended across the PHI.
1066          if (CurLoop->contains(&UseMI))
1067            return true;
1068          // A PHI in an exit block can cause a copy to be inserted if the PHI
1069          // has multiple predecessors in the loop with different values.
1070          // For now, approximate by rejecting all exit blocks.
1071          if (isExitBlock(CurLoop, UseMI.getParent()))
1072            return true;
1073          continue;
1074        }
1075        // Look past copies as well.
1076        if (UseMI.isCopy() && CurLoop->contains(&UseMI))
1077          Work.push_back(&UseMI);
1078      }
1079    }
1080  } while (!Work.empty());
1081  return false;
1082}
1083
1084/// Compute operand latency between a def of 'Reg' and an use in the current
1085/// loop, return true if the target considered it high.
1086bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
1087                                            Register Reg,
1088                                            MachineLoop *CurLoop) const {
1089  if (MRI->use_nodbg_empty(Reg))
1090    return false;
1091
1092  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
1093    if (UseMI.isCopyLike())
1094      continue;
1095    if (!CurLoop->contains(UseMI.getParent()))
1096      continue;
1097    for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
1098      const MachineOperand &MO = UseMI.getOperand(i);
1099      if (!MO.isReg() || !MO.isUse())
1100        continue;
1101      Register MOReg = MO.getReg();
1102      if (MOReg != Reg)
1103        continue;
1104
1105      if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
1106        return true;
1107    }
1108
1109    // Only look at the first in loop use.
1110    break;
1111  }
1112
1113  return false;
1114}
1115
1116/// Return true if the instruction is marked "cheap" or the operand latency
1117/// between its def and a use is one or less.
1118bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const {
1119  if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
1120    return true;
1121
1122  bool isCheap = false;
1123  unsigned NumDefs = MI.getDesc().getNumDefs();
1124  for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
1125    MachineOperand &DefMO = MI.getOperand(i);
1126    if (!DefMO.isReg() || !DefMO.isDef())
1127      continue;
1128    --NumDefs;
1129    Register Reg = DefMO.getReg();
1130    if (Reg.isPhysical())
1131      continue;
1132
1133    if (!TII->hasLowDefLatency(SchedModel, MI, i))
1134      return false;
1135    isCheap = true;
1136  }
1137
1138  return isCheap;
1139}
1140
1141/// Visit BBs from header to current BB, check if hoisting an instruction of the
1142/// given cost matrix can cause high register pressure.
1143bool
1144MachineLICMBase::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
1145                                         bool CheapInstr) {
1146  for (const auto &RPIdAndCost : Cost) {
1147    if (RPIdAndCost.second <= 0)
1148      continue;
1149
1150    unsigned Class = RPIdAndCost.first;
1151    int Limit = RegLimit[Class];
1152
1153    // Don't hoist cheap instructions if they would increase register pressure,
1154    // even if we're under the limit.
1155    if (CheapInstr && !HoistCheapInsts)
1156      return true;
1157
1158    for (const auto &RP : BackTrace)
1159      if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
1160        return true;
1161  }
1162
1163  return false;
1164}
1165
1166/// Traverse the back trace from header to the current block and update their
1167/// register pressures to reflect the effect of hoisting MI from the current
1168/// block to the preheader.
1169void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) {
1170  // First compute the 'cost' of the instruction, i.e. its contribution
1171  // to register pressure.
1172  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
1173                               /*ConsiderUnseenAsDef=*/false);
1174
1175  // Update register pressure of blocks from loop header to current block.
1176  for (auto &RP : BackTrace)
1177    for (const auto &RPIdAndCost : Cost)
1178      RP[RPIdAndCost.first] += RPIdAndCost.second;
1179}
1180
1181/// Return true if it is potentially profitable to hoist the given loop
1182/// invariant.
1183bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI,
1184                                          MachineLoop *CurLoop) {
1185  if (MI.isImplicitDef())
1186    return true;
1187
1188  // Besides removing computation from the loop, hoisting an instruction has
1189  // these effects:
1190  //
1191  // - The value defined by the instruction becomes live across the entire
1192  //   loop. This increases register pressure in the loop.
1193  //
1194  // - If the value is used by a PHI in the loop, a copy will be required for
1195  //   lowering the PHI after extending the live range.
1196  //
1197  // - When hoisting the last use of a value in the loop, that value no longer
1198  //   needs to be live in the loop. This lowers register pressure in the loop.
1199
1200  if (HoistConstStores &&  isCopyFeedingInvariantStore(MI, MRI, TRI))
1201    return true;
1202
1203  bool CheapInstr = IsCheapInstruction(MI);
1204  bool CreatesCopy = HasLoopPHIUse(&MI, CurLoop);
1205
1206  // Don't hoist a cheap instruction if it would create a copy in the loop.
1207  if (CheapInstr && CreatesCopy) {
1208    LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
1209    return false;
1210  }
1211
1212  // Rematerializable instructions should always be hoisted providing the
1213  // register allocator can just pull them down again when needed.
1214  if (isTriviallyReMaterializable(MI))
1215    return true;
1216
1217  // FIXME: If there are long latency loop-invariant instructions inside the
1218  // loop at this point, why didn't the optimizer's LICM hoist them?
1219  for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1220    const MachineOperand &MO = MI.getOperand(i);
1221    if (!MO.isReg() || MO.isImplicit())
1222      continue;
1223    Register Reg = MO.getReg();
1224    if (!Reg.isVirtual())
1225      continue;
1226    if (MO.isDef() && HasHighOperandLatency(MI, i, Reg, CurLoop)) {
1227      LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
1228      ++NumHighLatency;
1229      return true;
1230    }
1231  }
1232
1233  // Estimate register pressure to determine whether to LICM the instruction.
1234  // In low register pressure situation, we can be more aggressive about
1235  // hoisting. Also, favors hoisting long latency instructions even in
1236  // moderately high pressure situation.
1237  // Cheap instructions will only be hoisted if they don't increase register
1238  // pressure at all.
1239  auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
1240                               /*ConsiderUnseenAsDef=*/false);
1241
1242  // Visit BBs from header to current BB, if hoisting this doesn't cause
1243  // high register pressure, then it's safe to proceed.
1244  if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
1245    LLVM_DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
1246    ++NumLowRP;
1247    return true;
1248  }
1249
1250  // Don't risk increasing register pressure if it would create copies.
1251  if (CreatesCopy) {
1252    LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
1253    return false;
1254  }
1255
1256  // Do not "speculate" in high register pressure situation. If an
1257  // instruction is not guaranteed to be executed in the loop, it's best to be
1258  // conservative.
1259  if (AvoidSpeculation &&
1260      (!IsGuaranteedToExecute(MI.getParent(), CurLoop) && !MayCSE(&MI))) {
1261    LLVM_DEBUG(dbgs() << "Won't speculate: " << MI);
1262    return false;
1263  }
1264
1265  // If we have a COPY with other uses in the loop, hoist to allow the users to
1266  // also be hoisted.
1267  if (MI.isCopy() && MI.getOperand(0).isReg() &&
1268      MI.getOperand(0).getReg().isVirtual() && MI.getOperand(1).isReg() &&
1269      MI.getOperand(1).getReg().isVirtual() &&
1270      IsLoopInvariantInst(MI, CurLoop) &&
1271      any_of(MRI->use_nodbg_instructions(MI.getOperand(0).getReg()),
1272             [&CurLoop](MachineInstr &UseMI) {
1273               return CurLoop->contains(&UseMI);
1274             }))
1275    return true;
1276
1277  // High register pressure situation, only hoist if the instruction is going
1278  // to be remat'ed.
1279  if (!isTriviallyReMaterializable(MI) &&
1280      !MI.isDereferenceableInvariantLoad()) {
1281    LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
1282    return false;
1283  }
1284
1285  return true;
1286}
1287
1288/// Unfold a load from the given machineinstr if the load itself could be
1289/// hoisted. Return the unfolded and hoistable load, or null if the load
1290/// couldn't be unfolded or if it wouldn't be hoistable.
1291MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI,
1292                                                    MachineLoop *CurLoop) {
1293  // Don't unfold simple loads.
1294  if (MI->canFoldAsLoad())
1295    return nullptr;
1296
1297  // If not, we may be able to unfold a load and hoist that.
1298  // First test whether the instruction is loading from an amenable
1299  // memory location.
1300  if (!MI->isDereferenceableInvariantLoad())
1301    return nullptr;
1302
1303  // Next determine the register class for a temporary register.
1304  unsigned LoadRegIndex;
1305  unsigned NewOpc =
1306    TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
1307                                    /*UnfoldLoad=*/true,
1308                                    /*UnfoldStore=*/false,
1309                                    &LoadRegIndex);
1310  if (NewOpc == 0) return nullptr;
1311  const MCInstrDesc &MID = TII->get(NewOpc);
1312  MachineFunction &MF = *MI->getMF();
1313  const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
1314  // Ok, we're unfolding. Create a temporary register and do the unfold.
1315  Register Reg = MRI->createVirtualRegister(RC);
1316
1317  SmallVector<MachineInstr *, 2> NewMIs;
1318  bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
1319                                          /*UnfoldLoad=*/true,
1320                                          /*UnfoldStore=*/false, NewMIs);
1321  (void)Success;
1322  assert(Success &&
1323         "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1324         "succeeded!");
1325  assert(NewMIs.size() == 2 &&
1326         "Unfolded a load into multiple instructions!");
1327  MachineBasicBlock *MBB = MI->getParent();
1328  MachineBasicBlock::iterator Pos = MI;
1329  MBB->insert(Pos, NewMIs[0]);
1330  MBB->insert(Pos, NewMIs[1]);
1331  // If unfolding produced a load that wasn't loop-invariant or profitable to
1332  // hoist, discard the new instructions and bail.
1333  if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||
1334      !IsProfitableToHoist(*NewMIs[0], CurLoop)) {
1335    NewMIs[0]->eraseFromParent();
1336    NewMIs[1]->eraseFromParent();
1337    return nullptr;
1338  }
1339
1340  // Update register pressure for the unfolded instruction.
1341  UpdateRegPressure(NewMIs[1]);
1342
1343  // Otherwise we successfully unfolded a load that we can hoist.
1344
1345  // Update the call site info.
1346  if (MI->shouldUpdateCallSiteInfo())
1347    MF.eraseCallSiteInfo(MI);
1348
1349  MI->eraseFromParent();
1350  return NewMIs[0];
1351}
1352
1353/// Initialize the CSE map with instructions that are in the current loop
1354/// preheader that may become duplicates of instructions that are hoisted
1355/// out of the loop.
1356void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) {
1357  for (MachineInstr &MI : *BB)
1358    CSEMap[BB][MI.getOpcode()].push_back(&MI);
1359}
1360
1361/// Initialize AllowedToHoistLoads with information about whether invariant
1362/// loads can be moved outside a given loop
1363void MachineLICMBase::InitializeLoadsHoistableLoops() {
1364  SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
1365  SmallVector<MachineLoop *, 8> LoopsInPreOrder;
1366
1367  // Mark all loops as hoistable initially and prepare a list of loops in
1368  // pre-order DFS.
1369  while (!Worklist.empty()) {
1370    auto *L = Worklist.pop_back_val();
1371    AllowedToHoistLoads[L] = true;
1372    LoopsInPreOrder.push_back(L);
1373    Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
1374                    L->getSubLoops().end());
1375  }
1376
1377  // Going from the innermost to outermost loops, check if a loop has
1378  // instructions preventing invariant load hoisting. If such instruction is
1379  // found, mark this loop and its parent as non-hoistable and continue
1380  // investigating the next loop.
1381  // Visiting in a reversed pre-ordered DFS manner
1382  // allows us to not process all the instructions of the outer loop if the
1383  // inner loop is proved to be non-load-hoistable.
1384  for (auto *Loop : reverse(LoopsInPreOrder)) {
1385    for (auto *MBB : Loop->blocks()) {
1386      // If this loop has already been marked as non-hoistable, skip it.
1387      if (!AllowedToHoistLoads[Loop])
1388        continue;
1389      for (auto &MI : *MBB) {
1390        if (!MI.mayStore() && !MI.isCall() &&
1391            !(MI.mayLoad() && MI.hasOrderedMemoryRef()))
1392          continue;
1393        for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop())
1394          AllowedToHoistLoads[L] = false;
1395        break;
1396      }
1397    }
1398  }
1399}
1400
1401/// Find an instruction amount PrevMIs that is a duplicate of MI.
1402/// Return this instruction if it's found.
1403MachineInstr *
1404MachineLICMBase::LookForDuplicate(const MachineInstr *MI,
1405                                  std::vector<MachineInstr *> &PrevMIs) {
1406  for (MachineInstr *PrevMI : PrevMIs)
1407    if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
1408      return PrevMI;
1409
1410  return nullptr;
1411}
1412
1413/// Given a LICM'ed instruction, look for an instruction on the preheader that
1414/// computes the same value. If it's found, do a RAU on with the definition of
1415/// the existing instruction rather than hoisting the instruction to the
1416/// preheader.
1417bool MachineLICMBase::EliminateCSE(
1418    MachineInstr *MI,
1419    DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1420  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1421  // the undef property onto uses.
1422  if (MI->isImplicitDef())
1423    return false;
1424
1425  // Do not CSE normal loads because between them could be store instructions
1426  // that change the loaded value
1427  if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad())
1428    return false;
1429
1430  if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1431    LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1432
1433    // Replace virtual registers defined by MI by their counterparts defined
1434    // by Dup.
1435    SmallVector<unsigned, 2> Defs;
1436    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1437      const MachineOperand &MO = MI->getOperand(i);
1438
1439      // Physical registers may not differ here.
1440      assert((!MO.isReg() || MO.getReg() == 0 || !MO.getReg().isPhysical() ||
1441              MO.getReg() == Dup->getOperand(i).getReg()) &&
1442             "Instructions with different phys regs are not identical!");
1443
1444      if (MO.isReg() && MO.isDef() && !MO.getReg().isPhysical())
1445        Defs.push_back(i);
1446    }
1447
1448    SmallVector<const TargetRegisterClass*, 2> OrigRCs;
1449    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1450      unsigned Idx = Defs[i];
1451      Register Reg = MI->getOperand(Idx).getReg();
1452      Register DupReg = Dup->getOperand(Idx).getReg();
1453      OrigRCs.push_back(MRI->getRegClass(DupReg));
1454
1455      if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
1456        // Restore old RCs if more than one defs.
1457        for (unsigned j = 0; j != i; ++j)
1458          MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1459        return false;
1460      }
1461    }
1462
1463    for (unsigned Idx : Defs) {
1464      Register Reg = MI->getOperand(Idx).getReg();
1465      Register DupReg = Dup->getOperand(Idx).getReg();
1466      MRI->replaceRegWith(Reg, DupReg);
1467      MRI->clearKillFlags(DupReg);
1468      // Clear Dup dead flag if any, we reuse it for Reg.
1469      if (!MRI->use_nodbg_empty(DupReg))
1470        Dup->getOperand(Idx).setIsDead(false);
1471    }
1472
1473    MI->eraseFromParent();
1474    ++NumCSEed;
1475    return true;
1476  }
1477  return false;
1478}
1479
1480/// Return true if the given instruction will be CSE'd if it's hoisted out of
1481/// the loop.
1482bool MachineLICMBase::MayCSE(MachineInstr *MI) {
1483  if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad())
1484    return false;
1485
1486  unsigned Opcode = MI->getOpcode();
1487  for (auto &Map : CSEMap) {
1488    // Check this CSEMap's preheader dominates MI's basic block.
1489    if (DT->dominates(Map.first, MI->getParent())) {
1490      DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1491          Map.second.find(Opcode);
1492      // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1493      // the undef property onto uses.
1494      if (CI == Map.second.end() || MI->isImplicitDef())
1495        continue;
1496      if (LookForDuplicate(MI, CI->second) != nullptr)
1497        return true;
1498    }
1499  }
1500
1501  return false;
1502}
1503
1504/// When an instruction is found to use only loop invariant operands
1505/// that are safe to hoist, this instruction is called to do the dirty work.
1506/// It returns true if the instruction is hoisted.
1507unsigned MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
1508                                MachineLoop *CurLoop) {
1509  MachineBasicBlock *SrcBlock = MI->getParent();
1510
1511  // Disable the instruction hoisting due to block hotness
1512  if ((DisableHoistingToHotterBlocks == UseBFI::All ||
1513      (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
1514      isTgtHotterThanSrc(SrcBlock, Preheader)) {
1515    ++NumNotHoistedDueToHotness;
1516    return HoistResult::NotHoisted;
1517  }
1518  // First check whether we should hoist this instruction.
1519  bool HasExtractHoistableLoad = false;
1520  if (!IsLoopInvariantInst(*MI, CurLoop) ||
1521      !IsProfitableToHoist(*MI, CurLoop)) {
1522    // If not, try unfolding a hoistable load.
1523    MI = ExtractHoistableLoad(MI, CurLoop);
1524    if (!MI)
1525      return HoistResult::NotHoisted;
1526    HasExtractHoistableLoad = true;
1527  }
1528
1529  // If we have hoisted an instruction that may store, it can only be a constant
1530  // store.
1531  if (MI->mayStore())
1532    NumStoreConst++;
1533
1534  // Now move the instructions to the predecessor, inserting it before any
1535  // terminator instructions.
1536  LLVM_DEBUG({
1537    dbgs() << "Hoisting " << *MI;
1538    if (MI->getParent()->getBasicBlock())
1539      dbgs() << " from " << printMBBReference(*MI->getParent());
1540    if (Preheader->getBasicBlock())
1541      dbgs() << " to " << printMBBReference(*Preheader);
1542    dbgs() << "\n";
1543  });
1544
1545  // If this is the first instruction being hoisted to the preheader,
1546  // initialize the CSE map with potential common expressions.
1547  if (FirstInLoop) {
1548    InitCSEMap(Preheader);
1549    FirstInLoop = false;
1550  }
1551
1552  // Look for opportunity to CSE the hoisted instruction.
1553  unsigned Opcode = MI->getOpcode();
1554  bool HasCSEDone = false;
1555  for (auto &Map : CSEMap) {
1556    // Check this CSEMap's preheader dominates MI's basic block.
1557    if (DT->dominates(Map.first, MI->getParent())) {
1558      DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1559          Map.second.find(Opcode);
1560      if (CI != Map.second.end()) {
1561        if (EliminateCSE(MI, CI)) {
1562          HasCSEDone = true;
1563          break;
1564        }
1565      }
1566    }
1567  }
1568
1569  if (!HasCSEDone) {
1570    // Otherwise, splice the instruction to the preheader.
1571    Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
1572
1573    // Since we are moving the instruction out of its basic block, we do not
1574    // retain its debug location. Doing so would degrade the debugging
1575    // experience and adversely affect the accuracy of profiling information.
1576    assert(!MI->isDebugInstr() && "Should not hoist debug inst");
1577    MI->setDebugLoc(DebugLoc());
1578
1579    // Update register pressure for BBs from header to this block.
1580    UpdateBackTraceRegPressure(MI);
1581
1582    // Clear the kill flags of any register this instruction defines,
1583    // since they may need to be live throughout the entire loop
1584    // rather than just live for part of it.
1585    for (MachineOperand &MO : MI->all_defs())
1586      if (!MO.isDead())
1587        MRI->clearKillFlags(MO.getReg());
1588
1589    CSEMap[Preheader][Opcode].push_back(MI);
1590  }
1591
1592  ++NumHoisted;
1593  Changed = true;
1594
1595  if (HasCSEDone || HasExtractHoistableLoad)
1596    return HoistResult::Hoisted | HoistResult::ErasedMI;
1597  return HoistResult::Hoisted;
1598}
1599
1600/// Get the preheader for the current loop, splitting a critical edge if needed.
1601MachineBasicBlock *
1602MachineLICMBase::getCurPreheader(MachineLoop *CurLoop,
1603                                 MachineBasicBlock *CurPreheader) {
1604  // Determine the block to which to hoist instructions. If we can't find a
1605  // suitable loop predecessor, we can't do any hoisting.
1606
1607  // If we've tried to get a preheader and failed, don't try again.
1608  if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
1609    return nullptr;
1610
1611  if (!CurPreheader) {
1612    CurPreheader = CurLoop->getLoopPreheader();
1613    if (!CurPreheader) {
1614      MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
1615      if (!Pred) {
1616        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1617        return nullptr;
1618      }
1619
1620      CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this);
1621      if (!CurPreheader) {
1622        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1623        return nullptr;
1624      }
1625    }
1626  }
1627  return CurPreheader;
1628}
1629
1630/// Is the target basic block at least "BlockFrequencyRatioThreshold"
1631/// times hotter than the source basic block.
1632bool MachineLICMBase::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1633                                         MachineBasicBlock *TgtBlock) {
1634  // Parse source and target basic block frequency from MBFI
1635  uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency();
1636  uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency();
1637
1638  // Disable the hoisting if source block frequency is zero
1639  if (!SrcBF)
1640    return true;
1641
1642  double Ratio = (double)DstBF / SrcBF;
1643
1644  // Compare the block frequency ratio with the threshold
1645  return Ratio > BlockFrequencyRatioThreshold;
1646}
1647