1193323Sed//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This pass performs loop invariant code motion on machine instructions. We 11193323Sed// attempt to remove as much code from the body of a loop as possible. 12193323Sed// 13193323Sed// This pass does not attempt to throttle itself to limit register pressure. 14193323Sed// The register allocation phases are expected to perform rematerialization 15193323Sed// to recover when register pressure is high. 16193323Sed// 17193323Sed// This pass is not intended to be a replacement or a complete alternative 18193323Sed// for the LLVM-IR-level LICM pass. It is only designed to hoist simple 19193323Sed// constructs that are not exposed before lowering and instruction selection. 20193323Sed// 21193323Sed//===----------------------------------------------------------------------===// 22193323Sed 23193323Sed#define DEBUG_TYPE "machine-licm" 24193323Sed#include "llvm/CodeGen/Passes.h" 25252723Sdim#include "llvm/ADT/DenseMap.h" 26252723Sdim#include "llvm/ADT/SmallSet.h" 27252723Sdim#include "llvm/ADT/Statistic.h" 28252723Sdim#include "llvm/Analysis/AliasAnalysis.h" 29193323Sed#include "llvm/CodeGen/MachineDominators.h" 30207618Srdivacky#include "llvm/CodeGen/MachineFrameInfo.h" 31193323Sed#include "llvm/CodeGen/MachineLoopInfo.h" 32198892Srdivacky#include "llvm/CodeGen/MachineMemOperand.h" 33193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 34198892Srdivacky#include "llvm/CodeGen/PseudoSourceValue.h" 35224145Sdim#include "llvm/MC/MCInstrItineraries.h" 36226890Sdim#include "llvm/Support/CommandLine.h" 37193323Sed#include "llvm/Support/Debug.h" 38198090Srdivacky#include "llvm/Support/raw_ostream.h" 39252723Sdim#include "llvm/Target/TargetInstrInfo.h" 40252723Sdim#include "llvm/Target/TargetLowering.h" 41252723Sdim#include "llvm/Target/TargetMachine.h" 42252723Sdim#include "llvm/Target/TargetRegisterInfo.h" 43193323Sedusing namespace llvm; 44193323Sed 45226890Sdimstatic cl::opt<bool> 46226890SdimAvoidSpeculation("avoid-speculation", 47226890Sdim cl::desc("MachineLICM should avoid speculation"), 48235633Sdim cl::init(true), cl::Hidden); 49226890Sdim 50218893SdimSTATISTIC(NumHoisted, 51218893Sdim "Number of machine instructions hoisted out of loops"); 52218893SdimSTATISTIC(NumLowRP, 53218893Sdim "Number of instructions hoisted in low reg pressure situation"); 54218893SdimSTATISTIC(NumHighLatency, 55218893Sdim "Number of high latency instructions hoisted"); 56218893SdimSTATISTIC(NumCSEed, 57218893Sdim "Number of hoisted machine instructions CSEed"); 58207618SrdivackySTATISTIC(NumPostRAHoisted, 59207618Srdivacky "Number of machine instructions hoisted out of loops post regalloc"); 60193323Sed 61193323Sednamespace { 62198892Srdivacky class MachineLICM : public MachineFunctionPass { 63193323Sed const TargetMachine *TM; 64193323Sed const TargetInstrInfo *TII; 65252723Sdim const TargetLoweringBase *TLI; 66198090Srdivacky const TargetRegisterInfo *TRI; 67207618Srdivacky const MachineFrameInfo *MFI; 68218893Sdim MachineRegisterInfo *MRI; 69218893Sdim const InstrItineraryData *InstrItins; 70235633Sdim bool PreRegAlloc; 71193323Sed 72193323Sed // Various analyses that we use... 73198090Srdivacky AliasAnalysis *AA; // Alias analysis info. 74207618Srdivacky MachineLoopInfo *MLI; // Current MachineLoopInfo 75193323Sed MachineDominatorTree *DT; // Machine dominator tree for the cur loop 76193323Sed 77193323Sed // State that is updated as we process loops 78193323Sed bool Changed; // True if a loop is changed. 79210299Sed bool FirstInLoop; // True if it's the first LICM in the loop. 80193323Sed MachineLoop *CurLoop; // The current loop we are working on. 81193323Sed MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 82193323Sed 83235633Sdim // Exit blocks for CurLoop. 84235633Sdim SmallVector<MachineBasicBlock*, 8> ExitBlocks; 85207618Srdivacky 86235633Sdim bool isExitBlock(const MachineBasicBlock *MBB) const { 87235633Sdim return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != 88235633Sdim ExitBlocks.end(); 89235633Sdim } 90235633Sdim 91218893Sdim // Track 'estimated' register pressure. 92218893Sdim SmallSet<unsigned, 32> RegSeen; 93218893Sdim SmallVector<unsigned, 8> RegPressure; 94218893Sdim 95218893Sdim // Register pressure "limit" per register class. If the pressure 96218893Sdim // is higher than the limit, then it's considered high. 97218893Sdim SmallVector<unsigned, 8> RegLimit; 98218893Sdim 99218893Sdim // Register pressure on path leading from loop preheader to current BB. 100218893Sdim SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 101218893Sdim 102212904Sdim // For each opcode, keep a list of potential CSE instructions. 103198892Srdivacky DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 104207618Srdivacky 105226890Sdim enum { 106226890Sdim SpeculateFalse = 0, 107226890Sdim SpeculateTrue = 1, 108226890Sdim SpeculateUnknown = 2 109226890Sdim }; 110226890Sdim 111226890Sdim // If a MBB does not dominate loop exiting blocks then it may not safe 112226890Sdim // to hoist loads from this block. 113226890Sdim // Tri-state: 0 - false, 1 - true, 2 - unknown 114226890Sdim unsigned SpeculationState; 115226890Sdim 116193323Sed public: 117193323Sed static char ID; // Pass identification, replacement for typeid 118207618Srdivacky MachineLICM() : 119218893Sdim MachineFunctionPass(ID), PreRegAlloc(true) { 120218893Sdim initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 121218893Sdim } 122193323Sed 123207618Srdivacky explicit MachineLICM(bool PreRA) : 124218893Sdim MachineFunctionPass(ID), PreRegAlloc(PreRA) { 125218893Sdim initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 126218893Sdim } 127207618Srdivacky 128193323Sed virtual bool runOnMachineFunction(MachineFunction &MF); 129193323Sed 130193323Sed virtual void getAnalysisUsage(AnalysisUsage &AU) const { 131193323Sed AU.addRequired<MachineLoopInfo>(); 132193323Sed AU.addRequired<MachineDominatorTree>(); 133198090Srdivacky AU.addRequired<AliasAnalysis>(); 134193323Sed AU.addPreserved<MachineLoopInfo>(); 135193323Sed AU.addPreserved<MachineDominatorTree>(); 136193323Sed MachineFunctionPass::getAnalysisUsage(AU); 137193323Sed } 138193323Sed 139193323Sed virtual void releaseMemory() { 140218893Sdim RegSeen.clear(); 141218893Sdim RegPressure.clear(); 142218893Sdim RegLimit.clear(); 143218893Sdim BackTrace.clear(); 144218893Sdim for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator 145218893Sdim CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) 146218893Sdim CI->second.clear(); 147193323Sed CSEMap.clear(); 148193323Sed } 149193323Sed 150193323Sed private: 151207618Srdivacky /// CandidateInfo - Keep track of information about hoisting candidates. 152207618Srdivacky struct CandidateInfo { 153207618Srdivacky MachineInstr *MI; 154207618Srdivacky unsigned Def; 155207618Srdivacky int FI; 156207618Srdivacky CandidateInfo(MachineInstr *mi, unsigned def, int fi) 157207618Srdivacky : MI(mi), Def(def), FI(fi) {} 158207618Srdivacky }; 159207618Srdivacky 160207618Srdivacky /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 161207618Srdivacky /// invariants out to the preheader. 162207618Srdivacky void HoistRegionPostRA(); 163207618Srdivacky 164207618Srdivacky /// HoistPostRA - When an instruction is found to only use loop invariant 165207618Srdivacky /// operands that is safe to hoist, this instruction is called to do the 166207618Srdivacky /// dirty work. 167207618Srdivacky void HoistPostRA(MachineInstr *MI, unsigned Def); 168207618Srdivacky 169207618Srdivacky /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 170207618Srdivacky /// gather register def and frame object update information. 171235633Sdim void ProcessMI(MachineInstr *MI, 172235633Sdim BitVector &PhysRegDefs, 173235633Sdim BitVector &PhysRegClobbers, 174207618Srdivacky SmallSet<int, 32> &StoredFIs, 175263509Sdim SmallVectorImpl<CandidateInfo> &Candidates); 176207618Srdivacky 177207618Srdivacky /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 178207618Srdivacky /// current loop. 179207618Srdivacky void AddToLiveIns(unsigned Reg); 180207618Srdivacky 181207618Srdivacky /// IsLICMCandidate - Returns true if the instruction may be a suitable 182210299Sed /// candidate for LICM. e.g. If the instruction is a call, then it's 183210299Sed /// obviously not safe to hoist it. 184207618Srdivacky bool IsLICMCandidate(MachineInstr &I); 185207618Srdivacky 186193323Sed /// IsLoopInvariantInst - Returns true if the instruction is loop 187193323Sed /// invariant. I.e., all virtual register operands are defined outside of 188193323Sed /// the loop, physical registers aren't accessed (explicitly or implicitly), 189193323Sed /// and the instruction is hoistable. 190235633Sdim /// 191193323Sed bool IsLoopInvariantInst(MachineInstr &I); 192193323Sed 193235633Sdim /// HasLoopPHIUse - Return true if the specified instruction is used by any 194235633Sdim /// phi node in the current loop. 195235633Sdim bool HasLoopPHIUse(const MachineInstr *MI) const; 196221345Sdim 197218893Sdim /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 198218893Sdim /// and an use in the current loop, return true if the target considered 199218893Sdim /// it 'high'. 200218893Sdim bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, 201218893Sdim unsigned Reg) const; 202218893Sdim 203218893Sdim bool IsCheapInstruction(MachineInstr &MI) const; 204218893Sdim 205218893Sdim /// CanCauseHighRegPressure - Visit BBs from header to current BB, 206218893Sdim /// check if hoisting an instruction of the given cost matrix can cause high 207218893Sdim /// register pressure. 208235633Sdim bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap); 209218893Sdim 210218893Sdim /// UpdateBackTraceRegPressure - Traverse the back trace from header to 211218893Sdim /// the current block and update their register pressures to reflect the 212218893Sdim /// effect of hoisting MI from the current block to the preheader. 213218893Sdim void UpdateBackTraceRegPressure(const MachineInstr *MI); 214218893Sdim 215193323Sed /// IsProfitableToHoist - Return true if it is potentially profitable to 216193323Sed /// hoist the given loop invariant. 217193323Sed bool IsProfitableToHoist(MachineInstr &MI); 218193323Sed 219226890Sdim /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 220226890Sdim /// If not then a load from this mbb may not be safe to hoist. 221226890Sdim bool IsGuaranteedToExecute(MachineBasicBlock *BB); 222226890Sdim 223235633Sdim void EnterScope(MachineBasicBlock *MBB); 224235633Sdim 225235633Sdim void ExitScope(MachineBasicBlock *MBB); 226235633Sdim 227235633Sdim /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given 228235633Sdim /// dominator tree node if its a leaf or all of its children are done. Walk 229235633Sdim /// up the dominator tree to destroy ancestors which are now done. 230235633Sdim void ExitScopeIfDone(MachineDomTreeNode *Node, 231235633Sdim DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 232235633Sdim DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 233235633Sdim 234235633Sdim /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 235235633Sdim /// blocks dominated by the specified header block, and that are in the 236235633Sdim /// current loop) in depth first order w.r.t the DominatorTree. This allows 237235633Sdim /// us to visit definitions before uses, allowing us to hoist a loop body in 238235633Sdim /// one pass without iteration. 239193323Sed /// 240235633Sdim void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); 241235633Sdim void HoistRegion(MachineDomTreeNode *N, bool IsHeader); 242193323Sed 243226890Sdim /// getRegisterClassIDAndCost - For a given MI, register, and the operand 244226890Sdim /// index, return the ID and cost of its representative register class by 245226890Sdim /// reference. 246226890Sdim void getRegisterClassIDAndCost(const MachineInstr *MI, 247226890Sdim unsigned Reg, unsigned OpIdx, 248226890Sdim unsigned &RCId, unsigned &RCCost) const; 249226890Sdim 250218893Sdim /// InitRegPressure - Find all virtual register references that are liveout 251218893Sdim /// of the preheader to initialize the starting "register pressure". Note 252218893Sdim /// this does not count live through (livein but not used) registers. 253218893Sdim void InitRegPressure(MachineBasicBlock *BB); 254199989Srdivacky 255218893Sdim /// UpdateRegPressure - Update estimate of register pressure after the 256218893Sdim /// specified instruction. 257218893Sdim void UpdateRegPressure(const MachineInstr *MI); 258218893Sdim 259198892Srdivacky /// ExtractHoistableLoad - Unfold a load from the given machineinstr if 260198892Srdivacky /// the load itself could be hoisted. Return the unfolded and hoistable 261198892Srdivacky /// load, or null if the load couldn't be unfolded or if it wouldn't 262198892Srdivacky /// be hoistable. 263198892Srdivacky MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 264198892Srdivacky 265199481Srdivacky /// LookForDuplicate - Find an instruction amount PrevMIs that is a 266199481Srdivacky /// duplicate of MI. Return this instruction if it's found. 267199481Srdivacky const MachineInstr *LookForDuplicate(const MachineInstr *MI, 268199481Srdivacky std::vector<const MachineInstr*> &PrevMIs); 269199481Srdivacky 270198953Srdivacky /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on 271198953Srdivacky /// the preheader that compute the same value. If it's found, do a RAU on 272198953Srdivacky /// with the definition of the existing instruction rather than hoisting 273198953Srdivacky /// the instruction to the preheader. 274198953Srdivacky bool EliminateCSE(MachineInstr *MI, 275198953Srdivacky DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); 276198953Srdivacky 277226890Sdim /// MayCSE - Return true if the given instruction will be CSE'd if it's 278226890Sdim /// hoisted out of the loop. 279226890Sdim bool MayCSE(MachineInstr *MI); 280226890Sdim 281193323Sed /// Hoist - When an instruction is found to only use loop invariant operands 282193323Sed /// that is safe to hoist, this instruction is called to do the dirty work. 283218893Sdim /// It returns true if the instruction is hoisted. 284218893Sdim bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 285198892Srdivacky 286198892Srdivacky /// InitCSEMap - Initialize the CSE map with instructions that are in the 287198892Srdivacky /// current loop preheader that may become duplicates of instructions that 288198892Srdivacky /// are hoisted out of the loop. 289198892Srdivacky void InitCSEMap(MachineBasicBlock *BB); 290210299Sed 291210299Sed /// getCurPreheader - Get the preheader for the current loop, splitting 292210299Sed /// a critical edge if needed. 293210299Sed MachineBasicBlock *getCurPreheader(); 294193323Sed }; 295193323Sed} // end anonymous namespace 296193323Sed 297193323Sedchar MachineLICM::ID = 0; 298235633Sdimchar &llvm::MachineLICMID = MachineLICM::ID; 299218893SdimINITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", 300218893Sdim "Machine Loop Invariant Code Motion", false, false) 301218893SdimINITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 302218893SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 303218893SdimINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 304218893SdimINITIALIZE_PASS_END(MachineLICM, "machinelicm", 305218893Sdim "Machine Loop Invariant Code Motion", false, false) 306193323Sed 307210299Sed/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most 308210299Sed/// loop that has a unique predecessor. 309210299Sedstatic bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 310210299Sed // Check whether this loop even has a unique predecessor. 311210299Sed if (!CurLoop->getLoopPredecessor()) 312210299Sed return false; 313210299Sed // Ok, now check to see if any of its outer loops do. 314193323Sed for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 315210299Sed if (L->getLoopPredecessor()) 316193323Sed return false; 317210299Sed // None of them did, so this is the outermost with a unique predecessor. 318193323Sed return true; 319193323Sed} 320193323Sed 321193323Sedbool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 322210299Sed Changed = FirstInLoop = false; 323193323Sed TM = &MF.getTarget(); 324193323Sed TII = TM->getInstrInfo(); 325218893Sdim TLI = TM->getTargetLowering(); 326198090Srdivacky TRI = TM->getRegisterInfo(); 327207618Srdivacky MFI = MF.getFrameInfo(); 328218893Sdim MRI = &MF.getRegInfo(); 329218893Sdim InstrItins = TM->getInstrItineraryData(); 330193323Sed 331235633Sdim PreRegAlloc = MRI->isSSA(); 332235633Sdim 333235633Sdim if (PreRegAlloc) 334235633Sdim DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 335235633Sdim else 336235633Sdim DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 337245431Sdim DEBUG(dbgs() << MF.getName() << " ********\n"); 338235633Sdim 339218893Sdim if (PreRegAlloc) { 340218893Sdim // Estimate register pressure during pre-regalloc pass. 341218893Sdim unsigned NumRC = TRI->getNumRegClasses(); 342218893Sdim RegPressure.resize(NumRC); 343218893Sdim std::fill(RegPressure.begin(), RegPressure.end(), 0); 344218893Sdim RegLimit.resize(NumRC); 345218893Sdim for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 346218893Sdim E = TRI->regclass_end(); I != E; ++I) 347221345Sdim RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); 348218893Sdim } 349218893Sdim 350193323Sed // Get our Loop information... 351207618Srdivacky MLI = &getAnalysis<MachineLoopInfo>(); 352207618Srdivacky DT = &getAnalysis<MachineDominatorTree>(); 353207618Srdivacky AA = &getAnalysis<AliasAnalysis>(); 354193323Sed 355210299Sed SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 356210299Sed while (!Worklist.empty()) { 357210299Sed CurLoop = Worklist.pop_back_val(); 358210299Sed CurPreheader = 0; 359235633Sdim ExitBlocks.clear(); 360193323Sed 361207618Srdivacky // If this is done before regalloc, only visit outer-most preheader-sporting 362207618Srdivacky // loops. 363210299Sed if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 364210299Sed Worklist.append(CurLoop->begin(), CurLoop->end()); 365193323Sed continue; 366210299Sed } 367193323Sed 368235633Sdim CurLoop->getExitBlocks(ExitBlocks); 369235633Sdim 370207618Srdivacky if (!PreRegAlloc) 371207618Srdivacky HoistRegionPostRA(); 372207618Srdivacky else { 373207618Srdivacky // CSEMap is initialized for loop header when the first instruction is 374207618Srdivacky // being hoisted. 375207618Srdivacky MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 376210299Sed FirstInLoop = true; 377235633Sdim HoistOutOfLoop(N); 378207618Srdivacky CSEMap.clear(); 379207618Srdivacky } 380193323Sed } 381193323Sed 382193323Sed return Changed; 383193323Sed} 384193323Sed 385207618Srdivacky/// InstructionStoresToFI - Return true if instruction stores to the 386207618Srdivacky/// specified frame. 387207618Srdivackystatic bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 388207618Srdivacky for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 389207618Srdivacky oe = MI->memoperands_end(); o != oe; ++o) { 390207618Srdivacky if (!(*o)->isStore() || !(*o)->getValue()) 391207618Srdivacky continue; 392207618Srdivacky if (const FixedStackPseudoSourceValue *Value = 393207618Srdivacky dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 394207618Srdivacky if (Value->getFrameIndex() == FI) 395207618Srdivacky return true; 396207618Srdivacky } 397207618Srdivacky } 398207618Srdivacky return false; 399207618Srdivacky} 400207618Srdivacky 401207618Srdivacky/// ProcessMI - Examine the instruction for potentai LICM candidate. Also 402207618Srdivacky/// gather register def and frame object update information. 403207618Srdivackyvoid MachineLICM::ProcessMI(MachineInstr *MI, 404235633Sdim BitVector &PhysRegDefs, 405235633Sdim BitVector &PhysRegClobbers, 406207618Srdivacky SmallSet<int, 32> &StoredFIs, 407263509Sdim SmallVectorImpl<CandidateInfo> &Candidates) { 408207618Srdivacky bool RuledOut = false; 409207618Srdivacky bool HasNonInvariantUse = false; 410207618Srdivacky unsigned Def = 0; 411207618Srdivacky for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 412207618Srdivacky const MachineOperand &MO = MI->getOperand(i); 413207618Srdivacky if (MO.isFI()) { 414207618Srdivacky // Remember if the instruction stores to the frame index. 415207618Srdivacky int FI = MO.getIndex(); 416207618Srdivacky if (!StoredFIs.count(FI) && 417207618Srdivacky MFI->isSpillSlotObjectIndex(FI) && 418207618Srdivacky InstructionStoresToFI(MI, FI)) 419207618Srdivacky StoredFIs.insert(FI); 420207618Srdivacky HasNonInvariantUse = true; 421207618Srdivacky continue; 422207618Srdivacky } 423207618Srdivacky 424235633Sdim // We can't hoist an instruction defining a physreg that is clobbered in 425235633Sdim // the loop. 426235633Sdim if (MO.isRegMask()) { 427235633Sdim PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); 428235633Sdim continue; 429235633Sdim } 430235633Sdim 431207618Srdivacky if (!MO.isReg()) 432207618Srdivacky continue; 433207618Srdivacky unsigned Reg = MO.getReg(); 434207618Srdivacky if (!Reg) 435207618Srdivacky continue; 436207618Srdivacky assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 437207618Srdivacky "Not expecting virtual register!"); 438207618Srdivacky 439207618Srdivacky if (!MO.isDef()) { 440235633Sdim if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) 441207618Srdivacky // If it's using a non-loop-invariant register, then it's obviously not 442207618Srdivacky // safe to hoist. 443207618Srdivacky HasNonInvariantUse = true; 444207618Srdivacky continue; 445207618Srdivacky } 446207618Srdivacky 447207618Srdivacky if (MO.isImplicit()) { 448245431Sdim for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 449245431Sdim PhysRegClobbers.set(*AI); 450207618Srdivacky if (!MO.isDead()) 451207618Srdivacky // Non-dead implicit def? This cannot be hoisted. 452207618Srdivacky RuledOut = true; 453207618Srdivacky // No need to check if a dead implicit def is also defined by 454207618Srdivacky // another instruction. 455207618Srdivacky continue; 456207618Srdivacky } 457207618Srdivacky 458207618Srdivacky // FIXME: For now, avoid instructions with multiple defs, unless 459207618Srdivacky // it's a dead implicit def. 460207618Srdivacky if (Def) 461207618Srdivacky RuledOut = true; 462207618Srdivacky else 463207618Srdivacky Def = Reg; 464207618Srdivacky 465207618Srdivacky // If we have already seen another instruction that defines the same 466235633Sdim // register, then this is not safe. Two defs is indicated by setting a 467235633Sdim // PhysRegClobbers bit. 468245431Sdim for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { 469235633Sdim if (PhysRegDefs.test(*AS)) 470235633Sdim PhysRegClobbers.set(*AS); 471235633Sdim PhysRegDefs.set(*AS); 472235633Sdim } 473263509Sdim if (PhysRegClobbers.test(Reg)) 474263509Sdim // MI defined register is seen defined by another instruction in 475263509Sdim // the loop, it cannot be a LICM candidate. 476263509Sdim RuledOut = true; 477207618Srdivacky } 478207618Srdivacky 479207618Srdivacky // Only consider reloads for now and remats which do not have register 480207618Srdivacky // operands. FIXME: Consider unfold load folding instructions. 481207618Srdivacky if (Def && !RuledOut) { 482207618Srdivacky int FI = INT_MIN; 483207618Srdivacky if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 484207618Srdivacky (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 485207618Srdivacky Candidates.push_back(CandidateInfo(MI, Def, FI)); 486207618Srdivacky } 487207618Srdivacky} 488207618Srdivacky 489207618Srdivacky/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 490207618Srdivacky/// invariants out to the preheader. 491207618Srdivackyvoid MachineLICM::HoistRegionPostRA() { 492235633Sdim MachineBasicBlock *Preheader = getCurPreheader(); 493235633Sdim if (!Preheader) 494235633Sdim return; 495235633Sdim 496207618Srdivacky unsigned NumRegs = TRI->getNumRegs(); 497235633Sdim BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. 498235633Sdim BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. 499207618Srdivacky 500207618Srdivacky SmallVector<CandidateInfo, 32> Candidates; 501207618Srdivacky SmallSet<int, 32> StoredFIs; 502207618Srdivacky 503207618Srdivacky // Walk the entire region, count number of defs for each register, and 504207618Srdivacky // collect potential LICM candidates. 505263509Sdim const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 506207618Srdivacky for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 507207618Srdivacky MachineBasicBlock *BB = Blocks[i]; 508226890Sdim 509226890Sdim // If the header of the loop containing this basic block is a landing pad, 510226890Sdim // then don't try to hoist instructions out of this loop. 511226890Sdim const MachineLoop *ML = MLI->getLoopFor(BB); 512226890Sdim if (ML && ML->getHeader()->isLandingPad()) continue; 513226890Sdim 514207618Srdivacky // Conservatively treat live-in's as an external def. 515207618Srdivacky // FIXME: That means a reload that're reused in successor block(s) will not 516207618Srdivacky // be LICM'ed. 517207618Srdivacky for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), 518207618Srdivacky E = BB->livein_end(); I != E; ++I) { 519207618Srdivacky unsigned Reg = *I; 520245431Sdim for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 521245431Sdim PhysRegDefs.set(*AI); 522207618Srdivacky } 523207618Srdivacky 524226890Sdim SpeculationState = SpeculateUnknown; 525207618Srdivacky for (MachineBasicBlock::iterator 526207618Srdivacky MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 527207618Srdivacky MachineInstr *MI = &*MII; 528235633Sdim ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); 529207618Srdivacky } 530207618Srdivacky } 531207618Srdivacky 532235633Sdim // Gather the registers read / clobbered by the terminator. 533235633Sdim BitVector TermRegs(NumRegs); 534235633Sdim MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); 535235633Sdim if (TI != Preheader->end()) { 536235633Sdim for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { 537235633Sdim const MachineOperand &MO = TI->getOperand(i); 538235633Sdim if (!MO.isReg()) 539235633Sdim continue; 540235633Sdim unsigned Reg = MO.getReg(); 541235633Sdim if (!Reg) 542235633Sdim continue; 543245431Sdim for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 544245431Sdim TermRegs.set(*AI); 545235633Sdim } 546235633Sdim } 547235633Sdim 548207618Srdivacky // Now evaluate whether the potential candidates qualify. 549207618Srdivacky // 1. Check if the candidate defined register is defined by another 550207618Srdivacky // instruction in the loop. 551207618Srdivacky // 2. If the candidate is a load from stack slot (always true for now), 552207618Srdivacky // check if the slot is stored anywhere in the loop. 553235633Sdim // 3. Make sure candidate def should not clobber 554235633Sdim // registers read by the terminator. Similarly its def should not be 555235633Sdim // clobbered by the terminator. 556207618Srdivacky for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 557207618Srdivacky if (Candidates[i].FI != INT_MIN && 558207618Srdivacky StoredFIs.count(Candidates[i].FI)) 559207618Srdivacky continue; 560207618Srdivacky 561235633Sdim unsigned Def = Candidates[i].Def; 562235633Sdim if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { 563207618Srdivacky bool Safe = true; 564207618Srdivacky MachineInstr *MI = Candidates[i].MI; 565207618Srdivacky for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 566207618Srdivacky const MachineOperand &MO = MI->getOperand(j); 567207618Srdivacky if (!MO.isReg() || MO.isDef() || !MO.getReg()) 568207618Srdivacky continue; 569235633Sdim unsigned Reg = MO.getReg(); 570235633Sdim if (PhysRegDefs.test(Reg) || 571235633Sdim PhysRegClobbers.test(Reg)) { 572207618Srdivacky // If it's using a non-loop-invariant register, then it's obviously 573207618Srdivacky // not safe to hoist. 574207618Srdivacky Safe = false; 575207618Srdivacky break; 576207618Srdivacky } 577207618Srdivacky } 578207618Srdivacky if (Safe) 579207618Srdivacky HoistPostRA(MI, Candidates[i].Def); 580207618Srdivacky } 581207618Srdivacky } 582207618Srdivacky} 583207618Srdivacky 584207618Srdivacky/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current 585207618Srdivacky/// loop, and make sure it is not killed by any instructions in the loop. 586207618Srdivackyvoid MachineLICM::AddToLiveIns(unsigned Reg) { 587263509Sdim const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 588207618Srdivacky for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 589207618Srdivacky MachineBasicBlock *BB = Blocks[i]; 590207618Srdivacky if (!BB->isLiveIn(Reg)) 591207618Srdivacky BB->addLiveIn(Reg); 592207618Srdivacky for (MachineBasicBlock::iterator 593207618Srdivacky MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 594207618Srdivacky MachineInstr *MI = &*MII; 595207618Srdivacky for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 596207618Srdivacky MachineOperand &MO = MI->getOperand(i); 597207618Srdivacky if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 598207618Srdivacky if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 599207618Srdivacky MO.setIsKill(false); 600207618Srdivacky } 601207618Srdivacky } 602207618Srdivacky } 603207618Srdivacky} 604207618Srdivacky 605207618Srdivacky/// HoistPostRA - When an instruction is found to only use loop invariant 606207618Srdivacky/// operands that is safe to hoist, this instruction is called to do the 607207618Srdivacky/// dirty work. 608207618Srdivackyvoid MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 609210299Sed MachineBasicBlock *Preheader = getCurPreheader(); 610210299Sed 611207618Srdivacky // Now move the instructions to the predecessor, inserting it before any 612207618Srdivacky // terminator instructions. 613235633Sdim DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" 614235633Sdim << MI->getParent()->getNumber() << ": " << *MI); 615207618Srdivacky 616207618Srdivacky // Splice the instruction to the preheader. 617207618Srdivacky MachineBasicBlock *MBB = MI->getParent(); 618210299Sed Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 619207618Srdivacky 620235633Sdim // Add register to livein list to all the BBs in the current loop since a 621207618Srdivacky // loop invariant must be kept live throughout the whole loop. This is 622207618Srdivacky // important to ensure later passes do not scavenge the def register. 623207618Srdivacky AddToLiveIns(Def); 624207618Srdivacky 625207618Srdivacky ++NumPostRAHoisted; 626207618Srdivacky Changed = true; 627207618Srdivacky} 628207618Srdivacky 629226890Sdim// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 630226890Sdim// If not then a load from this mbb may not be safe to hoist. 631226890Sdimbool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { 632226890Sdim if (SpeculationState != SpeculateUnknown) 633226890Sdim return SpeculationState == SpeculateFalse; 634235633Sdim 635226890Sdim if (BB != CurLoop->getHeader()) { 636226890Sdim // Check loop exiting blocks. 637226890Sdim SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; 638226890Sdim CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); 639226890Sdim for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) 640226890Sdim if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { 641226890Sdim SpeculationState = SpeculateTrue; 642226890Sdim return false; 643226890Sdim } 644226890Sdim } 645226890Sdim 646226890Sdim SpeculationState = SpeculateFalse; 647226890Sdim return true; 648226890Sdim} 649226890Sdim 650235633Sdimvoid MachineLICM::EnterScope(MachineBasicBlock *MBB) { 651235633Sdim DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 652193323Sed 653235633Sdim // Remember livein register pressure. 654235633Sdim BackTrace.push_back(RegPressure); 655235633Sdim} 656226890Sdim 657235633Sdimvoid MachineLICM::ExitScope(MachineBasicBlock *MBB) { 658235633Sdim DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 659235633Sdim BackTrace.pop_back(); 660235633Sdim} 661193323Sed 662235633Sdim/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 663235633Sdim/// dominator tree node if its a leaf or all of its children are done. Walk 664235633Sdim/// up the dominator tree to destroy ancestors which are now done. 665235633Sdimvoid MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, 666235633Sdim DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 667235633Sdim DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 668235633Sdim if (OpenChildren[Node]) 669218893Sdim return; 670218893Sdim 671235633Sdim // Pop scope. 672235633Sdim ExitScope(Node->getBlock()); 673235633Sdim 674235633Sdim // Now traverse upwards to pop ancestors whose offsprings are all done. 675235633Sdim while (MachineDomTreeNode *Parent = ParentMap[Node]) { 676235633Sdim unsigned Left = --OpenChildren[Parent]; 677235633Sdim if (Left != 0) 678235633Sdim break; 679235633Sdim ExitScope(Parent->getBlock()); 680235633Sdim Node = Parent; 681235633Sdim } 682235633Sdim} 683235633Sdim 684235633Sdim/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 685235633Sdim/// blocks dominated by the specified header block, and that are in the 686235633Sdim/// current loop) in depth first order w.r.t the DominatorTree. This allows 687235633Sdim/// us to visit definitions before uses, allowing us to hoist a loop body in 688235633Sdim/// one pass without iteration. 689235633Sdim/// 690235633Sdimvoid MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { 691235633Sdim SmallVector<MachineDomTreeNode*, 32> Scopes; 692235633Sdim SmallVector<MachineDomTreeNode*, 8> WorkList; 693235633Sdim DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 694235633Sdim DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 695235633Sdim 696235633Sdim // Perform a DFS walk to determine the order of visit. 697235633Sdim WorkList.push_back(HeaderN); 698235633Sdim do { 699235633Sdim MachineDomTreeNode *Node = WorkList.pop_back_val(); 700235633Sdim assert(Node != 0 && "Null dominator tree node?"); 701235633Sdim MachineBasicBlock *BB = Node->getBlock(); 702235633Sdim 703235633Sdim // If the header of the loop containing this basic block is a landing pad, 704235633Sdim // then don't try to hoist instructions out of this loop. 705235633Sdim const MachineLoop *ML = MLI->getLoopFor(BB); 706235633Sdim if (ML && ML->getHeader()->isLandingPad()) 707235633Sdim continue; 708235633Sdim 709235633Sdim // If this subregion is not in the top level loop at all, exit. 710235633Sdim if (!CurLoop->contains(BB)) 711235633Sdim continue; 712235633Sdim 713235633Sdim Scopes.push_back(Node); 714235633Sdim const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 715235633Sdim unsigned NumChildren = Children.size(); 716235633Sdim 717235633Sdim // Don't hoist things out of a large switch statement. This often causes 718235633Sdim // code to be hoisted that wasn't going to be executed, and increases 719235633Sdim // register pressure in a situation where it's likely to matter. 720235633Sdim if (BB->succ_size() >= 25) 721235633Sdim NumChildren = 0; 722235633Sdim 723235633Sdim OpenChildren[Node] = NumChildren; 724235633Sdim // Add children in reverse order as then the next popped worklist node is 725235633Sdim // the first child of this node. This means we ultimately traverse the 726235633Sdim // DOM tree in exactly the same order as if we'd recursed. 727235633Sdim for (int i = (int)NumChildren-1; i >= 0; --i) { 728235633Sdim MachineDomTreeNode *Child = Children[i]; 729235633Sdim ParentMap[Child] = Node; 730235633Sdim WorkList.push_back(Child); 731235633Sdim } 732235633Sdim } while (!WorkList.empty()); 733235633Sdim 734235633Sdim if (Scopes.size() != 0) { 735235633Sdim MachineBasicBlock *Preheader = getCurPreheader(); 736235633Sdim if (!Preheader) 737235633Sdim return; 738235633Sdim 739218893Sdim // Compute registers which are livein into the loop headers. 740218893Sdim RegSeen.clear(); 741218893Sdim BackTrace.clear(); 742218893Sdim InitRegPressure(Preheader); 743218893Sdim } 744218893Sdim 745235633Sdim // Now perform LICM. 746235633Sdim for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 747235633Sdim MachineDomTreeNode *Node = Scopes[i]; 748235633Sdim MachineBasicBlock *MBB = Node->getBlock(); 749218893Sdim 750235633Sdim MachineBasicBlock *Preheader = getCurPreheader(); 751235633Sdim if (!Preheader) 752235633Sdim continue; 753193323Sed 754235633Sdim EnterScope(MBB); 755235633Sdim 756235633Sdim // Process the block 757235633Sdim SpeculationState = SpeculateUnknown; 758235633Sdim for (MachineBasicBlock::iterator 759235633Sdim MII = MBB->begin(), E = MBB->end(); MII != E; ) { 760235633Sdim MachineBasicBlock::iterator NextMII = MII; ++NextMII; 761235633Sdim MachineInstr *MI = &*MII; 762235633Sdim if (!Hoist(MI, Preheader)) 763235633Sdim UpdateRegPressure(MI); 764235633Sdim MII = NextMII; 765235633Sdim } 766235633Sdim 767235633Sdim // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 768235633Sdim ExitScopeIfDone(Node, OpenChildren, ParentMap); 769212904Sdim } 770193323Sed} 771193323Sed 772218893Sdimstatic bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { 773218893Sdim return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); 774218893Sdim} 775218893Sdim 776226890Sdim/// getRegisterClassIDAndCost - For a given MI, register, and the operand 777226890Sdim/// index, return the ID and cost of its representative register class. 778226890Sdimvoid 779226890SdimMachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, 780226890Sdim unsigned Reg, unsigned OpIdx, 781226890Sdim unsigned &RCId, unsigned &RCCost) const { 782226890Sdim const TargetRegisterClass *RC = MRI->getRegClass(Reg); 783252723Sdim MVT VT = *RC->vt_begin(); 784235633Sdim if (VT == MVT::Untyped) { 785226890Sdim RCId = RC->getID(); 786226890Sdim RCCost = 1; 787226890Sdim } else { 788226890Sdim RCId = TLI->getRepRegClassFor(VT)->getID(); 789226890Sdim RCCost = TLI->getRepRegClassCostFor(VT); 790226890Sdim } 791226890Sdim} 792235633Sdim 793218893Sdim/// InitRegPressure - Find all virtual register references that are liveout of 794218893Sdim/// the preheader to initialize the starting "register pressure". Note this 795218893Sdim/// does not count live through (livein but not used) registers. 796218893Sdimvoid MachineLICM::InitRegPressure(MachineBasicBlock *BB) { 797218893Sdim std::fill(RegPressure.begin(), RegPressure.end(), 0); 798218893Sdim 799218893Sdim // If the preheader has only a single predecessor and it ends with a 800218893Sdim // fallthrough or an unconditional branch, then scan its predecessor for live 801218893Sdim // defs as well. This happens whenever the preheader is created by splitting 802218893Sdim // the critical edge from the loop predecessor to the loop header. 803218893Sdim if (BB->pred_size() == 1) { 804218893Sdim MachineBasicBlock *TBB = 0, *FBB = 0; 805218893Sdim SmallVector<MachineOperand, 4> Cond; 806218893Sdim if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) 807218893Sdim InitRegPressure(*BB->pred_begin()); 808218893Sdim } 809218893Sdim 810218893Sdim for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); 811218893Sdim MII != E; ++MII) { 812218893Sdim MachineInstr *MI = &*MII; 813218893Sdim for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 814218893Sdim const MachineOperand &MO = MI->getOperand(i); 815218893Sdim if (!MO.isReg() || MO.isImplicit()) 816218893Sdim continue; 817218893Sdim unsigned Reg = MO.getReg(); 818218893Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 819218893Sdim continue; 820218893Sdim 821218893Sdim bool isNew = RegSeen.insert(Reg); 822226890Sdim unsigned RCId, RCCost; 823226890Sdim getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 824218893Sdim if (MO.isDef()) 825226890Sdim RegPressure[RCId] += RCCost; 826218893Sdim else { 827218893Sdim bool isKill = isOperandKill(MO, MRI); 828218893Sdim if (isNew && !isKill) 829218893Sdim // Haven't seen this, it must be a livein. 830226890Sdim RegPressure[RCId] += RCCost; 831218893Sdim else if (!isNew && isKill) 832226890Sdim RegPressure[RCId] -= RCCost; 833218893Sdim } 834218893Sdim } 835218893Sdim } 836218893Sdim} 837218893Sdim 838218893Sdim/// UpdateRegPressure - Update estimate of register pressure after the 839218893Sdim/// specified instruction. 840218893Sdimvoid MachineLICM::UpdateRegPressure(const MachineInstr *MI) { 841218893Sdim if (MI->isImplicitDef()) 842218893Sdim return; 843218893Sdim 844218893Sdim SmallVector<unsigned, 4> Defs; 845218893Sdim for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 846218893Sdim const MachineOperand &MO = MI->getOperand(i); 847218893Sdim if (!MO.isReg() || MO.isImplicit()) 848218893Sdim continue; 849218893Sdim unsigned Reg = MO.getReg(); 850218893Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 851218893Sdim continue; 852218893Sdim 853218893Sdim bool isNew = RegSeen.insert(Reg); 854218893Sdim if (MO.isDef()) 855218893Sdim Defs.push_back(Reg); 856218893Sdim else if (!isNew && isOperandKill(MO, MRI)) { 857226890Sdim unsigned RCId, RCCost; 858226890Sdim getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 859218893Sdim if (RCCost > RegPressure[RCId]) 860218893Sdim RegPressure[RCId] = 0; 861218893Sdim else 862218893Sdim RegPressure[RCId] -= RCCost; 863218893Sdim } 864218893Sdim } 865218893Sdim 866226890Sdim unsigned Idx = 0; 867218893Sdim while (!Defs.empty()) { 868218893Sdim unsigned Reg = Defs.pop_back_val(); 869226890Sdim unsigned RCId, RCCost; 870226890Sdim getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost); 871218893Sdim RegPressure[RCId] += RCCost; 872226890Sdim ++Idx; 873218893Sdim } 874218893Sdim} 875218893Sdim 876235633Sdim/// isLoadFromGOTOrConstantPool - Return true if this machine instruction 877235633Sdim/// loads from global offset table or constant pool. 878235633Sdimstatic bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { 879235633Sdim assert (MI.mayLoad() && "Expected MI that loads!"); 880235633Sdim for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), 881235633Sdim E = MI.memoperands_end(); I != E; ++I) { 882235633Sdim if (const Value *V = (*I)->getValue()) { 883235633Sdim if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) 884235633Sdim if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) 885235633Sdim return true; 886235633Sdim } 887235633Sdim } 888235633Sdim return false; 889235633Sdim} 890235633Sdim 891207618Srdivacky/// IsLICMCandidate - Returns true if the instruction may be a suitable 892207618Srdivacky/// candidate for LICM. e.g. If the instruction is a call, then it's obviously 893207618Srdivacky/// not safe to hoist it. 894207618Srdivackybool MachineLICM::IsLICMCandidate(MachineInstr &I) { 895210299Sed // Check if it's safe to move the instruction. 896210299Sed bool DontMoveAcrossStore = true; 897210299Sed if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) 898207618Srdivacky return false; 899226890Sdim 900226890Sdim // If it is load then check if it is guaranteed to execute by making sure that 901226890Sdim // it dominates all exiting blocks. If it doesn't, then there is a path out of 902235633Sdim // the loop which does not execute this load, so we can't hoist it. Loads 903235633Sdim // from constant memory are not safe to speculate all the time, for example 904235633Sdim // indexed load from a jump table. 905226890Sdim // Stores and side effects are already checked by isSafeToMove. 906235633Sdim if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && 907235633Sdim !IsGuaranteedToExecute(I.getParent())) 908226890Sdim return false; 909226890Sdim 910207618Srdivacky return true; 911207618Srdivacky} 912193323Sed 913207618Srdivacky/// IsLoopInvariantInst - Returns true if the instruction is loop 914207618Srdivacky/// invariant. I.e., all virtual register operands are defined outside of the 915207618Srdivacky/// loop, physical registers aren't accessed explicitly, and there are no side 916207618Srdivacky/// effects that aren't captured by the operands or other flags. 917235633Sdim/// 918207618Srdivackybool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 919207618Srdivacky if (!IsLICMCandidate(I)) 920207618Srdivacky return false; 921207618Srdivacky 922193323Sed // The instruction is loop invariant if all of its operands are. 923193323Sed for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 924193323Sed const MachineOperand &MO = I.getOperand(i); 925193323Sed 926193323Sed if (!MO.isReg()) 927193323Sed continue; 928193323Sed 929193323Sed unsigned Reg = MO.getReg(); 930193323Sed if (Reg == 0) continue; 931193323Sed 932193323Sed // Don't hoist an instruction that uses or defines a physical register. 933198090Srdivacky if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 934198090Srdivacky if (MO.isUse()) { 935198090Srdivacky // If the physreg has no defs anywhere, it's just an ambient register 936198090Srdivacky // and we can freely move its uses. Alternatively, if it's allocatable, 937198090Srdivacky // it could get allocated to something with a def during allocation. 938235633Sdim if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) 939198090Srdivacky return false; 940198090Srdivacky // Otherwise it's safe to move. 941198090Srdivacky continue; 942198090Srdivacky } else if (!MO.isDead()) { 943198090Srdivacky // A def that isn't dead. We can't move it. 944198090Srdivacky return false; 945204642Srdivacky } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 946204642Srdivacky // If the reg is live into the loop, we can't hoist an instruction 947204642Srdivacky // which would clobber it. 948204642Srdivacky return false; 949198090Srdivacky } 950198090Srdivacky } 951193323Sed 952193323Sed if (!MO.isUse()) 953193323Sed continue; 954193323Sed 955218893Sdim assert(MRI->getVRegDef(Reg) && 956193323Sed "Machine instr not mapped for this vreg?!"); 957193323Sed 958193323Sed // If the loop contains the definition of an operand, then the instruction 959193323Sed // isn't loop invariant. 960218893Sdim if (CurLoop->contains(MRI->getVRegDef(Reg))) 961193323Sed return false; 962193323Sed } 963193323Sed 964193323Sed // If we got this far, the instruction is loop invariant! 965193323Sed return true; 966193323Sed} 967193323Sed 968193323Sed 969235633Sdim/// HasLoopPHIUse - Return true if the specified instruction is used by a 970235633Sdim/// phi node and hoisting it could cause a copy to be inserted. 971235633Sdimbool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { 972235633Sdim SmallVector<const MachineInstr*, 8> Work(1, MI); 973235633Sdim do { 974235633Sdim MI = Work.pop_back_val(); 975235633Sdim for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { 976235633Sdim if (!MO->isReg() || !MO->isDef()) 977235633Sdim continue; 978235633Sdim unsigned Reg = MO->getReg(); 979235633Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 980235633Sdim continue; 981235633Sdim for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), 982235633Sdim UE = MRI->use_end(); UI != UE; ++UI) { 983235633Sdim MachineInstr *UseMI = &*UI; 984235633Sdim // A PHI may cause a copy to be inserted. 985235633Sdim if (UseMI->isPHI()) { 986235633Sdim // A PHI inside the loop causes a copy because the live range of Reg is 987235633Sdim // extended across the PHI. 988235633Sdim if (CurLoop->contains(UseMI)) 989235633Sdim return true; 990235633Sdim // A PHI in an exit block can cause a copy to be inserted if the PHI 991235633Sdim // has multiple predecessors in the loop with different values. 992235633Sdim // For now, approximate by rejecting all exit blocks. 993235633Sdim if (isExitBlock(UseMI->getParent())) 994235633Sdim return true; 995235633Sdim continue; 996235633Sdim } 997235633Sdim // Look past copies as well. 998235633Sdim if (UseMI->isCopy() && CurLoop->contains(UseMI)) 999235633Sdim Work.push_back(UseMI); 1000235633Sdim } 1001221345Sdim } 1002235633Sdim } while (!Work.empty()); 1003193323Sed return false; 1004193323Sed} 1005193323Sed 1006218893Sdim/// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 1007218893Sdim/// and an use in the current loop, return true if the target considered 1008218893Sdim/// it 'high'. 1009218893Sdimbool MachineLICM::HasHighOperandLatency(MachineInstr &MI, 1010218893Sdim unsigned DefIdx, unsigned Reg) const { 1011218893Sdim if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg)) 1012218893Sdim return false; 1013218893Sdim 1014218893Sdim for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 1015218893Sdim E = MRI->use_nodbg_end(); I != E; ++I) { 1016218893Sdim MachineInstr *UseMI = &*I; 1017218893Sdim if (UseMI->isCopyLike()) 1018218893Sdim continue; 1019218893Sdim if (!CurLoop->contains(UseMI->getParent())) 1020218893Sdim continue; 1021218893Sdim for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 1022218893Sdim const MachineOperand &MO = UseMI->getOperand(i); 1023218893Sdim if (!MO.isReg() || !MO.isUse()) 1024218893Sdim continue; 1025218893Sdim unsigned MOReg = MO.getReg(); 1026218893Sdim if (MOReg != Reg) 1027218893Sdim continue; 1028218893Sdim 1029218893Sdim if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i)) 1030218893Sdim return true; 1031218893Sdim } 1032218893Sdim 1033218893Sdim // Only look at the first in loop use. 1034218893Sdim break; 1035199989Srdivacky } 1036218893Sdim 1037218893Sdim return false; 1038199989Srdivacky} 1039199989Srdivacky 1040218893Sdim/// IsCheapInstruction - Return true if the instruction is marked "cheap" or 1041218893Sdim/// the operand latency between its def and a use is one or less. 1042218893Sdimbool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { 1043235633Sdim if (MI.isAsCheapAsAMove() || MI.isCopyLike()) 1044218893Sdim return true; 1045218893Sdim if (!InstrItins || InstrItins->isEmpty()) 1046218893Sdim return false; 1047218893Sdim 1048218893Sdim bool isCheap = false; 1049218893Sdim unsigned NumDefs = MI.getDesc().getNumDefs(); 1050218893Sdim for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 1051218893Sdim MachineOperand &DefMO = MI.getOperand(i); 1052218893Sdim if (!DefMO.isReg() || !DefMO.isDef()) 1053218893Sdim continue; 1054218893Sdim --NumDefs; 1055218893Sdim unsigned Reg = DefMO.getReg(); 1056218893Sdim if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1057218893Sdim continue; 1058218893Sdim 1059218893Sdim if (!TII->hasLowDefLatency(InstrItins, &MI, i)) 1060218893Sdim return false; 1061218893Sdim isCheap = true; 1062218893Sdim } 1063218893Sdim 1064218893Sdim return isCheap; 1065218893Sdim} 1066218893Sdim 1067218893Sdim/// CanCauseHighRegPressure - Visit BBs from header to current BB, check 1068218893Sdim/// if hoisting an instruction of the given cost matrix can cause high 1069218893Sdim/// register pressure. 1070235633Sdimbool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, 1071235633Sdim bool CheapInstr) { 1072218893Sdim for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 1073218893Sdim CI != CE; ++CI) { 1074235633Sdim if (CI->second <= 0) 1075218893Sdim continue; 1076218893Sdim 1077218893Sdim unsigned RCId = CI->first; 1078235633Sdim unsigned Limit = RegLimit[RCId]; 1079235633Sdim int Cost = CI->second; 1080235633Sdim 1081235633Sdim // Don't hoist cheap instructions if they would increase register pressure, 1082235633Sdim // even if we're under the limit. 1083235633Sdim if (CheapInstr) 1084235633Sdim return true; 1085235633Sdim 1086218893Sdim for (unsigned i = BackTrace.size(); i != 0; --i) { 1087263509Sdim SmallVectorImpl<unsigned> &RP = BackTrace[i-1]; 1088235633Sdim if (RP[RCId] + Cost >= Limit) 1089218893Sdim return true; 1090218893Sdim } 1091218893Sdim } 1092218893Sdim 1093218893Sdim return false; 1094218893Sdim} 1095218893Sdim 1096218893Sdim/// UpdateBackTraceRegPressure - Traverse the back trace from header to the 1097218893Sdim/// current block and update their register pressures to reflect the effect 1098218893Sdim/// of hoisting MI from the current block to the preheader. 1099218893Sdimvoid MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { 1100218893Sdim if (MI->isImplicitDef()) 1101218893Sdim return; 1102218893Sdim 1103218893Sdim // First compute the 'cost' of the instruction, i.e. its contribution 1104218893Sdim // to register pressure. 1105218893Sdim DenseMap<unsigned, int> Cost; 1106218893Sdim for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 1107218893Sdim const MachineOperand &MO = MI->getOperand(i); 1108218893Sdim if (!MO.isReg() || MO.isImplicit()) 1109218893Sdim continue; 1110218893Sdim unsigned Reg = MO.getReg(); 1111218893Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1112218893Sdim continue; 1113218893Sdim 1114226890Sdim unsigned RCId, RCCost; 1115226890Sdim getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 1116218893Sdim if (MO.isDef()) { 1117218893Sdim DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1118218893Sdim if (CI != Cost.end()) 1119218893Sdim CI->second += RCCost; 1120218893Sdim else 1121218893Sdim Cost.insert(std::make_pair(RCId, RCCost)); 1122218893Sdim } else if (isOperandKill(MO, MRI)) { 1123218893Sdim DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1124218893Sdim if (CI != Cost.end()) 1125218893Sdim CI->second -= RCCost; 1126218893Sdim else 1127218893Sdim Cost.insert(std::make_pair(RCId, -RCCost)); 1128218893Sdim } 1129218893Sdim } 1130218893Sdim 1131218893Sdim // Update register pressure of blocks from loop header to current block. 1132218893Sdim for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { 1133263509Sdim SmallVectorImpl<unsigned> &RP = BackTrace[i]; 1134218893Sdim for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 1135218893Sdim CI != CE; ++CI) { 1136218893Sdim unsigned RCId = CI->first; 1137218893Sdim RP[RCId] += CI->second; 1138218893Sdim } 1139218893Sdim } 1140218893Sdim} 1141218893Sdim 1142193323Sed/// IsProfitableToHoist - Return true if it is potentially profitable to hoist 1143193323Sed/// the given loop invariant. 1144193323Sedbool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 1145218893Sdim if (MI.isImplicitDef()) 1146218893Sdim return true; 1147218893Sdim 1148235633Sdim // Besides removing computation from the loop, hoisting an instruction has 1149235633Sdim // these effects: 1150235633Sdim // 1151235633Sdim // - The value defined by the instruction becomes live across the entire 1152235633Sdim // loop. This increases register pressure in the loop. 1153235633Sdim // 1154235633Sdim // - If the value is used by a PHI in the loop, a copy will be required for 1155235633Sdim // lowering the PHI after extending the live range. 1156235633Sdim // 1157235633Sdim // - When hoisting the last use of a value in the loop, that value no longer 1158235633Sdim // needs to be live in the loop. This lowers register pressure in the loop. 1159226890Sdim 1160235633Sdim bool CheapInstr = IsCheapInstruction(MI); 1161235633Sdim bool CreatesCopy = HasLoopPHIUse(&MI); 1162218893Sdim 1163235633Sdim // Don't hoist a cheap instruction if it would create a copy in the loop. 1164235633Sdim if (CheapInstr && CreatesCopy) { 1165235633Sdim DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); 1166235633Sdim return false; 1167235633Sdim } 1168235633Sdim 1169235633Sdim // Rematerializable instructions should always be hoisted since the register 1170235633Sdim // allocator can just pull them down again when needed. 1171235633Sdim if (TII->isTriviallyReMaterializable(&MI, AA)) 1172235633Sdim return true; 1173235633Sdim 1174235633Sdim // Estimate register pressure to determine whether to LICM the instruction. 1175235633Sdim // In low register pressure situation, we can be more aggressive about 1176235633Sdim // hoisting. Also, favors hoisting long latency instructions even in 1177235633Sdim // moderately high pressure situation. 1178235633Sdim // Cheap instructions will only be hoisted if they don't increase register 1179235633Sdim // pressure at all. 1180235633Sdim // FIXME: If there are long latency loop-invariant instructions inside the 1181235633Sdim // loop at this point, why didn't the optimizer's LICM hoist them? 1182235633Sdim DenseMap<unsigned, int> Cost; 1183235633Sdim for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 1184235633Sdim const MachineOperand &MO = MI.getOperand(i); 1185235633Sdim if (!MO.isReg() || MO.isImplicit()) 1186235633Sdim continue; 1187235633Sdim unsigned Reg = MO.getReg(); 1188235633Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1189235633Sdim continue; 1190235633Sdim 1191235633Sdim unsigned RCId, RCCost; 1192235633Sdim getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); 1193235633Sdim if (MO.isDef()) { 1194235633Sdim if (HasHighOperandLatency(MI, i, Reg)) { 1195235633Sdim DEBUG(dbgs() << "Hoist High Latency: " << MI); 1196235633Sdim ++NumHighLatency; 1197235633Sdim return true; 1198218893Sdim } 1199235633Sdim Cost[RCId] += RCCost; 1200235633Sdim } else if (isOperandKill(MO, MRI)) { 1201235633Sdim // Is a virtual register use is a kill, hoisting it out of the loop 1202235633Sdim // may actually reduce register pressure or be register pressure 1203235633Sdim // neutral. 1204235633Sdim Cost[RCId] -= RCCost; 1205218893Sdim } 1206235633Sdim } 1207218893Sdim 1208235633Sdim // Visit BBs from header to current BB, if hoisting this doesn't cause 1209235633Sdim // high register pressure, then it's safe to proceed. 1210235633Sdim if (!CanCauseHighRegPressure(Cost, CheapInstr)) { 1211235633Sdim DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); 1212235633Sdim ++NumLowRP; 1213235633Sdim return true; 1214235633Sdim } 1215218893Sdim 1216235633Sdim // Don't risk increasing register pressure if it would create copies. 1217235633Sdim if (CreatesCopy) { 1218235633Sdim DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); 1219235633Sdim return false; 1220235633Sdim } 1221226890Sdim 1222235633Sdim // Do not "speculate" in high register pressure situation. If an 1223235633Sdim // instruction is not guaranteed to be executed in the loop, it's best to be 1224235633Sdim // conservative. 1225235633Sdim if (AvoidSpeculation && 1226235633Sdim (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { 1227235633Sdim DEBUG(dbgs() << "Won't speculate: " << MI); 1228235633Sdim return false; 1229199989Srdivacky } 1230193323Sed 1231235633Sdim // High register pressure situation, only hoist if the instruction is going 1232235633Sdim // to be remat'ed. 1233235633Sdim if (!TII->isTriviallyReMaterializable(&MI, AA) && 1234235633Sdim !MI.isInvariantLoad(AA)) { 1235235633Sdim DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); 1236235633Sdim return false; 1237193323Sed } 1238193323Sed 1239193323Sed return true; 1240193323Sed} 1241193323Sed 1242198892SrdivackyMachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 1243218893Sdim // Don't unfold simple loads. 1244235633Sdim if (MI->canFoldAsLoad()) 1245218893Sdim return 0; 1246218893Sdim 1247198892Srdivacky // If not, we may be able to unfold a load and hoist that. 1248198892Srdivacky // First test whether the instruction is loading from an amenable 1249198892Srdivacky // memory location. 1250218893Sdim if (!MI->isInvariantLoad(AA)) 1251199989Srdivacky return 0; 1252199989Srdivacky 1253198892Srdivacky // Next determine the register class for a temporary register. 1254198892Srdivacky unsigned LoadRegIndex; 1255198892Srdivacky unsigned NewOpc = 1256198892Srdivacky TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 1257198892Srdivacky /*UnfoldLoad=*/true, 1258198892Srdivacky /*UnfoldStore=*/false, 1259198892Srdivacky &LoadRegIndex); 1260198892Srdivacky if (NewOpc == 0) return 0; 1261224145Sdim const MCInstrDesc &MID = TII->get(NewOpc); 1262224145Sdim if (MID.getNumDefs() != 1) return 0; 1263245431Sdim MachineFunction &MF = *MI->getParent()->getParent(); 1264245431Sdim const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); 1265198892Srdivacky // Ok, we're unfolding. Create a temporary register and do the unfold. 1266218893Sdim unsigned Reg = MRI->createVirtualRegister(RC); 1267199989Srdivacky 1268198892Srdivacky SmallVector<MachineInstr *, 2> NewMIs; 1269198892Srdivacky bool Success = 1270198892Srdivacky TII->unfoldMemoryOperand(MF, MI, Reg, 1271198892Srdivacky /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 1272198892Srdivacky NewMIs); 1273198892Srdivacky (void)Success; 1274198892Srdivacky assert(Success && 1275198892Srdivacky "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 1276198892Srdivacky "succeeded!"); 1277198892Srdivacky assert(NewMIs.size() == 2 && 1278198892Srdivacky "Unfolded a load into multiple instructions!"); 1279198892Srdivacky MachineBasicBlock *MBB = MI->getParent(); 1280235633Sdim MachineBasicBlock::iterator Pos = MI; 1281235633Sdim MBB->insert(Pos, NewMIs[0]); 1282235633Sdim MBB->insert(Pos, NewMIs[1]); 1283198892Srdivacky // If unfolding produced a load that wasn't loop-invariant or profitable to 1284198892Srdivacky // hoist, discard the new instructions and bail. 1285198892Srdivacky if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 1286198892Srdivacky NewMIs[0]->eraseFromParent(); 1287198892Srdivacky NewMIs[1]->eraseFromParent(); 1288198892Srdivacky return 0; 1289198892Srdivacky } 1290218893Sdim 1291218893Sdim // Update register pressure for the unfolded instruction. 1292218893Sdim UpdateRegPressure(NewMIs[1]); 1293218893Sdim 1294198892Srdivacky // Otherwise we successfully unfolded a load that we can hoist. 1295198892Srdivacky MI->eraseFromParent(); 1296198892Srdivacky return NewMIs[0]; 1297198892Srdivacky} 1298198892Srdivacky 1299198892Srdivackyvoid MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 1300198892Srdivacky for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 1301198892Srdivacky const MachineInstr *MI = &*I; 1302218893Sdim unsigned Opcode = MI->getOpcode(); 1303218893Sdim DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1304218893Sdim CI = CSEMap.find(Opcode); 1305218893Sdim if (CI != CSEMap.end()) 1306218893Sdim CI->second.push_back(MI); 1307218893Sdim else { 1308218893Sdim std::vector<const MachineInstr*> CSEMIs; 1309218893Sdim CSEMIs.push_back(MI); 1310218893Sdim CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 1311198892Srdivacky } 1312198892Srdivacky } 1313198892Srdivacky} 1314198892Srdivacky 1315199481Srdivackyconst MachineInstr* 1316199481SrdivackyMachineLICM::LookForDuplicate(const MachineInstr *MI, 1317199481Srdivacky std::vector<const MachineInstr*> &PrevMIs) { 1318198953Srdivacky for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 1319198953Srdivacky const MachineInstr *PrevMI = PrevMIs[i]; 1320218893Sdim if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0))) 1321198953Srdivacky return PrevMI; 1322198953Srdivacky } 1323198953Srdivacky return 0; 1324198953Srdivacky} 1325198953Srdivacky 1326198953Srdivackybool MachineLICM::EliminateCSE(MachineInstr *MI, 1327198953Srdivacky DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 1328210299Sed // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1329210299Sed // the undef property onto uses. 1330210299Sed if (CI == CSEMap.end() || MI->isImplicitDef()) 1331199481Srdivacky return false; 1332199481Srdivacky 1333199481Srdivacky if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 1334202375Srdivacky DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 1335204642Srdivacky 1336204642Srdivacky // Replace virtual registers defined by MI by their counterparts defined 1337204642Srdivacky // by Dup. 1338235633Sdim SmallVector<unsigned, 2> Defs; 1339199481Srdivacky for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1340199481Srdivacky const MachineOperand &MO = MI->getOperand(i); 1341204642Srdivacky 1342204642Srdivacky // Physical registers may not differ here. 1343204642Srdivacky assert((!MO.isReg() || MO.getReg() == 0 || 1344204642Srdivacky !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 1345204642Srdivacky MO.getReg() == Dup->getOperand(i).getReg()) && 1346204642Srdivacky "Instructions with different phys regs are not identical!"); 1347204642Srdivacky 1348204642Srdivacky if (MO.isReg() && MO.isDef() && 1349235633Sdim !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 1350235633Sdim Defs.push_back(i); 1351235633Sdim } 1352235633Sdim 1353235633Sdim SmallVector<const TargetRegisterClass*, 2> OrigRCs; 1354235633Sdim for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1355235633Sdim unsigned Idx = Defs[i]; 1356235633Sdim unsigned Reg = MI->getOperand(Idx).getReg(); 1357235633Sdim unsigned DupReg = Dup->getOperand(Idx).getReg(); 1358235633Sdim OrigRCs.push_back(MRI->getRegClass(DupReg)); 1359235633Sdim 1360235633Sdim if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { 1361235633Sdim // Restore old RCs if more than one defs. 1362235633Sdim for (unsigned j = 0; j != i; ++j) 1363235633Sdim MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); 1364235633Sdim return false; 1365208599Srdivacky } 1366198953Srdivacky } 1367235633Sdim 1368235633Sdim for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1369235633Sdim unsigned Idx = Defs[i]; 1370235633Sdim unsigned Reg = MI->getOperand(Idx).getReg(); 1371235633Sdim unsigned DupReg = Dup->getOperand(Idx).getReg(); 1372235633Sdim MRI->replaceRegWith(Reg, DupReg); 1373235633Sdim MRI->clearKillFlags(DupReg); 1374235633Sdim } 1375235633Sdim 1376199481Srdivacky MI->eraseFromParent(); 1377199481Srdivacky ++NumCSEed; 1378199481Srdivacky return true; 1379198953Srdivacky } 1380198953Srdivacky return false; 1381198953Srdivacky} 1382198953Srdivacky 1383226890Sdim/// MayCSE - Return true if the given instruction will be CSE'd if it's 1384226890Sdim/// hoisted out of the loop. 1385226890Sdimbool MachineLICM::MayCSE(MachineInstr *MI) { 1386226890Sdim unsigned Opcode = MI->getOpcode(); 1387226890Sdim DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1388226890Sdim CI = CSEMap.find(Opcode); 1389226890Sdim // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1390226890Sdim // the undef property onto uses. 1391226890Sdim if (CI == CSEMap.end() || MI->isImplicitDef()) 1392226890Sdim return false; 1393226890Sdim 1394226890Sdim return LookForDuplicate(MI, CI->second) != 0; 1395226890Sdim} 1396226890Sdim 1397193323Sed/// Hoist - When an instruction is found to use only loop invariant operands 1398193323Sed/// that are safe to hoist, this instruction is called to do the dirty work. 1399193323Sed/// 1400218893Sdimbool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 1401198892Srdivacky // First check whether we should hoist this instruction. 1402198892Srdivacky if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 1403198892Srdivacky // If not, try unfolding a hoistable load. 1404198892Srdivacky MI = ExtractHoistableLoad(MI); 1405218893Sdim if (!MI) return false; 1406198892Srdivacky } 1407193323Sed 1408193323Sed // Now move the instructions to the predecessor, inserting it before any 1409193323Sed // terminator instructions. 1410193323Sed DEBUG({ 1411202375Srdivacky dbgs() << "Hoisting " << *MI; 1412210299Sed if (Preheader->getBasicBlock()) 1413202375Srdivacky dbgs() << " to MachineBasicBlock " 1414210299Sed << Preheader->getName(); 1415198892Srdivacky if (MI->getParent()->getBasicBlock()) 1416202375Srdivacky dbgs() << " from MachineBasicBlock " 1417199989Srdivacky << MI->getParent()->getName(); 1418202375Srdivacky dbgs() << "\n"; 1419193323Sed }); 1420193323Sed 1421198892Srdivacky // If this is the first instruction being hoisted to the preheader, 1422198892Srdivacky // initialize the CSE map with potential common expressions. 1423210299Sed if (FirstInLoop) { 1424210299Sed InitCSEMap(Preheader); 1425210299Sed FirstInLoop = false; 1426210299Sed } 1427198892Srdivacky 1428193323Sed // Look for opportunity to CSE the hoisted instruction. 1429198892Srdivacky unsigned Opcode = MI->getOpcode(); 1430198892Srdivacky DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1431198892Srdivacky CI = CSEMap.find(Opcode); 1432198953Srdivacky if (!EliminateCSE(MI, CI)) { 1433198953Srdivacky // Otherwise, splice the instruction to the preheader. 1434210299Sed Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 1435198892Srdivacky 1436218893Sdim // Update register pressure for BBs from header to this block. 1437218893Sdim UpdateBackTraceRegPressure(MI); 1438218893Sdim 1439208599Srdivacky // Clear the kill flags of any register this instruction defines, 1440208599Srdivacky // since they may need to be live throughout the entire loop 1441208599Srdivacky // rather than just live for part of it. 1442208599Srdivacky for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1443208599Srdivacky MachineOperand &MO = MI->getOperand(i); 1444208599Srdivacky if (MO.isReg() && MO.isDef() && !MO.isDead()) 1445218893Sdim MRI->clearKillFlags(MO.getReg()); 1446208599Srdivacky } 1447208599Srdivacky 1448193323Sed // Add to the CSE map. 1449193323Sed if (CI != CSEMap.end()) 1450198892Srdivacky CI->second.push_back(MI); 1451193323Sed else { 1452193323Sed std::vector<const MachineInstr*> CSEMIs; 1453198892Srdivacky CSEMIs.push_back(MI); 1454198892Srdivacky CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 1455193323Sed } 1456193323Sed } 1457193323Sed 1458193323Sed ++NumHoisted; 1459193323Sed Changed = true; 1460218893Sdim 1461218893Sdim return true; 1462193323Sed} 1463210299Sed 1464210299SedMachineBasicBlock *MachineLICM::getCurPreheader() { 1465210299Sed // Determine the block to which to hoist instructions. If we can't find a 1466210299Sed // suitable loop predecessor, we can't do any hoisting. 1467210299Sed 1468210299Sed // If we've tried to get a preheader and failed, don't try again. 1469210299Sed if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 1470210299Sed return 0; 1471210299Sed 1472210299Sed if (!CurPreheader) { 1473210299Sed CurPreheader = CurLoop->getLoopPreheader(); 1474210299Sed if (!CurPreheader) { 1475210299Sed MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 1476210299Sed if (!Pred) { 1477210299Sed CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1478210299Sed return 0; 1479210299Sed } 1480210299Sed 1481210299Sed CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); 1482210299Sed if (!CurPreheader) { 1483210299Sed CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1484210299Sed return 0; 1485210299Sed } 1486210299Sed } 1487210299Sed } 1488210299Sed return CurPreheader; 1489210299Sed} 1490