1193323Sed//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This pass performs loop invariant code motion on machine instructions. We 11193323Sed// attempt to remove as much code from the body of a loop as possible. 12193323Sed// 13193323Sed// This pass does not attempt to throttle itself to limit register pressure. 14193323Sed// The register allocation phases are expected to perform rematerialization 15193323Sed// to recover when register pressure is high. 16193323Sed// 17193323Sed// This pass is not intended to be a replacement or a complete alternative 18193323Sed// for the LLVM-IR-level LICM pass. It is only designed to hoist simple 19193323Sed// constructs that are not exposed before lowering and instruction selection. 20193323Sed// 21193323Sed//===----------------------------------------------------------------------===// 22193323Sed 23193323Sed#define DEBUG_TYPE "machine-licm" 24193323Sed#include "llvm/CodeGen/Passes.h" 25249423Sdim#include "llvm/ADT/DenseMap.h" 26249423Sdim#include "llvm/ADT/SmallSet.h" 27249423Sdim#include "llvm/ADT/Statistic.h" 28249423Sdim#include "llvm/Analysis/AliasAnalysis.h" 29193323Sed#include "llvm/CodeGen/MachineDominators.h" 30207618Srdivacky#include "llvm/CodeGen/MachineFrameInfo.h" 31193323Sed#include "llvm/CodeGen/MachineLoopInfo.h" 32198892Srdivacky#include "llvm/CodeGen/MachineMemOperand.h" 33193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 34198892Srdivacky#include "llvm/CodeGen/PseudoSourceValue.h" 35224145Sdim#include "llvm/MC/MCInstrItineraries.h" 36226633Sdim#include "llvm/Support/CommandLine.h" 37193323Sed#include "llvm/Support/Debug.h" 38198090Srdivacky#include "llvm/Support/raw_ostream.h" 39249423Sdim#include "llvm/Target/TargetInstrInfo.h" 40249423Sdim#include "llvm/Target/TargetLowering.h" 41249423Sdim#include "llvm/Target/TargetMachine.h" 42249423Sdim#include "llvm/Target/TargetRegisterInfo.h" 43193323Sedusing namespace llvm; 44193323Sed 45226633Sdimstatic cl::opt<bool> 46226633SdimAvoidSpeculation("avoid-speculation", 47226633Sdim cl::desc("MachineLICM should avoid speculation"), 48234353Sdim cl::init(true), cl::Hidden); 49226633Sdim 50218893SdimSTATISTIC(NumHoisted, 51218893Sdim "Number of machine instructions hoisted out of loops"); 52218893SdimSTATISTIC(NumLowRP, 53218893Sdim "Number of instructions hoisted in low reg pressure situation"); 54218893SdimSTATISTIC(NumHighLatency, 55218893Sdim "Number of high latency instructions hoisted"); 56218893SdimSTATISTIC(NumCSEed, 57218893Sdim "Number of hoisted machine instructions CSEed"); 58207618SrdivackySTATISTIC(NumPostRAHoisted, 59207618Srdivacky "Number of machine instructions hoisted out of loops post regalloc"); 60193323Sed 61193323Sednamespace { 62198892Srdivacky class MachineLICM : public MachineFunctionPass { 63193323Sed const TargetMachine *TM; 64193323Sed const TargetInstrInfo *TII; 65249423Sdim const TargetLoweringBase *TLI; 66198090Srdivacky const TargetRegisterInfo *TRI; 67207618Srdivacky const MachineFrameInfo *MFI; 68218893Sdim MachineRegisterInfo *MRI; 69218893Sdim const InstrItineraryData *InstrItins; 70234353Sdim bool PreRegAlloc; 71193323Sed 72193323Sed // Various analyses that we use... 73198090Srdivacky AliasAnalysis *AA; // Alias analysis info. 74207618Srdivacky MachineLoopInfo *MLI; // Current MachineLoopInfo 75193323Sed MachineDominatorTree *DT; // Machine dominator tree for the cur loop 76193323Sed 77193323Sed // State that is updated as we process loops 78193323Sed bool Changed; // True if a loop is changed. 79210299Sed bool FirstInLoop; // True if it's the first LICM in the loop. 80193323Sed MachineLoop *CurLoop; // The current loop we are working on. 81193323Sed MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 82193323Sed 83234353Sdim // Exit blocks for CurLoop. 84234353Sdim SmallVector<MachineBasicBlock*, 8> ExitBlocks; 85207618Srdivacky 86234353Sdim bool isExitBlock(const MachineBasicBlock *MBB) const { 87234353Sdim return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != 88234353Sdim ExitBlocks.end(); 89234353Sdim } 90234353Sdim 91218893Sdim // Track 'estimated' register pressure. 92218893Sdim SmallSet<unsigned, 32> RegSeen; 93218893Sdim SmallVector<unsigned, 8> RegPressure; 94218893Sdim 95218893Sdim // Register pressure "limit" per register class. If the pressure 96218893Sdim // is higher than the limit, then it's considered high. 97218893Sdim SmallVector<unsigned, 8> RegLimit; 98218893Sdim 99218893Sdim // Register pressure on path leading from loop preheader to current BB. 100218893Sdim SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 101218893Sdim 102212904Sdim // For each opcode, keep a list of potential CSE instructions. 103198892Srdivacky DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 104207618Srdivacky 105226633Sdim enum { 106226633Sdim SpeculateFalse = 0, 107226633Sdim SpeculateTrue = 1, 108226633Sdim SpeculateUnknown = 2 109226633Sdim }; 110226633Sdim 111226633Sdim // If a MBB does not dominate loop exiting blocks then it may not safe 112226633Sdim // to hoist loads from this block. 113226633Sdim // Tri-state: 0 - false, 1 - true, 2 - unknown 114226633Sdim unsigned SpeculationState; 115226633Sdim 116193323Sed public: 117193323Sed static char ID; // Pass identification, replacement for typeid 118207618Srdivacky MachineLICM() : 119218893Sdim MachineFunctionPass(ID), PreRegAlloc(true) { 120218893Sdim initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 121218893Sdim } 122193323Sed 123207618Srdivacky explicit MachineLICM(bool PreRA) : 124218893Sdim MachineFunctionPass(ID), PreRegAlloc(PreRA) { 125218893Sdim initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 126218893Sdim } 127207618Srdivacky 128193323Sed virtual bool runOnMachineFunction(MachineFunction &MF); 129193323Sed 130193323Sed virtual void getAnalysisUsage(AnalysisUsage &AU) const { 131193323Sed AU.addRequired<MachineLoopInfo>(); 132193323Sed AU.addRequired<MachineDominatorTree>(); 133198090Srdivacky AU.addRequired<AliasAnalysis>(); 134193323Sed AU.addPreserved<MachineLoopInfo>(); 135193323Sed AU.addPreserved<MachineDominatorTree>(); 136193323Sed MachineFunctionPass::getAnalysisUsage(AU); 137193323Sed } 138193323Sed 139193323Sed virtual void releaseMemory() { 140218893Sdim RegSeen.clear(); 141218893Sdim RegPressure.clear(); 142218893Sdim RegLimit.clear(); 143218893Sdim BackTrace.clear(); 144218893Sdim for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator 145218893Sdim CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) 146218893Sdim CI->second.clear(); 147193323Sed CSEMap.clear(); 148193323Sed } 149193323Sed 150193323Sed private: 151207618Srdivacky /// CandidateInfo - Keep track of information about hoisting candidates. 152207618Srdivacky struct CandidateInfo { 153207618Srdivacky MachineInstr *MI; 154207618Srdivacky unsigned Def; 155207618Srdivacky int FI; 156207618Srdivacky CandidateInfo(MachineInstr *mi, unsigned def, int fi) 157207618Srdivacky : MI(mi), Def(def), FI(fi) {} 158207618Srdivacky }; 159207618Srdivacky 160207618Srdivacky /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 161207618Srdivacky /// invariants out to the preheader. 162207618Srdivacky void HoistRegionPostRA(); 163207618Srdivacky 164207618Srdivacky /// HoistPostRA - When an instruction is found to only use loop invariant 165207618Srdivacky /// operands that is safe to hoist, this instruction is called to do the 166207618Srdivacky /// dirty work. 167207618Srdivacky void HoistPostRA(MachineInstr *MI, unsigned Def); 168207618Srdivacky 169207618Srdivacky /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 170207618Srdivacky /// gather register def and frame object update information. 171234353Sdim void ProcessMI(MachineInstr *MI, 172234353Sdim BitVector &PhysRegDefs, 173234353Sdim BitVector &PhysRegClobbers, 174207618Srdivacky SmallSet<int, 32> &StoredFIs, 175263508Sdim SmallVectorImpl<CandidateInfo> &Candidates); 176207618Srdivacky 177207618Srdivacky /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 178207618Srdivacky /// current loop. 179207618Srdivacky void AddToLiveIns(unsigned Reg); 180207618Srdivacky 181207618Srdivacky /// IsLICMCandidate - Returns true if the instruction may be a suitable 182210299Sed /// candidate for LICM. e.g. If the instruction is a call, then it's 183210299Sed /// obviously not safe to hoist it. 184207618Srdivacky bool IsLICMCandidate(MachineInstr &I); 185207618Srdivacky 186193323Sed /// IsLoopInvariantInst - Returns true if the instruction is loop 187193323Sed /// invariant. I.e., all virtual register operands are defined outside of 188193323Sed /// the loop, physical registers aren't accessed (explicitly or implicitly), 189193323Sed /// and the instruction is hoistable. 190234353Sdim /// 191193323Sed bool IsLoopInvariantInst(MachineInstr &I); 192193323Sed 193234353Sdim /// HasLoopPHIUse - Return true if the specified instruction is used by any 194234353Sdim /// phi node in the current loop. 195234353Sdim bool HasLoopPHIUse(const MachineInstr *MI) const; 196221345Sdim 197218893Sdim /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 198218893Sdim /// and an use in the current loop, return true if the target considered 199218893Sdim /// it 'high'. 200218893Sdim bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, 201218893Sdim unsigned Reg) const; 202218893Sdim 203218893Sdim bool IsCheapInstruction(MachineInstr &MI) const; 204218893Sdim 205218893Sdim /// CanCauseHighRegPressure - Visit BBs from header to current BB, 206218893Sdim /// check if hoisting an instruction of the given cost matrix can cause high 207218893Sdim /// register pressure. 208234353Sdim bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap); 209218893Sdim 210218893Sdim /// UpdateBackTraceRegPressure - Traverse the back trace from header to 211218893Sdim /// the current block and update their register pressures to reflect the 212218893Sdim /// effect of hoisting MI from the current block to the preheader. 213218893Sdim void UpdateBackTraceRegPressure(const MachineInstr *MI); 214218893Sdim 215193323Sed /// IsProfitableToHoist - Return true if it is potentially profitable to 216193323Sed /// hoist the given loop invariant. 217193323Sed bool IsProfitableToHoist(MachineInstr &MI); 218193323Sed 219226633Sdim /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 220226633Sdim /// If not then a load from this mbb may not be safe to hoist. 221226633Sdim bool IsGuaranteedToExecute(MachineBasicBlock *BB); 222226633Sdim 223234353Sdim void EnterScope(MachineBasicBlock *MBB); 224234353Sdim 225234353Sdim void ExitScope(MachineBasicBlock *MBB); 226234353Sdim 227234353Sdim /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given 228234353Sdim /// dominator tree node if its a leaf or all of its children are done. Walk 229234353Sdim /// up the dominator tree to destroy ancestors which are now done. 230234353Sdim void ExitScopeIfDone(MachineDomTreeNode *Node, 231234353Sdim DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 232234353Sdim DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 233234353Sdim 234234353Sdim /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 235234353Sdim /// blocks dominated by the specified header block, and that are in the 236234353Sdim /// current loop) in depth first order w.r.t the DominatorTree. This allows 237234353Sdim /// us to visit definitions before uses, allowing us to hoist a loop body in 238234353Sdim /// one pass without iteration. 239193323Sed /// 240234353Sdim void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); 241234353Sdim void HoistRegion(MachineDomTreeNode *N, bool IsHeader); 242193323Sed 243226633Sdim /// getRegisterClassIDAndCost - For a given MI, register, and the operand 244226633Sdim /// index, return the ID and cost of its representative register class by 245226633Sdim /// reference. 246226633Sdim void getRegisterClassIDAndCost(const MachineInstr *MI, 247226633Sdim unsigned Reg, unsigned OpIdx, 248226633Sdim unsigned &RCId, unsigned &RCCost) const; 249226633Sdim 250218893Sdim /// InitRegPressure - Find all virtual register references that are liveout 251218893Sdim /// of the preheader to initialize the starting "register pressure". Note 252218893Sdim /// this does not count live through (livein but not used) registers. 253218893Sdim void InitRegPressure(MachineBasicBlock *BB); 254199989Srdivacky 255218893Sdim /// UpdateRegPressure - Update estimate of register pressure after the 256218893Sdim /// specified instruction. 257218893Sdim void UpdateRegPressure(const MachineInstr *MI); 258218893Sdim 259198892Srdivacky /// ExtractHoistableLoad - Unfold a load from the given machineinstr if 260198892Srdivacky /// the load itself could be hoisted. Return the unfolded and hoistable 261198892Srdivacky /// load, or null if the load couldn't be unfolded or if it wouldn't 262198892Srdivacky /// be hoistable. 263198892Srdivacky MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 264198892Srdivacky 265199481Srdivacky /// LookForDuplicate - Find an instruction amount PrevMIs that is a 266199481Srdivacky /// duplicate of MI. Return this instruction if it's found. 267199481Srdivacky const MachineInstr *LookForDuplicate(const MachineInstr *MI, 268199481Srdivacky std::vector<const MachineInstr*> &PrevMIs); 269199481Srdivacky 270198953Srdivacky /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on 271198953Srdivacky /// the preheader that compute the same value. If it's found, do a RAU on 272198953Srdivacky /// with the definition of the existing instruction rather than hoisting 273198953Srdivacky /// the instruction to the preheader. 274198953Srdivacky bool EliminateCSE(MachineInstr *MI, 275198953Srdivacky DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); 276198953Srdivacky 277226633Sdim /// MayCSE - Return true if the given instruction will be CSE'd if it's 278226633Sdim /// hoisted out of the loop. 279226633Sdim bool MayCSE(MachineInstr *MI); 280226633Sdim 281193323Sed /// Hoist - When an instruction is found to only use loop invariant operands 282193323Sed /// that is safe to hoist, this instruction is called to do the dirty work. 283218893Sdim /// It returns true if the instruction is hoisted. 284218893Sdim bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 285198892Srdivacky 286198892Srdivacky /// InitCSEMap - Initialize the CSE map with instructions that are in the 287198892Srdivacky /// current loop preheader that may become duplicates of instructions that 288198892Srdivacky /// are hoisted out of the loop. 289198892Srdivacky void InitCSEMap(MachineBasicBlock *BB); 290210299Sed 291210299Sed /// getCurPreheader - Get the preheader for the current loop, splitting 292210299Sed /// a critical edge if needed. 293210299Sed MachineBasicBlock *getCurPreheader(); 294193323Sed }; 295193323Sed} // end anonymous namespace 296193323Sed 297193323Sedchar MachineLICM::ID = 0; 298234353Sdimchar &llvm::MachineLICMID = MachineLICM::ID; 299218893SdimINITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", 300218893Sdim "Machine Loop Invariant Code Motion", false, false) 301218893SdimINITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 302218893SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 303218893SdimINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 304218893SdimINITIALIZE_PASS_END(MachineLICM, "machinelicm", 305218893Sdim "Machine Loop Invariant Code Motion", false, false) 306193323Sed 307210299Sed/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most 308210299Sed/// loop that has a unique predecessor. 309210299Sedstatic bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 310210299Sed // Check whether this loop even has a unique predecessor. 311210299Sed if (!CurLoop->getLoopPredecessor()) 312210299Sed return false; 313210299Sed // Ok, now check to see if any of its outer loops do. 314193323Sed for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 315210299Sed if (L->getLoopPredecessor()) 316193323Sed return false; 317210299Sed // None of them did, so this is the outermost with a unique predecessor. 318193323Sed return true; 319193323Sed} 320193323Sed 321193323Sedbool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 322210299Sed Changed = FirstInLoop = false; 323193323Sed TM = &MF.getTarget(); 324193323Sed TII = TM->getInstrInfo(); 325218893Sdim TLI = TM->getTargetLowering(); 326198090Srdivacky TRI = TM->getRegisterInfo(); 327207618Srdivacky MFI = MF.getFrameInfo(); 328218893Sdim MRI = &MF.getRegInfo(); 329218893Sdim InstrItins = TM->getInstrItineraryData(); 330193323Sed 331234353Sdim PreRegAlloc = MRI->isSSA(); 332234353Sdim 333234353Sdim if (PreRegAlloc) 334234353Sdim DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 335234353Sdim else 336234353Sdim DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 337243830Sdim DEBUG(dbgs() << MF.getName() << " ********\n"); 338234353Sdim 339218893Sdim if (PreRegAlloc) { 340218893Sdim // Estimate register pressure during pre-regalloc pass. 341218893Sdim unsigned NumRC = TRI->getNumRegClasses(); 342218893Sdim RegPressure.resize(NumRC); 343218893Sdim std::fill(RegPressure.begin(), RegPressure.end(), 0); 344218893Sdim RegLimit.resize(NumRC); 345218893Sdim for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 346218893Sdim E = TRI->regclass_end(); I != E; ++I) 347221345Sdim RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); 348218893Sdim } 349218893Sdim 350193323Sed // Get our Loop information... 351207618Srdivacky MLI = &getAnalysis<MachineLoopInfo>(); 352207618Srdivacky DT = &getAnalysis<MachineDominatorTree>(); 353207618Srdivacky AA = &getAnalysis<AliasAnalysis>(); 354193323Sed 355210299Sed SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 356210299Sed while (!Worklist.empty()) { 357210299Sed CurLoop = Worklist.pop_back_val(); 358210299Sed CurPreheader = 0; 359234353Sdim ExitBlocks.clear(); 360193323Sed 361207618Srdivacky // If this is done before regalloc, only visit outer-most preheader-sporting 362207618Srdivacky // loops. 363210299Sed if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 364210299Sed Worklist.append(CurLoop->begin(), CurLoop->end()); 365193323Sed continue; 366210299Sed } 367193323Sed 368234353Sdim CurLoop->getExitBlocks(ExitBlocks); 369234353Sdim 370207618Srdivacky if (!PreRegAlloc) 371207618Srdivacky HoistRegionPostRA(); 372207618Srdivacky else { 373207618Srdivacky // CSEMap is initialized for loop header when the first instruction is 374207618Srdivacky // being hoisted. 375207618Srdivacky MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 376210299Sed FirstInLoop = true; 377234353Sdim HoistOutOfLoop(N); 378207618Srdivacky CSEMap.clear(); 379207618Srdivacky } 380193323Sed } 381193323Sed 382193323Sed return Changed; 383193323Sed} 384193323Sed 385207618Srdivacky/// InstructionStoresToFI - Return true if instruction stores to the 386207618Srdivacky/// specified frame. 387207618Srdivackystatic bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 388207618Srdivacky for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 389207618Srdivacky oe = MI->memoperands_end(); o != oe; ++o) { 390207618Srdivacky if (!(*o)->isStore() || !(*o)->getValue()) 391207618Srdivacky continue; 392207618Srdivacky if (const FixedStackPseudoSourceValue *Value = 393207618Srdivacky dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 394207618Srdivacky if (Value->getFrameIndex() == FI) 395207618Srdivacky return true; 396207618Srdivacky } 397207618Srdivacky } 398207618Srdivacky return false; 399207618Srdivacky} 400207618Srdivacky 401207618Srdivacky/// ProcessMI - Examine the instruction for potentai LICM candidate. Also 402207618Srdivacky/// gather register def and frame object update information. 403207618Srdivackyvoid MachineLICM::ProcessMI(MachineInstr *MI, 404234353Sdim BitVector &PhysRegDefs, 405234353Sdim BitVector &PhysRegClobbers, 406207618Srdivacky SmallSet<int, 32> &StoredFIs, 407263508Sdim SmallVectorImpl<CandidateInfo> &Candidates) { 408207618Srdivacky bool RuledOut = false; 409207618Srdivacky bool HasNonInvariantUse = false; 410207618Srdivacky unsigned Def = 0; 411207618Srdivacky for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 412207618Srdivacky const MachineOperand &MO = MI->getOperand(i); 413207618Srdivacky if (MO.isFI()) { 414207618Srdivacky // Remember if the instruction stores to the frame index. 415207618Srdivacky int FI = MO.getIndex(); 416207618Srdivacky if (!StoredFIs.count(FI) && 417207618Srdivacky MFI->isSpillSlotObjectIndex(FI) && 418207618Srdivacky InstructionStoresToFI(MI, FI)) 419207618Srdivacky StoredFIs.insert(FI); 420207618Srdivacky HasNonInvariantUse = true; 421207618Srdivacky continue; 422207618Srdivacky } 423207618Srdivacky 424234353Sdim // We can't hoist an instruction defining a physreg that is clobbered in 425234353Sdim // the loop. 426234353Sdim if (MO.isRegMask()) { 427234353Sdim PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); 428234353Sdim continue; 429234353Sdim } 430234353Sdim 431207618Srdivacky if (!MO.isReg()) 432207618Srdivacky continue; 433207618Srdivacky unsigned Reg = MO.getReg(); 434207618Srdivacky if (!Reg) 435207618Srdivacky continue; 436207618Srdivacky assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 437207618Srdivacky "Not expecting virtual register!"); 438207618Srdivacky 439207618Srdivacky if (!MO.isDef()) { 440234353Sdim if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) 441207618Srdivacky // If it's using a non-loop-invariant register, then it's obviously not 442207618Srdivacky // safe to hoist. 443207618Srdivacky HasNonInvariantUse = true; 444207618Srdivacky continue; 445207618Srdivacky } 446207618Srdivacky 447207618Srdivacky if (MO.isImplicit()) { 448239462Sdim for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 449239462Sdim PhysRegClobbers.set(*AI); 450207618Srdivacky if (!MO.isDead()) 451207618Srdivacky // Non-dead implicit def? This cannot be hoisted. 452207618Srdivacky RuledOut = true; 453207618Srdivacky // No need to check if a dead implicit def is also defined by 454207618Srdivacky // another instruction. 455207618Srdivacky continue; 456207618Srdivacky } 457207618Srdivacky 458207618Srdivacky // FIXME: For now, avoid instructions with multiple defs, unless 459207618Srdivacky // it's a dead implicit def. 460207618Srdivacky if (Def) 461207618Srdivacky RuledOut = true; 462207618Srdivacky else 463207618Srdivacky Def = Reg; 464207618Srdivacky 465207618Srdivacky // If we have already seen another instruction that defines the same 466234353Sdim // register, then this is not safe. Two defs is indicated by setting a 467234353Sdim // PhysRegClobbers bit. 468239462Sdim for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { 469234353Sdim if (PhysRegDefs.test(*AS)) 470234353Sdim PhysRegClobbers.set(*AS); 471234353Sdim PhysRegDefs.set(*AS); 472234353Sdim } 473263508Sdim if (PhysRegClobbers.test(Reg)) 474263508Sdim // MI defined register is seen defined by another instruction in 475263508Sdim // the loop, it cannot be a LICM candidate. 476263508Sdim RuledOut = true; 477207618Srdivacky } 478207618Srdivacky 479207618Srdivacky // Only consider reloads for now and remats which do not have register 480207618Srdivacky // operands. FIXME: Consider unfold load folding instructions. 481207618Srdivacky if (Def && !RuledOut) { 482207618Srdivacky int FI = INT_MIN; 483207618Srdivacky if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 484207618Srdivacky (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 485207618Srdivacky Candidates.push_back(CandidateInfo(MI, Def, FI)); 486207618Srdivacky } 487207618Srdivacky} 488207618Srdivacky 489207618Srdivacky/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 490207618Srdivacky/// invariants out to the preheader. 491207618Srdivackyvoid MachineLICM::HoistRegionPostRA() { 492234353Sdim MachineBasicBlock *Preheader = getCurPreheader(); 493234353Sdim if (!Preheader) 494234353Sdim return; 495234353Sdim 496207618Srdivacky unsigned NumRegs = TRI->getNumRegs(); 497234353Sdim BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. 498234353Sdim BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. 499207618Srdivacky 500207618Srdivacky SmallVector<CandidateInfo, 32> Candidates; 501207618Srdivacky SmallSet<int, 32> StoredFIs; 502207618Srdivacky 503207618Srdivacky // Walk the entire region, count number of defs for each register, and 504207618Srdivacky // collect potential LICM candidates. 505263508Sdim const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 506207618Srdivacky for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 507207618Srdivacky MachineBasicBlock *BB = Blocks[i]; 508226633Sdim 509226633Sdim // If the header of the loop containing this basic block is a landing pad, 510226633Sdim // then don't try to hoist instructions out of this loop. 511226633Sdim const MachineLoop *ML = MLI->getLoopFor(BB); 512226633Sdim if (ML && ML->getHeader()->isLandingPad()) continue; 513226633Sdim 514207618Srdivacky // Conservatively treat live-in's as an external def. 515207618Srdivacky // FIXME: That means a reload that're reused in successor block(s) will not 516207618Srdivacky // be LICM'ed. 517207618Srdivacky for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), 518207618Srdivacky E = BB->livein_end(); I != E; ++I) { 519207618Srdivacky unsigned Reg = *I; 520239462Sdim for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 521239462Sdim PhysRegDefs.set(*AI); 522207618Srdivacky } 523207618Srdivacky 524226633Sdim SpeculationState = SpeculateUnknown; 525207618Srdivacky for (MachineBasicBlock::iterator 526207618Srdivacky MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 527207618Srdivacky MachineInstr *MI = &*MII; 528234353Sdim ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); 529207618Srdivacky } 530207618Srdivacky } 531207618Srdivacky 532234353Sdim // Gather the registers read / clobbered by the terminator. 533234353Sdim BitVector TermRegs(NumRegs); 534234353Sdim MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); 535234353Sdim if (TI != Preheader->end()) { 536234353Sdim for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { 537234353Sdim const MachineOperand &MO = TI->getOperand(i); 538234353Sdim if (!MO.isReg()) 539234353Sdim continue; 540234353Sdim unsigned Reg = MO.getReg(); 541234353Sdim if (!Reg) 542234353Sdim continue; 543239462Sdim for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 544239462Sdim TermRegs.set(*AI); 545234353Sdim } 546234353Sdim } 547234353Sdim 548207618Srdivacky // Now evaluate whether the potential candidates qualify. 549207618Srdivacky // 1. Check if the candidate defined register is defined by another 550207618Srdivacky // instruction in the loop. 551207618Srdivacky // 2. If the candidate is a load from stack slot (always true for now), 552207618Srdivacky // check if the slot is stored anywhere in the loop. 553234353Sdim // 3. Make sure candidate def should not clobber 554234353Sdim // registers read by the terminator. Similarly its def should not be 555234353Sdim // clobbered by the terminator. 556207618Srdivacky for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 557207618Srdivacky if (Candidates[i].FI != INT_MIN && 558207618Srdivacky StoredFIs.count(Candidates[i].FI)) 559207618Srdivacky continue; 560207618Srdivacky 561234353Sdim unsigned Def = Candidates[i].Def; 562234353Sdim if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { 563207618Srdivacky bool Safe = true; 564207618Srdivacky MachineInstr *MI = Candidates[i].MI; 565207618Srdivacky for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 566207618Srdivacky const MachineOperand &MO = MI->getOperand(j); 567207618Srdivacky if (!MO.isReg() || MO.isDef() || !MO.getReg()) 568207618Srdivacky continue; 569234353Sdim unsigned Reg = MO.getReg(); 570234353Sdim if (PhysRegDefs.test(Reg) || 571234353Sdim PhysRegClobbers.test(Reg)) { 572207618Srdivacky // If it's using a non-loop-invariant register, then it's obviously 573207618Srdivacky // not safe to hoist. 574207618Srdivacky Safe = false; 575207618Srdivacky break; 576207618Srdivacky } 577207618Srdivacky } 578207618Srdivacky if (Safe) 579207618Srdivacky HoistPostRA(MI, Candidates[i].Def); 580207618Srdivacky } 581207618Srdivacky } 582207618Srdivacky} 583207618Srdivacky 584207618Srdivacky/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current 585207618Srdivacky/// loop, and make sure it is not killed by any instructions in the loop. 586207618Srdivackyvoid MachineLICM::AddToLiveIns(unsigned Reg) { 587263508Sdim const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 588207618Srdivacky for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 589207618Srdivacky MachineBasicBlock *BB = Blocks[i]; 590207618Srdivacky if (!BB->isLiveIn(Reg)) 591207618Srdivacky BB->addLiveIn(Reg); 592207618Srdivacky for (MachineBasicBlock::iterator 593207618Srdivacky MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 594207618Srdivacky MachineInstr *MI = &*MII; 595207618Srdivacky for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 596207618Srdivacky MachineOperand &MO = MI->getOperand(i); 597207618Srdivacky if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 598207618Srdivacky if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 599207618Srdivacky MO.setIsKill(false); 600207618Srdivacky } 601207618Srdivacky } 602207618Srdivacky } 603207618Srdivacky} 604207618Srdivacky 605207618Srdivacky/// HoistPostRA - When an instruction is found to only use loop invariant 606207618Srdivacky/// operands that is safe to hoist, this instruction is called to do the 607207618Srdivacky/// dirty work. 608207618Srdivackyvoid MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 609210299Sed MachineBasicBlock *Preheader = getCurPreheader(); 610210299Sed 611207618Srdivacky // Now move the instructions to the predecessor, inserting it before any 612207618Srdivacky // terminator instructions. 613234353Sdim DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" 614234353Sdim << MI->getParent()->getNumber() << ": " << *MI); 615207618Srdivacky 616207618Srdivacky // Splice the instruction to the preheader. 617207618Srdivacky MachineBasicBlock *MBB = MI->getParent(); 618210299Sed Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 619207618Srdivacky 620234353Sdim // Add register to livein list to all the BBs in the current loop since a 621207618Srdivacky // loop invariant must be kept live throughout the whole loop. This is 622207618Srdivacky // important to ensure later passes do not scavenge the def register. 623207618Srdivacky AddToLiveIns(Def); 624207618Srdivacky 625207618Srdivacky ++NumPostRAHoisted; 626207618Srdivacky Changed = true; 627207618Srdivacky} 628207618Srdivacky 629226633Sdim// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 630226633Sdim// If not then a load from this mbb may not be safe to hoist. 631226633Sdimbool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { 632226633Sdim if (SpeculationState != SpeculateUnknown) 633226633Sdim return SpeculationState == SpeculateFalse; 634234353Sdim 635226633Sdim if (BB != CurLoop->getHeader()) { 636226633Sdim // Check loop exiting blocks. 637226633Sdim SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; 638226633Sdim CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); 639226633Sdim for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) 640226633Sdim if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { 641226633Sdim SpeculationState = SpeculateTrue; 642226633Sdim return false; 643226633Sdim } 644226633Sdim } 645226633Sdim 646226633Sdim SpeculationState = SpeculateFalse; 647226633Sdim return true; 648226633Sdim} 649226633Sdim 650234353Sdimvoid MachineLICM::EnterScope(MachineBasicBlock *MBB) { 651234353Sdim DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 652193323Sed 653234353Sdim // Remember livein register pressure. 654234353Sdim BackTrace.push_back(RegPressure); 655234353Sdim} 656226633Sdim 657234353Sdimvoid MachineLICM::ExitScope(MachineBasicBlock *MBB) { 658234353Sdim DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 659234353Sdim BackTrace.pop_back(); 660234353Sdim} 661193323Sed 662234353Sdim/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 663234353Sdim/// dominator tree node if its a leaf or all of its children are done. Walk 664234353Sdim/// up the dominator tree to destroy ancestors which are now done. 665234353Sdimvoid MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, 666234353Sdim DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 667234353Sdim DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 668234353Sdim if (OpenChildren[Node]) 669218893Sdim return; 670218893Sdim 671234353Sdim // Pop scope. 672234353Sdim ExitScope(Node->getBlock()); 673234353Sdim 674234353Sdim // Now traverse upwards to pop ancestors whose offsprings are all done. 675234353Sdim while (MachineDomTreeNode *Parent = ParentMap[Node]) { 676234353Sdim unsigned Left = --OpenChildren[Parent]; 677234353Sdim if (Left != 0) 678234353Sdim break; 679234353Sdim ExitScope(Parent->getBlock()); 680234353Sdim Node = Parent; 681234353Sdim } 682234353Sdim} 683234353Sdim 684234353Sdim/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 685234353Sdim/// blocks dominated by the specified header block, and that are in the 686234353Sdim/// current loop) in depth first order w.r.t the DominatorTree. This allows 687234353Sdim/// us to visit definitions before uses, allowing us to hoist a loop body in 688234353Sdim/// one pass without iteration. 689234353Sdim/// 690234353Sdimvoid MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { 691234353Sdim SmallVector<MachineDomTreeNode*, 32> Scopes; 692234353Sdim SmallVector<MachineDomTreeNode*, 8> WorkList; 693234353Sdim DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 694234353Sdim DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 695234353Sdim 696234353Sdim // Perform a DFS walk to determine the order of visit. 697234353Sdim WorkList.push_back(HeaderN); 698234353Sdim do { 699234353Sdim MachineDomTreeNode *Node = WorkList.pop_back_val(); 700234353Sdim assert(Node != 0 && "Null dominator tree node?"); 701234353Sdim MachineBasicBlock *BB = Node->getBlock(); 702234353Sdim 703234353Sdim // If the header of the loop containing this basic block is a landing pad, 704234353Sdim // then don't try to hoist instructions out of this loop. 705234353Sdim const MachineLoop *ML = MLI->getLoopFor(BB); 706234353Sdim if (ML && ML->getHeader()->isLandingPad()) 707234353Sdim continue; 708234353Sdim 709234353Sdim // If this subregion is not in the top level loop at all, exit. 710234353Sdim if (!CurLoop->contains(BB)) 711234353Sdim continue; 712234353Sdim 713234353Sdim Scopes.push_back(Node); 714234353Sdim const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 715234353Sdim unsigned NumChildren = Children.size(); 716234353Sdim 717234353Sdim // Don't hoist things out of a large switch statement. This often causes 718234353Sdim // code to be hoisted that wasn't going to be executed, and increases 719234353Sdim // register pressure in a situation where it's likely to matter. 720234353Sdim if (BB->succ_size() >= 25) 721234353Sdim NumChildren = 0; 722234353Sdim 723234353Sdim OpenChildren[Node] = NumChildren; 724234353Sdim // Add children in reverse order as then the next popped worklist node is 725234353Sdim // the first child of this node. This means we ultimately traverse the 726234353Sdim // DOM tree in exactly the same order as if we'd recursed. 727234353Sdim for (int i = (int)NumChildren-1; i >= 0; --i) { 728234353Sdim MachineDomTreeNode *Child = Children[i]; 729234353Sdim ParentMap[Child] = Node; 730234353Sdim WorkList.push_back(Child); 731234353Sdim } 732234353Sdim } while (!WorkList.empty()); 733234353Sdim 734234353Sdim if (Scopes.size() != 0) { 735234353Sdim MachineBasicBlock *Preheader = getCurPreheader(); 736234353Sdim if (!Preheader) 737234353Sdim return; 738234353Sdim 739218893Sdim // Compute registers which are livein into the loop headers. 740218893Sdim RegSeen.clear(); 741218893Sdim BackTrace.clear(); 742218893Sdim InitRegPressure(Preheader); 743218893Sdim } 744218893Sdim 745234353Sdim // Now perform LICM. 746234353Sdim for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 747234353Sdim MachineDomTreeNode *Node = Scopes[i]; 748234353Sdim MachineBasicBlock *MBB = Node->getBlock(); 749218893Sdim 750234353Sdim MachineBasicBlock *Preheader = getCurPreheader(); 751234353Sdim if (!Preheader) 752234353Sdim continue; 753193323Sed 754234353Sdim EnterScope(MBB); 755234353Sdim 756234353Sdim // Process the block 757234353Sdim SpeculationState = SpeculateUnknown; 758234353Sdim for (MachineBasicBlock::iterator 759234353Sdim MII = MBB->begin(), E = MBB->end(); MII != E; ) { 760234353Sdim MachineBasicBlock::iterator NextMII = MII; ++NextMII; 761234353Sdim MachineInstr *MI = &*MII; 762234353Sdim if (!Hoist(MI, Preheader)) 763234353Sdim UpdateRegPressure(MI); 764234353Sdim MII = NextMII; 765234353Sdim } 766234353Sdim 767234353Sdim // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 768234353Sdim ExitScopeIfDone(Node, OpenChildren, ParentMap); 769212904Sdim } 770193323Sed} 771193323Sed 772218893Sdimstatic bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { 773218893Sdim return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); 774218893Sdim} 775218893Sdim 776226633Sdim/// getRegisterClassIDAndCost - For a given MI, register, and the operand 777226633Sdim/// index, return the ID and cost of its representative register class. 778226633Sdimvoid 779226633SdimMachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, 780226633Sdim unsigned Reg, unsigned OpIdx, 781226633Sdim unsigned &RCId, unsigned &RCCost) const { 782226633Sdim const TargetRegisterClass *RC = MRI->getRegClass(Reg); 783249423Sdim MVT VT = *RC->vt_begin(); 784234353Sdim if (VT == MVT::Untyped) { 785226633Sdim RCId = RC->getID(); 786226633Sdim RCCost = 1; 787226633Sdim } else { 788226633Sdim RCId = TLI->getRepRegClassFor(VT)->getID(); 789226633Sdim RCCost = TLI->getRepRegClassCostFor(VT); 790226633Sdim } 791226633Sdim} 792234353Sdim 793218893Sdim/// InitRegPressure - Find all virtual register references that are liveout of 794218893Sdim/// the preheader to initialize the starting "register pressure". Note this 795218893Sdim/// does not count live through (livein but not used) registers. 796218893Sdimvoid MachineLICM::InitRegPressure(MachineBasicBlock *BB) { 797218893Sdim std::fill(RegPressure.begin(), RegPressure.end(), 0); 798218893Sdim 799218893Sdim // If the preheader has only a single predecessor and it ends with a 800218893Sdim // fallthrough or an unconditional branch, then scan its predecessor for live 801218893Sdim // defs as well. This happens whenever the preheader is created by splitting 802218893Sdim // the critical edge from the loop predecessor to the loop header. 803218893Sdim if (BB->pred_size() == 1) { 804218893Sdim MachineBasicBlock *TBB = 0, *FBB = 0; 805218893Sdim SmallVector<MachineOperand, 4> Cond; 806218893Sdim if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) 807218893Sdim InitRegPressure(*BB->pred_begin()); 808218893Sdim } 809218893Sdim 810218893Sdim for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); 811218893Sdim MII != E; ++MII) { 812218893Sdim MachineInstr *MI = &*MII; 813218893Sdim for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 814218893Sdim const MachineOperand &MO = MI->getOperand(i); 815218893Sdim if (!MO.isReg() || MO.isImplicit()) 816218893Sdim continue; 817218893Sdim unsigned Reg = MO.getReg(); 818218893Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 819218893Sdim continue; 820218893Sdim 821218893Sdim bool isNew = RegSeen.insert(Reg); 822226633Sdim unsigned RCId, RCCost; 823226633Sdim getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 824218893Sdim if (MO.isDef()) 825226633Sdim RegPressure[RCId] += RCCost; 826218893Sdim else { 827218893Sdim bool isKill = isOperandKill(MO, MRI); 828218893Sdim if (isNew && !isKill) 829218893Sdim // Haven't seen this, it must be a livein. 830226633Sdim RegPressure[RCId] += RCCost; 831218893Sdim else if (!isNew && isKill) 832226633Sdim RegPressure[RCId] -= RCCost; 833218893Sdim } 834218893Sdim } 835218893Sdim } 836218893Sdim} 837218893Sdim 838218893Sdim/// UpdateRegPressure - Update estimate of register pressure after the 839218893Sdim/// specified instruction. 840218893Sdimvoid MachineLICM::UpdateRegPressure(const MachineInstr *MI) { 841218893Sdim if (MI->isImplicitDef()) 842218893Sdim return; 843218893Sdim 844218893Sdim SmallVector<unsigned, 4> Defs; 845218893Sdim for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 846218893Sdim const MachineOperand &MO = MI->getOperand(i); 847218893Sdim if (!MO.isReg() || MO.isImplicit()) 848218893Sdim continue; 849218893Sdim unsigned Reg = MO.getReg(); 850218893Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 851218893Sdim continue; 852218893Sdim 853218893Sdim bool isNew = RegSeen.insert(Reg); 854218893Sdim if (MO.isDef()) 855218893Sdim Defs.push_back(Reg); 856218893Sdim else if (!isNew && isOperandKill(MO, MRI)) { 857226633Sdim unsigned RCId, RCCost; 858226633Sdim getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 859218893Sdim if (RCCost > RegPressure[RCId]) 860218893Sdim RegPressure[RCId] = 0; 861218893Sdim else 862218893Sdim RegPressure[RCId] -= RCCost; 863218893Sdim } 864218893Sdim } 865218893Sdim 866226633Sdim unsigned Idx = 0; 867218893Sdim while (!Defs.empty()) { 868218893Sdim unsigned Reg = Defs.pop_back_val(); 869226633Sdim unsigned RCId, RCCost; 870226633Sdim getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost); 871218893Sdim RegPressure[RCId] += RCCost; 872226633Sdim ++Idx; 873218893Sdim } 874218893Sdim} 875218893Sdim 876234353Sdim/// isLoadFromGOTOrConstantPool - Return true if this machine instruction 877234353Sdim/// loads from global offset table or constant pool. 878234353Sdimstatic bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { 879234353Sdim assert (MI.mayLoad() && "Expected MI that loads!"); 880234353Sdim for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), 881234353Sdim E = MI.memoperands_end(); I != E; ++I) { 882234353Sdim if (const Value *V = (*I)->getValue()) { 883234353Sdim if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) 884234353Sdim if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) 885234353Sdim return true; 886234353Sdim } 887234353Sdim } 888234353Sdim return false; 889234353Sdim} 890234353Sdim 891207618Srdivacky/// IsLICMCandidate - Returns true if the instruction may be a suitable 892207618Srdivacky/// candidate for LICM. e.g. If the instruction is a call, then it's obviously 893207618Srdivacky/// not safe to hoist it. 894207618Srdivackybool MachineLICM::IsLICMCandidate(MachineInstr &I) { 895210299Sed // Check if it's safe to move the instruction. 896210299Sed bool DontMoveAcrossStore = true; 897210299Sed if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) 898207618Srdivacky return false; 899226633Sdim 900226633Sdim // If it is load then check if it is guaranteed to execute by making sure that 901226633Sdim // it dominates all exiting blocks. If it doesn't, then there is a path out of 902234353Sdim // the loop which does not execute this load, so we can't hoist it. Loads 903234353Sdim // from constant memory are not safe to speculate all the time, for example 904234353Sdim // indexed load from a jump table. 905226633Sdim // Stores and side effects are already checked by isSafeToMove. 906234353Sdim if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && 907234353Sdim !IsGuaranteedToExecute(I.getParent())) 908226633Sdim return false; 909226633Sdim 910207618Srdivacky return true; 911207618Srdivacky} 912193323Sed 913207618Srdivacky/// IsLoopInvariantInst - Returns true if the instruction is loop 914207618Srdivacky/// invariant. I.e., all virtual register operands are defined outside of the 915207618Srdivacky/// loop, physical registers aren't accessed explicitly, and there are no side 916207618Srdivacky/// effects that aren't captured by the operands or other flags. 917234353Sdim/// 918207618Srdivackybool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 919207618Srdivacky if (!IsLICMCandidate(I)) 920207618Srdivacky return false; 921207618Srdivacky 922193323Sed // The instruction is loop invariant if all of its operands are. 923193323Sed for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 924193323Sed const MachineOperand &MO = I.getOperand(i); 925193323Sed 926193323Sed if (!MO.isReg()) 927193323Sed continue; 928193323Sed 929193323Sed unsigned Reg = MO.getReg(); 930193323Sed if (Reg == 0) continue; 931193323Sed 932193323Sed // Don't hoist an instruction that uses or defines a physical register. 933198090Srdivacky if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 934198090Srdivacky if (MO.isUse()) { 935198090Srdivacky // If the physreg has no defs anywhere, it's just an ambient register 936198090Srdivacky // and we can freely move its uses. Alternatively, if it's allocatable, 937198090Srdivacky // it could get allocated to something with a def during allocation. 938234353Sdim if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) 939198090Srdivacky return false; 940198090Srdivacky // Otherwise it's safe to move. 941198090Srdivacky continue; 942198090Srdivacky } else if (!MO.isDead()) { 943198090Srdivacky // A def that isn't dead. We can't move it. 944198090Srdivacky return false; 945204642Srdivacky } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 946204642Srdivacky // If the reg is live into the loop, we can't hoist an instruction 947204642Srdivacky // which would clobber it. 948204642Srdivacky return false; 949198090Srdivacky } 950198090Srdivacky } 951193323Sed 952193323Sed if (!MO.isUse()) 953193323Sed continue; 954193323Sed 955218893Sdim assert(MRI->getVRegDef(Reg) && 956193323Sed "Machine instr not mapped for this vreg?!"); 957193323Sed 958193323Sed // If the loop contains the definition of an operand, then the instruction 959193323Sed // isn't loop invariant. 960218893Sdim if (CurLoop->contains(MRI->getVRegDef(Reg))) 961193323Sed return false; 962193323Sed } 963193323Sed 964193323Sed // If we got this far, the instruction is loop invariant! 965193323Sed return true; 966193323Sed} 967193323Sed 968193323Sed 969234353Sdim/// HasLoopPHIUse - Return true if the specified instruction is used by a 970234353Sdim/// phi node and hoisting it could cause a copy to be inserted. 971234353Sdimbool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { 972234353Sdim SmallVector<const MachineInstr*, 8> Work(1, MI); 973234353Sdim do { 974234353Sdim MI = Work.pop_back_val(); 975234353Sdim for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { 976234353Sdim if (!MO->isReg() || !MO->isDef()) 977234353Sdim continue; 978234353Sdim unsigned Reg = MO->getReg(); 979234353Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 980234353Sdim continue; 981234353Sdim for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), 982234353Sdim UE = MRI->use_end(); UI != UE; ++UI) { 983234353Sdim MachineInstr *UseMI = &*UI; 984234353Sdim // A PHI may cause a copy to be inserted. 985234353Sdim if (UseMI->isPHI()) { 986234353Sdim // A PHI inside the loop causes a copy because the live range of Reg is 987234353Sdim // extended across the PHI. 988234353Sdim if (CurLoop->contains(UseMI)) 989234353Sdim return true; 990234353Sdim // A PHI in an exit block can cause a copy to be inserted if the PHI 991234353Sdim // has multiple predecessors in the loop with different values. 992234353Sdim // For now, approximate by rejecting all exit blocks. 993234353Sdim if (isExitBlock(UseMI->getParent())) 994234353Sdim return true; 995234353Sdim continue; 996234353Sdim } 997234353Sdim // Look past copies as well. 998234353Sdim if (UseMI->isCopy() && CurLoop->contains(UseMI)) 999234353Sdim Work.push_back(UseMI); 1000234353Sdim } 1001221345Sdim } 1002234353Sdim } while (!Work.empty()); 1003193323Sed return false; 1004193323Sed} 1005193323Sed 1006218893Sdim/// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 1007218893Sdim/// and an use in the current loop, return true if the target considered 1008218893Sdim/// it 'high'. 1009218893Sdimbool MachineLICM::HasHighOperandLatency(MachineInstr &MI, 1010218893Sdim unsigned DefIdx, unsigned Reg) const { 1011218893Sdim if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg)) 1012218893Sdim return false; 1013218893Sdim 1014218893Sdim for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 1015218893Sdim E = MRI->use_nodbg_end(); I != E; ++I) { 1016218893Sdim MachineInstr *UseMI = &*I; 1017218893Sdim if (UseMI->isCopyLike()) 1018218893Sdim continue; 1019218893Sdim if (!CurLoop->contains(UseMI->getParent())) 1020218893Sdim continue; 1021218893Sdim for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 1022218893Sdim const MachineOperand &MO = UseMI->getOperand(i); 1023218893Sdim if (!MO.isReg() || !MO.isUse()) 1024218893Sdim continue; 1025218893Sdim unsigned MOReg = MO.getReg(); 1026218893Sdim if (MOReg != Reg) 1027218893Sdim continue; 1028218893Sdim 1029218893Sdim if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i)) 1030218893Sdim return true; 1031218893Sdim } 1032218893Sdim 1033218893Sdim // Only look at the first in loop use. 1034218893Sdim break; 1035199989Srdivacky } 1036218893Sdim 1037218893Sdim return false; 1038199989Srdivacky} 1039199989Srdivacky 1040218893Sdim/// IsCheapInstruction - Return true if the instruction is marked "cheap" or 1041218893Sdim/// the operand latency between its def and a use is one or less. 1042218893Sdimbool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { 1043234353Sdim if (MI.isAsCheapAsAMove() || MI.isCopyLike()) 1044218893Sdim return true; 1045218893Sdim if (!InstrItins || InstrItins->isEmpty()) 1046218893Sdim return false; 1047218893Sdim 1048218893Sdim bool isCheap = false; 1049218893Sdim unsigned NumDefs = MI.getDesc().getNumDefs(); 1050218893Sdim for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 1051218893Sdim MachineOperand &DefMO = MI.getOperand(i); 1052218893Sdim if (!DefMO.isReg() || !DefMO.isDef()) 1053218893Sdim continue; 1054218893Sdim --NumDefs; 1055218893Sdim unsigned Reg = DefMO.getReg(); 1056218893Sdim if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1057218893Sdim continue; 1058218893Sdim 1059218893Sdim if (!TII->hasLowDefLatency(InstrItins, &MI, i)) 1060218893Sdim return false; 1061218893Sdim isCheap = true; 1062218893Sdim } 1063218893Sdim 1064218893Sdim return isCheap; 1065218893Sdim} 1066218893Sdim 1067218893Sdim/// CanCauseHighRegPressure - Visit BBs from header to current BB, check 1068218893Sdim/// if hoisting an instruction of the given cost matrix can cause high 1069218893Sdim/// register pressure. 1070234353Sdimbool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, 1071234353Sdim bool CheapInstr) { 1072218893Sdim for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 1073218893Sdim CI != CE; ++CI) { 1074234353Sdim if (CI->second <= 0) 1075218893Sdim continue; 1076218893Sdim 1077218893Sdim unsigned RCId = CI->first; 1078234353Sdim unsigned Limit = RegLimit[RCId]; 1079234353Sdim int Cost = CI->second; 1080234353Sdim 1081234353Sdim // Don't hoist cheap instructions if they would increase register pressure, 1082234353Sdim // even if we're under the limit. 1083234353Sdim if (CheapInstr) 1084234353Sdim return true; 1085234353Sdim 1086218893Sdim for (unsigned i = BackTrace.size(); i != 0; --i) { 1087263508Sdim SmallVectorImpl<unsigned> &RP = BackTrace[i-1]; 1088234353Sdim if (RP[RCId] + Cost >= Limit) 1089218893Sdim return true; 1090218893Sdim } 1091218893Sdim } 1092218893Sdim 1093218893Sdim return false; 1094218893Sdim} 1095218893Sdim 1096218893Sdim/// UpdateBackTraceRegPressure - Traverse the back trace from header to the 1097218893Sdim/// current block and update their register pressures to reflect the effect 1098218893Sdim/// of hoisting MI from the current block to the preheader. 1099218893Sdimvoid MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { 1100218893Sdim if (MI->isImplicitDef()) 1101218893Sdim return; 1102218893Sdim 1103218893Sdim // First compute the 'cost' of the instruction, i.e. its contribution 1104218893Sdim // to register pressure. 1105218893Sdim DenseMap<unsigned, int> Cost; 1106218893Sdim for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 1107218893Sdim const MachineOperand &MO = MI->getOperand(i); 1108218893Sdim if (!MO.isReg() || MO.isImplicit()) 1109218893Sdim continue; 1110218893Sdim unsigned Reg = MO.getReg(); 1111218893Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1112218893Sdim continue; 1113218893Sdim 1114226633Sdim unsigned RCId, RCCost; 1115226633Sdim getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 1116218893Sdim if (MO.isDef()) { 1117218893Sdim DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1118218893Sdim if (CI != Cost.end()) 1119218893Sdim CI->second += RCCost; 1120218893Sdim else 1121218893Sdim Cost.insert(std::make_pair(RCId, RCCost)); 1122218893Sdim } else if (isOperandKill(MO, MRI)) { 1123218893Sdim DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1124218893Sdim if (CI != Cost.end()) 1125218893Sdim CI->second -= RCCost; 1126218893Sdim else 1127218893Sdim Cost.insert(std::make_pair(RCId, -RCCost)); 1128218893Sdim } 1129218893Sdim } 1130218893Sdim 1131218893Sdim // Update register pressure of blocks from loop header to current block. 1132218893Sdim for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { 1133263508Sdim SmallVectorImpl<unsigned> &RP = BackTrace[i]; 1134218893Sdim for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 1135218893Sdim CI != CE; ++CI) { 1136218893Sdim unsigned RCId = CI->first; 1137218893Sdim RP[RCId] += CI->second; 1138218893Sdim } 1139218893Sdim } 1140218893Sdim} 1141218893Sdim 1142193323Sed/// IsProfitableToHoist - Return true if it is potentially profitable to hoist 1143193323Sed/// the given loop invariant. 1144193323Sedbool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 1145218893Sdim if (MI.isImplicitDef()) 1146218893Sdim return true; 1147218893Sdim 1148234353Sdim // Besides removing computation from the loop, hoisting an instruction has 1149234353Sdim // these effects: 1150234353Sdim // 1151234353Sdim // - The value defined by the instruction becomes live across the entire 1152234353Sdim // loop. This increases register pressure in the loop. 1153234353Sdim // 1154234353Sdim // - If the value is used by a PHI in the loop, a copy will be required for 1155234353Sdim // lowering the PHI after extending the live range. 1156234353Sdim // 1157234353Sdim // - When hoisting the last use of a value in the loop, that value no longer 1158234353Sdim // needs to be live in the loop. This lowers register pressure in the loop. 1159226633Sdim 1160234353Sdim bool CheapInstr = IsCheapInstruction(MI); 1161234353Sdim bool CreatesCopy = HasLoopPHIUse(&MI); 1162218893Sdim 1163234353Sdim // Don't hoist a cheap instruction if it would create a copy in the loop. 1164234353Sdim if (CheapInstr && CreatesCopy) { 1165234353Sdim DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); 1166234353Sdim return false; 1167234353Sdim } 1168234353Sdim 1169234353Sdim // Rematerializable instructions should always be hoisted since the register 1170234353Sdim // allocator can just pull them down again when needed. 1171234353Sdim if (TII->isTriviallyReMaterializable(&MI, AA)) 1172234353Sdim return true; 1173234353Sdim 1174234353Sdim // Estimate register pressure to determine whether to LICM the instruction. 1175234353Sdim // In low register pressure situation, we can be more aggressive about 1176234353Sdim // hoisting. Also, favors hoisting long latency instructions even in 1177234353Sdim // moderately high pressure situation. 1178234353Sdim // Cheap instructions will only be hoisted if they don't increase register 1179234353Sdim // pressure at all. 1180234353Sdim // FIXME: If there are long latency loop-invariant instructions inside the 1181234353Sdim // loop at this point, why didn't the optimizer's LICM hoist them? 1182234353Sdim DenseMap<unsigned, int> Cost; 1183234353Sdim for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 1184234353Sdim const MachineOperand &MO = MI.getOperand(i); 1185234353Sdim if (!MO.isReg() || MO.isImplicit()) 1186234353Sdim continue; 1187234353Sdim unsigned Reg = MO.getReg(); 1188234353Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1189234353Sdim continue; 1190234353Sdim 1191234353Sdim unsigned RCId, RCCost; 1192234353Sdim getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); 1193234353Sdim if (MO.isDef()) { 1194234353Sdim if (HasHighOperandLatency(MI, i, Reg)) { 1195234353Sdim DEBUG(dbgs() << "Hoist High Latency: " << MI); 1196234353Sdim ++NumHighLatency; 1197234353Sdim return true; 1198218893Sdim } 1199234353Sdim Cost[RCId] += RCCost; 1200234353Sdim } else if (isOperandKill(MO, MRI)) { 1201234353Sdim // Is a virtual register use is a kill, hoisting it out of the loop 1202234353Sdim // may actually reduce register pressure or be register pressure 1203234353Sdim // neutral. 1204234353Sdim Cost[RCId] -= RCCost; 1205218893Sdim } 1206234353Sdim } 1207218893Sdim 1208234353Sdim // Visit BBs from header to current BB, if hoisting this doesn't cause 1209234353Sdim // high register pressure, then it's safe to proceed. 1210234353Sdim if (!CanCauseHighRegPressure(Cost, CheapInstr)) { 1211234353Sdim DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); 1212234353Sdim ++NumLowRP; 1213234353Sdim return true; 1214234353Sdim } 1215218893Sdim 1216234353Sdim // Don't risk increasing register pressure if it would create copies. 1217234353Sdim if (CreatesCopy) { 1218234353Sdim DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); 1219234353Sdim return false; 1220234353Sdim } 1221226633Sdim 1222234353Sdim // Do not "speculate" in high register pressure situation. If an 1223234353Sdim // instruction is not guaranteed to be executed in the loop, it's best to be 1224234353Sdim // conservative. 1225234353Sdim if (AvoidSpeculation && 1226234353Sdim (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { 1227234353Sdim DEBUG(dbgs() << "Won't speculate: " << MI); 1228234353Sdim return false; 1229199989Srdivacky } 1230193323Sed 1231234353Sdim // High register pressure situation, only hoist if the instruction is going 1232234353Sdim // to be remat'ed. 1233234353Sdim if (!TII->isTriviallyReMaterializable(&MI, AA) && 1234234353Sdim !MI.isInvariantLoad(AA)) { 1235234353Sdim DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); 1236234353Sdim return false; 1237193323Sed } 1238193323Sed 1239193323Sed return true; 1240193323Sed} 1241193323Sed 1242198892SrdivackyMachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 1243218893Sdim // Don't unfold simple loads. 1244234353Sdim if (MI->canFoldAsLoad()) 1245218893Sdim return 0; 1246218893Sdim 1247198892Srdivacky // If not, we may be able to unfold a load and hoist that. 1248198892Srdivacky // First test whether the instruction is loading from an amenable 1249198892Srdivacky // memory location. 1250218893Sdim if (!MI->isInvariantLoad(AA)) 1251199989Srdivacky return 0; 1252199989Srdivacky 1253198892Srdivacky // Next determine the register class for a temporary register. 1254198892Srdivacky unsigned LoadRegIndex; 1255198892Srdivacky unsigned NewOpc = 1256198892Srdivacky TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 1257198892Srdivacky /*UnfoldLoad=*/true, 1258198892Srdivacky /*UnfoldStore=*/false, 1259198892Srdivacky &LoadRegIndex); 1260198892Srdivacky if (NewOpc == 0) return 0; 1261224145Sdim const MCInstrDesc &MID = TII->get(NewOpc); 1262224145Sdim if (MID.getNumDefs() != 1) return 0; 1263239462Sdim MachineFunction &MF = *MI->getParent()->getParent(); 1264239462Sdim const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); 1265198892Srdivacky // Ok, we're unfolding. Create a temporary register and do the unfold. 1266218893Sdim unsigned Reg = MRI->createVirtualRegister(RC); 1267199989Srdivacky 1268198892Srdivacky SmallVector<MachineInstr *, 2> NewMIs; 1269198892Srdivacky bool Success = 1270198892Srdivacky TII->unfoldMemoryOperand(MF, MI, Reg, 1271198892Srdivacky /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 1272198892Srdivacky NewMIs); 1273198892Srdivacky (void)Success; 1274198892Srdivacky assert(Success && 1275198892Srdivacky "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 1276198892Srdivacky "succeeded!"); 1277198892Srdivacky assert(NewMIs.size() == 2 && 1278198892Srdivacky "Unfolded a load into multiple instructions!"); 1279198892Srdivacky MachineBasicBlock *MBB = MI->getParent(); 1280234353Sdim MachineBasicBlock::iterator Pos = MI; 1281234353Sdim MBB->insert(Pos, NewMIs[0]); 1282234353Sdim MBB->insert(Pos, NewMIs[1]); 1283198892Srdivacky // If unfolding produced a load that wasn't loop-invariant or profitable to 1284198892Srdivacky // hoist, discard the new instructions and bail. 1285198892Srdivacky if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 1286198892Srdivacky NewMIs[0]->eraseFromParent(); 1287198892Srdivacky NewMIs[1]->eraseFromParent(); 1288198892Srdivacky return 0; 1289198892Srdivacky } 1290218893Sdim 1291218893Sdim // Update register pressure for the unfolded instruction. 1292218893Sdim UpdateRegPressure(NewMIs[1]); 1293218893Sdim 1294198892Srdivacky // Otherwise we successfully unfolded a load that we can hoist. 1295198892Srdivacky MI->eraseFromParent(); 1296198892Srdivacky return NewMIs[0]; 1297198892Srdivacky} 1298198892Srdivacky 1299198892Srdivackyvoid MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 1300198892Srdivacky for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 1301198892Srdivacky const MachineInstr *MI = &*I; 1302218893Sdim unsigned Opcode = MI->getOpcode(); 1303218893Sdim DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1304218893Sdim CI = CSEMap.find(Opcode); 1305218893Sdim if (CI != CSEMap.end()) 1306218893Sdim CI->second.push_back(MI); 1307218893Sdim else { 1308218893Sdim std::vector<const MachineInstr*> CSEMIs; 1309218893Sdim CSEMIs.push_back(MI); 1310218893Sdim CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 1311198892Srdivacky } 1312198892Srdivacky } 1313198892Srdivacky} 1314198892Srdivacky 1315199481Srdivackyconst MachineInstr* 1316199481SrdivackyMachineLICM::LookForDuplicate(const MachineInstr *MI, 1317199481Srdivacky std::vector<const MachineInstr*> &PrevMIs) { 1318198953Srdivacky for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 1319198953Srdivacky const MachineInstr *PrevMI = PrevMIs[i]; 1320218893Sdim if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0))) 1321198953Srdivacky return PrevMI; 1322198953Srdivacky } 1323198953Srdivacky return 0; 1324198953Srdivacky} 1325198953Srdivacky 1326198953Srdivackybool MachineLICM::EliminateCSE(MachineInstr *MI, 1327198953Srdivacky DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 1328210299Sed // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1329210299Sed // the undef property onto uses. 1330210299Sed if (CI == CSEMap.end() || MI->isImplicitDef()) 1331199481Srdivacky return false; 1332199481Srdivacky 1333199481Srdivacky if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 1334202375Srdivacky DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 1335204642Srdivacky 1336204642Srdivacky // Replace virtual registers defined by MI by their counterparts defined 1337204642Srdivacky // by Dup. 1338234353Sdim SmallVector<unsigned, 2> Defs; 1339199481Srdivacky for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1340199481Srdivacky const MachineOperand &MO = MI->getOperand(i); 1341204642Srdivacky 1342204642Srdivacky // Physical registers may not differ here. 1343204642Srdivacky assert((!MO.isReg() || MO.getReg() == 0 || 1344204642Srdivacky !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 1345204642Srdivacky MO.getReg() == Dup->getOperand(i).getReg()) && 1346204642Srdivacky "Instructions with different phys regs are not identical!"); 1347204642Srdivacky 1348204642Srdivacky if (MO.isReg() && MO.isDef() && 1349234353Sdim !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 1350234353Sdim Defs.push_back(i); 1351234353Sdim } 1352234353Sdim 1353234353Sdim SmallVector<const TargetRegisterClass*, 2> OrigRCs; 1354234353Sdim for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1355234353Sdim unsigned Idx = Defs[i]; 1356234353Sdim unsigned Reg = MI->getOperand(Idx).getReg(); 1357234353Sdim unsigned DupReg = Dup->getOperand(Idx).getReg(); 1358234353Sdim OrigRCs.push_back(MRI->getRegClass(DupReg)); 1359234353Sdim 1360234353Sdim if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { 1361234353Sdim // Restore old RCs if more than one defs. 1362234353Sdim for (unsigned j = 0; j != i; ++j) 1363234353Sdim MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); 1364234353Sdim return false; 1365208599Srdivacky } 1366198953Srdivacky } 1367234353Sdim 1368234353Sdim for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1369234353Sdim unsigned Idx = Defs[i]; 1370234353Sdim unsigned Reg = MI->getOperand(Idx).getReg(); 1371234353Sdim unsigned DupReg = Dup->getOperand(Idx).getReg(); 1372234353Sdim MRI->replaceRegWith(Reg, DupReg); 1373234353Sdim MRI->clearKillFlags(DupReg); 1374234353Sdim } 1375234353Sdim 1376199481Srdivacky MI->eraseFromParent(); 1377199481Srdivacky ++NumCSEed; 1378199481Srdivacky return true; 1379198953Srdivacky } 1380198953Srdivacky return false; 1381198953Srdivacky} 1382198953Srdivacky 1383226633Sdim/// MayCSE - Return true if the given instruction will be CSE'd if it's 1384226633Sdim/// hoisted out of the loop. 1385226633Sdimbool MachineLICM::MayCSE(MachineInstr *MI) { 1386226633Sdim unsigned Opcode = MI->getOpcode(); 1387226633Sdim DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1388226633Sdim CI = CSEMap.find(Opcode); 1389226633Sdim // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1390226633Sdim // the undef property onto uses. 1391226633Sdim if (CI == CSEMap.end() || MI->isImplicitDef()) 1392226633Sdim return false; 1393226633Sdim 1394226633Sdim return LookForDuplicate(MI, CI->second) != 0; 1395226633Sdim} 1396226633Sdim 1397193323Sed/// Hoist - When an instruction is found to use only loop invariant operands 1398193323Sed/// that are safe to hoist, this instruction is called to do the dirty work. 1399193323Sed/// 1400218893Sdimbool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 1401198892Srdivacky // First check whether we should hoist this instruction. 1402198892Srdivacky if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 1403198892Srdivacky // If not, try unfolding a hoistable load. 1404198892Srdivacky MI = ExtractHoistableLoad(MI); 1405218893Sdim if (!MI) return false; 1406198892Srdivacky } 1407193323Sed 1408193323Sed // Now move the instructions to the predecessor, inserting it before any 1409193323Sed // terminator instructions. 1410193323Sed DEBUG({ 1411202375Srdivacky dbgs() << "Hoisting " << *MI; 1412210299Sed if (Preheader->getBasicBlock()) 1413202375Srdivacky dbgs() << " to MachineBasicBlock " 1414210299Sed << Preheader->getName(); 1415198892Srdivacky if (MI->getParent()->getBasicBlock()) 1416202375Srdivacky dbgs() << " from MachineBasicBlock " 1417199989Srdivacky << MI->getParent()->getName(); 1418202375Srdivacky dbgs() << "\n"; 1419193323Sed }); 1420193323Sed 1421198892Srdivacky // If this is the first instruction being hoisted to the preheader, 1422198892Srdivacky // initialize the CSE map with potential common expressions. 1423210299Sed if (FirstInLoop) { 1424210299Sed InitCSEMap(Preheader); 1425210299Sed FirstInLoop = false; 1426210299Sed } 1427198892Srdivacky 1428193323Sed // Look for opportunity to CSE the hoisted instruction. 1429198892Srdivacky unsigned Opcode = MI->getOpcode(); 1430198892Srdivacky DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1431198892Srdivacky CI = CSEMap.find(Opcode); 1432198953Srdivacky if (!EliminateCSE(MI, CI)) { 1433198953Srdivacky // Otherwise, splice the instruction to the preheader. 1434210299Sed Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 1435198892Srdivacky 1436218893Sdim // Update register pressure for BBs from header to this block. 1437218893Sdim UpdateBackTraceRegPressure(MI); 1438218893Sdim 1439208599Srdivacky // Clear the kill flags of any register this instruction defines, 1440208599Srdivacky // since they may need to be live throughout the entire loop 1441208599Srdivacky // rather than just live for part of it. 1442208599Srdivacky for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1443208599Srdivacky MachineOperand &MO = MI->getOperand(i); 1444208599Srdivacky if (MO.isReg() && MO.isDef() && !MO.isDead()) 1445218893Sdim MRI->clearKillFlags(MO.getReg()); 1446208599Srdivacky } 1447208599Srdivacky 1448193323Sed // Add to the CSE map. 1449193323Sed if (CI != CSEMap.end()) 1450198892Srdivacky CI->second.push_back(MI); 1451193323Sed else { 1452193323Sed std::vector<const MachineInstr*> CSEMIs; 1453198892Srdivacky CSEMIs.push_back(MI); 1454198892Srdivacky CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 1455193323Sed } 1456193323Sed } 1457193323Sed 1458193323Sed ++NumHoisted; 1459193323Sed Changed = true; 1460218893Sdim 1461218893Sdim return true; 1462193323Sed} 1463210299Sed 1464210299SedMachineBasicBlock *MachineLICM::getCurPreheader() { 1465210299Sed // Determine the block to which to hoist instructions. If we can't find a 1466210299Sed // suitable loop predecessor, we can't do any hoisting. 1467210299Sed 1468210299Sed // If we've tried to get a preheader and failed, don't try again. 1469210299Sed if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 1470210299Sed return 0; 1471210299Sed 1472210299Sed if (!CurPreheader) { 1473210299Sed CurPreheader = CurLoop->getLoopPreheader(); 1474210299Sed if (!CurPreheader) { 1475210299Sed MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 1476210299Sed if (!Pred) { 1477210299Sed CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1478210299Sed return 0; 1479210299Sed } 1480210299Sed 1481210299Sed CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); 1482210299Sed if (!CurPreheader) { 1483210299Sed CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1484210299Sed return 0; 1485210299Sed } 1486210299Sed } 1487210299Sed } 1488210299Sed return CurPreheader; 1489210299Sed} 1490