MachineLICM.cpp revision 208954
1//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs loop invariant code motion on machine instructions. We 11// attempt to remove as much code from the body of a loop as possible. 12// 13// This pass does not attempt to throttle itself to limit register pressure. 14// The register allocation phases are expected to perform rematerialization 15// to recover when register pressure is high. 16// 17// This pass is not intended to be a replacement or a complete alternative 18// for the LLVM-IR-level LICM pass. It is only designed to hoist simple 19// constructs that are not exposed before lowering and instruction selection. 20// 21//===----------------------------------------------------------------------===// 22 23#define DEBUG_TYPE "machine-licm" 24#include "llvm/CodeGen/Passes.h" 25#include "llvm/CodeGen/MachineDominators.h" 26#include "llvm/CodeGen/MachineFrameInfo.h" 27#include "llvm/CodeGen/MachineLoopInfo.h" 28#include "llvm/CodeGen/MachineMemOperand.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/PseudoSourceValue.h" 31#include "llvm/Target/TargetRegisterInfo.h" 32#include "llvm/Target/TargetInstrInfo.h" 33#include "llvm/Target/TargetMachine.h" 34#include "llvm/Analysis/AliasAnalysis.h" 35#include "llvm/ADT/DenseMap.h" 36#include "llvm/ADT/SmallSet.h" 37#include "llvm/ADT/Statistic.h" 38#include "llvm/Support/Debug.h" 39#include "llvm/Support/raw_ostream.h" 40 41using namespace llvm; 42 43STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); 44STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed"); 45STATISTIC(NumPostRAHoisted, 46 "Number of machine instructions hoisted out of loops post regalloc"); 47 48namespace { 49 class MachineLICM : public MachineFunctionPass { 50 bool PreRegAlloc; 51 52 const TargetMachine *TM; 53 const TargetInstrInfo *TII; 54 const TargetRegisterInfo *TRI; 55 const MachineFrameInfo *MFI; 56 MachineRegisterInfo *RegInfo; 57 58 // Various analyses that we use... 59 AliasAnalysis *AA; // Alias analysis info. 60 MachineLoopInfo *MLI; // Current MachineLoopInfo 61 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 62 63 // State that is updated as we process loops 64 bool Changed; // True if a loop is changed. 65 MachineLoop *CurLoop; // The current loop we are working on. 66 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 67 68 BitVector AllocatableSet; 69 70 // For each opcode, keep a list of potentail CSE instructions. 71 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 72 73 public: 74 static char ID; // Pass identification, replacement for typeid 75 MachineLICM() : 76 MachineFunctionPass(&ID), PreRegAlloc(true) {} 77 78 explicit MachineLICM(bool PreRA) : 79 MachineFunctionPass(&ID), PreRegAlloc(PreRA) {} 80 81 virtual bool runOnMachineFunction(MachineFunction &MF); 82 83 const char *getPassName() const { return "Machine Instruction LICM"; } 84 85 // FIXME: Loop preheaders? 86 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 87 AU.setPreservesCFG(); 88 AU.addRequired<MachineLoopInfo>(); 89 AU.addRequired<MachineDominatorTree>(); 90 AU.addRequired<AliasAnalysis>(); 91 AU.addPreserved<MachineLoopInfo>(); 92 AU.addPreserved<MachineDominatorTree>(); 93 MachineFunctionPass::getAnalysisUsage(AU); 94 } 95 96 virtual void releaseMemory() { 97 CSEMap.clear(); 98 } 99 100 private: 101 /// CandidateInfo - Keep track of information about hoisting candidates. 102 struct CandidateInfo { 103 MachineInstr *MI; 104 unsigned Def; 105 int FI; 106 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 107 : MI(mi), Def(def), FI(fi) {} 108 }; 109 110 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 111 /// invariants out to the preheader. 112 void HoistRegionPostRA(); 113 114 /// HoistPostRA - When an instruction is found to only use loop invariant 115 /// operands that is safe to hoist, this instruction is called to do the 116 /// dirty work. 117 void HoistPostRA(MachineInstr *MI, unsigned Def); 118 119 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 120 /// gather register def and frame object update information. 121 void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs, 122 SmallSet<int, 32> &StoredFIs, 123 SmallVector<CandidateInfo, 32> &Candidates); 124 125 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 126 /// current loop. 127 void AddToLiveIns(unsigned Reg); 128 129 /// IsLICMCandidate - Returns true if the instruction may be a suitable 130 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously 131 /// not safe to hoist it. 132 bool IsLICMCandidate(MachineInstr &I); 133 134 /// IsLoopInvariantInst - Returns true if the instruction is loop 135 /// invariant. I.e., all virtual register operands are defined outside of 136 /// the loop, physical registers aren't accessed (explicitly or implicitly), 137 /// and the instruction is hoistable. 138 /// 139 bool IsLoopInvariantInst(MachineInstr &I); 140 141 /// IsProfitableToHoist - Return true if it is potentially profitable to 142 /// hoist the given loop invariant. 143 bool IsProfitableToHoist(MachineInstr &MI); 144 145 /// HoistRegion - Walk the specified region of the CFG (defined by all 146 /// blocks dominated by the specified block, and that are in the current 147 /// loop) in depth first order w.r.t the DominatorTree. This allows us to 148 /// visit definitions before uses, allowing us to hoist a loop body in one 149 /// pass without iteration. 150 /// 151 void HoistRegion(MachineDomTreeNode *N); 152 153 /// isLoadFromConstantMemory - Return true if the given instruction is a 154 /// load from constant memory. 155 bool isLoadFromConstantMemory(MachineInstr *MI); 156 157 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if 158 /// the load itself could be hoisted. Return the unfolded and hoistable 159 /// load, or null if the load couldn't be unfolded or if it wouldn't 160 /// be hoistable. 161 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 162 163 /// LookForDuplicate - Find an instruction amount PrevMIs that is a 164 /// duplicate of MI. Return this instruction if it's found. 165 const MachineInstr *LookForDuplicate(const MachineInstr *MI, 166 std::vector<const MachineInstr*> &PrevMIs); 167 168 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on 169 /// the preheader that compute the same value. If it's found, do a RAU on 170 /// with the definition of the existing instruction rather than hoisting 171 /// the instruction to the preheader. 172 bool EliminateCSE(MachineInstr *MI, 173 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); 174 175 /// Hoist - When an instruction is found to only use loop invariant operands 176 /// that is safe to hoist, this instruction is called to do the dirty work. 177 /// 178 void Hoist(MachineInstr *MI); 179 180 /// InitCSEMap - Initialize the CSE map with instructions that are in the 181 /// current loop preheader that may become duplicates of instructions that 182 /// are hoisted out of the loop. 183 void InitCSEMap(MachineBasicBlock *BB); 184 }; 185} // end anonymous namespace 186 187char MachineLICM::ID = 0; 188static RegisterPass<MachineLICM> 189X("machinelicm", "Machine Loop Invariant Code Motion"); 190 191FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { 192 return new MachineLICM(PreRegAlloc); 193} 194 195/// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most 196/// loop that has a preheader. 197static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { 198 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 199 if (L->getLoopPreheader()) 200 return false; 201 return true; 202} 203 204bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 205 if (PreRegAlloc) 206 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n"); 207 else 208 DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n"); 209 210 Changed = false; 211 TM = &MF.getTarget(); 212 TII = TM->getInstrInfo(); 213 TRI = TM->getRegisterInfo(); 214 MFI = MF.getFrameInfo(); 215 RegInfo = &MF.getRegInfo(); 216 AllocatableSet = TRI->getAllocatableSet(MF); 217 218 // Get our Loop information... 219 MLI = &getAnalysis<MachineLoopInfo>(); 220 DT = &getAnalysis<MachineDominatorTree>(); 221 AA = &getAnalysis<AliasAnalysis>(); 222 223 for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){ 224 CurLoop = *I; 225 226 // If this is done before regalloc, only visit outer-most preheader-sporting 227 // loops. 228 if (PreRegAlloc && !LoopIsOuterMostWithPreheader(CurLoop)) 229 continue; 230 231 // Determine the block to which to hoist instructions. If we can't find a 232 // suitable loop preheader, we can't do any hoisting. 233 // 234 // FIXME: We are only hoisting if the basic block coming into this loop 235 // has only one successor. This isn't the case in general because we haven't 236 // broken critical edges or added preheaders. 237 CurPreheader = CurLoop->getLoopPreheader(); 238 if (!CurPreheader) 239 continue; 240 241 if (!PreRegAlloc) 242 HoistRegionPostRA(); 243 else { 244 // CSEMap is initialized for loop header when the first instruction is 245 // being hoisted. 246 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 247 HoistRegion(N); 248 CSEMap.clear(); 249 } 250 } 251 252 return Changed; 253} 254 255/// InstructionStoresToFI - Return true if instruction stores to the 256/// specified frame. 257static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 258 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 259 oe = MI->memoperands_end(); o != oe; ++o) { 260 if (!(*o)->isStore() || !(*o)->getValue()) 261 continue; 262 if (const FixedStackPseudoSourceValue *Value = 263 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 264 if (Value->getFrameIndex() == FI) 265 return true; 266 } 267 } 268 return false; 269} 270 271/// ProcessMI - Examine the instruction for potentai LICM candidate. Also 272/// gather register def and frame object update information. 273void MachineLICM::ProcessMI(MachineInstr *MI, 274 unsigned *PhysRegDefs, 275 SmallSet<int, 32> &StoredFIs, 276 SmallVector<CandidateInfo, 32> &Candidates) { 277 bool RuledOut = false; 278 bool HasNonInvariantUse = false; 279 unsigned Def = 0; 280 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 281 const MachineOperand &MO = MI->getOperand(i); 282 if (MO.isFI()) { 283 // Remember if the instruction stores to the frame index. 284 int FI = MO.getIndex(); 285 if (!StoredFIs.count(FI) && 286 MFI->isSpillSlotObjectIndex(FI) && 287 InstructionStoresToFI(MI, FI)) 288 StoredFIs.insert(FI); 289 HasNonInvariantUse = true; 290 continue; 291 } 292 293 if (!MO.isReg()) 294 continue; 295 unsigned Reg = MO.getReg(); 296 if (!Reg) 297 continue; 298 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 299 "Not expecting virtual register!"); 300 301 if (!MO.isDef()) { 302 if (Reg && PhysRegDefs[Reg]) 303 // If it's using a non-loop-invariant register, then it's obviously not 304 // safe to hoist. 305 HasNonInvariantUse = true; 306 continue; 307 } 308 309 if (MO.isImplicit()) { 310 ++PhysRegDefs[Reg]; 311 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 312 ++PhysRegDefs[*AS]; 313 if (!MO.isDead()) 314 // Non-dead implicit def? This cannot be hoisted. 315 RuledOut = true; 316 // No need to check if a dead implicit def is also defined by 317 // another instruction. 318 continue; 319 } 320 321 // FIXME: For now, avoid instructions with multiple defs, unless 322 // it's a dead implicit def. 323 if (Def) 324 RuledOut = true; 325 else 326 Def = Reg; 327 328 // If we have already seen another instruction that defines the same 329 // register, then this is not safe. 330 if (++PhysRegDefs[Reg] > 1) 331 // MI defined register is seen defined by another instruction in 332 // the loop, it cannot be a LICM candidate. 333 RuledOut = true; 334 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 335 if (++PhysRegDefs[*AS] > 1) 336 RuledOut = true; 337 } 338 339 // Only consider reloads for now and remats which do not have register 340 // operands. FIXME: Consider unfold load folding instructions. 341 if (Def && !RuledOut) { 342 int FI = INT_MIN; 343 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 344 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 345 Candidates.push_back(CandidateInfo(MI, Def, FI)); 346 } 347} 348 349/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 350/// invariants out to the preheader. 351void MachineLICM::HoistRegionPostRA() { 352 unsigned NumRegs = TRI->getNumRegs(); 353 unsigned *PhysRegDefs = new unsigned[NumRegs]; 354 std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0); 355 356 SmallVector<CandidateInfo, 32> Candidates; 357 SmallSet<int, 32> StoredFIs; 358 359 // Walk the entire region, count number of defs for each register, and 360 // collect potential LICM candidates. 361 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 362 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 363 MachineBasicBlock *BB = Blocks[i]; 364 // Conservatively treat live-in's as an external def. 365 // FIXME: That means a reload that're reused in successor block(s) will not 366 // be LICM'ed. 367 for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), 368 E = BB->livein_end(); I != E; ++I) { 369 unsigned Reg = *I; 370 ++PhysRegDefs[Reg]; 371 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 372 ++PhysRegDefs[*AS]; 373 } 374 375 for (MachineBasicBlock::iterator 376 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 377 MachineInstr *MI = &*MII; 378 ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates); 379 } 380 } 381 382 // Now evaluate whether the potential candidates qualify. 383 // 1. Check if the candidate defined register is defined by another 384 // instruction in the loop. 385 // 2. If the candidate is a load from stack slot (always true for now), 386 // check if the slot is stored anywhere in the loop. 387 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 388 if (Candidates[i].FI != INT_MIN && 389 StoredFIs.count(Candidates[i].FI)) 390 continue; 391 392 if (PhysRegDefs[Candidates[i].Def] == 1) { 393 bool Safe = true; 394 MachineInstr *MI = Candidates[i].MI; 395 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 396 const MachineOperand &MO = MI->getOperand(j); 397 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 398 continue; 399 if (PhysRegDefs[MO.getReg()]) { 400 // If it's using a non-loop-invariant register, then it's obviously 401 // not safe to hoist. 402 Safe = false; 403 break; 404 } 405 } 406 if (Safe) 407 HoistPostRA(MI, Candidates[i].Def); 408 } 409 } 410 411 delete[] PhysRegDefs; 412} 413 414/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current 415/// loop, and make sure it is not killed by any instructions in the loop. 416void MachineLICM::AddToLiveIns(unsigned Reg) { 417 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 418 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 419 MachineBasicBlock *BB = Blocks[i]; 420 if (!BB->isLiveIn(Reg)) 421 BB->addLiveIn(Reg); 422 for (MachineBasicBlock::iterator 423 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 424 MachineInstr *MI = &*MII; 425 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 426 MachineOperand &MO = MI->getOperand(i); 427 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 428 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 429 MO.setIsKill(false); 430 } 431 } 432 } 433} 434 435/// HoistPostRA - When an instruction is found to only use loop invariant 436/// operands that is safe to hoist, this instruction is called to do the 437/// dirty work. 438void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 439 // Now move the instructions to the predecessor, inserting it before any 440 // terminator instructions. 441 DEBUG({ 442 dbgs() << "Hoisting " << *MI; 443 if (CurPreheader->getBasicBlock()) 444 dbgs() << " to MachineBasicBlock " 445 << CurPreheader->getName(); 446 if (MI->getParent()->getBasicBlock()) 447 dbgs() << " from MachineBasicBlock " 448 << MI->getParent()->getName(); 449 dbgs() << "\n"; 450 }); 451 452 // Splice the instruction to the preheader. 453 MachineBasicBlock *MBB = MI->getParent(); 454 CurPreheader->splice(CurPreheader->getFirstTerminator(), MBB, MI); 455 456 // Add register to livein list to all the BBs in the current loop since a 457 // loop invariant must be kept live throughout the whole loop. This is 458 // important to ensure later passes do not scavenge the def register. 459 AddToLiveIns(Def); 460 461 ++NumPostRAHoisted; 462 Changed = true; 463} 464 465/// HoistRegion - Walk the specified region of the CFG (defined by all blocks 466/// dominated by the specified block, and that are in the current loop) in depth 467/// first order w.r.t the DominatorTree. This allows us to visit definitions 468/// before uses, allowing us to hoist a loop body in one pass without iteration. 469/// 470void MachineLICM::HoistRegion(MachineDomTreeNode *N) { 471 assert(N != 0 && "Null dominator tree node?"); 472 MachineBasicBlock *BB = N->getBlock(); 473 474 // If this subregion is not in the top level loop at all, exit. 475 if (!CurLoop->contains(BB)) return; 476 477 for (MachineBasicBlock::iterator 478 MII = BB->begin(), E = BB->end(); MII != E; ) { 479 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 480 Hoist(&*MII); 481 MII = NextMII; 482 } 483 484 const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); 485 for (unsigned I = 0, E = Children.size(); I != E; ++I) 486 HoistRegion(Children[I]); 487} 488 489/// IsLICMCandidate - Returns true if the instruction may be a suitable 490/// candidate for LICM. e.g. If the instruction is a call, then it's obviously 491/// not safe to hoist it. 492bool MachineLICM::IsLICMCandidate(MachineInstr &I) { 493 if (I.isImplicitDef()) 494 return false; 495 496 const TargetInstrDesc &TID = I.getDesc(); 497 498 // Ignore stuff that we obviously can't hoist. 499 if (TID.mayStore() || TID.isCall() || TID.isTerminator() || 500 TID.hasUnmodeledSideEffects()) 501 return false; 502 503 if (TID.mayLoad()) { 504 // Okay, this instruction does a load. As a refinement, we allow the target 505 // to decide whether the loaded value is actually a constant. If so, we can 506 // actually use it as a load. 507 if (!I.isInvariantLoad(AA)) 508 // FIXME: we should be able to hoist loads with no other side effects if 509 // there are no other instructions which can change memory in this loop. 510 // This is a trivial form of alias analysis. 511 return false; 512 } 513 return true; 514} 515 516/// IsLoopInvariantInst - Returns true if the instruction is loop 517/// invariant. I.e., all virtual register operands are defined outside of the 518/// loop, physical registers aren't accessed explicitly, and there are no side 519/// effects that aren't captured by the operands or other flags. 520/// 521bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 522 if (!IsLICMCandidate(I)) 523 return false; 524 525 // The instruction is loop invariant if all of its operands are. 526 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 527 const MachineOperand &MO = I.getOperand(i); 528 529 if (!MO.isReg()) 530 continue; 531 532 unsigned Reg = MO.getReg(); 533 if (Reg == 0) continue; 534 535 // Don't hoist an instruction that uses or defines a physical register. 536 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 537 if (MO.isUse()) { 538 // If the physreg has no defs anywhere, it's just an ambient register 539 // and we can freely move its uses. Alternatively, if it's allocatable, 540 // it could get allocated to something with a def during allocation. 541 if (!RegInfo->def_empty(Reg)) 542 return false; 543 if (AllocatableSet.test(Reg)) 544 return false; 545 // Check for a def among the register's aliases too. 546 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 547 unsigned AliasReg = *Alias; 548 if (!RegInfo->def_empty(AliasReg)) 549 return false; 550 if (AllocatableSet.test(AliasReg)) 551 return false; 552 } 553 // Otherwise it's safe to move. 554 continue; 555 } else if (!MO.isDead()) { 556 // A def that isn't dead. We can't move it. 557 return false; 558 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 559 // If the reg is live into the loop, we can't hoist an instruction 560 // which would clobber it. 561 return false; 562 } 563 } 564 565 if (!MO.isUse()) 566 continue; 567 568 assert(RegInfo->getVRegDef(Reg) && 569 "Machine instr not mapped for this vreg?!"); 570 571 // If the loop contains the definition of an operand, then the instruction 572 // isn't loop invariant. 573 if (CurLoop->contains(RegInfo->getVRegDef(Reg))) 574 return false; 575 } 576 577 // If we got this far, the instruction is loop invariant! 578 return true; 579} 580 581 582/// HasPHIUses - Return true if the specified register has any PHI use. 583static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) { 584 for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg), 585 UE = RegInfo->use_end(); UI != UE; ++UI) { 586 MachineInstr *UseMI = &*UI; 587 if (UseMI->isPHI()) 588 return true; 589 } 590 return false; 591} 592 593/// isLoadFromConstantMemory - Return true if the given instruction is a 594/// load from constant memory. Machine LICM will hoist these even if they are 595/// not re-materializable. 596bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { 597 if (!MI->getDesc().mayLoad()) return false; 598 if (!MI->hasOneMemOperand()) return false; 599 MachineMemOperand *MMO = *MI->memoperands_begin(); 600 if (MMO->isVolatile()) return false; 601 if (!MMO->getValue()) return false; 602 const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue()); 603 if (PSV) { 604 MachineFunction &MF = *MI->getParent()->getParent(); 605 return PSV->isConstant(MF.getFrameInfo()); 606 } else { 607 return AA->pointsToConstantMemory(MMO->getValue()); 608 } 609} 610 611/// IsProfitableToHoist - Return true if it is potentially profitable to hoist 612/// the given loop invariant. 613bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 614 // FIXME: For now, only hoist re-materilizable instructions. LICM will 615 // increase register pressure. We want to make sure it doesn't increase 616 // spilling. 617 // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting 618 // these tend to help performance in low register pressure situation. The 619 // trade off is it may cause spill in high pressure situation. It will end up 620 // adding a store in the loop preheader. But the reload is no more expensive. 621 // The side benefit is these loads are frequently CSE'ed. 622 if (!TII->isTriviallyReMaterializable(&MI, AA)) { 623 if (!isLoadFromConstantMemory(&MI)) 624 return false; 625 } 626 627 // If result(s) of this instruction is used by PHIs, then don't hoist it. 628 // The presence of joins makes it difficult for current register allocator 629 // implementation to perform remat. 630 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 631 const MachineOperand &MO = MI.getOperand(i); 632 if (!MO.isReg() || !MO.isDef()) 633 continue; 634 if (HasPHIUses(MO.getReg(), RegInfo)) 635 return false; 636 } 637 638 return true; 639} 640 641MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 642 // If not, we may be able to unfold a load and hoist that. 643 // First test whether the instruction is loading from an amenable 644 // memory location. 645 if (!isLoadFromConstantMemory(MI)) 646 return 0; 647 648 // Next determine the register class for a temporary register. 649 unsigned LoadRegIndex; 650 unsigned NewOpc = 651 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 652 /*UnfoldLoad=*/true, 653 /*UnfoldStore=*/false, 654 &LoadRegIndex); 655 if (NewOpc == 0) return 0; 656 const TargetInstrDesc &TID = TII->get(NewOpc); 657 if (TID.getNumDefs() != 1) return 0; 658 const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI); 659 // Ok, we're unfolding. Create a temporary register and do the unfold. 660 unsigned Reg = RegInfo->createVirtualRegister(RC); 661 662 MachineFunction &MF = *MI->getParent()->getParent(); 663 SmallVector<MachineInstr *, 2> NewMIs; 664 bool Success = 665 TII->unfoldMemoryOperand(MF, MI, Reg, 666 /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 667 NewMIs); 668 (void)Success; 669 assert(Success && 670 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 671 "succeeded!"); 672 assert(NewMIs.size() == 2 && 673 "Unfolded a load into multiple instructions!"); 674 MachineBasicBlock *MBB = MI->getParent(); 675 MBB->insert(MI, NewMIs[0]); 676 MBB->insert(MI, NewMIs[1]); 677 // If unfolding produced a load that wasn't loop-invariant or profitable to 678 // hoist, discard the new instructions and bail. 679 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 680 NewMIs[0]->eraseFromParent(); 681 NewMIs[1]->eraseFromParent(); 682 return 0; 683 } 684 // Otherwise we successfully unfolded a load that we can hoist. 685 MI->eraseFromParent(); 686 return NewMIs[0]; 687} 688 689void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 690 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 691 const MachineInstr *MI = &*I; 692 // FIXME: For now, only hoist re-materilizable instructions. LICM will 693 // increase register pressure. We want to make sure it doesn't increase 694 // spilling. 695 if (TII->isTriviallyReMaterializable(MI, AA)) { 696 unsigned Opcode = MI->getOpcode(); 697 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 698 CI = CSEMap.find(Opcode); 699 if (CI != CSEMap.end()) 700 CI->second.push_back(MI); 701 else { 702 std::vector<const MachineInstr*> CSEMIs; 703 CSEMIs.push_back(MI); 704 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 705 } 706 } 707 } 708} 709 710const MachineInstr* 711MachineLICM::LookForDuplicate(const MachineInstr *MI, 712 std::vector<const MachineInstr*> &PrevMIs) { 713 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 714 const MachineInstr *PrevMI = PrevMIs[i]; 715 if (TII->produceSameValue(MI, PrevMI)) 716 return PrevMI; 717 } 718 return 0; 719} 720 721bool MachineLICM::EliminateCSE(MachineInstr *MI, 722 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 723 if (CI == CSEMap.end()) 724 return false; 725 726 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 727 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 728 729 // Replace virtual registers defined by MI by their counterparts defined 730 // by Dup. 731 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 732 const MachineOperand &MO = MI->getOperand(i); 733 734 // Physical registers may not differ here. 735 assert((!MO.isReg() || MO.getReg() == 0 || 736 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 737 MO.getReg() == Dup->getOperand(i).getReg()) && 738 "Instructions with different phys regs are not identical!"); 739 740 if (MO.isReg() && MO.isDef() && 741 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 742 RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); 743 RegInfo->clearKillFlags(Dup->getOperand(i).getReg()); 744 } 745 } 746 MI->eraseFromParent(); 747 ++NumCSEed; 748 return true; 749 } 750 return false; 751} 752 753/// Hoist - When an instruction is found to use only loop invariant operands 754/// that are safe to hoist, this instruction is called to do the dirty work. 755/// 756void MachineLICM::Hoist(MachineInstr *MI) { 757 // First check whether we should hoist this instruction. 758 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 759 // If not, try unfolding a hoistable load. 760 MI = ExtractHoistableLoad(MI); 761 if (!MI) return; 762 } 763 764 // Now move the instructions to the predecessor, inserting it before any 765 // terminator instructions. 766 DEBUG({ 767 dbgs() << "Hoisting " << *MI; 768 if (CurPreheader->getBasicBlock()) 769 dbgs() << " to MachineBasicBlock " 770 << CurPreheader->getName(); 771 if (MI->getParent()->getBasicBlock()) 772 dbgs() << " from MachineBasicBlock " 773 << MI->getParent()->getName(); 774 dbgs() << "\n"; 775 }); 776 777 // If this is the first instruction being hoisted to the preheader, 778 // initialize the CSE map with potential common expressions. 779 InitCSEMap(CurPreheader); 780 781 // Look for opportunity to CSE the hoisted instruction. 782 unsigned Opcode = MI->getOpcode(); 783 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 784 CI = CSEMap.find(Opcode); 785 if (!EliminateCSE(MI, CI)) { 786 // Otherwise, splice the instruction to the preheader. 787 CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI); 788 789 // Clear the kill flags of any register this instruction defines, 790 // since they may need to be live throughout the entire loop 791 // rather than just live for part of it. 792 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 793 MachineOperand &MO = MI->getOperand(i); 794 if (MO.isReg() && MO.isDef() && !MO.isDead()) 795 RegInfo->clearKillFlags(MO.getReg()); 796 } 797 798 // Add to the CSE map. 799 if (CI != CSEMap.end()) 800 CI->second.push_back(MI); 801 else { 802 std::vector<const MachineInstr*> CSEMIs; 803 CSEMIs.push_back(MI); 804 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 805 } 806 } 807 808 ++NumHoisted; 809 Changed = true; 810} 811