HexagonHardwareLoops.cpp revision 239462
1234285Sdim//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===// 2234285Sdim// 3234285Sdim// The LLVM Compiler Infrastructure 4234285Sdim// 5234285Sdim// This file is distributed under the University of Illinois Open Source 6234285Sdim// License. See LICENSE.TXT for details. 7234285Sdim// 8234285Sdim//===----------------------------------------------------------------------===// 9234285Sdim// 10234285Sdim// This pass identifies loops where we can generate the Hexagon hardware 11234285Sdim// loop instruction. The hardware loop can perform loop branches with a 12234285Sdim// zero-cycle overhead. 13234285Sdim// 14234285Sdim// The pattern that defines the induction variable can changed depending on 15234285Sdim// prior optimizations. For example, the IndVarSimplify phase run by 'opt' 16234285Sdim// normalizes induction variables, and the Loop Strength Reduction pass 17234285Sdim// run by 'llc' may also make changes to the induction variable. 18234285Sdim// The pattern detected by this phase is due to running Strength Reduction. 19234285Sdim// 20234285Sdim// Criteria for hardware loops: 21234285Sdim// - Countable loops (w/ ind. var for a trip count) 22234285Sdim// - Assumes loops are normalized by IndVarSimplify 23234285Sdim// - Try inner-most loops first 24234285Sdim// - No nested hardware loops. 25234285Sdim// - No function calls in loops. 26234285Sdim// 27234285Sdim//===----------------------------------------------------------------------===// 28234285Sdim 29234285Sdim#define DEBUG_TYPE "hwloops" 30234285Sdim#include "Hexagon.h" 31234285Sdim#include "HexagonTargetMachine.h" 32234285Sdim#include "llvm/Constants.h" 33234285Sdim#include "llvm/PassSupport.h" 34234285Sdim#include "llvm/ADT/DenseMap.h" 35234285Sdim#include "llvm/ADT/Statistic.h" 36234285Sdim#include "llvm/CodeGen/Passes.h" 37234285Sdim#include "llvm/CodeGen/MachineDominators.h" 38234285Sdim#include "llvm/CodeGen/MachineFunction.h" 39234285Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 40234285Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 41234285Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 42234285Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 43234285Sdim#include "llvm/CodeGen/RegisterScavenging.h" 44234285Sdim#include "llvm/Support/Debug.h" 45234285Sdim#include "llvm/Support/raw_ostream.h" 46234285Sdim#include "llvm/Target/TargetInstrInfo.h" 47234285Sdim#include <algorithm> 48234285Sdim 49234285Sdimusing namespace llvm; 50234285Sdim 51234285SdimSTATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); 52234285Sdim 53234285Sdimnamespace { 54234285Sdim class CountValue; 55234285Sdim struct HexagonHardwareLoops : public MachineFunctionPass { 56234285Sdim MachineLoopInfo *MLI; 57234285Sdim MachineRegisterInfo *MRI; 58234285Sdim const TargetInstrInfo *TII; 59234285Sdim 60234285Sdim public: 61234285Sdim static char ID; // Pass identification, replacement for typeid 62234285Sdim 63234285Sdim HexagonHardwareLoops() : MachineFunctionPass(ID) {} 64234285Sdim 65234285Sdim virtual bool runOnMachineFunction(MachineFunction &MF); 66234285Sdim 67234285Sdim const char *getPassName() const { return "Hexagon Hardware Loops"; } 68234285Sdim 69234285Sdim virtual void getAnalysisUsage(AnalysisUsage &AU) const { 70234285Sdim AU.setPreservesCFG(); 71234285Sdim AU.addRequired<MachineDominatorTree>(); 72234285Sdim AU.addPreserved<MachineDominatorTree>(); 73234285Sdim AU.addRequired<MachineLoopInfo>(); 74234285Sdim AU.addPreserved<MachineLoopInfo>(); 75234285Sdim MachineFunctionPass::getAnalysisUsage(AU); 76234285Sdim } 77234285Sdim 78234285Sdim private: 79234285Sdim /// getCanonicalInductionVariable - Check to see if the loop has a canonical 80234285Sdim /// induction variable. 81234285Sdim /// Should be defined in MachineLoop. Based upon version in class Loop. 82234285Sdim const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const; 83234285Sdim 84234285Sdim /// getTripCount - Return a loop-invariant LLVM register indicating the 85234285Sdim /// number of times the loop will be executed. If the trip-count cannot 86234285Sdim /// be determined, this return null. 87234285Sdim CountValue *getTripCount(MachineLoop *L) const; 88234285Sdim 89234285Sdim /// isInductionOperation - Return true if the instruction matches the 90234285Sdim /// pattern for an opertion that defines an induction variable. 91234285Sdim bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const; 92234285Sdim 93234285Sdim /// isInvalidOperation - Return true if the instruction is not valid within 94234285Sdim /// a hardware loop. 95234285Sdim bool isInvalidLoopOperation(const MachineInstr *MI) const; 96234285Sdim 97234285Sdim /// containsInavlidInstruction - Return true if the loop contains an 98234285Sdim /// instruction that inhibits using the hardware loop. 99234285Sdim bool containsInvalidInstruction(MachineLoop *L) const; 100234285Sdim 101234285Sdim /// converToHardwareLoop - Given a loop, check if we can convert it to a 102234285Sdim /// hardware loop. If so, then perform the conversion and return true. 103234285Sdim bool convertToHardwareLoop(MachineLoop *L); 104234285Sdim 105234285Sdim }; 106234285Sdim 107234285Sdim char HexagonHardwareLoops::ID = 0; 108234285Sdim 109234285Sdim 110234285Sdim // CountValue class - Abstraction for a trip count of a loop. A 111234285Sdim // smaller vesrsion of the MachineOperand class without the concerns 112234285Sdim // of changing the operand representation. 113234285Sdim class CountValue { 114234285Sdim public: 115234285Sdim enum CountValueType { 116234285Sdim CV_Register, 117234285Sdim CV_Immediate 118234285Sdim }; 119234285Sdim private: 120234285Sdim CountValueType Kind; 121234285Sdim union Values { 122234285Sdim unsigned RegNum; 123234285Sdim int64_t ImmVal; 124234285Sdim Values(unsigned r) : RegNum(r) {} 125234285Sdim Values(int64_t i) : ImmVal(i) {} 126234285Sdim } Contents; 127234285Sdim bool isNegative; 128234285Sdim 129234285Sdim public: 130234285Sdim CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r), 131234285Sdim isNegative(neg) {} 132234285Sdim explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i), 133234285Sdim isNegative(i < 0) {} 134234285Sdim CountValueType getType() const { return Kind; } 135234285Sdim bool isReg() const { return Kind == CV_Register; } 136234285Sdim bool isImm() const { return Kind == CV_Immediate; } 137234285Sdim bool isNeg() const { return isNegative; } 138234285Sdim 139234285Sdim unsigned getReg() const { 140234285Sdim assert(isReg() && "Wrong CountValue accessor"); 141234285Sdim return Contents.RegNum; 142234285Sdim } 143234285Sdim void setReg(unsigned Val) { 144234285Sdim Contents.RegNum = Val; 145234285Sdim } 146234285Sdim int64_t getImm() const { 147234285Sdim assert(isImm() && "Wrong CountValue accessor"); 148234285Sdim if (isNegative) { 149234285Sdim return -Contents.ImmVal; 150234285Sdim } 151234285Sdim return Contents.ImmVal; 152234285Sdim } 153234285Sdim void setImm(int64_t Val) { 154234285Sdim Contents.ImmVal = Val; 155234285Sdim } 156234285Sdim 157234285Sdim void print(raw_ostream &OS, const TargetMachine *TM = 0) const { 158234285Sdim if (isReg()) { OS << PrintReg(getReg()); } 159234285Sdim if (isImm()) { OS << getImm(); } 160234285Sdim } 161234285Sdim }; 162234285Sdim 163234285Sdim struct HexagonFixupHwLoops : public MachineFunctionPass { 164234285Sdim public: 165234285Sdim static char ID; // Pass identification, replacement for typeid. 166234285Sdim 167234285Sdim HexagonFixupHwLoops() : MachineFunctionPass(ID) {} 168234285Sdim 169234285Sdim virtual bool runOnMachineFunction(MachineFunction &MF); 170234285Sdim 171234285Sdim const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; } 172234285Sdim 173234285Sdim virtual void getAnalysisUsage(AnalysisUsage &AU) const { 174234285Sdim AU.setPreservesCFG(); 175234285Sdim MachineFunctionPass::getAnalysisUsage(AU); 176234285Sdim } 177234285Sdim 178234285Sdim private: 179234285Sdim /// Maximum distance between the loop instr and the basic block. 180234285Sdim /// Just an estimate. 181234285Sdim static const unsigned MAX_LOOP_DISTANCE = 200; 182234285Sdim 183234285Sdim /// fixupLoopInstrs - Check the offset between each loop instruction and 184234285Sdim /// the loop basic block to determine if we can use the LOOP instruction 185234285Sdim /// or if we need to set the LC/SA registers explicitly. 186234285Sdim bool fixupLoopInstrs(MachineFunction &MF); 187234285Sdim 188234285Sdim /// convertLoopInstr - Add the instruction to set the LC and SA registers 189234285Sdim /// explicitly. 190234285Sdim void convertLoopInstr(MachineFunction &MF, 191234285Sdim MachineBasicBlock::iterator &MII, 192234285Sdim RegScavenger &RS); 193234285Sdim 194234285Sdim }; 195234285Sdim 196234285Sdim char HexagonFixupHwLoops::ID = 0; 197234285Sdim 198234285Sdim} // end anonymous namespace 199234285Sdim 200234285Sdim 201234285Sdim/// isHardwareLoop - Returns true if the instruction is a hardware loop 202234285Sdim/// instruction. 203234285Sdimstatic bool isHardwareLoop(const MachineInstr *MI) { 204234285Sdim return MI->getOpcode() == Hexagon::LOOP0_r || 205234285Sdim MI->getOpcode() == Hexagon::LOOP0_i; 206234285Sdim} 207234285Sdim 208234285Sdim/// isCompareEquals - Returns true if the instruction is a compare equals 209234285Sdim/// instruction with an immediate operand. 210234285Sdimstatic bool isCompareEqualsImm(const MachineInstr *MI) { 211234285Sdim return MI->getOpcode() == Hexagon::CMPEQri; 212234285Sdim} 213234285Sdim 214234285Sdim 215234285Sdim/// createHexagonHardwareLoops - Factory for creating 216234285Sdim/// the hardware loop phase. 217234285SdimFunctionPass *llvm::createHexagonHardwareLoops() { 218234285Sdim return new HexagonHardwareLoops(); 219234285Sdim} 220234285Sdim 221234285Sdim 222234285Sdimbool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { 223234285Sdim DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); 224234285Sdim 225234285Sdim bool Changed = false; 226234285Sdim 227234285Sdim // get the loop information 228234285Sdim MLI = &getAnalysis<MachineLoopInfo>(); 229234285Sdim // get the register information 230234285Sdim MRI = &MF.getRegInfo(); 231234285Sdim // the target specific instructio info. 232234285Sdim TII = MF.getTarget().getInstrInfo(); 233234285Sdim 234234285Sdim for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); 235234285Sdim I != E; ++I) { 236234285Sdim MachineLoop *L = *I; 237234285Sdim if (!L->getParentLoop()) { 238234285Sdim Changed |= convertToHardwareLoop(L); 239234285Sdim } 240234285Sdim } 241234285Sdim 242234285Sdim return Changed; 243234285Sdim} 244234285Sdim 245234285Sdim/// getCanonicalInductionVariable - Check to see if the loop has a canonical 246234285Sdim/// induction variable. We check for a simple recurrence pattern - an 247234285Sdim/// integer recurrence that decrements by one each time through the loop and 248234285Sdim/// ends at zero. If so, return the phi node that corresponds to it. 249234285Sdim/// 250234285Sdim/// Based upon the similar code in LoopInfo except this code is specific to 251234285Sdim/// the machine. 252234285Sdim/// This method assumes that the IndVarSimplify pass has been run by 'opt'. 253234285Sdim/// 254234285Sdimconst MachineInstr 255234285Sdim*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const { 256234285Sdim MachineBasicBlock *TopMBB = L->getTopBlock(); 257234285Sdim MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); 258234285Sdim assert(PI != TopMBB->pred_end() && 259234285Sdim "Loop must have more than one incoming edge!"); 260234285Sdim MachineBasicBlock *Backedge = *PI++; 261234285Sdim if (PI == TopMBB->pred_end()) return 0; // dead loop 262234285Sdim MachineBasicBlock *Incoming = *PI++; 263234285Sdim if (PI != TopMBB->pred_end()) return 0; // multiple backedges? 264234285Sdim 265234285Sdim // make sure there is one incoming and one backedge and determine which 266234285Sdim // is which. 267234285Sdim if (L->contains(Incoming)) { 268234285Sdim if (L->contains(Backedge)) 269234285Sdim return 0; 270234285Sdim std::swap(Incoming, Backedge); 271234285Sdim } else if (!L->contains(Backedge)) 272234285Sdim return 0; 273234285Sdim 274234285Sdim // Loop over all of the PHI nodes, looking for a canonical induction variable: 275234285Sdim // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2". 276234285Sdim // - The recurrence comes from the backedge. 277234285Sdim // - the definition is an induction operatio.n 278234285Sdim for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end(); 279234285Sdim I != E && I->isPHI(); ++I) { 280234285Sdim const MachineInstr *MPhi = &*I; 281234285Sdim unsigned DefReg = MPhi->getOperand(0).getReg(); 282234285Sdim for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { 283234285Sdim // Check each operand for the value from the backedge. 284234285Sdim MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB(); 285234285Sdim if (L->contains(MBB)) { // operands comes from the backedge 286234285Sdim // Check if the definition is an induction operation. 287234285Sdim const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg()); 288234285Sdim if (isInductionOperation(DI, DefReg)) { 289234285Sdim return MPhi; 290234285Sdim } 291234285Sdim } 292234285Sdim } 293234285Sdim } 294234285Sdim return 0; 295234285Sdim} 296234285Sdim 297234285Sdim/// getTripCount - Return a loop-invariant LLVM value indicating the 298234285Sdim/// number of times the loop will be executed. The trip count can 299234285Sdim/// be either a register or a constant value. If the trip-count 300234285Sdim/// cannot be determined, this returns null. 301234285Sdim/// 302234285Sdim/// We find the trip count from the phi instruction that defines the 303234285Sdim/// induction variable. We follow the links to the CMP instruction 304234285Sdim/// to get the trip count. 305234285Sdim/// 306234285Sdim/// Based upon getTripCount in LoopInfo. 307234285Sdim/// 308234285SdimCountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const { 309234285Sdim // Check that the loop has a induction variable. 310234285Sdim const MachineInstr *IV_Inst = getCanonicalInductionVariable(L); 311234285Sdim if (IV_Inst == 0) return 0; 312234285Sdim 313234285Sdim // Canonical loops will end with a 'cmpeq_ri IV, Imm', 314234285Sdim // if Imm is 0, get the count from the PHI opnd 315234285Sdim // if Imm is -M, than M is the count 316234285Sdim // Otherwise, Imm is the count 317234285Sdim const MachineOperand *IV_Opnd; 318234285Sdim const MachineOperand *InitialValue; 319234285Sdim if (!L->contains(IV_Inst->getOperand(2).getMBB())) { 320234285Sdim InitialValue = &IV_Inst->getOperand(1); 321234285Sdim IV_Opnd = &IV_Inst->getOperand(3); 322234285Sdim } else { 323234285Sdim InitialValue = &IV_Inst->getOperand(3); 324234285Sdim IV_Opnd = &IV_Inst->getOperand(1); 325234285Sdim } 326234285Sdim 327234285Sdim // Look for the cmp instruction to determine if we 328234285Sdim // can get a useful trip count. The trip count can 329234285Sdim // be either a register or an immediate. The location 330234285Sdim // of the value depends upon the type (reg or imm). 331239462Sdim for (MachineRegisterInfo::reg_iterator 332239462Sdim RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); 333239462Sdim RI != RE; ++RI) { 334239462Sdim IV_Opnd = &RI.getOperand(); 335234285Sdim const MachineInstr *MI = IV_Opnd->getParent(); 336234285Sdim if (L->contains(MI) && isCompareEqualsImm(MI)) { 337234285Sdim const MachineOperand &MO = MI->getOperand(2); 338234285Sdim assert(MO.isImm() && "IV Cmp Operand should be 0"); 339234285Sdim int64_t ImmVal = MO.getImm(); 340234285Sdim 341234285Sdim const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); 342234285Sdim assert(L->contains(IV_DefInstr->getParent()) && 343234285Sdim "IV definition should occurs in loop"); 344234285Sdim int64_t iv_value = IV_DefInstr->getOperand(2).getImm(); 345234285Sdim 346234285Sdim if (ImmVal == 0) { 347234285Sdim // Make sure the induction variable changes by one on each iteration. 348234285Sdim if (iv_value != 1 && iv_value != -1) { 349234285Sdim return 0; 350234285Sdim } 351234285Sdim return new CountValue(InitialValue->getReg(), iv_value > 0); 352234285Sdim } else { 353234285Sdim assert(InitialValue->isReg() && "Expecting register for init value"); 354234285Sdim const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg()); 355234285Sdim if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) { 356234285Sdim int64_t count = ImmVal - DefInstr->getOperand(1).getImm(); 357234285Sdim if ((count % iv_value) != 0) { 358234285Sdim return 0; 359234285Sdim } 360234285Sdim return new CountValue(count/iv_value); 361234285Sdim } 362234285Sdim } 363234285Sdim } 364234285Sdim } 365234285Sdim return 0; 366234285Sdim} 367234285Sdim 368234285Sdim/// isInductionOperation - return true if the operation is matches the 369234285Sdim/// pattern that defines an induction variable: 370234285Sdim/// add iv, c 371234285Sdim/// 372234285Sdimbool 373234285SdimHexagonHardwareLoops::isInductionOperation(const MachineInstr *MI, 374234285Sdim unsigned IVReg) const { 375234285Sdim return (MI->getOpcode() == 376234285Sdim Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg); 377234285Sdim} 378234285Sdim 379234285Sdim/// isInvalidOperation - Return true if the operation is invalid within 380234285Sdim/// hardware loop. 381234285Sdimbool 382234285SdimHexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const { 383234285Sdim 384234285Sdim // call is not allowed because the callee may use a hardware loop 385234285Sdim if (MI->getDesc().isCall()) { 386234285Sdim return true; 387234285Sdim } 388234285Sdim // do not allow nested hardware loops 389234285Sdim if (isHardwareLoop(MI)) { 390234285Sdim return true; 391234285Sdim } 392234285Sdim // check if the instruction defines a hardware loop register 393234285Sdim for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 394234285Sdim const MachineOperand &MO = MI->getOperand(i); 395234285Sdim if (MO.isReg() && MO.isDef() && 396234285Sdim (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 || 397234285Sdim MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) { 398234285Sdim return true; 399234285Sdim } 400234285Sdim } 401234285Sdim return false; 402234285Sdim} 403234285Sdim 404234285Sdim/// containsInvalidInstruction - Return true if the loop contains 405234285Sdim/// an instruction that inhibits the use of the hardware loop function. 406234285Sdim/// 407234285Sdimbool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { 408234285Sdim const std::vector<MachineBasicBlock*> Blocks = L->getBlocks(); 409234285Sdim for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 410234285Sdim MachineBasicBlock *MBB = Blocks[i]; 411234285Sdim for (MachineBasicBlock::iterator 412234285Sdim MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { 413234285Sdim const MachineInstr *MI = &*MII; 414234285Sdim if (isInvalidLoopOperation(MI)) { 415234285Sdim return true; 416234285Sdim } 417234285Sdim } 418234285Sdim } 419234285Sdim return false; 420234285Sdim} 421234285Sdim 422234285Sdim/// converToHardwareLoop - check if the loop is a candidate for 423234285Sdim/// converting to a hardware loop. If so, then perform the 424234285Sdim/// transformation. 425234285Sdim/// 426234285Sdim/// This function works on innermost loops first. A loop can 427234285Sdim/// be converted if it is a counting loop; either a register 428234285Sdim/// value or an immediate. 429234285Sdim/// 430234285Sdim/// The code makes several assumptions about the representation 431234285Sdim/// of the loop in llvm. 432234285Sdimbool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { 433234285Sdim bool Changed = false; 434234285Sdim // Process nested loops first. 435234285Sdim for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { 436234285Sdim Changed |= convertToHardwareLoop(*I); 437234285Sdim } 438234285Sdim // If a nested loop has been converted, then we can't convert this loop. 439234285Sdim if (Changed) { 440234285Sdim return Changed; 441234285Sdim } 442234285Sdim // Are we able to determine the trip count for the loop? 443234285Sdim CountValue *TripCount = getTripCount(L); 444234285Sdim if (TripCount == 0) { 445234285Sdim return false; 446234285Sdim } 447234285Sdim // Does the loop contain any invalid instructions? 448234285Sdim if (containsInvalidInstruction(L)) { 449234285Sdim return false; 450234285Sdim } 451234285Sdim MachineBasicBlock *Preheader = L->getLoopPreheader(); 452234285Sdim // No preheader means there's not place for the loop instr. 453234285Sdim if (Preheader == 0) { 454234285Sdim return false; 455234285Sdim } 456234285Sdim MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); 457234285Sdim 458234285Sdim MachineBasicBlock *LastMBB = L->getExitingBlock(); 459234285Sdim // Don't generate hw loop if the loop has more than one exit. 460234285Sdim if (LastMBB == 0) { 461234285Sdim return false; 462234285Sdim } 463234285Sdim MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); 464234285Sdim 465234285Sdim // Determine the loop start. 466234285Sdim MachineBasicBlock *LoopStart = L->getTopBlock(); 467234285Sdim if (L->getLoopLatch() != LastMBB) { 468234285Sdim // When the exit and latch are not the same, use the latch block as the 469234285Sdim // start. 470234285Sdim // The loop start address is used only after the 1st iteration, and the loop 471234285Sdim // latch may contains instrs. that need to be executed after the 1st iter. 472234285Sdim LoopStart = L->getLoopLatch(); 473234285Sdim // Make sure the latch is a successor of the exit, otherwise it won't work. 474234285Sdim if (!LastMBB->isSuccessor(LoopStart)) { 475234285Sdim return false; 476234285Sdim } 477234285Sdim } 478234285Sdim 479234285Sdim // Convert the loop to a hardware loop 480234285Sdim DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); 481234285Sdim 482234285Sdim if (TripCount->isReg()) { 483234285Sdim // Create a copy of the loop count register. 484234285Sdim MachineFunction *MF = LastMBB->getParent(); 485234285Sdim const TargetRegisterClass *RC = 486234285Sdim MF->getRegInfo().getRegClass(TripCount->getReg()); 487234285Sdim unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC); 488234285Sdim BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), 489234285Sdim TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg()); 490234285Sdim if (TripCount->isNeg()) { 491234285Sdim unsigned CountReg1 = CountReg; 492234285Sdim CountReg = MF->getRegInfo().createVirtualRegister(RC); 493234285Sdim BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), 494234285Sdim TII->get(Hexagon::NEG), CountReg).addReg(CountReg1); 495234285Sdim } 496234285Sdim 497239462Sdim // Add the Loop instruction to the beginning of the loop. 498234285Sdim BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), 499234285Sdim TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg); 500234285Sdim } else { 501234285Sdim assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); 502234285Sdim // Add the Loop immediate instruction to the beginning of the loop. 503234285Sdim int64_t CountImm = TripCount->getImm(); 504234285Sdim BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), 505234285Sdim TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm); 506234285Sdim } 507234285Sdim 508234285Sdim // Make sure the loop start always has a reference in the CFG. We need to 509234285Sdim // create a BlockAddress operand to get this mechanism to work both the 510234285Sdim // MachineBasicBlock and BasicBlock objects need the flag set. 511234285Sdim LoopStart->setHasAddressTaken(); 512234285Sdim // This line is needed to set the hasAddressTaken flag on the BasicBlock 513234285Sdim // object 514234285Sdim BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); 515234285Sdim 516234285Sdim // Replace the loop branch with an endloop instruction. 517234285Sdim DebugLoc dl = LastI->getDebugLoc(); 518234285Sdim BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart); 519234285Sdim 520234285Sdim // The loop ends with either: 521234285Sdim // - a conditional branch followed by an unconditional branch, or 522234285Sdim // - a conditional branch to the loop start. 523234285Sdim if (LastI->getOpcode() == Hexagon::JMP_c || 524234285Sdim LastI->getOpcode() == Hexagon::JMP_cNot) { 525234285Sdim // delete one and change/add an uncond. branch to out of the loop 526234285Sdim MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); 527234285Sdim LastI = LastMBB->erase(LastI); 528234285Sdim if (!L->contains(BranchTarget)) { 529234285Sdim if (LastI != LastMBB->end()) { 530234285Sdim TII->RemoveBranch(*LastMBB); 531234285Sdim } 532234285Sdim SmallVector<MachineOperand, 0> Cond; 533234285Sdim TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl); 534234285Sdim } 535234285Sdim } else { 536234285Sdim // Conditional branch to loop start; just delete it. 537234285Sdim LastMBB->erase(LastI); 538234285Sdim } 539234285Sdim delete TripCount; 540234285Sdim 541234285Sdim ++NumHWLoops; 542234285Sdim return true; 543234285Sdim} 544234285Sdim 545234285Sdim/// createHexagonFixupHwLoops - Factory for creating the hardware loop 546234285Sdim/// phase. 547234285SdimFunctionPass *llvm::createHexagonFixupHwLoops() { 548234285Sdim return new HexagonFixupHwLoops(); 549234285Sdim} 550234285Sdim 551234285Sdimbool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { 552234285Sdim DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n"); 553234285Sdim 554234285Sdim bool Changed = fixupLoopInstrs(MF); 555234285Sdim return Changed; 556234285Sdim} 557234285Sdim 558234285Sdim/// fixupLoopInsts - For Hexagon, if the loop label is to far from the 559234285Sdim/// loop instruction then we need to set the LC0 and SA0 registers 560234285Sdim/// explicitly instead of using LOOP(start,count). This function 561234285Sdim/// checks the distance, and generates register assignments if needed. 562234285Sdim/// 563234285Sdim/// This function makes two passes over the basic blocks. The first 564234285Sdim/// pass computes the offset of the basic block from the start. 565234285Sdim/// The second pass checks all the loop instructions. 566234285Sdimbool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { 567234285Sdim 568234285Sdim // Offset of the current instruction from the start. 569234285Sdim unsigned InstOffset = 0; 570234285Sdim // Map for each basic block to it's first instruction. 571234285Sdim DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset; 572234285Sdim 573234285Sdim // First pass - compute the offset of each basic block. 574234285Sdim for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); 575234285Sdim MBB != MBBe; ++MBB) { 576234285Sdim BlockToInstOffset[MBB] = InstOffset; 577234285Sdim InstOffset += (MBB->size() * 4); 578234285Sdim } 579234285Sdim 580234285Sdim // Second pass - check each loop instruction to see if it needs to 581234285Sdim // be converted. 582234285Sdim InstOffset = 0; 583234285Sdim bool Changed = false; 584234285Sdim RegScavenger RS; 585234285Sdim 586234285Sdim // Loop over all the basic blocks. 587234285Sdim for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); 588234285Sdim MBB != MBBe; ++MBB) { 589234285Sdim InstOffset = BlockToInstOffset[MBB]; 590234285Sdim RS.enterBasicBlock(MBB); 591234285Sdim 592234285Sdim // Loop over all the instructions. 593234285Sdim MachineBasicBlock::iterator MIE = MBB->end(); 594234285Sdim MachineBasicBlock::iterator MII = MBB->begin(); 595234285Sdim while (MII != MIE) { 596234285Sdim if (isHardwareLoop(MII)) { 597234285Sdim RS.forward(MII); 598234285Sdim assert(MII->getOperand(0).isMBB() && 599234285Sdim "Expect a basic block as loop operand"); 600234285Sdim int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; 601234285Sdim diff = (diff > 0 ? diff : -diff); 602234285Sdim if ((unsigned)diff > MAX_LOOP_DISTANCE) { 603234285Sdim // Convert to explicity setting LC0 and SA0. 604234285Sdim convertLoopInstr(MF, MII, RS); 605234285Sdim MII = MBB->erase(MII); 606234285Sdim Changed = true; 607234285Sdim } else { 608234285Sdim ++MII; 609234285Sdim } 610234285Sdim } else { 611234285Sdim ++MII; 612234285Sdim } 613234285Sdim InstOffset += 4; 614234285Sdim } 615234285Sdim } 616234285Sdim 617234285Sdim return Changed; 618234285Sdim 619234285Sdim} 620234285Sdim 621234285Sdim/// convertLoopInstr - convert a loop instruction to a sequence of instructions 622234285Sdim/// that set the lc and sa register explicitly. 623234285Sdimvoid HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF, 624234285Sdim MachineBasicBlock::iterator &MII, 625234285Sdim RegScavenger &RS) { 626234285Sdim const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); 627234285Sdim MachineBasicBlock *MBB = MII->getParent(); 628234285Sdim DebugLoc DL = MII->getDebugLoc(); 629239462Sdim unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0); 630234285Sdim 631234285Sdim // First, set the LC0 with the trip count. 632234285Sdim if (MII->getOperand(1).isReg()) { 633234285Sdim // Trip count is a register 634234285Sdim BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) 635234285Sdim .addReg(MII->getOperand(1).getReg()); 636234285Sdim } else { 637234285Sdim // Trip count is an immediate. 638234285Sdim BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch) 639234285Sdim .addImm(MII->getOperand(1).getImm()); 640234285Sdim BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) 641234285Sdim .addReg(Scratch); 642234285Sdim } 643234285Sdim // Then, set the SA0 with the loop start address. 644234285Sdim BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch) 645234285Sdim .addMBB(MII->getOperand(0).getMBB()); 646234285Sdim BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch); 647234285Sdim} 648