HexagonHardwareLoops.cpp revision 239462
1234285Sdim//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
2234285Sdim//
3234285Sdim//                     The LLVM Compiler Infrastructure
4234285Sdim//
5234285Sdim// This file is distributed under the University of Illinois Open Source
6234285Sdim// License. See LICENSE.TXT for details.
7234285Sdim//
8234285Sdim//===----------------------------------------------------------------------===//
9234285Sdim//
10234285Sdim// This pass identifies loops where we can generate the Hexagon hardware
11234285Sdim// loop instruction.  The hardware loop can perform loop branches with a
12234285Sdim// zero-cycle overhead.
13234285Sdim//
14234285Sdim// The pattern that defines the induction variable can changed depending on
15234285Sdim// prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
16234285Sdim// normalizes induction variables, and the Loop Strength Reduction pass
17234285Sdim// run by 'llc' may also make changes to the induction variable.
18234285Sdim// The pattern detected by this phase is due to running Strength Reduction.
19234285Sdim//
20234285Sdim// Criteria for hardware loops:
21234285Sdim//  - Countable loops (w/ ind. var for a trip count)
22234285Sdim//  - Assumes loops are normalized by IndVarSimplify
23234285Sdim//  - Try inner-most loops first
24234285Sdim//  - No nested hardware loops.
25234285Sdim//  - No function calls in loops.
26234285Sdim//
27234285Sdim//===----------------------------------------------------------------------===//
28234285Sdim
29234285Sdim#define DEBUG_TYPE "hwloops"
30234285Sdim#include "Hexagon.h"
31234285Sdim#include "HexagonTargetMachine.h"
32234285Sdim#include "llvm/Constants.h"
33234285Sdim#include "llvm/PassSupport.h"
34234285Sdim#include "llvm/ADT/DenseMap.h"
35234285Sdim#include "llvm/ADT/Statistic.h"
36234285Sdim#include "llvm/CodeGen/Passes.h"
37234285Sdim#include "llvm/CodeGen/MachineDominators.h"
38234285Sdim#include "llvm/CodeGen/MachineFunction.h"
39234285Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
40234285Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
41234285Sdim#include "llvm/CodeGen/MachineLoopInfo.h"
42234285Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
43234285Sdim#include "llvm/CodeGen/RegisterScavenging.h"
44234285Sdim#include "llvm/Support/Debug.h"
45234285Sdim#include "llvm/Support/raw_ostream.h"
46234285Sdim#include "llvm/Target/TargetInstrInfo.h"
47234285Sdim#include <algorithm>
48234285Sdim
49234285Sdimusing namespace llvm;
50234285Sdim
51234285SdimSTATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
52234285Sdim
53234285Sdimnamespace {
54234285Sdim  class CountValue;
55234285Sdim  struct HexagonHardwareLoops : public MachineFunctionPass {
56234285Sdim    MachineLoopInfo       *MLI;
57234285Sdim    MachineRegisterInfo   *MRI;
58234285Sdim    const TargetInstrInfo *TII;
59234285Sdim
60234285Sdim  public:
61234285Sdim    static char ID;   // Pass identification, replacement for typeid
62234285Sdim
63234285Sdim    HexagonHardwareLoops() : MachineFunctionPass(ID) {}
64234285Sdim
65234285Sdim    virtual bool runOnMachineFunction(MachineFunction &MF);
66234285Sdim
67234285Sdim    const char *getPassName() const { return "Hexagon Hardware Loops"; }
68234285Sdim
69234285Sdim    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
70234285Sdim      AU.setPreservesCFG();
71234285Sdim      AU.addRequired<MachineDominatorTree>();
72234285Sdim      AU.addPreserved<MachineDominatorTree>();
73234285Sdim      AU.addRequired<MachineLoopInfo>();
74234285Sdim      AU.addPreserved<MachineLoopInfo>();
75234285Sdim      MachineFunctionPass::getAnalysisUsage(AU);
76234285Sdim    }
77234285Sdim
78234285Sdim  private:
79234285Sdim    /// getCanonicalInductionVariable - Check to see if the loop has a canonical
80234285Sdim    /// induction variable.
81234285Sdim    /// Should be defined in MachineLoop. Based upon version in class Loop.
82234285Sdim    const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const;
83234285Sdim
84234285Sdim    /// getTripCount - Return a loop-invariant LLVM register indicating the
85234285Sdim    /// number of times the loop will be executed.  If the trip-count cannot
86234285Sdim    /// be determined, this return null.
87234285Sdim    CountValue *getTripCount(MachineLoop *L) const;
88234285Sdim
89234285Sdim    /// isInductionOperation - Return true if the instruction matches the
90234285Sdim    /// pattern for an opertion that defines an induction variable.
91234285Sdim    bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
92234285Sdim
93234285Sdim    /// isInvalidOperation - Return true if the instruction is not valid within
94234285Sdim    /// a hardware loop.
95234285Sdim    bool isInvalidLoopOperation(const MachineInstr *MI) const;
96234285Sdim
97234285Sdim    /// containsInavlidInstruction - Return true if the loop contains an
98234285Sdim    /// instruction that inhibits using the hardware loop.
99234285Sdim    bool containsInvalidInstruction(MachineLoop *L) const;
100234285Sdim
101234285Sdim    /// converToHardwareLoop - Given a loop, check if we can convert it to a
102234285Sdim    /// hardware loop.  If so, then perform the conversion and return true.
103234285Sdim    bool convertToHardwareLoop(MachineLoop *L);
104234285Sdim
105234285Sdim  };
106234285Sdim
107234285Sdim  char HexagonHardwareLoops::ID = 0;
108234285Sdim
109234285Sdim
110234285Sdim  // CountValue class - Abstraction for a trip count of a loop. A
111234285Sdim  // smaller vesrsion of the MachineOperand class without the concerns
112234285Sdim  // of changing the operand representation.
113234285Sdim  class CountValue {
114234285Sdim  public:
115234285Sdim    enum CountValueType {
116234285Sdim      CV_Register,
117234285Sdim      CV_Immediate
118234285Sdim    };
119234285Sdim  private:
120234285Sdim    CountValueType Kind;
121234285Sdim    union Values {
122234285Sdim      unsigned RegNum;
123234285Sdim      int64_t ImmVal;
124234285Sdim      Values(unsigned r) : RegNum(r) {}
125234285Sdim      Values(int64_t i) : ImmVal(i) {}
126234285Sdim    } Contents;
127234285Sdim    bool isNegative;
128234285Sdim
129234285Sdim  public:
130234285Sdim    CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
131234285Sdim                                       isNegative(neg) {}
132234285Sdim    explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
133234285Sdim                                     isNegative(i < 0) {}
134234285Sdim    CountValueType getType() const { return Kind; }
135234285Sdim    bool isReg() const { return Kind == CV_Register; }
136234285Sdim    bool isImm() const { return Kind == CV_Immediate; }
137234285Sdim    bool isNeg() const { return isNegative; }
138234285Sdim
139234285Sdim    unsigned getReg() const {
140234285Sdim      assert(isReg() && "Wrong CountValue accessor");
141234285Sdim      return Contents.RegNum;
142234285Sdim    }
143234285Sdim    void setReg(unsigned Val) {
144234285Sdim      Contents.RegNum = Val;
145234285Sdim    }
146234285Sdim    int64_t getImm() const {
147234285Sdim      assert(isImm() && "Wrong CountValue accessor");
148234285Sdim      if (isNegative) {
149234285Sdim        return -Contents.ImmVal;
150234285Sdim      }
151234285Sdim      return Contents.ImmVal;
152234285Sdim    }
153234285Sdim    void setImm(int64_t Val) {
154234285Sdim      Contents.ImmVal = Val;
155234285Sdim    }
156234285Sdim
157234285Sdim    void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
158234285Sdim      if (isReg()) { OS << PrintReg(getReg()); }
159234285Sdim      if (isImm()) { OS << getImm(); }
160234285Sdim    }
161234285Sdim  };
162234285Sdim
163234285Sdim  struct HexagonFixupHwLoops : public MachineFunctionPass {
164234285Sdim  public:
165234285Sdim    static char ID;     // Pass identification, replacement for typeid.
166234285Sdim
167234285Sdim    HexagonFixupHwLoops() : MachineFunctionPass(ID) {}
168234285Sdim
169234285Sdim    virtual bool runOnMachineFunction(MachineFunction &MF);
170234285Sdim
171234285Sdim    const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
172234285Sdim
173234285Sdim    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
174234285Sdim      AU.setPreservesCFG();
175234285Sdim      MachineFunctionPass::getAnalysisUsage(AU);
176234285Sdim    }
177234285Sdim
178234285Sdim  private:
179234285Sdim    /// Maximum distance between the loop instr and the basic block.
180234285Sdim    /// Just an estimate.
181234285Sdim    static const unsigned MAX_LOOP_DISTANCE = 200;
182234285Sdim
183234285Sdim    /// fixupLoopInstrs - Check the offset between each loop instruction and
184234285Sdim    /// the loop basic block to determine if we can use the LOOP instruction
185234285Sdim    /// or if we need to set the LC/SA registers explicitly.
186234285Sdim    bool fixupLoopInstrs(MachineFunction &MF);
187234285Sdim
188234285Sdim    /// convertLoopInstr - Add the instruction to set the LC and SA registers
189234285Sdim    /// explicitly.
190234285Sdim    void convertLoopInstr(MachineFunction &MF,
191234285Sdim                          MachineBasicBlock::iterator &MII,
192234285Sdim                          RegScavenger &RS);
193234285Sdim
194234285Sdim  };
195234285Sdim
196234285Sdim  char HexagonFixupHwLoops::ID = 0;
197234285Sdim
198234285Sdim} // end anonymous namespace
199234285Sdim
200234285Sdim
201234285Sdim/// isHardwareLoop - Returns true if the instruction is a hardware loop
202234285Sdim/// instruction.
203234285Sdimstatic bool isHardwareLoop(const MachineInstr *MI) {
204234285Sdim  return MI->getOpcode() == Hexagon::LOOP0_r ||
205234285Sdim    MI->getOpcode() == Hexagon::LOOP0_i;
206234285Sdim}
207234285Sdim
208234285Sdim/// isCompareEquals - Returns true if the instruction is a compare equals
209234285Sdim/// instruction with an immediate operand.
210234285Sdimstatic bool isCompareEqualsImm(const MachineInstr *MI) {
211234285Sdim  return MI->getOpcode() == Hexagon::CMPEQri;
212234285Sdim}
213234285Sdim
214234285Sdim
215234285Sdim/// createHexagonHardwareLoops - Factory for creating
216234285Sdim/// the hardware loop phase.
217234285SdimFunctionPass *llvm::createHexagonHardwareLoops() {
218234285Sdim  return new HexagonHardwareLoops();
219234285Sdim}
220234285Sdim
221234285Sdim
222234285Sdimbool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
223234285Sdim  DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
224234285Sdim
225234285Sdim  bool Changed = false;
226234285Sdim
227234285Sdim  // get the loop information
228234285Sdim  MLI = &getAnalysis<MachineLoopInfo>();
229234285Sdim  // get the register information
230234285Sdim  MRI = &MF.getRegInfo();
231234285Sdim  // the target specific instructio info.
232234285Sdim  TII = MF.getTarget().getInstrInfo();
233234285Sdim
234234285Sdim  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
235234285Sdim       I != E; ++I) {
236234285Sdim    MachineLoop *L = *I;
237234285Sdim    if (!L->getParentLoop()) {
238234285Sdim      Changed |= convertToHardwareLoop(L);
239234285Sdim    }
240234285Sdim  }
241234285Sdim
242234285Sdim  return Changed;
243234285Sdim}
244234285Sdim
245234285Sdim/// getCanonicalInductionVariable - Check to see if the loop has a canonical
246234285Sdim/// induction variable. We check for a simple recurrence pattern - an
247234285Sdim/// integer recurrence that decrements by one each time through the loop and
248234285Sdim/// ends at zero.  If so, return the phi node that corresponds to it.
249234285Sdim///
250234285Sdim/// Based upon the similar code in LoopInfo except this code is specific to
251234285Sdim/// the machine.
252234285Sdim/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
253234285Sdim///
254234285Sdimconst MachineInstr
255234285Sdim*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const {
256234285Sdim  MachineBasicBlock *TopMBB = L->getTopBlock();
257234285Sdim  MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
258234285Sdim  assert(PI != TopMBB->pred_end() &&
259234285Sdim         "Loop must have more than one incoming edge!");
260234285Sdim  MachineBasicBlock *Backedge = *PI++;
261234285Sdim  if (PI == TopMBB->pred_end()) return 0;  // dead loop
262234285Sdim  MachineBasicBlock *Incoming = *PI++;
263234285Sdim  if (PI != TopMBB->pred_end()) return 0;  // multiple backedges?
264234285Sdim
265234285Sdim  // make sure there is one incoming and one backedge and determine which
266234285Sdim  // is which.
267234285Sdim  if (L->contains(Incoming)) {
268234285Sdim    if (L->contains(Backedge))
269234285Sdim      return 0;
270234285Sdim    std::swap(Incoming, Backedge);
271234285Sdim  } else if (!L->contains(Backedge))
272234285Sdim    return 0;
273234285Sdim
274234285Sdim  // Loop over all of the PHI nodes, looking for a canonical induction variable:
275234285Sdim  //   - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
276234285Sdim  //   - The recurrence comes from the backedge.
277234285Sdim  //   - the definition is an induction operatio.n
278234285Sdim  for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
279234285Sdim       I != E && I->isPHI(); ++I) {
280234285Sdim    const MachineInstr *MPhi = &*I;
281234285Sdim    unsigned DefReg = MPhi->getOperand(0).getReg();
282234285Sdim    for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
283234285Sdim      // Check each operand for the value from the backedge.
284234285Sdim      MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
285234285Sdim      if (L->contains(MBB)) { // operands comes from the backedge
286234285Sdim        // Check if the definition is an induction operation.
287234285Sdim        const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
288234285Sdim        if (isInductionOperation(DI, DefReg)) {
289234285Sdim          return MPhi;
290234285Sdim        }
291234285Sdim      }
292234285Sdim    }
293234285Sdim  }
294234285Sdim  return 0;
295234285Sdim}
296234285Sdim
297234285Sdim/// getTripCount - Return a loop-invariant LLVM value indicating the
298234285Sdim/// number of times the loop will be executed.  The trip count can
299234285Sdim/// be either a register or a constant value.  If the trip-count
300234285Sdim/// cannot be determined, this returns null.
301234285Sdim///
302234285Sdim/// We find the trip count from the phi instruction that defines the
303234285Sdim/// induction variable.  We follow the links to the CMP instruction
304234285Sdim/// to get the trip count.
305234285Sdim///
306234285Sdim/// Based upon getTripCount in LoopInfo.
307234285Sdim///
308234285SdimCountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const {
309234285Sdim  // Check that the loop has a induction variable.
310234285Sdim  const MachineInstr *IV_Inst = getCanonicalInductionVariable(L);
311234285Sdim  if (IV_Inst == 0) return 0;
312234285Sdim
313234285Sdim  // Canonical loops will end with a 'cmpeq_ri IV, Imm',
314234285Sdim  //  if Imm is 0, get the count from the PHI opnd
315234285Sdim  //  if Imm is -M, than M is the count
316234285Sdim  //  Otherwise, Imm is the count
317234285Sdim  const MachineOperand *IV_Opnd;
318234285Sdim  const MachineOperand *InitialValue;
319234285Sdim  if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
320234285Sdim    InitialValue = &IV_Inst->getOperand(1);
321234285Sdim    IV_Opnd = &IV_Inst->getOperand(3);
322234285Sdim  } else {
323234285Sdim    InitialValue = &IV_Inst->getOperand(3);
324234285Sdim    IV_Opnd = &IV_Inst->getOperand(1);
325234285Sdim  }
326234285Sdim
327234285Sdim  // Look for the cmp instruction to determine if we
328234285Sdim  // can get a useful trip count.  The trip count can
329234285Sdim  // be either a register or an immediate.  The location
330234285Sdim  // of the value depends upon the type (reg or imm).
331239462Sdim  for (MachineRegisterInfo::reg_iterator
332239462Sdim       RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
333239462Sdim       RI != RE; ++RI) {
334239462Sdim    IV_Opnd = &RI.getOperand();
335234285Sdim    const MachineInstr *MI = IV_Opnd->getParent();
336234285Sdim    if (L->contains(MI) && isCompareEqualsImm(MI)) {
337234285Sdim      const MachineOperand &MO = MI->getOperand(2);
338234285Sdim      assert(MO.isImm() && "IV Cmp Operand should be 0");
339234285Sdim      int64_t ImmVal = MO.getImm();
340234285Sdim
341234285Sdim      const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
342234285Sdim      assert(L->contains(IV_DefInstr->getParent()) &&
343234285Sdim             "IV definition should occurs in loop");
344234285Sdim      int64_t iv_value = IV_DefInstr->getOperand(2).getImm();
345234285Sdim
346234285Sdim      if (ImmVal == 0) {
347234285Sdim        // Make sure the induction variable changes by one on each iteration.
348234285Sdim        if (iv_value != 1 && iv_value != -1) {
349234285Sdim          return 0;
350234285Sdim        }
351234285Sdim        return new CountValue(InitialValue->getReg(), iv_value > 0);
352234285Sdim      } else {
353234285Sdim        assert(InitialValue->isReg() && "Expecting register for init value");
354234285Sdim        const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg());
355234285Sdim        if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) {
356234285Sdim          int64_t count = ImmVal - DefInstr->getOperand(1).getImm();
357234285Sdim          if ((count % iv_value) != 0) {
358234285Sdim            return 0;
359234285Sdim          }
360234285Sdim          return new CountValue(count/iv_value);
361234285Sdim        }
362234285Sdim      }
363234285Sdim    }
364234285Sdim  }
365234285Sdim  return 0;
366234285Sdim}
367234285Sdim
368234285Sdim/// isInductionOperation - return true if the operation is matches the
369234285Sdim/// pattern that defines an induction variable:
370234285Sdim///    add iv, c
371234285Sdim///
372234285Sdimbool
373234285SdimHexagonHardwareLoops::isInductionOperation(const MachineInstr *MI,
374234285Sdim                                           unsigned IVReg) const {
375234285Sdim  return (MI->getOpcode() ==
376234285Sdim          Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg);
377234285Sdim}
378234285Sdim
379234285Sdim/// isInvalidOperation - Return true if the operation is invalid within
380234285Sdim/// hardware loop.
381234285Sdimbool
382234285SdimHexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
383234285Sdim
384234285Sdim  // call is not allowed because the callee may use a hardware loop
385234285Sdim  if (MI->getDesc().isCall()) {
386234285Sdim    return true;
387234285Sdim  }
388234285Sdim  // do not allow nested hardware loops
389234285Sdim  if (isHardwareLoop(MI)) {
390234285Sdim    return true;
391234285Sdim  }
392234285Sdim  // check if the instruction defines a hardware loop register
393234285Sdim  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
394234285Sdim    const MachineOperand &MO = MI->getOperand(i);
395234285Sdim    if (MO.isReg() && MO.isDef() &&
396234285Sdim        (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 ||
397234285Sdim         MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) {
398234285Sdim      return true;
399234285Sdim    }
400234285Sdim  }
401234285Sdim  return false;
402234285Sdim}
403234285Sdim
404234285Sdim/// containsInvalidInstruction - Return true if the loop contains
405234285Sdim/// an instruction that inhibits the use of the hardware loop function.
406234285Sdim///
407234285Sdimbool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
408234285Sdim  const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
409234285Sdim  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
410234285Sdim    MachineBasicBlock *MBB = Blocks[i];
411234285Sdim    for (MachineBasicBlock::iterator
412234285Sdim           MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
413234285Sdim      const MachineInstr *MI = &*MII;
414234285Sdim      if (isInvalidLoopOperation(MI)) {
415234285Sdim        return true;
416234285Sdim      }
417234285Sdim    }
418234285Sdim  }
419234285Sdim  return false;
420234285Sdim}
421234285Sdim
422234285Sdim/// converToHardwareLoop - check if the loop is a candidate for
423234285Sdim/// converting to a hardware loop.  If so, then perform the
424234285Sdim/// transformation.
425234285Sdim///
426234285Sdim/// This function works on innermost loops first.  A loop can
427234285Sdim/// be converted if it is a counting loop; either a register
428234285Sdim/// value or an immediate.
429234285Sdim///
430234285Sdim/// The code makes several assumptions about the representation
431234285Sdim/// of the loop in llvm.
432234285Sdimbool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
433234285Sdim  bool Changed = false;
434234285Sdim  // Process nested loops first.
435234285Sdim  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
436234285Sdim    Changed |= convertToHardwareLoop(*I);
437234285Sdim  }
438234285Sdim  // If a nested loop has been converted, then we can't convert this loop.
439234285Sdim  if (Changed) {
440234285Sdim    return Changed;
441234285Sdim  }
442234285Sdim  // Are we able to determine the trip count for the loop?
443234285Sdim  CountValue *TripCount = getTripCount(L);
444234285Sdim  if (TripCount == 0) {
445234285Sdim    return false;
446234285Sdim  }
447234285Sdim  // Does the loop contain any invalid instructions?
448234285Sdim  if (containsInvalidInstruction(L)) {
449234285Sdim    return false;
450234285Sdim  }
451234285Sdim  MachineBasicBlock *Preheader = L->getLoopPreheader();
452234285Sdim  // No preheader means there's not place for the loop instr.
453234285Sdim  if (Preheader == 0) {
454234285Sdim    return false;
455234285Sdim  }
456234285Sdim  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
457234285Sdim
458234285Sdim  MachineBasicBlock *LastMBB = L->getExitingBlock();
459234285Sdim  // Don't generate hw loop if the loop has more than one exit.
460234285Sdim  if (LastMBB == 0) {
461234285Sdim    return false;
462234285Sdim  }
463234285Sdim  MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
464234285Sdim
465234285Sdim  // Determine the loop start.
466234285Sdim  MachineBasicBlock *LoopStart = L->getTopBlock();
467234285Sdim  if (L->getLoopLatch() != LastMBB) {
468234285Sdim    // When the exit and latch are not the same, use the latch block as the
469234285Sdim    // start.
470234285Sdim    // The loop start address is used only after the 1st iteration, and the loop
471234285Sdim    // latch may contains instrs. that need to be executed after the 1st iter.
472234285Sdim    LoopStart = L->getLoopLatch();
473234285Sdim    // Make sure the latch is a successor of the exit, otherwise it won't work.
474234285Sdim    if (!LastMBB->isSuccessor(LoopStart)) {
475234285Sdim      return false;
476234285Sdim    }
477234285Sdim  }
478234285Sdim
479234285Sdim  // Convert the loop to a hardware loop
480234285Sdim  DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
481234285Sdim
482234285Sdim  if (TripCount->isReg()) {
483234285Sdim    // Create a copy of the loop count register.
484234285Sdim    MachineFunction *MF = LastMBB->getParent();
485234285Sdim    const TargetRegisterClass *RC =
486234285Sdim      MF->getRegInfo().getRegClass(TripCount->getReg());
487234285Sdim    unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC);
488234285Sdim    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
489234285Sdim            TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg());
490234285Sdim    if (TripCount->isNeg()) {
491234285Sdim      unsigned CountReg1 = CountReg;
492234285Sdim      CountReg = MF->getRegInfo().createVirtualRegister(RC);
493234285Sdim      BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
494234285Sdim              TII->get(Hexagon::NEG), CountReg).addReg(CountReg1);
495234285Sdim    }
496234285Sdim
497239462Sdim    // Add the Loop instruction to the beginning of the loop.
498234285Sdim    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
499234285Sdim            TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg);
500234285Sdim  } else {
501234285Sdim    assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
502234285Sdim    // Add the Loop immediate instruction to the beginning of the loop.
503234285Sdim    int64_t CountImm = TripCount->getImm();
504234285Sdim    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
505234285Sdim            TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm);
506234285Sdim  }
507234285Sdim
508234285Sdim  // Make sure the loop start always has a reference in the CFG.  We need to
509234285Sdim  // create a BlockAddress operand to get this mechanism to work both the
510234285Sdim  // MachineBasicBlock and BasicBlock objects need the flag set.
511234285Sdim  LoopStart->setHasAddressTaken();
512234285Sdim  // This line is needed to set the hasAddressTaken flag on the BasicBlock
513234285Sdim  // object
514234285Sdim  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
515234285Sdim
516234285Sdim  // Replace the loop branch with an endloop instruction.
517234285Sdim  DebugLoc dl = LastI->getDebugLoc();
518234285Sdim  BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
519234285Sdim
520234285Sdim  // The loop ends with either:
521234285Sdim  //  - a conditional branch followed by an unconditional branch, or
522234285Sdim  //  - a conditional branch to the loop start.
523234285Sdim  if (LastI->getOpcode() == Hexagon::JMP_c ||
524234285Sdim      LastI->getOpcode() == Hexagon::JMP_cNot) {
525234285Sdim    // delete one and change/add an uncond. branch to out of the loop
526234285Sdim    MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
527234285Sdim    LastI = LastMBB->erase(LastI);
528234285Sdim    if (!L->contains(BranchTarget)) {
529234285Sdim      if (LastI != LastMBB->end()) {
530234285Sdim        TII->RemoveBranch(*LastMBB);
531234285Sdim      }
532234285Sdim      SmallVector<MachineOperand, 0> Cond;
533234285Sdim      TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl);
534234285Sdim    }
535234285Sdim  } else {
536234285Sdim    // Conditional branch to loop start; just delete it.
537234285Sdim    LastMBB->erase(LastI);
538234285Sdim  }
539234285Sdim  delete TripCount;
540234285Sdim
541234285Sdim  ++NumHWLoops;
542234285Sdim  return true;
543234285Sdim}
544234285Sdim
545234285Sdim/// createHexagonFixupHwLoops - Factory for creating the hardware loop
546234285Sdim/// phase.
547234285SdimFunctionPass *llvm::createHexagonFixupHwLoops() {
548234285Sdim  return new HexagonFixupHwLoops();
549234285Sdim}
550234285Sdim
551234285Sdimbool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
552234285Sdim  DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n");
553234285Sdim
554234285Sdim  bool Changed = fixupLoopInstrs(MF);
555234285Sdim  return Changed;
556234285Sdim}
557234285Sdim
558234285Sdim/// fixupLoopInsts - For Hexagon, if the loop label is to far from the
559234285Sdim/// loop instruction then we need to set the LC0 and SA0 registers
560234285Sdim/// explicitly instead of using LOOP(start,count).  This function
561234285Sdim/// checks the distance, and generates register assignments if needed.
562234285Sdim///
563234285Sdim/// This function makes two passes over the basic blocks.  The first
564234285Sdim/// pass computes the offset of the basic block from the start.
565234285Sdim/// The second pass checks all the loop instructions.
566234285Sdimbool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
567234285Sdim
568234285Sdim  // Offset of the current instruction from the start.
569234285Sdim  unsigned InstOffset = 0;
570234285Sdim  // Map for each basic block to it's first instruction.
571234285Sdim  DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
572234285Sdim
573234285Sdim  // First pass - compute the offset of each basic block.
574234285Sdim  for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
575234285Sdim       MBB != MBBe; ++MBB) {
576234285Sdim    BlockToInstOffset[MBB] = InstOffset;
577234285Sdim    InstOffset += (MBB->size() * 4);
578234285Sdim  }
579234285Sdim
580234285Sdim  // Second pass - check each loop instruction to see if it needs to
581234285Sdim  // be converted.
582234285Sdim  InstOffset = 0;
583234285Sdim  bool Changed = false;
584234285Sdim  RegScavenger RS;
585234285Sdim
586234285Sdim  // Loop over all the basic blocks.
587234285Sdim  for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
588234285Sdim       MBB != MBBe; ++MBB) {
589234285Sdim    InstOffset = BlockToInstOffset[MBB];
590234285Sdim    RS.enterBasicBlock(MBB);
591234285Sdim
592234285Sdim    // Loop over all the instructions.
593234285Sdim    MachineBasicBlock::iterator MIE = MBB->end();
594234285Sdim    MachineBasicBlock::iterator MII = MBB->begin();
595234285Sdim    while (MII != MIE) {
596234285Sdim      if (isHardwareLoop(MII)) {
597234285Sdim        RS.forward(MII);
598234285Sdim        assert(MII->getOperand(0).isMBB() &&
599234285Sdim               "Expect a basic block as loop operand");
600234285Sdim        int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
601234285Sdim        diff = (diff > 0 ? diff : -diff);
602234285Sdim        if ((unsigned)diff > MAX_LOOP_DISTANCE) {
603234285Sdim          // Convert to explicity setting LC0 and SA0.
604234285Sdim          convertLoopInstr(MF, MII, RS);
605234285Sdim          MII = MBB->erase(MII);
606234285Sdim          Changed = true;
607234285Sdim        } else {
608234285Sdim          ++MII;
609234285Sdim        }
610234285Sdim      } else {
611234285Sdim        ++MII;
612234285Sdim      }
613234285Sdim      InstOffset += 4;
614234285Sdim    }
615234285Sdim  }
616234285Sdim
617234285Sdim  return Changed;
618234285Sdim
619234285Sdim}
620234285Sdim
621234285Sdim/// convertLoopInstr - convert a loop instruction to a sequence of instructions
622234285Sdim/// that set the lc and sa register explicitly.
623234285Sdimvoid HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
624234285Sdim                                           MachineBasicBlock::iterator &MII,
625234285Sdim                                           RegScavenger &RS) {
626234285Sdim  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
627234285Sdim  MachineBasicBlock *MBB = MII->getParent();
628234285Sdim  DebugLoc DL = MII->getDebugLoc();
629239462Sdim  unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
630234285Sdim
631234285Sdim  // First, set the LC0 with the trip count.
632234285Sdim  if (MII->getOperand(1).isReg()) {
633234285Sdim    // Trip count is a register
634234285Sdim    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
635234285Sdim      .addReg(MII->getOperand(1).getReg());
636234285Sdim  } else {
637234285Sdim    // Trip count is an immediate.
638234285Sdim    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
639234285Sdim      .addImm(MII->getOperand(1).getImm());
640234285Sdim    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
641234285Sdim      .addReg(Scratch);
642234285Sdim  }
643234285Sdim  // Then, set the SA0 with the loop start address.
644234285Sdim  BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
645234285Sdim    .addMBB(MII->getOperand(0).getMBB());
646234285Sdim  BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch);
647234285Sdim}
648