HexagonHardwareLoops.cpp revision 321369
1234285Sdim//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
2234285Sdim//
3234285Sdim//                     The LLVM Compiler Infrastructure
4234285Sdim//
5234285Sdim// This file is distributed under the University of Illinois Open Source
6234285Sdim// License. See LICENSE.TXT for details.
7234285Sdim//
8234285Sdim//===----------------------------------------------------------------------===//
9234285Sdim//
10234285Sdim// This pass identifies loops where we can generate the Hexagon hardware
11234285Sdim// loop instruction.  The hardware loop can perform loop branches with a
12234285Sdim// zero-cycle overhead.
13234285Sdim//
14234285Sdim// The pattern that defines the induction variable can changed depending on
15234285Sdim// prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
16234285Sdim// normalizes induction variables, and the Loop Strength Reduction pass
17234285Sdim// run by 'llc' may also make changes to the induction variable.
18234285Sdim// The pattern detected by this phase is due to running Strength Reduction.
19234285Sdim//
20234285Sdim// Criteria for hardware loops:
21234285Sdim//  - Countable loops (w/ ind. var for a trip count)
22234285Sdim//  - Assumes loops are normalized by IndVarSimplify
23234285Sdim//  - Try inner-most loops first
24234285Sdim//  - No function calls in loops.
25234285Sdim//
26234285Sdim//===----------------------------------------------------------------------===//
27234285Sdim
28314564Sdim#include "HexagonInstrInfo.h"
29314564Sdim#include "HexagonSubtarget.h"
30249423Sdim#include "llvm/ADT/SmallSet.h"
31314564Sdim#include "llvm/ADT/SmallVector.h"
32234285Sdim#include "llvm/ADT/Statistic.h"
33314564Sdim#include "llvm/ADT/StringRef.h"
34314564Sdim#include "llvm/CodeGen/MachineBasicBlock.h"
35234285Sdim#include "llvm/CodeGen/MachineDominators.h"
36234285Sdim#include "llvm/CodeGen/MachineFunction.h"
37234285Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
38314564Sdim#include "llvm/CodeGen/MachineInstr.h"
39234285Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
40234285Sdim#include "llvm/CodeGen/MachineLoopInfo.h"
41314564Sdim#include "llvm/CodeGen/MachineOperand.h"
42234285Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
43314564Sdim#include "llvm/IR/Constants.h"
44314564Sdim#include "llvm/IR/DebugLoc.h"
45314564Sdim#include "llvm/Pass.h"
46249423Sdim#include "llvm/Support/CommandLine.h"
47234285Sdim#include "llvm/Support/Debug.h"
48314564Sdim#include "llvm/Support/ErrorHandling.h"
49314564Sdim#include "llvm/Support/MathExtras.h"
50234285Sdim#include "llvm/Support/raw_ostream.h"
51314564Sdim#include "llvm/Target/TargetRegisterInfo.h"
52314564Sdim#include <cassert>
53314564Sdim#include <cstdint>
54314564Sdim#include <cstdlib>
55314564Sdim#include <iterator>
56314564Sdim#include <map>
57314564Sdim#include <set>
58314564Sdim#include <utility>
59249423Sdim#include <vector>
60234285Sdim
61234285Sdimusing namespace llvm;
62234285Sdim
63276479Sdim#define DEBUG_TYPE "hwloops"
64276479Sdim
65249423Sdim#ifndef NDEBUG
66288943Sdimstatic cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
67288943Sdim
68288943Sdim// Option to create preheader only for a specific function.
69288943Sdimstatic cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
70288943Sdim                                 cl::init(""));
71249423Sdim#endif
72249423Sdim
73288943Sdim// Option to create a preheader if one doesn't exist.
74288943Sdimstatic cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
75288943Sdim    cl::Hidden, cl::init(true),
76288943Sdim    cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
77288943Sdim
78314564Sdim// Turn it off by default. If a preheader block is not created here, the
79314564Sdim// software pipeliner may be unable to find a block suitable to serve as
80314564Sdim// a preheader. In that case SWP will not run.
81314564Sdimstatic cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false),
82314564Sdim  cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader "
83314564Sdim  "instructions"));
84314564Sdim
85234285SdimSTATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
86234285Sdim
87249423Sdimnamespace llvm {
88314564Sdim
89288943Sdim  FunctionPass *createHexagonHardwareLoops();
90249423Sdim  void initializeHexagonHardwareLoopsPass(PassRegistry&);
91249423Sdim
92314564Sdim} // end namespace llvm
93314564Sdim
94234285Sdimnamespace {
95314564Sdim
96234285Sdim  class CountValue;
97314564Sdim
98234285Sdim  struct HexagonHardwareLoops : public MachineFunctionPass {
99249423Sdim    MachineLoopInfo            *MLI;
100249423Sdim    MachineRegisterInfo        *MRI;
101249423Sdim    MachineDominatorTree       *MDT;
102249423Sdim    const HexagonInstrInfo     *TII;
103321369Sdim    const HexagonRegisterInfo  *TRI;
104249423Sdim#ifndef NDEBUG
105249423Sdim    static int Counter;
106249423Sdim#endif
107234285Sdim
108234285Sdim  public:
109249423Sdim    static char ID;
110234285Sdim
111249423Sdim    HexagonHardwareLoops() : MachineFunctionPass(ID) {
112249423Sdim      initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry());
113249423Sdim    }
114234285Sdim
115276479Sdim    bool runOnMachineFunction(MachineFunction &MF) override;
116234285Sdim
117314564Sdim    StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
118234285Sdim
119276479Sdim    void getAnalysisUsage(AnalysisUsage &AU) const override {
120234285Sdim      AU.addRequired<MachineDominatorTree>();
121234285Sdim      AU.addRequired<MachineLoopInfo>();
122234285Sdim      MachineFunctionPass::getAnalysisUsage(AU);
123234285Sdim    }
124234285Sdim
125234285Sdim  private:
126288943Sdim    typedef std::map<unsigned, MachineInstr *> LoopFeederMap;
127288943Sdim
128249423Sdim    /// Kinds of comparisons in the compare instructions.
129249423Sdim    struct Comparison {
130249423Sdim      enum Kind {
131249423Sdim        EQ  = 0x01,
132249423Sdim        NE  = 0x02,
133288943Sdim        L   = 0x04,
134288943Sdim        G   = 0x08,
135288943Sdim        U   = 0x40,
136249423Sdim        LTs = L,
137249423Sdim        LEs = L | EQ,
138249423Sdim        GTs = G,
139249423Sdim        GEs = G | EQ,
140249423Sdim        LTu = L      | U,
141249423Sdim        LEu = L | EQ | U,
142249423Sdim        GTu = G      | U,
143249423Sdim        GEu = G | EQ | U
144249423Sdim      };
145249423Sdim
146249423Sdim      static Kind getSwappedComparison(Kind Cmp) {
147249423Sdim        assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
148249423Sdim        if ((Cmp & L) || (Cmp & G))
149249423Sdim          return (Kind)(Cmp ^ (L|G));
150249423Sdim        return Cmp;
151249423Sdim      }
152288943Sdim
153288943Sdim      static Kind getNegatedComparison(Kind Cmp) {
154288943Sdim        if ((Cmp & L) || (Cmp & G))
155288943Sdim          return (Kind)((Cmp ^ (L | G)) ^ EQ);
156288943Sdim        if ((Cmp & NE) || (Cmp & EQ))
157288943Sdim          return (Kind)(Cmp ^ (EQ | NE));
158288943Sdim        return (Kind)0;
159288943Sdim      }
160288943Sdim
161288943Sdim      static bool isSigned(Kind Cmp) {
162288943Sdim        return (Cmp & (L | G) && !(Cmp & U));
163288943Sdim      }
164288943Sdim
165288943Sdim      static bool isUnsigned(Kind Cmp) {
166288943Sdim        return (Cmp & U);
167288943Sdim      }
168249423Sdim    };
169249423Sdim
170249423Sdim    /// \brief Find the register that contains the loop controlling
171234285Sdim    /// induction variable.
172249423Sdim    /// If successful, it will return true and set the \p Reg, \p IVBump
173249423Sdim    /// and \p IVOp arguments.  Otherwise it will return false.
174249423Sdim    /// The returned induction register is the register R that follows the
175249423Sdim    /// following induction pattern:
176249423Sdim    /// loop:
177249423Sdim    ///   R = phi ..., [ R.next, LatchBlock ]
178249423Sdim    ///   R.next = R + #bump
179249423Sdim    ///   if (R.next < #N) goto loop
180249423Sdim    /// IVBump is the immediate value added to R, and IVOp is the instruction
181249423Sdim    /// "R.next = R + #bump".
182249423Sdim    bool findInductionRegister(MachineLoop *L, unsigned &Reg,
183249423Sdim                               int64_t &IVBump, MachineInstr *&IVOp) const;
184234285Sdim
185288943Sdim    /// \brief Return the comparison kind for the specified opcode.
186288943Sdim    Comparison::Kind getComparisonKind(unsigned CondOpc,
187288943Sdim                                       MachineOperand *InitialValue,
188288943Sdim                                       const MachineOperand *Endvalue,
189288943Sdim                                       int64_t IVBump) const;
190288943Sdim
191249423Sdim    /// \brief Analyze the statements in a loop to determine if the loop
192249423Sdim    /// has a computable trip count and, if so, return a value that represents
193249423Sdim    /// the trip count expression.
194249423Sdim    CountValue *getLoopTripCount(MachineLoop *L,
195261991Sdim                                 SmallVectorImpl<MachineInstr *> &OldInsts);
196234285Sdim
197249423Sdim    /// \brief Return the expression that represents the number of times
198249423Sdim    /// a loop iterates.  The function takes the operands that represent the
199249423Sdim    /// loop start value, loop end value, and induction value.  Based upon
200249423Sdim    /// these operands, the function attempts to compute the trip count.
201249423Sdim    /// If the trip count is not directly available (as an immediate value,
202249423Sdim    /// or a register), the function will attempt to insert computation of it
203249423Sdim    /// to the loop's preheader.
204288943Sdim    CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
205288943Sdim                             const MachineOperand *End, unsigned IVReg,
206288943Sdim                             int64_t IVBump, Comparison::Kind Cmp) const;
207234285Sdim
208249423Sdim    /// \brief Return true if the instruction is not valid within a hardware
209249423Sdim    /// loop.
210288943Sdim    bool isInvalidLoopOperation(const MachineInstr *MI,
211288943Sdim                                bool IsInnerHWLoop) const;
212234285Sdim
213249423Sdim    /// \brief Return true if the loop contains an instruction that inhibits
214249423Sdim    /// using the hardware loop.
215288943Sdim    bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
216234285Sdim
217249423Sdim    /// \brief Given a loop, check if we can convert it to a hardware loop.
218249423Sdim    /// If so, then perform the conversion and return true.
219288943Sdim    bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
220234285Sdim
221249423Sdim    /// \brief Return true if the instruction is now dead.
222249423Sdim    bool isDead(const MachineInstr *MI,
223261991Sdim                SmallVectorImpl<MachineInstr *> &DeadPhis) const;
224249423Sdim
225249423Sdim    /// \brief Remove the instruction if it is now dead.
226249423Sdim    void removeIfDead(MachineInstr *MI);
227249423Sdim
228249423Sdim    /// \brief Make sure that the "bump" instruction executes before the
229249423Sdim    /// compare.  We need that for the IV fixup, so that the compare
230249423Sdim    /// instruction would not use a bumped value that has not yet been
231249423Sdim    /// defined.  If the instructions are out of order, try to reorder them.
232249423Sdim    bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
233249423Sdim
234288943Sdim    /// \brief Return true if MO and MI pair is visited only once. If visited
235288943Sdim    /// more than once, this indicates there is recursion. In such a case,
236288943Sdim    /// return false.
237288943Sdim    bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
238288943Sdim                      const MachineOperand *MO,
239288943Sdim                      LoopFeederMap &LoopFeederPhi) const;
240249423Sdim
241288943Sdim    /// \brief Return true if the Phi may generate a value that may underflow,
242288943Sdim    /// or may wrap.
243288943Sdim    bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
244288943Sdim                               MachineBasicBlock *MBB, MachineLoop *L,
245288943Sdim                               LoopFeederMap &LoopFeederPhi) const;
246249423Sdim
247288943Sdim    /// \brief Return true if the induction variable may underflow an unsigned
248288943Sdim    /// value in the first iteration.
249288943Sdim    bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
250288943Sdim                                     const MachineOperand *EndVal,
251288943Sdim                                     MachineBasicBlock *MBB, MachineLoop *L,
252288943Sdim                                     LoopFeederMap &LoopFeederPhi) const;
253288943Sdim
254288943Sdim    /// \brief Check if the given operand has a compile-time known constant
255288943Sdim    /// value. Return true if yes, and false otherwise. When returning true, set
256288943Sdim    /// Val to the corresponding constant value.
257288943Sdim    bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
258288943Sdim
259288943Sdim    /// \brief Check if the operand has a compile-time known constant value.
260288943Sdim    bool isImmediate(const MachineOperand &MO) const {
261288943Sdim      int64_t V;
262288943Sdim      return checkForImmediate(MO, V);
263288943Sdim    }
264288943Sdim
265288943Sdim    /// \brief Return the immediate for the specified operand.
266288943Sdim    int64_t getImmediate(const MachineOperand &MO) const {
267288943Sdim      int64_t V;
268288943Sdim      if (!checkForImmediate(MO, V))
269288943Sdim        llvm_unreachable("Invalid operand");
270288943Sdim      return V;
271288943Sdim    }
272288943Sdim
273249423Sdim    /// \brief Reset the given machine operand to now refer to a new immediate
274249423Sdim    /// value.  Assumes that the operand was already referencing an immediate
275249423Sdim    /// value, either directly, or via a register.
276249423Sdim    void setImmediate(MachineOperand &MO, int64_t Val);
277249423Sdim
278249423Sdim    /// \brief Fix the data flow of the induction varible.
279249423Sdim    /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
280249423Sdim    ///                                     |
281249423Sdim    ///                                     +-> back to phi
282249423Sdim    /// where "bump" is the increment of the induction variable:
283249423Sdim    ///   iv = iv + #const.
284249423Sdim    /// Due to some prior code transformations, the actual flow may look
285249423Sdim    /// like this:
286249423Sdim    ///   phi -+-> bump ---> back to phi
287249423Sdim    ///        |
288249423Sdim    ///        +-> comparison-in-latch (against upper_bound-bump),
289249423Sdim    /// i.e. the comparison that controls the loop execution may be using
290249423Sdim    /// the value of the induction variable from before the increment.
291249423Sdim    ///
292249423Sdim    /// Return true if the loop's flow is the desired one (i.e. it's
293249423Sdim    /// either been fixed, or no fixing was necessary).
294249423Sdim    /// Otherwise, return false.  This can happen if the induction variable
295249423Sdim    /// couldn't be identified, or if the value in the latch's comparison
296249423Sdim    /// cannot be adjusted to reflect the post-bump value.
297249423Sdim    bool fixupInductionVariable(MachineLoop *L);
298249423Sdim
299249423Sdim    /// \brief Given a loop, if it does not have a preheader, create one.
300249423Sdim    /// Return the block that is the preheader.
301249423Sdim    MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
302234285Sdim  };
303234285Sdim
304234285Sdim  char HexagonHardwareLoops::ID = 0;
305249423Sdim#ifndef NDEBUG
306249423Sdim  int HexagonHardwareLoops::Counter = 0;
307249423Sdim#endif
308234285Sdim
309280031Sdim  /// \brief Abstraction for a trip count of a loop. A smaller version
310249423Sdim  /// of the MachineOperand class without the concerns of changing the
311249423Sdim  /// operand representation.
312234285Sdim  class CountValue {
313234285Sdim  public:
314234285Sdim    enum CountValueType {
315234285Sdim      CV_Register,
316234285Sdim      CV_Immediate
317234285Sdim    };
318314564Sdim
319234285Sdim  private:
320234285Sdim    CountValueType Kind;
321234285Sdim    union Values {
322249423Sdim      struct {
323249423Sdim        unsigned Reg;
324249423Sdim        unsigned Sub;
325249423Sdim      } R;
326249423Sdim      unsigned ImmVal;
327234285Sdim    } Contents;
328234285Sdim
329234285Sdim  public:
330249423Sdim    explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
331249423Sdim      Kind = t;
332249423Sdim      if (Kind == CV_Register) {
333249423Sdim        Contents.R.Reg = v;
334249423Sdim        Contents.R.Sub = u;
335249423Sdim      } else {
336249423Sdim        Contents.ImmVal = v;
337249423Sdim      }
338249423Sdim    }
339314564Sdim
340234285Sdim    bool isReg() const { return Kind == CV_Register; }
341234285Sdim    bool isImm() const { return Kind == CV_Immediate; }
342234285Sdim
343234285Sdim    unsigned getReg() const {
344234285Sdim      assert(isReg() && "Wrong CountValue accessor");
345249423Sdim      return Contents.R.Reg;
346234285Sdim    }
347249423Sdim    unsigned getSubReg() const {
348249423Sdim      assert(isReg() && "Wrong CountValue accessor");
349249423Sdim      return Contents.R.Sub;
350234285Sdim    }
351249423Sdim    unsigned getImm() const {
352234285Sdim      assert(isImm() && "Wrong CountValue accessor");
353234285Sdim      return Contents.ImmVal;
354234285Sdim    }
355234285Sdim
356288943Sdim    void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
357249423Sdim      if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
358249423Sdim      if (isImm()) { OS << Contents.ImmVal; }
359234285Sdim    }
360234285Sdim  };
361314564Sdim
362249423Sdim} // end anonymous namespace
363234285Sdim
364249423SdimINITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
365249423Sdim                      "Hexagon Hardware Loops", false, false)
366249423SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
367249423SdimINITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
368249423SdimINITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
369249423Sdim                    "Hexagon Hardware Loops", false, false)
370234285Sdim
371234285SdimFunctionPass *llvm::createHexagonHardwareLoops() {
372234285Sdim  return new HexagonHardwareLoops();
373234285Sdim}
374234285Sdim
375234285Sdimbool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
376234285Sdim  DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
377309124Sdim  if (skipFunction(*MF.getFunction()))
378309124Sdim    return false;
379234285Sdim
380234285Sdim  bool Changed = false;
381234285Sdim
382234285Sdim  MLI = &getAnalysis<MachineLoopInfo>();
383234285Sdim  MRI = &MF.getRegInfo();
384249423Sdim  MDT = &getAnalysis<MachineDominatorTree>();
385321369Sdim  const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
386321369Sdim  TII = HST.getInstrInfo();
387321369Sdim  TRI = HST.getRegisterInfo();
388234285Sdim
389288943Sdim  for (auto &L : *MLI)
390288943Sdim    if (!L->getParentLoop()) {
391288943Sdim      bool L0Used = false;
392288943Sdim      bool L1Used = false;
393288943Sdim      Changed |= convertToHardwareLoop(L, L0Used, L1Used);
394288943Sdim    }
395234285Sdim
396234285Sdim  return Changed;
397234285Sdim}
398234285Sdim
399249423Sdimbool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
400249423Sdim                                                 unsigned &Reg,
401249423Sdim                                                 int64_t &IVBump,
402249423Sdim                                                 MachineInstr *&IVOp
403249423Sdim                                                 ) const {
404249423Sdim  MachineBasicBlock *Header = L->getHeader();
405314564Sdim  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
406249423Sdim  MachineBasicBlock *Latch = L->getLoopLatch();
407314564Sdim  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
408288943Sdim  if (!Header || !Preheader || !Latch || !ExitingBlock)
409249423Sdim    return false;
410249423Sdim
411249423Sdim  // This pair represents an induction register together with an immediate
412249423Sdim  // value that will be added to it in each loop iteration.
413249423Sdim  typedef std::pair<unsigned,int64_t> RegisterBump;
414249423Sdim
415249423Sdim  // Mapping:  R.next -> (R, bump), where R, R.next and bump are derived
416249423Sdim  // from an induction operation
417249423Sdim  //   R.next = R + bump
418249423Sdim  // where bump is an immediate value.
419249423Sdim  typedef std::map<unsigned,RegisterBump> InductionMap;
420249423Sdim
421249423Sdim  InductionMap IndMap;
422249423Sdim
423249423Sdim  typedef MachineBasicBlock::instr_iterator instr_iterator;
424249423Sdim  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
425249423Sdim       I != E && I->isPHI(); ++I) {
426249423Sdim    MachineInstr *Phi = &*I;
427249423Sdim
428249423Sdim    // Have a PHI instruction.  Get the operand that corresponds to the
429249423Sdim    // latch block, and see if is a result of an addition of form "reg+imm",
430249423Sdim    // where the "reg" is defined by the PHI node we are looking at.
431249423Sdim    for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
432249423Sdim      if (Phi->getOperand(i+1).getMBB() != Latch)
433249423Sdim        continue;
434249423Sdim
435249423Sdim      unsigned PhiOpReg = Phi->getOperand(i).getReg();
436249423Sdim      MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
437249423Sdim
438314564Sdim      if (DI->getDesc().isAdd()) {
439288943Sdim        // If the register operand to the add is the PHI we're looking at, this
440288943Sdim        // meets the induction pattern.
441249423Sdim        unsigned IndReg = DI->getOperand(1).getReg();
442288943Sdim        MachineOperand &Opnd2 = DI->getOperand(2);
443288943Sdim        int64_t V;
444288943Sdim        if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
445249423Sdim          unsigned UpdReg = DI->getOperand(0).getReg();
446249423Sdim          IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
447249423Sdim        }
448249423Sdim      }
449249423Sdim    }  // for (i)
450249423Sdim  }  // for (instr)
451249423Sdim
452249423Sdim  SmallVector<MachineOperand,2> Cond;
453276479Sdim  MachineBasicBlock *TB = nullptr, *FB = nullptr;
454309124Sdim  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
455249423Sdim  if (NotAnalyzed)
456249423Sdim    return false;
457249423Sdim
458288943Sdim  unsigned PredR, PredPos, PredRegFlags;
459288943Sdim  if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
460288943Sdim    return false;
461249423Sdim
462249423Sdim  MachineInstr *PredI = MRI->getVRegDef(PredR);
463249423Sdim  if (!PredI->isCompare())
464249423Sdim    return false;
465249423Sdim
466249423Sdim  unsigned CmpReg1 = 0, CmpReg2 = 0;
467249423Sdim  int CmpImm = 0, CmpMask = 0;
468309124Sdim  bool CmpAnalyzed =
469309124Sdim      TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
470249423Sdim  // Fail if the compare was not analyzed, or it's not comparing a register
471249423Sdim  // with an immediate value.  Not checking the mask here, since we handle
472288943Sdim  // the individual compare opcodes (including A4_cmpb*) later on.
473249423Sdim  if (!CmpAnalyzed)
474249423Sdim    return false;
475249423Sdim
476249423Sdim  // Exactly one of the input registers to the comparison should be among
477249423Sdim  // the induction registers.
478249423Sdim  InductionMap::iterator IndMapEnd = IndMap.end();
479249423Sdim  InductionMap::iterator F = IndMapEnd;
480249423Sdim  if (CmpReg1 != 0) {
481249423Sdim    InductionMap::iterator F1 = IndMap.find(CmpReg1);
482249423Sdim    if (F1 != IndMapEnd)
483249423Sdim      F = F1;
484249423Sdim  }
485249423Sdim  if (CmpReg2 != 0) {
486249423Sdim    InductionMap::iterator F2 = IndMap.find(CmpReg2);
487249423Sdim    if (F2 != IndMapEnd) {
488249423Sdim      if (F != IndMapEnd)
489249423Sdim        return false;
490249423Sdim      F = F2;
491249423Sdim    }
492249423Sdim  }
493249423Sdim  if (F == IndMapEnd)
494249423Sdim    return false;
495249423Sdim
496249423Sdim  Reg = F->second.first;
497249423Sdim  IVBump = F->second.second;
498249423Sdim  IVOp = MRI->getVRegDef(F->first);
499249423Sdim  return true;
500249423Sdim}
501249423Sdim
502288943Sdim// Return the comparison kind for the specified opcode.
503288943SdimHexagonHardwareLoops::Comparison::Kind
504288943SdimHexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
505288943Sdim                                        MachineOperand *InitialValue,
506288943Sdim                                        const MachineOperand *EndValue,
507288943Sdim                                        int64_t IVBump) const {
508288943Sdim  Comparison::Kind Cmp = (Comparison::Kind)0;
509288943Sdim  switch (CondOpc) {
510288943Sdim  case Hexagon::C2_cmpeqi:
511288943Sdim  case Hexagon::C2_cmpeq:
512288943Sdim  case Hexagon::C2_cmpeqp:
513288943Sdim    Cmp = Comparison::EQ;
514288943Sdim    break;
515288943Sdim  case Hexagon::C4_cmpneq:
516288943Sdim  case Hexagon::C4_cmpneqi:
517288943Sdim    Cmp = Comparison::NE;
518288943Sdim    break;
519288943Sdim  case Hexagon::C4_cmplte:
520288943Sdim    Cmp = Comparison::LEs;
521288943Sdim    break;
522288943Sdim  case Hexagon::C4_cmplteu:
523288943Sdim    Cmp = Comparison::LEu;
524288943Sdim    break;
525288943Sdim  case Hexagon::C2_cmpgtui:
526288943Sdim  case Hexagon::C2_cmpgtu:
527288943Sdim  case Hexagon::C2_cmpgtup:
528288943Sdim    Cmp = Comparison::GTu;
529288943Sdim    break;
530288943Sdim  case Hexagon::C2_cmpgti:
531288943Sdim  case Hexagon::C2_cmpgt:
532288943Sdim  case Hexagon::C2_cmpgtp:
533288943Sdim    Cmp = Comparison::GTs;
534288943Sdim    break;
535288943Sdim  default:
536288943Sdim    return (Comparison::Kind)0;
537288943Sdim  }
538288943Sdim  return Cmp;
539288943Sdim}
540249423Sdim
541249423Sdim/// \brief Analyze the statements in a loop to determine if the loop has
542249423Sdim/// a computable trip count and, if so, return a value that represents
543249423Sdim/// the trip count expression.
544234285Sdim///
545249423Sdim/// This function iterates over the phi nodes in the loop to check for
546249423Sdim/// induction variable patterns that are used in the calculation for
547249423Sdim/// the number of time the loop is executed.
548249423SdimCountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
549288943Sdim    SmallVectorImpl<MachineInstr *> &OldInsts) {
550234285Sdim  MachineBasicBlock *TopMBB = L->getTopBlock();
551234285Sdim  MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
552234285Sdim  assert(PI != TopMBB->pred_end() &&
553234285Sdim         "Loop must have more than one incoming edge!");
554234285Sdim  MachineBasicBlock *Backedge = *PI++;
555249423Sdim  if (PI == TopMBB->pred_end())  // dead loop?
556276479Sdim    return nullptr;
557234285Sdim  MachineBasicBlock *Incoming = *PI++;
558249423Sdim  if (PI != TopMBB->pred_end())  // multiple backedges?
559276479Sdim    return nullptr;
560234285Sdim
561249423Sdim  // Make sure there is one incoming and one backedge and determine which
562234285Sdim  // is which.
563234285Sdim  if (L->contains(Incoming)) {
564234285Sdim    if (L->contains(Backedge))
565276479Sdim      return nullptr;
566234285Sdim    std::swap(Incoming, Backedge);
567234285Sdim  } else if (!L->contains(Backedge))
568276479Sdim    return nullptr;
569234285Sdim
570249423Sdim  // Look for the cmp instruction to determine if we can get a useful trip
571249423Sdim  // count.  The trip count can be either a register or an immediate.  The
572249423Sdim  // location of the value depends upon the type (reg or imm).
573314564Sdim  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
574288943Sdim  if (!ExitingBlock)
575276479Sdim    return nullptr;
576249423Sdim
577249423Sdim  unsigned IVReg = 0;
578249423Sdim  int64_t IVBump = 0;
579249423Sdim  MachineInstr *IVOp;
580249423Sdim  bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
581249423Sdim  if (!FoundIV)
582276479Sdim    return nullptr;
583249423Sdim
584314564Sdim  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
585249423Sdim
586276479Sdim  MachineOperand *InitialValue = nullptr;
587249423Sdim  MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
588288943Sdim  MachineBasicBlock *Latch = L->getLoopLatch();
589249423Sdim  for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
590249423Sdim    MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
591249423Sdim    if (MBB == Preheader)
592249423Sdim      InitialValue = &IV_Phi->getOperand(i);
593249423Sdim    else if (MBB == Latch)
594249423Sdim      IVReg = IV_Phi->getOperand(i).getReg();  // Want IV reg after bump.
595234285Sdim  }
596249423Sdim  if (!InitialValue)
597276479Sdim    return nullptr;
598234285Sdim
599249423Sdim  SmallVector<MachineOperand,2> Cond;
600276479Sdim  MachineBasicBlock *TB = nullptr, *FB = nullptr;
601309124Sdim  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
602249423Sdim  if (NotAnalyzed)
603276479Sdim    return nullptr;
604234285Sdim
605249423Sdim  MachineBasicBlock *Header = L->getHeader();
606249423Sdim  // TB must be non-null.  If FB is also non-null, one of them must be
607249423Sdim  // the header.  Otherwise, branch to TB could be exiting the loop, and
608249423Sdim  // the fall through can go to the header.
609288943Sdim  assert (TB && "Exit block without a branch?");
610288943Sdim  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
611314564Sdim    MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
612288943Sdim    SmallVector<MachineOperand,2> LCond;
613309124Sdim    bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
614288943Sdim    if (NotAnalyzed)
615288943Sdim      return nullptr;
616288943Sdim    if (TB == Latch)
617288943Sdim      TB = (LTB == Header) ? LTB : LFB;
618288943Sdim    else
619288943Sdim      FB = (LTB == Header) ? LTB: LFB;
620288943Sdim  }
621249423Sdim  assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
622249423Sdim  if (!TB || (FB && TB != Header && FB != Header))
623276479Sdim    return nullptr;
624249423Sdim
625249423Sdim  // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
626249423Sdim  // to put imm(0), followed by P in the vector Cond.
627249423Sdim  // If TB is not the header, it means that the "not-taken" path must lead
628249423Sdim  // to the header.
629288943Sdim  bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
630288943Sdim  unsigned PredReg, PredPos, PredRegFlags;
631288943Sdim  if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
632288943Sdim    return nullptr;
633249423Sdim  MachineInstr *CondI = MRI->getVRegDef(PredReg);
634249423Sdim  unsigned CondOpc = CondI->getOpcode();
635249423Sdim
636249423Sdim  unsigned CmpReg1 = 0, CmpReg2 = 0;
637249423Sdim  int Mask = 0, ImmValue = 0;
638309124Sdim  bool AnalyzedCmp =
639309124Sdim      TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
640249423Sdim  if (!AnalyzedCmp)
641276479Sdim    return nullptr;
642249423Sdim
643249423Sdim  // The comparison operator type determines how we compute the loop
644249423Sdim  // trip count.
645249423Sdim  OldInsts.push_back(CondI);
646249423Sdim  OldInsts.push_back(IVOp);
647249423Sdim
648249423Sdim  // Sadly, the following code gets information based on the position
649249423Sdim  // of the operands in the compare instruction.  This has to be done
650249423Sdim  // this way, because the comparisons check for a specific relationship
651249423Sdim  // between the operands (e.g. is-less-than), rather than to find out
652249423Sdim  // what relationship the operands are in (as on PPC).
653249423Sdim  Comparison::Kind Cmp;
654249423Sdim  bool isSwapped = false;
655249423Sdim  const MachineOperand &Op1 = CondI->getOperand(1);
656249423Sdim  const MachineOperand &Op2 = CondI->getOperand(2);
657276479Sdim  const MachineOperand *EndValue = nullptr;
658249423Sdim
659249423Sdim  if (Op1.isReg()) {
660249423Sdim    if (Op2.isImm() || Op1.getReg() == IVReg)
661249423Sdim      EndValue = &Op2;
662249423Sdim    else {
663249423Sdim      EndValue = &Op1;
664249423Sdim      isSwapped = true;
665249423Sdim    }
666234285Sdim  }
667234285Sdim
668249423Sdim  if (!EndValue)
669276479Sdim    return nullptr;
670234285Sdim
671288943Sdim  Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
672288943Sdim  if (!Cmp)
673288943Sdim    return nullptr;
674288943Sdim  if (Negated)
675288943Sdim    Cmp = Comparison::getNegatedComparison(Cmp);
676249423Sdim  if (isSwapped)
677288943Sdim    Cmp = Comparison::getSwappedComparison(Cmp);
678249423Sdim
679249423Sdim  if (InitialValue->isReg()) {
680249423Sdim    unsigned R = InitialValue->getReg();
681249423Sdim    MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
682249423Sdim    if (!MDT->properlyDominates(DefBB, Header))
683276479Sdim      return nullptr;
684249423Sdim    OldInsts.push_back(MRI->getVRegDef(R));
685249423Sdim  }
686249423Sdim  if (EndValue->isReg()) {
687249423Sdim    unsigned R = EndValue->getReg();
688249423Sdim    MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
689249423Sdim    if (!MDT->properlyDominates(DefBB, Header))
690276479Sdim      return nullptr;
691288943Sdim    OldInsts.push_back(MRI->getVRegDef(R));
692249423Sdim  }
693249423Sdim
694249423Sdim  return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
695234285Sdim}
696234285Sdim
697249423Sdim/// \brief Helper function that returns the expression that represents the
698249423Sdim/// number of times a loop iterates.  The function takes the operands that
699249423Sdim/// represent the loop start value, loop end value, and induction value.
700249423Sdim/// Based upon these operands, the function attempts to compute the trip count.
701249423SdimCountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
702249423Sdim                                               const MachineOperand *Start,
703249423Sdim                                               const MachineOperand *End,
704249423Sdim                                               unsigned IVReg,
705249423Sdim                                               int64_t IVBump,
706249423Sdim                                               Comparison::Kind Cmp) const {
707249423Sdim  // Cannot handle comparison EQ, i.e. while (A == B).
708249423Sdim  if (Cmp == Comparison::EQ)
709276479Sdim    return nullptr;
710249423Sdim
711249423Sdim  // Check if either the start or end values are an assignment of an immediate.
712249423Sdim  // If so, use the immediate value rather than the register.
713249423Sdim  if (Start->isReg()) {
714249423Sdim    const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
715288943Sdim    if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
716288943Sdim                          StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
717249423Sdim      Start = &StartValInstr->getOperand(1);
718249423Sdim  }
719249423Sdim  if (End->isReg()) {
720249423Sdim    const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
721288943Sdim    if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
722288943Sdim                        EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
723249423Sdim      End = &EndValInstr->getOperand(1);
724249423Sdim  }
725249423Sdim
726288943Sdim  if (!Start->isReg() && !Start->isImm())
727288943Sdim    return nullptr;
728288943Sdim  if (!End->isReg() && !End->isImm())
729288943Sdim    return nullptr;
730249423Sdim
731249423Sdim  bool CmpLess =     Cmp & Comparison::L;
732249423Sdim  bool CmpGreater =  Cmp & Comparison::G;
733249423Sdim  bool CmpHasEqual = Cmp & Comparison::EQ;
734249423Sdim
735249423Sdim  // Avoid certain wrap-arounds.  This doesn't detect all wrap-arounds.
736249423Sdim  if (CmpLess && IVBump < 0)
737288943Sdim    // Loop going while iv is "less" with the iv value going down.  Must wrap.
738276479Sdim    return nullptr;
739288943Sdim
740249423Sdim  if (CmpGreater && IVBump > 0)
741288943Sdim    // Loop going while iv is "greater" with the iv value going up.  Must wrap.
742276479Sdim    return nullptr;
743249423Sdim
744288943Sdim  // Phis that may feed into the loop.
745288943Sdim  LoopFeederMap LoopFeederPhi;
746288943Sdim
747296417Sdim  // Check if the initial value may be zero and can be decremented in the first
748288943Sdim  // iteration. If the value is zero, the endloop instruction will not decrement
749296417Sdim  // the loop counter, so we shouldn't generate a hardware loop in this case.
750288943Sdim  if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
751288943Sdim                                  LoopFeederPhi))
752288943Sdim      return nullptr;
753288943Sdim
754249423Sdim  if (Start->isImm() && End->isImm()) {
755249423Sdim    // Both, start and end are immediates.
756249423Sdim    int64_t StartV = Start->getImm();
757249423Sdim    int64_t EndV = End->getImm();
758249423Sdim    int64_t Dist = EndV - StartV;
759249423Sdim    if (Dist == 0)
760276479Sdim      return nullptr;
761249423Sdim
762249423Sdim    bool Exact = (Dist % IVBump) == 0;
763249423Sdim
764249423Sdim    if (Cmp == Comparison::NE) {
765249423Sdim      if (!Exact)
766276479Sdim        return nullptr;
767249423Sdim      if ((Dist < 0) ^ (IVBump < 0))
768276479Sdim        return nullptr;
769249423Sdim    }
770249423Sdim
771249423Sdim    // For comparisons that include the final value (i.e. include equality
772249423Sdim    // with the final value), we need to increase the distance by 1.
773249423Sdim    if (CmpHasEqual)
774249423Sdim      Dist = Dist > 0 ? Dist+1 : Dist-1;
775249423Sdim
776288943Sdim    // For the loop to iterate, CmpLess should imply Dist > 0.  Similarly,
777288943Sdim    // CmpGreater should imply Dist < 0.  These conditions could actually
778288943Sdim    // fail, for example, in unreachable code (which may still appear to be
779288943Sdim    // reachable in the CFG).
780288943Sdim    if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
781288943Sdim      return nullptr;
782249423Sdim
783249423Sdim    // "Normalized" distance, i.e. with the bump set to +-1.
784288943Sdim    int64_t Dist1 = (IVBump > 0) ? (Dist +  (IVBump - 1)) / IVBump
785288943Sdim                                 : (-Dist + (-IVBump - 1)) / (-IVBump);
786249423Sdim    assert (Dist1 > 0 && "Fishy thing.  Both operands have the same sign.");
787249423Sdim
788249423Sdim    uint64_t Count = Dist1;
789249423Sdim
790249423Sdim    if (Count > 0xFFFFFFFFULL)
791276479Sdim      return nullptr;
792249423Sdim
793249423Sdim    return new CountValue(CountValue::CV_Immediate, Count);
794249423Sdim  }
795249423Sdim
796249423Sdim  // A general case: Start and End are some values, but the actual
797249423Sdim  // iteration count may not be available.  If it is not, insert
798249423Sdim  // a computation of it into the preheader.
799249423Sdim
800249423Sdim  // If the induction variable bump is not a power of 2, quit.
801249423Sdim  // Othwerise we'd need a general integer division.
802288943Sdim  if (!isPowerOf2_64(std::abs(IVBump)))
803276479Sdim    return nullptr;
804249423Sdim
805314564Sdim  MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);
806249423Sdim  assert (PH && "Should have a preheader by now");
807249423Sdim  MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
808288943Sdim  DebugLoc DL;
809288943Sdim  if (InsertPos != PH->end())
810288943Sdim    DL = InsertPos->getDebugLoc();
811249423Sdim
812249423Sdim  // If Start is an immediate and End is a register, the trip count
813249423Sdim  // will be "reg - imm".  Hexagon's "subtract immediate" instruction
814249423Sdim  // is actually "reg + -imm".
815249423Sdim
816249423Sdim  // If the loop IV is going downwards, i.e. if the bump is negative,
817249423Sdim  // then the iteration count (computed as End-Start) will need to be
818249423Sdim  // negated.  To avoid the negation, just swap Start and End.
819249423Sdim  if (IVBump < 0) {
820249423Sdim    std::swap(Start, End);
821249423Sdim    IVBump = -IVBump;
822249423Sdim  }
823249423Sdim  // Cmp may now have a wrong direction, e.g.  LEs may now be GEs.
824249423Sdim  // Signedness, and "including equality" are preserved.
825249423Sdim
826249423Sdim  bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
827249423Sdim  bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
828249423Sdim
829249423Sdim  int64_t StartV = 0, EndV = 0;
830249423Sdim  if (Start->isImm())
831249423Sdim    StartV = Start->getImm();
832249423Sdim  if (End->isImm())
833249423Sdim    EndV = End->getImm();
834249423Sdim
835249423Sdim  int64_t AdjV = 0;
836249423Sdim  // To compute the iteration count, we would need this computation:
837249423Sdim  //   Count = (End - Start + (IVBump-1)) / IVBump
838249423Sdim  // or, when CmpHasEqual:
839249423Sdim  //   Count = (End - Start + (IVBump-1)+1) / IVBump
840249423Sdim  // The "IVBump-1" part is the adjustment (AdjV).  We can avoid
841249423Sdim  // generating an instruction specifically to add it if we can adjust
842249423Sdim  // the immediate values for Start or End.
843249423Sdim
844249423Sdim  if (CmpHasEqual) {
845249423Sdim    // Need to add 1 to the total iteration count.
846249423Sdim    if (Start->isImm())
847249423Sdim      StartV--;
848249423Sdim    else if (End->isImm())
849249423Sdim      EndV++;
850249423Sdim    else
851249423Sdim      AdjV += 1;
852249423Sdim  }
853249423Sdim
854249423Sdim  if (Cmp != Comparison::NE) {
855249423Sdim    if (Start->isImm())
856249423Sdim      StartV -= (IVBump-1);
857249423Sdim    else if (End->isImm())
858249423Sdim      EndV += (IVBump-1);
859249423Sdim    else
860249423Sdim      AdjV += (IVBump-1);
861249423Sdim  }
862249423Sdim
863249423Sdim  unsigned R = 0, SR = 0;
864249423Sdim  if (Start->isReg()) {
865249423Sdim    R = Start->getReg();
866249423Sdim    SR = Start->getSubReg();
867249423Sdim  } else {
868249423Sdim    R = End->getReg();
869249423Sdim    SR = End->getSubReg();
870249423Sdim  }
871249423Sdim  const TargetRegisterClass *RC = MRI->getRegClass(R);
872249423Sdim  // Hardware loops cannot handle 64-bit registers.  If it's a double
873249423Sdim  // register, it has to have a subregister.
874249423Sdim  if (!SR && RC == &Hexagon::DoubleRegsRegClass)
875276479Sdim    return nullptr;
876249423Sdim  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
877249423Sdim
878249423Sdim  // Compute DistR (register with the distance between Start and End).
879249423Sdim  unsigned DistR, DistSR;
880249423Sdim
881249423Sdim  // Avoid special case, where the start value is an imm(0).
882249423Sdim  if (Start->isImm() && StartV == 0) {
883249423Sdim    DistR = End->getReg();
884249423Sdim    DistSR = End->getSubReg();
885249423Sdim  } else {
886280031Sdim    const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
887288943Sdim                              (RegToImm ? TII->get(Hexagon::A2_subri) :
888288943Sdim                                          TII->get(Hexagon::A2_addi));
889288943Sdim    if (RegToReg || RegToImm) {
890288943Sdim      unsigned SubR = MRI->createVirtualRegister(IntRC);
891288943Sdim      MachineInstrBuilder SubIB =
892288943Sdim        BuildMI(*PH, InsertPos, DL, SubD, SubR);
893249423Sdim
894288943Sdim      if (RegToReg)
895288943Sdim        SubIB.addReg(End->getReg(), 0, End->getSubReg())
896288943Sdim          .addReg(Start->getReg(), 0, Start->getSubReg());
897288943Sdim      else
898288943Sdim        SubIB.addImm(EndV)
899288943Sdim          .addReg(Start->getReg(), 0, Start->getSubReg());
900288943Sdim      DistR = SubR;
901288943Sdim    } else {
902288943Sdim      // If the loop has been unrolled, we should use the original loop count
903288943Sdim      // instead of recalculating the value. This will avoid additional
904288943Sdim      // 'Add' instruction.
905288943Sdim      const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
906288943Sdim      if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
907288943Sdim          EndValInstr->getOperand(2).getImm() == StartV) {
908288943Sdim        DistR = EndValInstr->getOperand(1).getReg();
909288943Sdim      } else {
910288943Sdim        unsigned SubR = MRI->createVirtualRegister(IntRC);
911288943Sdim        MachineInstrBuilder SubIB =
912288943Sdim          BuildMI(*PH, InsertPos, DL, SubD, SubR);
913288943Sdim        SubIB.addReg(End->getReg(), 0, End->getSubReg())
914288943Sdim             .addImm(-StartV);
915288943Sdim        DistR = SubR;
916288943Sdim      }
917249423Sdim    }
918249423Sdim    DistSR = 0;
919249423Sdim  }
920249423Sdim
921249423Sdim  // From DistR, compute AdjR (register with the adjusted distance).
922249423Sdim  unsigned AdjR, AdjSR;
923249423Sdim
924249423Sdim  if (AdjV == 0) {
925249423Sdim    AdjR = DistR;
926249423Sdim    AdjSR = DistSR;
927249423Sdim  } else {
928249423Sdim    // Generate CountR = ADD DistR, AdjVal
929249423Sdim    unsigned AddR = MRI->createVirtualRegister(IntRC);
930288943Sdim    MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
931249423Sdim    BuildMI(*PH, InsertPos, DL, AddD, AddR)
932249423Sdim      .addReg(DistR, 0, DistSR)
933249423Sdim      .addImm(AdjV);
934249423Sdim
935249423Sdim    AdjR = AddR;
936249423Sdim    AdjSR = 0;
937249423Sdim  }
938249423Sdim
939249423Sdim  // From AdjR, compute CountR (register with the final count).
940249423Sdim  unsigned CountR, CountSR;
941249423Sdim
942249423Sdim  if (IVBump == 1) {
943249423Sdim    CountR = AdjR;
944249423Sdim    CountSR = AdjSR;
945249423Sdim  } else {
946249423Sdim    // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
947249423Sdim    unsigned Shift = Log2_32(IVBump);
948249423Sdim
949249423Sdim    // Generate NormR = LSR DistR, Shift.
950249423Sdim    unsigned LsrR = MRI->createVirtualRegister(IntRC);
951280031Sdim    const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
952249423Sdim    BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
953249423Sdim      .addReg(AdjR, 0, AdjSR)
954249423Sdim      .addImm(Shift);
955249423Sdim
956249423Sdim    CountR = LsrR;
957249423Sdim    CountSR = 0;
958249423Sdim  }
959249423Sdim
960249423Sdim  return new CountValue(CountValue::CV_Register, CountR, CountSR);
961234285Sdim}
962234285Sdim
963249423Sdim/// \brief Return true if the operation is invalid within hardware loop.
964288943Sdimbool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
965288943Sdim                                                  bool IsInnerHWLoop) const {
966288943Sdim  // Call is not allowed because the callee may use a hardware loop except for
967288943Sdim  // the case when the call never returns.
968314564Sdim  if (MI->getDesc().isCall())
969314564Sdim    return !TII->doesNotReturn(*MI);
970249423Sdim
971288943Sdim  // Check if the instruction defines a hardware loop register.
972321369Sdim  using namespace Hexagon;
973321369Sdim  static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 };
974321369Sdim  static const unsigned Regs1[]  = { LC1, SA1 };
975321369Sdim  auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01))
976321369Sdim                                 : makeArrayRef(Regs1, array_lengthof(Regs1));
977321369Sdim  for (unsigned R : CheckRegs)
978321369Sdim    if (MI->modifiesRegister(R, TRI))
979234285Sdim      return true;
980321369Sdim
981234285Sdim  return false;
982234285Sdim}
983234285Sdim
984288943Sdim/// \brief Return true if the loop contains an instruction that inhibits
985288943Sdim/// the use of the hardware loop instruction.
986288943Sdimbool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
987288943Sdim    bool IsInnerHWLoop) const {
988261991Sdim  const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
989288943Sdim  DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
990234285Sdim  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
991234285Sdim    MachineBasicBlock *MBB = Blocks[i];
992234285Sdim    for (MachineBasicBlock::iterator
993234285Sdim           MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
994234285Sdim      const MachineInstr *MI = &*MII;
995288943Sdim      if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
996288943Sdim        DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
997234285Sdim        return true;
998288943Sdim      }
999234285Sdim    }
1000234285Sdim  }
1001234285Sdim  return false;
1002234285Sdim}
1003234285Sdim
1004249423Sdim/// \brief Returns true if the instruction is dead.  This was essentially
1005249423Sdim/// copied from DeadMachineInstructionElim::isDead, but with special cases
1006249423Sdim/// for inline asm, physical registers and instructions with side effects
1007249423Sdim/// removed.
1008249423Sdimbool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1009261991Sdim                              SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1010249423Sdim  // Examine each operand.
1011249423Sdim  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1012249423Sdim    const MachineOperand &MO = MI->getOperand(i);
1013249423Sdim    if (!MO.isReg() || !MO.isDef())
1014249423Sdim      continue;
1015249423Sdim
1016249423Sdim    unsigned Reg = MO.getReg();
1017249423Sdim    if (MRI->use_nodbg_empty(Reg))
1018249423Sdim      continue;
1019249423Sdim
1020249423Sdim    typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator;
1021249423Sdim
1022249423Sdim    // This instruction has users, but if the only user is the phi node for the
1023249423Sdim    // parent block, and the only use of that phi node is this instruction, then
1024249423Sdim    // this instruction is dead: both it (and the phi node) can be removed.
1025249423Sdim    use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1026249423Sdim    use_nodbg_iterator End = MRI->use_nodbg_end();
1027276479Sdim    if (std::next(I) != End || !I->getParent()->isPHI())
1028249423Sdim      return false;
1029249423Sdim
1030276479Sdim    MachineInstr *OnePhi = I->getParent();
1031249423Sdim    for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
1032249423Sdim      const MachineOperand &OPO = OnePhi->getOperand(j);
1033249423Sdim      if (!OPO.isReg() || !OPO.isDef())
1034249423Sdim        continue;
1035249423Sdim
1036249423Sdim      unsigned OPReg = OPO.getReg();
1037249423Sdim      use_nodbg_iterator nextJ;
1038249423Sdim      for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1039249423Sdim           J != End; J = nextJ) {
1040276479Sdim        nextJ = std::next(J);
1041276479Sdim        MachineOperand &Use = *J;
1042249423Sdim        MachineInstr *UseMI = Use.getParent();
1043249423Sdim
1044288943Sdim        // If the phi node has a user that is not MI, bail.
1045249423Sdim        if (MI != UseMI)
1046249423Sdim          return false;
1047249423Sdim      }
1048249423Sdim    }
1049249423Sdim    DeadPhis.push_back(OnePhi);
1050249423Sdim  }
1051249423Sdim
1052249423Sdim  // If there are no defs with uses, the instruction is dead.
1053249423Sdim  return true;
1054249423Sdim}
1055249423Sdim
1056249423Sdimvoid HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1057249423Sdim  // This procedure was essentially copied from DeadMachineInstructionElim.
1058249423Sdim
1059249423Sdim  SmallVector<MachineInstr*, 1> DeadPhis;
1060249423Sdim  if (isDead(MI, DeadPhis)) {
1061249423Sdim    DEBUG(dbgs() << "HW looping will remove: " << *MI);
1062249423Sdim
1063249423Sdim    // It is possible that some DBG_VALUE instructions refer to this
1064249423Sdim    // instruction.  Examine each def operand for such references;
1065249423Sdim    // if found, mark the DBG_VALUE as undef (but don't delete it).
1066249423Sdim    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1067249423Sdim      const MachineOperand &MO = MI->getOperand(i);
1068249423Sdim      if (!MO.isReg() || !MO.isDef())
1069249423Sdim        continue;
1070249423Sdim      unsigned Reg = MO.getReg();
1071249423Sdim      MachineRegisterInfo::use_iterator nextI;
1072249423Sdim      for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
1073249423Sdim           E = MRI->use_end(); I != E; I = nextI) {
1074276479Sdim        nextI = std::next(I);  // I is invalidated by the setReg
1075276479Sdim        MachineOperand &Use = *I;
1076276479Sdim        MachineInstr *UseMI = I->getParent();
1077249423Sdim        if (UseMI == MI)
1078249423Sdim          continue;
1079249423Sdim        if (Use.isDebug())
1080249423Sdim          UseMI->getOperand(0).setReg(0U);
1081249423Sdim      }
1082249423Sdim    }
1083249423Sdim
1084249423Sdim    MI->eraseFromParent();
1085249423Sdim    for (unsigned i = 0; i < DeadPhis.size(); ++i)
1086249423Sdim      DeadPhis[i]->eraseFromParent();
1087249423Sdim  }
1088249423Sdim}
1089249423Sdim
1090249423Sdim/// \brief Check if the loop is a candidate for converting to a hardware
1091249423Sdim/// loop.  If so, then perform the transformation.
1092234285Sdim///
1093249423Sdim/// This function works on innermost loops first.  A loop can be converted
1094249423Sdim/// if it is a counting loop; either a register value or an immediate.
1095234285Sdim///
1096249423Sdim/// The code makes several assumptions about the representation of the loop
1097249423Sdim/// in llvm.
1098288943Sdimbool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1099288943Sdim                                                 bool &RecL0used,
1100288943Sdim                                                 bool &RecL1used) {
1101249423Sdim  // This is just for sanity.
1102249423Sdim  assert(L->getHeader() && "Loop without a header?");
1103249423Sdim
1104234285Sdim  bool Changed = false;
1105288943Sdim  bool L0Used = false;
1106288943Sdim  bool L1Used = false;
1107288943Sdim
1108234285Sdim  // Process nested loops first.
1109288943Sdim  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
1110288943Sdim    Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
1111288943Sdim    L0Used |= RecL0used;
1112288943Sdim    L1Used |= RecL1used;
1113288943Sdim  }
1114249423Sdim
1115234285Sdim  // If a nested loop has been converted, then we can't convert this loop.
1116288943Sdim  if (Changed && L0Used && L1Used)
1117234285Sdim    return Changed;
1118249423Sdim
1119288943Sdim  unsigned LOOP_i;
1120288943Sdim  unsigned LOOP_r;
1121288943Sdim  unsigned ENDLOOP;
1122288943Sdim
1123288943Sdim  // Flag used to track loopN instruction:
1124288943Sdim  // 1 - Hardware loop is being generated for the inner most loop.
1125288943Sdim  // 0 - Hardware loop is being generated for the outer loop.
1126288943Sdim  unsigned IsInnerHWLoop = 1;
1127288943Sdim
1128288943Sdim  if (L0Used) {
1129288943Sdim    LOOP_i = Hexagon::J2_loop1i;
1130288943Sdim    LOOP_r = Hexagon::J2_loop1r;
1131288943Sdim    ENDLOOP = Hexagon::ENDLOOP1;
1132288943Sdim    IsInnerHWLoop = 0;
1133288943Sdim  } else {
1134288943Sdim    LOOP_i = Hexagon::J2_loop0i;
1135288943Sdim    LOOP_r = Hexagon::J2_loop0r;
1136288943Sdim    ENDLOOP = Hexagon::ENDLOOP0;
1137288943Sdim  }
1138288943Sdim
1139249423Sdim#ifndef NDEBUG
1140249423Sdim  // Stop trying after reaching the limit (if any).
1141249423Sdim  int Limit = HWLoopLimit;
1142249423Sdim  if (Limit >= 0) {
1143249423Sdim    if (Counter >= HWLoopLimit)
1144249423Sdim      return false;
1145249423Sdim    Counter++;
1146234285Sdim  }
1147249423Sdim#endif
1148249423Sdim
1149234285Sdim  // Does the loop contain any invalid instructions?
1150288943Sdim  if (containsInvalidInstruction(L, IsInnerHWLoop))
1151234285Sdim    return false;
1152249423Sdim
1153314564Sdim  MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1154234285Sdim  // Don't generate hw loop if the loop has more than one exit.
1155276479Sdim  if (!LastMBB)
1156234285Sdim    return false;
1157249423Sdim
1158234285Sdim  MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
1159249423Sdim  if (LastI == LastMBB->end())
1160249423Sdim    return false;
1161234285Sdim
1162288943Sdim  // Is the induction variable bump feeding the latch condition?
1163288943Sdim  if (!fixupInductionVariable(L))
1164288943Sdim    return false;
1165288943Sdim
1166249423Sdim  // Ensure the loop has a preheader: the loop instruction will be
1167249423Sdim  // placed there.
1168314564Sdim  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1169249423Sdim  if (!Preheader) {
1170249423Sdim    Preheader = createPreheaderForLoop(L);
1171249423Sdim    if (!Preheader)
1172249423Sdim      return false;
1173249423Sdim  }
1174288943Sdim
1175249423Sdim  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1176249423Sdim
1177249423Sdim  SmallVector<MachineInstr*, 2> OldInsts;
1178249423Sdim  // Are we able to determine the trip count for the loop?
1179249423Sdim  CountValue *TripCount = getLoopTripCount(L, OldInsts);
1180276479Sdim  if (!TripCount)
1181249423Sdim    return false;
1182249423Sdim
1183249423Sdim  // Is the trip count available in the preheader?
1184249423Sdim  if (TripCount->isReg()) {
1185249423Sdim    // There will be a use of the register inserted into the preheader,
1186249423Sdim    // so make sure that the register is actually defined at that point.
1187249423Sdim    MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1188249423Sdim    MachineBasicBlock *BBDef = TCDef->getParent();
1189288943Sdim    if (!MDT->dominates(BBDef, Preheader))
1190288943Sdim      return false;
1191249423Sdim  }
1192249423Sdim
1193234285Sdim  // Determine the loop start.
1194288943Sdim  MachineBasicBlock *TopBlock = L->getTopBlock();
1195314564Sdim  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1196314564Sdim  MachineBasicBlock *LoopStart = nullptr;
1197288943Sdim  if (ExitingBlock !=  L->getLoopLatch()) {
1198314564Sdim    MachineBasicBlock *TB = nullptr, *FB = nullptr;
1199288943Sdim    SmallVector<MachineOperand, 2> Cond;
1200288943Sdim
1201309124Sdim    if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1202234285Sdim      return false;
1203288943Sdim
1204288943Sdim    if (L->contains(TB))
1205288943Sdim      LoopStart = TB;
1206288943Sdim    else if (L->contains(FB))
1207288943Sdim      LoopStart = FB;
1208288943Sdim    else
1209288943Sdim      return false;
1210234285Sdim  }
1211288943Sdim  else
1212288943Sdim    LoopStart = TopBlock;
1213234285Sdim
1214249423Sdim  // Convert the loop to a hardware loop.
1215234285Sdim  DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1216249423Sdim  DebugLoc DL;
1217249423Sdim  if (InsertPos != Preheader->end())
1218249423Sdim    DL = InsertPos->getDebugLoc();
1219234285Sdim
1220234285Sdim  if (TripCount->isReg()) {
1221234285Sdim    // Create a copy of the loop count register.
1222249423Sdim    unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1223249423Sdim    BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1224249423Sdim      .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1225239462Sdim    // Add the Loop instruction to the beginning of the loop.
1226288943Sdim    BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1227249423Sdim      .addReg(CountReg);
1228234285Sdim  } else {
1229249423Sdim    assert(TripCount->isImm() && "Expecting immediate value for trip count");
1230249423Sdim    // Add the Loop immediate instruction to the beginning of the loop,
1231249423Sdim    // if the immediate fits in the instructions.  Otherwise, we need to
1232249423Sdim    // create a new virtual register.
1233234285Sdim    int64_t CountImm = TripCount->getImm();
1234288943Sdim    if (!TII->isValidOffset(LOOP_i, CountImm)) {
1235249423Sdim      unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1236280031Sdim      BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1237249423Sdim        .addImm(CountImm);
1238288943Sdim      BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1239249423Sdim        .addMBB(LoopStart).addReg(CountReg);
1240249423Sdim    } else
1241288943Sdim      BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1242249423Sdim        .addMBB(LoopStart).addImm(CountImm);
1243234285Sdim  }
1244234285Sdim
1245249423Sdim  // Make sure the loop start always has a reference in the CFG.  We need
1246249423Sdim  // to create a BlockAddress operand to get this mechanism to work both the
1247234285Sdim  // MachineBasicBlock and BasicBlock objects need the flag set.
1248234285Sdim  LoopStart->setHasAddressTaken();
1249234285Sdim  // This line is needed to set the hasAddressTaken flag on the BasicBlock
1250249423Sdim  // object.
1251234285Sdim  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
1252234285Sdim
1253234285Sdim  // Replace the loop branch with an endloop instruction.
1254249423Sdim  DebugLoc LastIDL = LastI->getDebugLoc();
1255288943Sdim  BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1256234285Sdim
1257234285Sdim  // The loop ends with either:
1258234285Sdim  //  - a conditional branch followed by an unconditional branch, or
1259234285Sdim  //  - a conditional branch to the loop start.
1260280031Sdim  if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1261280031Sdim      LastI->getOpcode() == Hexagon::J2_jumpf) {
1262249423Sdim    // Delete one and change/add an uncond. branch to out of the loop.
1263234285Sdim    MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1264234285Sdim    LastI = LastMBB->erase(LastI);
1265234285Sdim    if (!L->contains(BranchTarget)) {
1266249423Sdim      if (LastI != LastMBB->end())
1267249423Sdim        LastI = LastMBB->erase(LastI);
1268234285Sdim      SmallVector<MachineOperand, 0> Cond;
1269314564Sdim      TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1270234285Sdim    }
1271234285Sdim  } else {
1272234285Sdim    // Conditional branch to loop start; just delete it.
1273234285Sdim    LastMBB->erase(LastI);
1274234285Sdim  }
1275234285Sdim  delete TripCount;
1276234285Sdim
1277249423Sdim  // The induction operation and the comparison may now be
1278249423Sdim  // unneeded. If these are unneeded, then remove them.
1279249423Sdim  for (unsigned i = 0; i < OldInsts.size(); ++i)
1280249423Sdim    removeIfDead(OldInsts[i]);
1281249423Sdim
1282234285Sdim  ++NumHWLoops;
1283288943Sdim
1284288943Sdim  // Set RecL1used and RecL0used only after hardware loop has been
1285288943Sdim  // successfully generated. Doing it earlier can cause wrong loop instruction
1286288943Sdim  // to be used.
1287288943Sdim  if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1288288943Sdim    RecL1used = true;
1289288943Sdim  else
1290288943Sdim    RecL0used = true;
1291288943Sdim
1292234285Sdim  return true;
1293234285Sdim}
1294234285Sdim
1295249423Sdimbool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1296249423Sdim                                            MachineInstr *CmpI) {
1297249423Sdim  assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1298249423Sdim
1299249423Sdim  MachineBasicBlock *BB = BumpI->getParent();
1300249423Sdim  if (CmpI->getParent() != BB)
1301249423Sdim    return false;
1302249423Sdim
1303249423Sdim  typedef MachineBasicBlock::instr_iterator instr_iterator;
1304249423Sdim  // Check if things are in order to begin with.
1305296417Sdim  for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1306249423Sdim    if (&*I == CmpI)
1307249423Sdim      return true;
1308249423Sdim
1309249423Sdim  // Out of order.
1310249423Sdim  unsigned PredR = CmpI->getOperand(0).getReg();
1311249423Sdim  bool FoundBump = false;
1312296417Sdim  instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1313249423Sdim  for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1314249423Sdim    MachineInstr *In = &*I;
1315249423Sdim    for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1316249423Sdim      MachineOperand &MO = In->getOperand(i);
1317249423Sdim      if (MO.isReg() && MO.isUse()) {
1318249423Sdim        if (MO.getReg() == PredR)  // Found an intervening use of PredR.
1319249423Sdim          return false;
1320249423Sdim      }
1321249423Sdim    }
1322249423Sdim
1323249423Sdim    if (In == BumpI) {
1324296417Sdim      BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1325249423Sdim      FoundBump = true;
1326249423Sdim      break;
1327249423Sdim    }
1328249423Sdim  }
1329249423Sdim  assert (FoundBump && "Cannot determine instruction order");
1330249423Sdim  return FoundBump;
1331234285Sdim}
1332234285Sdim
1333288943Sdim/// This function is required to break recursion. Visiting phis in a loop may
1334288943Sdim/// result in recursion during compilation. We break the recursion by making
1335288943Sdim/// sure that we visit a MachineOperand and its definition in a
1336288943Sdim/// MachineInstruction only once. If we attempt to visit more than once, then
1337288943Sdim/// there is recursion, and will return false.
1338288943Sdimbool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1339288943Sdim                                        MachineInstr *MI,
1340288943Sdim                                        const MachineOperand *MO,
1341288943Sdim                                        LoopFeederMap &LoopFeederPhi) const {
1342288943Sdim  if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1343288943Sdim    const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
1344288943Sdim    DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
1345288943Sdim    // Ignore all BBs that form Loop.
1346288943Sdim    for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1347288943Sdim      MachineBasicBlock *MBB = Blocks[i];
1348288943Sdim      if (A == MBB)
1349288943Sdim        return false;
1350288943Sdim    }
1351288943Sdim    MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1352288943Sdim    LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1353288943Sdim    return true;
1354288943Sdim  } else
1355288943Sdim    // Already visited node.
1356288943Sdim    return false;
1357288943Sdim}
1358234285Sdim
1359288943Sdim/// Return true if a Phi may generate a value that can underflow.
1360288943Sdim/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1361288943Sdimbool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1362288943Sdim    MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1363288943Sdim    MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1364288943Sdim  assert(Phi->isPHI() && "Expecting a Phi.");
1365288943Sdim  // Walk through each Phi, and its used operands. Make sure that
1366288943Sdim  // if there is recursion in Phi, we won't generate hardware loops.
1367288943Sdim  for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1368288943Sdim    if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1369288943Sdim      if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1370288943Sdim                                      Phi->getParent(), L, LoopFeederPhi))
1371288943Sdim        return true;
1372288943Sdim  return false;
1373288943Sdim}
1374288943Sdim
1375288943Sdim/// Return true if the induction variable can underflow in the first iteration.
1376288943Sdim/// An example, is an initial unsigned value that is 0 and is decrement in the
1377288943Sdim/// first itertion of a do-while loop.  In this case, we cannot generate a
1378288943Sdim/// hardware loop because the endloop instruction does not decrement the loop
1379288943Sdim/// counter if it is <= 1. We only need to perform this analysis if the
1380288943Sdim/// initial value is a register.
1381288943Sdim///
1382288943Sdim/// This function assumes the initial value may underfow unless proven
1383288943Sdim/// otherwise. If the type is signed, then we don't care because signed
1384288943Sdim/// underflow is undefined. We attempt to prove the initial value is not
1385288943Sdim/// zero by perfoming a crude analysis of the loop counter. This function
1386288943Sdim/// checks if the initial value is used in any comparison prior to the loop
1387288943Sdim/// and, if so, assumes the comparison is a range check. This is inexact,
1388288943Sdim/// but will catch the simple cases.
1389288943Sdimbool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1390288943Sdim    const MachineOperand *InitVal, const MachineOperand *EndVal,
1391288943Sdim    MachineBasicBlock *MBB, MachineLoop *L,
1392288943Sdim    LoopFeederMap &LoopFeederPhi) const {
1393288943Sdim  // Only check register values since they are unknown.
1394288943Sdim  if (!InitVal->isReg())
1395288943Sdim    return false;
1396288943Sdim
1397288943Sdim  if (!EndVal->isImm())
1398288943Sdim    return false;
1399288943Sdim
1400288943Sdim  // A register value that is assigned an immediate is a known value, and it
1401288943Sdim  // won't underflow in the first iteration.
1402288943Sdim  int64_t Imm;
1403288943Sdim  if (checkForImmediate(*InitVal, Imm))
1404288943Sdim    return (EndVal->getImm() == Imm);
1405288943Sdim
1406288943Sdim  unsigned Reg = InitVal->getReg();
1407288943Sdim
1408288943Sdim  // We don't know the value of a physical register.
1409288943Sdim  if (!TargetRegisterInfo::isVirtualRegister(Reg))
1410288943Sdim    return true;
1411288943Sdim
1412288943Sdim  MachineInstr *Def = MRI->getVRegDef(Reg);
1413288943Sdim  if (!Def)
1414288943Sdim    return true;
1415288943Sdim
1416288943Sdim  // If the initial value is a Phi or copy and the operands may not underflow,
1417288943Sdim  // then the definition cannot be underflow either.
1418288943Sdim  if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1419288943Sdim                                             L, LoopFeederPhi))
1420288943Sdim    return false;
1421288943Sdim  if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1422288943Sdim                                                    EndVal, Def->getParent(),
1423288943Sdim                                                    L, LoopFeederPhi))
1424288943Sdim    return false;
1425288943Sdim
1426288943Sdim  // Iterate over the uses of the initial value. If the initial value is used
1427288943Sdim  // in a compare, then we assume this is a range check that ensures the loop
1428288943Sdim  // doesn't underflow. This is not an exact test and should be improved.
1429288943Sdim  for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
1430288943Sdim         E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1431288943Sdim    MachineInstr *MI = &*I;
1432288943Sdim    unsigned CmpReg1 = 0, CmpReg2 = 0;
1433288943Sdim    int CmpMask = 0, CmpValue = 0;
1434288943Sdim
1435309124Sdim    if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1436288943Sdim      continue;
1437288943Sdim
1438314564Sdim    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1439288943Sdim    SmallVector<MachineOperand, 2> Cond;
1440309124Sdim    if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1441288943Sdim      continue;
1442288943Sdim
1443314564Sdim    Comparison::Kind Cmp =
1444314564Sdim        getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
1445288943Sdim    if (Cmp == 0)
1446288943Sdim      continue;
1447288943Sdim    if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1448288943Sdim      Cmp = Comparison::getNegatedComparison(Cmp);
1449288943Sdim    if (CmpReg2 != 0 && CmpReg2 == Reg)
1450288943Sdim      Cmp = Comparison::getSwappedComparison(Cmp);
1451288943Sdim
1452288943Sdim    // Signed underflow is undefined.
1453288943Sdim    if (Comparison::isSigned(Cmp))
1454288943Sdim      return false;
1455288943Sdim
1456296417Sdim    // Check if there is a comparison of the initial value. If the initial value
1457288943Sdim    // is greater than or not equal to another value, then assume this is a
1458288943Sdim    // range check.
1459288943Sdim    if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1460288943Sdim      return false;
1461288943Sdim  }
1462288943Sdim
1463288943Sdim  // OK - this is a hack that needs to be improved. We really need to analyze
1464288943Sdim  // the instructions performed on the initial value. This works on the simplest
1465288943Sdim  // cases only.
1466288943Sdim  if (!Def->isCopy() && !Def->isPHI())
1467288943Sdim    return false;
1468288943Sdim
1469288943Sdim  return true;
1470288943Sdim}
1471288943Sdim
1472288943Sdimbool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1473288943Sdim                                             int64_t &Val) const {
1474288943Sdim  if (MO.isImm()) {
1475288943Sdim    Val = MO.getImm();
1476288943Sdim    return true;
1477288943Sdim  }
1478288943Sdim  if (!MO.isReg())
1479288943Sdim    return false;
1480288943Sdim
1481288943Sdim  // MO is a register. Check whether it is defined as an immediate value,
1482288943Sdim  // and if so, get the value of it in TV. That value will then need to be
1483288943Sdim  // processed to handle potential subregisters in MO.
1484288943Sdim  int64_t TV;
1485288943Sdim
1486288943Sdim  unsigned R = MO.getReg();
1487288943Sdim  if (!TargetRegisterInfo::isVirtualRegister(R))
1488288943Sdim    return false;
1489249423Sdim  MachineInstr *DI = MRI->getVRegDef(R);
1490249423Sdim  unsigned DOpc = DI->getOpcode();
1491249423Sdim  switch (DOpc) {
1492288943Sdim    case TargetOpcode::COPY:
1493280031Sdim    case Hexagon::A2_tfrsi:
1494280031Sdim    case Hexagon::A2_tfrpi:
1495314564Sdim    case Hexagon::CONST32:
1496314564Sdim    case Hexagon::CONST64: {
1497288943Sdim      // Call recursively to avoid an extra check whether operand(1) is
1498288943Sdim      // indeed an immediate (it could be a global address, for example),
1499288943Sdim      // plus we can handle COPY at the same time.
1500288943Sdim      if (!checkForImmediate(DI->getOperand(1), TV))
1501288943Sdim        return false;
1502288943Sdim      break;
1503288943Sdim    }
1504288943Sdim    case Hexagon::A2_combineii:
1505288943Sdim    case Hexagon::A4_combineir:
1506288943Sdim    case Hexagon::A4_combineii:
1507288943Sdim    case Hexagon::A4_combineri:
1508288943Sdim    case Hexagon::A2_combinew: {
1509288943Sdim      const MachineOperand &S1 = DI->getOperand(1);
1510288943Sdim      const MachineOperand &S2 = DI->getOperand(2);
1511288943Sdim      int64_t V1, V2;
1512288943Sdim      if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1513288943Sdim        return false;
1514321369Sdim      TV = V2 | (static_cast<uint64_t>(V1) << 32);
1515288943Sdim      break;
1516288943Sdim    }
1517288943Sdim    case TargetOpcode::REG_SEQUENCE: {
1518288943Sdim      const MachineOperand &S1 = DI->getOperand(1);
1519288943Sdim      const MachineOperand &S3 = DI->getOperand(3);
1520288943Sdim      int64_t V1, V3;
1521288943Sdim      if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1522288943Sdim        return false;
1523288943Sdim      unsigned Sub2 = DI->getOperand(2).getImm();
1524288943Sdim      unsigned Sub4 = DI->getOperand(4).getImm();
1525314564Sdim      if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1526288943Sdim        TV = V1 | (V3 << 32);
1527314564Sdim      else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1528288943Sdim        TV = V3 | (V1 << 32);
1529288943Sdim      else
1530288943Sdim        llvm_unreachable("Unexpected form of REG_SEQUENCE");
1531288943Sdim      break;
1532288943Sdim    }
1533288943Sdim
1534288943Sdim    default:
1535288943Sdim      return false;
1536249423Sdim  }
1537234285Sdim
1538314564Sdim  // By now, we should have successfully obtained the immediate value defining
1539288943Sdim  // the register referenced in MO. Handle a potential use of a subregister.
1540288943Sdim  switch (MO.getSubReg()) {
1541314564Sdim    case Hexagon::isub_lo:
1542288943Sdim      Val = TV & 0xFFFFFFFFULL;
1543288943Sdim      break;
1544314564Sdim    case Hexagon::isub_hi:
1545288943Sdim      Val = (TV >> 32) & 0xFFFFFFFFULL;
1546288943Sdim      break;
1547288943Sdim    default:
1548288943Sdim      Val = TV;
1549288943Sdim      break;
1550288943Sdim  }
1551288943Sdim  return true;
1552249423Sdim}
1553234285Sdim
1554249423Sdimvoid HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1555249423Sdim  if (MO.isImm()) {
1556249423Sdim    MO.setImm(Val);
1557249423Sdim    return;
1558234285Sdim  }
1559234285Sdim
1560249423Sdim  assert(MO.isReg());
1561249423Sdim  unsigned R = MO.getReg();
1562288943Sdim  MachineInstr *DI = MRI->getVRegDef(R);
1563234285Sdim
1564249423Sdim  const TargetRegisterClass *RC = MRI->getRegClass(R);
1565249423Sdim  unsigned NewR = MRI->createVirtualRegister(RC);
1566249423Sdim  MachineBasicBlock &B = *DI->getParent();
1567249423Sdim  DebugLoc DL = DI->getDebugLoc();
1568288943Sdim  BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1569249423Sdim  MO.setReg(NewR);
1570249423Sdim}
1571234285Sdim
1572288943Sdimstatic bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
1573288943Sdim  // These two instructions are not extendable.
1574288943Sdim  if (CmpOpc == Hexagon::A4_cmpbeqi)
1575288943Sdim    return isUInt<8>(Imm);
1576288943Sdim  if (CmpOpc == Hexagon::A4_cmpbgti)
1577288943Sdim    return isInt<8>(Imm);
1578288943Sdim  // The rest of the comparison-with-immediate instructions are extendable.
1579288943Sdim  return true;
1580288943Sdim}
1581249423Sdim
1582249423Sdimbool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1583249423Sdim  MachineBasicBlock *Header = L->getHeader();
1584249423Sdim  MachineBasicBlock *Latch = L->getLoopLatch();
1585314564Sdim  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1586249423Sdim
1587288943Sdim  if (!(Header && Latch && ExitingBlock))
1588249423Sdim    return false;
1589249423Sdim
1590249423Sdim  // These data structures follow the same concept as the corresponding
1591249423Sdim  // ones in findInductionRegister (where some comments are).
1592249423Sdim  typedef std::pair<unsigned,int64_t> RegisterBump;
1593249423Sdim  typedef std::pair<unsigned,RegisterBump> RegisterInduction;
1594249423Sdim  typedef std::set<RegisterInduction> RegisterInductionSet;
1595249423Sdim
1596249423Sdim  // Register candidates for induction variables, with their associated bumps.
1597249423Sdim  RegisterInductionSet IndRegs;
1598249423Sdim
1599249423Sdim  // Look for induction patterns:
1600249423Sdim  //   vreg1 = PHI ..., [ latch, vreg2 ]
1601249423Sdim  //   vreg2 = ADD vreg1, imm
1602249423Sdim  typedef MachineBasicBlock::instr_iterator instr_iterator;
1603249423Sdim  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1604249423Sdim       I != E && I->isPHI(); ++I) {
1605249423Sdim    MachineInstr *Phi = &*I;
1606249423Sdim
1607249423Sdim    // Have a PHI instruction.
1608249423Sdim    for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1609249423Sdim      if (Phi->getOperand(i+1).getMBB() != Latch)
1610249423Sdim        continue;
1611249423Sdim
1612249423Sdim      unsigned PhiReg = Phi->getOperand(i).getReg();
1613249423Sdim      MachineInstr *DI = MRI->getVRegDef(PhiReg);
1614249423Sdim
1615314564Sdim      if (DI->getDesc().isAdd()) {
1616249423Sdim        // If the register operand to the add/sub is the PHI we are looking
1617249423Sdim        // at, this meets the induction pattern.
1618249423Sdim        unsigned IndReg = DI->getOperand(1).getReg();
1619288943Sdim        MachineOperand &Opnd2 = DI->getOperand(2);
1620288943Sdim        int64_t V;
1621288943Sdim        if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1622249423Sdim          unsigned UpdReg = DI->getOperand(0).getReg();
1623249423Sdim          IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1624234285Sdim        }
1625234285Sdim      }
1626249423Sdim    }  // for (i)
1627249423Sdim  }  // for (instr)
1628249423Sdim
1629249423Sdim  if (IndRegs.empty())
1630249423Sdim    return false;
1631249423Sdim
1632276479Sdim  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1633249423Sdim  SmallVector<MachineOperand,2> Cond;
1634249423Sdim  // AnalyzeBranch returns true if it fails to analyze branch.
1635309124Sdim  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1636288943Sdim  if (NotAnalyzed || Cond.empty())
1637249423Sdim    return false;
1638249423Sdim
1639288943Sdim  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1640314564Sdim    MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1641288943Sdim    SmallVector<MachineOperand,2> LCond;
1642309124Sdim    bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1643288943Sdim    if (NotAnalyzed)
1644288943Sdim      return false;
1645249423Sdim
1646288943Sdim    // Since latch is not the exiting block, the latch branch should be an
1647288943Sdim    // unconditional branch to the loop header.
1648288943Sdim    if (TB == Latch)
1649288943Sdim      TB = (LTB == Header) ? LTB : LFB;
1650288943Sdim    else
1651288943Sdim      FB = (LTB == Header) ? LTB : LFB;
1652288943Sdim  }
1653288943Sdim  if (TB != Header) {
1654288943Sdim    if (FB != Header) {
1655288943Sdim      // The latch/exit block does not go back to the header.
1656288943Sdim      return false;
1657288943Sdim    }
1658288943Sdim    // FB is the header (i.e., uncond. jump to branch header)
1659288943Sdim    // In this case, the LoopBody -> TB should not be a back edge otherwise
1660288943Sdim    // it could result in an infinite loop after conversion to hw_loop.
1661288943Sdim    // This case can happen when the Latch has two jumps like this:
1662288943Sdim    // Jmp_c OuterLoopHeader <-- TB
1663288943Sdim    // Jmp   InnerLoopHeader <-- FB
1664288943Sdim    if (MDT->dominates(TB, FB))
1665288943Sdim      return false;
1666288943Sdim  }
1667249423Sdim
1668249423Sdim  // Expecting a predicate register as a condition.  It won't be a hardware
1669249423Sdim  // predicate register at this point yet, just a vreg.
1670249423Sdim  // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
1671249423Sdim  // into Cond, followed by the predicate register.  For non-negated branches
1672249423Sdim  // it's just the register.
1673249423Sdim  unsigned CSz = Cond.size();
1674249423Sdim  if (CSz != 1 && CSz != 2)
1675249423Sdim    return false;
1676249423Sdim
1677288943Sdim  if (!Cond[CSz-1].isReg())
1678288943Sdim    return false;
1679288943Sdim
1680249423Sdim  unsigned P = Cond[CSz-1].getReg();
1681249423Sdim  MachineInstr *PredDef = MRI->getVRegDef(P);
1682249423Sdim
1683249423Sdim  if (!PredDef->isCompare())
1684249423Sdim    return false;
1685249423Sdim
1686249423Sdim  SmallSet<unsigned,2> CmpRegs;
1687276479Sdim  MachineOperand *CmpImmOp = nullptr;
1688249423Sdim
1689249423Sdim  // Go over all operands to the compare and look for immediate and register
1690249423Sdim  // operands.  Assume that if the compare has a single register use and a
1691249423Sdim  // single immediate operand, then the register is being compared with the
1692249423Sdim  // immediate value.
1693249423Sdim  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1694249423Sdim    MachineOperand &MO = PredDef->getOperand(i);
1695249423Sdim    if (MO.isReg()) {
1696249423Sdim      // Skip all implicit references.  In one case there was:
1697249423Sdim      //   %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
1698249423Sdim      if (MO.isImplicit())
1699249423Sdim        continue;
1700249423Sdim      if (MO.isUse()) {
1701288943Sdim        if (!isImmediate(MO)) {
1702249423Sdim          CmpRegs.insert(MO.getReg());
1703249423Sdim          continue;
1704249423Sdim        }
1705249423Sdim        // Consider the register to be the "immediate" operand.
1706249423Sdim        if (CmpImmOp)
1707249423Sdim          return false;
1708249423Sdim        CmpImmOp = &MO;
1709249423Sdim      }
1710249423Sdim    } else if (MO.isImm()) {
1711249423Sdim      if (CmpImmOp)    // A second immediate argument?  Confusing.  Bail out.
1712249423Sdim        return false;
1713249423Sdim      CmpImmOp = &MO;
1714234285Sdim    }
1715234285Sdim  }
1716234285Sdim
1717249423Sdim  if (CmpRegs.empty())
1718249423Sdim    return false;
1719234285Sdim
1720249423Sdim  // Check if the compared register follows the order we want.  Fix if needed.
1721249423Sdim  for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1722249423Sdim       I != E; ++I) {
1723249423Sdim    // This is a success.  If the register used in the comparison is one that
1724249423Sdim    // we have identified as a bumped (updated) induction register, there is
1725249423Sdim    // nothing to do.
1726249423Sdim    if (CmpRegs.count(I->first))
1727249423Sdim      return true;
1728249423Sdim
1729249423Sdim    // Otherwise, if the register being compared comes out of a PHI node,
1730249423Sdim    // and has been recognized as following the induction pattern, and is
1731249423Sdim    // compared against an immediate, we can fix it.
1732249423Sdim    const RegisterBump &RB = I->second;
1733249423Sdim    if (CmpRegs.count(RB.first)) {
1734288943Sdim      if (!CmpImmOp) {
1735288943Sdim        // If both operands to the compare instruction are registers, see if
1736288943Sdim        // it can be changed to use induction register as one of the operands.
1737288943Sdim        MachineInstr *IndI = nullptr;
1738288943Sdim        MachineInstr *nonIndI = nullptr;
1739288943Sdim        MachineOperand *IndMO = nullptr;
1740288943Sdim        MachineOperand *nonIndMO = nullptr;
1741288943Sdim
1742288943Sdim        for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1743288943Sdim          MachineOperand &MO = PredDef->getOperand(i);
1744288943Sdim          if (MO.isReg() && MO.getReg() == RB.first) {
1745288943Sdim            DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1746288943Sdim                         << *(MRI->getVRegDef(I->first)));
1747288943Sdim            if (IndI)
1748288943Sdim              return false;
1749288943Sdim
1750288943Sdim            IndI = MRI->getVRegDef(I->first);
1751288943Sdim            IndMO = &MO;
1752288943Sdim          } else if (MO.isReg()) {
1753288943Sdim            DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1754288943Sdim                         << *(MRI->getVRegDef(MO.getReg())));
1755288943Sdim            if (nonIndI)
1756288943Sdim              return false;
1757288943Sdim
1758288943Sdim            nonIndI = MRI->getVRegDef(MO.getReg());
1759288943Sdim            nonIndMO = &MO;
1760288943Sdim          }
1761288943Sdim        }
1762288943Sdim        if (IndI && nonIndI &&
1763288943Sdim            nonIndI->getOpcode() == Hexagon::A2_addi &&
1764288943Sdim            nonIndI->getOperand(2).isImm() &&
1765288943Sdim            nonIndI->getOperand(2).getImm() == - RB.second) {
1766288943Sdim          bool Order = orderBumpCompare(IndI, PredDef);
1767288943Sdim          if (Order) {
1768288943Sdim            IndMO->setReg(I->first);
1769288943Sdim            nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1770288943Sdim            return true;
1771288943Sdim          }
1772288943Sdim        }
1773249423Sdim        return false;
1774288943Sdim      }
1775249423Sdim
1776288943Sdim      // It is not valid to do this transformation on an unsigned comparison
1777288943Sdim      // because it may underflow.
1778314564Sdim      Comparison::Kind Cmp =
1779314564Sdim          getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
1780288943Sdim      if (!Cmp || Comparison::isUnsigned(Cmp))
1781288943Sdim        return false;
1782288943Sdim
1783288943Sdim      // If the register is being compared against an immediate, try changing
1784288943Sdim      // the compare instruction to use induction register and adjust the
1785288943Sdim      // immediate operand.
1786249423Sdim      int64_t CmpImm = getImmediate(*CmpImmOp);
1787249423Sdim      int64_t V = RB.second;
1788288943Sdim      // Handle Overflow (64-bit).
1789288943Sdim      if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1790288943Sdim          ((V < 0) && (CmpImm < INT64_MIN - V)))
1791249423Sdim        return false;
1792249423Sdim      CmpImm += V;
1793288943Sdim      // Most comparisons of register against an immediate value allow
1794288943Sdim      // the immediate to be constant-extended. There are some exceptions
1795288943Sdim      // though. Make sure the new combination will work.
1796288943Sdim      if (CmpImmOp->isImm())
1797288943Sdim        if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
1798288943Sdim          return false;
1799249423Sdim
1800249423Sdim      // Make sure that the compare happens after the bump.  Otherwise,
1801249423Sdim      // after the fixup, the compare would use a yet-undefined register.
1802249423Sdim      MachineInstr *BumpI = MRI->getVRegDef(I->first);
1803249423Sdim      bool Order = orderBumpCompare(BumpI, PredDef);
1804249423Sdim      if (!Order)
1805249423Sdim        return false;
1806249423Sdim
1807249423Sdim      // Finally, fix the compare instruction.
1808249423Sdim      setImmediate(*CmpImmOp, CmpImm);
1809249423Sdim      for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1810249423Sdim        MachineOperand &MO = PredDef->getOperand(i);
1811249423Sdim        if (MO.isReg() && MO.getReg() == RB.first) {
1812249423Sdim          MO.setReg(I->first);
1813249423Sdim          return true;
1814249423Sdim        }
1815249423Sdim      }
1816249423Sdim    }
1817249423Sdim  }
1818249423Sdim
1819249423Sdim  return false;
1820234285Sdim}
1821234285Sdim
1822314564Sdim/// createPreheaderForLoop - Create a preheader for a given loop.
1823249423SdimMachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1824249423Sdim      MachineLoop *L) {
1825314564Sdim  if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1826249423Sdim    return TmpPH;
1827288943Sdim  if (!HWCreatePreheader)
1828288943Sdim    return nullptr;
1829288943Sdim
1830249423Sdim  MachineBasicBlock *Header = L->getHeader();
1831249423Sdim  MachineBasicBlock *Latch = L->getLoopLatch();
1832314564Sdim  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1833249423Sdim  MachineFunction *MF = Header->getParent();
1834249423Sdim  DebugLoc DL;
1835249423Sdim
1836288943Sdim#ifndef NDEBUG
1837288943Sdim  if ((PHFn != "") && (PHFn != MF->getName()))
1838276479Sdim    return nullptr;
1839288943Sdim#endif
1840249423Sdim
1841288943Sdim  if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1842288943Sdim    return nullptr;
1843288943Sdim
1844249423Sdim  typedef MachineBasicBlock::instr_iterator instr_iterator;
1845249423Sdim
1846249423Sdim  // Verify that all existing predecessors have analyzable branches
1847249423Sdim  // (or no branches at all).
1848249423Sdim  typedef std::vector<MachineBasicBlock*> MBBVector;
1849249423Sdim  MBBVector Preds(Header->pred_begin(), Header->pred_end());
1850249423Sdim  SmallVector<MachineOperand,2> Tmp1;
1851276479Sdim  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1852249423Sdim
1853309124Sdim  if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1854276479Sdim    return nullptr;
1855249423Sdim
1856249423Sdim  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1857249423Sdim    MachineBasicBlock *PB = *I;
1858309124Sdim    bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1859288943Sdim    if (NotAnalyzed)
1860288943Sdim      return nullptr;
1861249423Sdim  }
1862249423Sdim
1863249423Sdim  MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
1864296417Sdim  MF->insert(Header->getIterator(), NewPH);
1865249423Sdim
1866249423Sdim  if (Header->pred_size() > 2) {
1867249423Sdim    // Ensure that the header has only two predecessors: the preheader and
1868249423Sdim    // the loop latch.  Any additional predecessors of the header should
1869288943Sdim    // join at the newly created preheader. Inspect all PHI nodes from the
1870249423Sdim    // header and create appropriate corresponding PHI nodes in the preheader.
1871249423Sdim
1872249423Sdim    for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1873249423Sdim         I != E && I->isPHI(); ++I) {
1874249423Sdim      MachineInstr *PN = &*I;
1875249423Sdim
1876249423Sdim      const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1877249423Sdim      MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1878249423Sdim      NewPH->insert(NewPH->end(), NewPN);
1879249423Sdim
1880249423Sdim      unsigned PR = PN->getOperand(0).getReg();
1881249423Sdim      const TargetRegisterClass *RC = MRI->getRegClass(PR);
1882249423Sdim      unsigned NewPR = MRI->createVirtualRegister(RC);
1883249423Sdim      NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1884249423Sdim
1885249423Sdim      // Copy all non-latch operands of a header's PHI node to the newly
1886249423Sdim      // created PHI node in the preheader.
1887249423Sdim      for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1888249423Sdim        unsigned PredR = PN->getOperand(i).getReg();
1889288943Sdim        unsigned PredRSub = PN->getOperand(i).getSubReg();
1890249423Sdim        MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1891249423Sdim        if (PredB == Latch)
1892249423Sdim          continue;
1893249423Sdim
1894288943Sdim        MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1895288943Sdim        MO.setSubReg(PredRSub);
1896288943Sdim        NewPN->addOperand(MO);
1897249423Sdim        NewPN->addOperand(MachineOperand::CreateMBB(PredB));
1898249423Sdim      }
1899249423Sdim
1900249423Sdim      // Remove copied operands from the old PHI node and add the value
1901249423Sdim      // coming from the preheader's PHI.
1902249423Sdim      for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1903249423Sdim        MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1904249423Sdim        if (PredB != Latch) {
1905249423Sdim          PN->RemoveOperand(i+1);
1906249423Sdim          PN->RemoveOperand(i);
1907249423Sdim        }
1908249423Sdim      }
1909249423Sdim      PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1910249423Sdim      PN->addOperand(MachineOperand::CreateMBB(NewPH));
1911249423Sdim    }
1912234285Sdim  } else {
1913249423Sdim    assert(Header->pred_size() == 2);
1914249423Sdim
1915249423Sdim    // The header has only two predecessors, but the non-latch predecessor
1916249423Sdim    // is not a preheader (e.g. it has other successors, etc.)
1917249423Sdim    // In such a case we don't need any extra PHI nodes in the new preheader,
1918249423Sdim    // all we need is to adjust existing PHIs in the header to now refer to
1919249423Sdim    // the new preheader.
1920249423Sdim    for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1921249423Sdim         I != E && I->isPHI(); ++I) {
1922249423Sdim      MachineInstr *PN = &*I;
1923249423Sdim      for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1924249423Sdim        MachineOperand &MO = PN->getOperand(i+1);
1925249423Sdim        if (MO.getMBB() != Latch)
1926249423Sdim          MO.setMBB(NewPH);
1927249423Sdim      }
1928249423Sdim    }
1929234285Sdim  }
1930249423Sdim
1931249423Sdim  // "Reroute" the CFG edges to link in the new preheader.
1932249423Sdim  // If any of the predecessors falls through to the header, insert a branch
1933249423Sdim  // to the new preheader in that place.
1934249423Sdim  SmallVector<MachineOperand,1> Tmp2;
1935249423Sdim  SmallVector<MachineOperand,1> EmptyCond;
1936249423Sdim
1937276479Sdim  TB = FB = nullptr;
1938249423Sdim
1939249423Sdim  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1940249423Sdim    MachineBasicBlock *PB = *I;
1941249423Sdim    if (PB != Latch) {
1942249423Sdim      Tmp2.clear();
1943309124Sdim      bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
1944276479Sdim      (void)NotAnalyzed; // suppress compiler warning
1945249423Sdim      assert (!NotAnalyzed && "Should be analyzable!");
1946249423Sdim      if (TB != Header && (Tmp2.empty() || FB != Header))
1947314564Sdim        TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1948249423Sdim      PB->ReplaceUsesOfBlockWith(Header, NewPH);
1949249423Sdim    }
1950249423Sdim  }
1951249423Sdim
1952249423Sdim  // It can happen that the latch block will fall through into the header.
1953249423Sdim  // Insert an unconditional branch to the header.
1954276479Sdim  TB = FB = nullptr;
1955309124Sdim  bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
1956276479Sdim  (void)LatchNotAnalyzed; // suppress compiler warning
1957249423Sdim  assert (!LatchNotAnalyzed && "Should be analyzable!");
1958249423Sdim  if (!TB && !FB)
1959314564Sdim    TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1960249423Sdim
1961249423Sdim  // Finally, the branch from the preheader to the header.
1962314564Sdim  TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1963249423Sdim  NewPH->addSuccessor(Header);
1964249423Sdim
1965288943Sdim  MachineLoop *ParentLoop = L->getParentLoop();
1966288943Sdim  if (ParentLoop)
1967288943Sdim    ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
1968288943Sdim
1969288943Sdim  // Update the dominator information with the new preheader.
1970288943Sdim  if (MDT) {
1971314564Sdim    if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
1972314564Sdim      if (MachineDomTreeNode *DHN = HN->getIDom()) {
1973314564Sdim        MDT->addNewBlock(NewPH, DHN->getBlock());
1974314564Sdim        MDT->changeImmediateDominator(Header, NewPH);
1975314564Sdim      }
1976314564Sdim    }
1977288943Sdim  }
1978288943Sdim
1979249423Sdim  return NewPH;
1980234285Sdim}
1981