1327952Sdim//===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
2234285Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6234285Sdim//
7234285Sdim//===----------------------------------------------------------------------===//
8234285Sdim//
9234285Sdim// This pass identifies loops where we can generate the Hexagon hardware
10234285Sdim// loop instruction.  The hardware loop can perform loop branches with a
11234285Sdim// zero-cycle overhead.
12234285Sdim//
13234285Sdim// The pattern that defines the induction variable can changed depending on
14234285Sdim// prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
15234285Sdim// normalizes induction variables, and the Loop Strength Reduction pass
16234285Sdim// run by 'llc' may also make changes to the induction variable.
17234285Sdim// The pattern detected by this phase is due to running Strength Reduction.
18234285Sdim//
19234285Sdim// Criteria for hardware loops:
20234285Sdim//  - Countable loops (w/ ind. var for a trip count)
21234285Sdim//  - Assumes loops are normalized by IndVarSimplify
22234285Sdim//  - Try inner-most loops first
23234285Sdim//  - No function calls in loops.
24234285Sdim//
25234285Sdim//===----------------------------------------------------------------------===//
26234285Sdim
27314564Sdim#include "HexagonInstrInfo.h"
28314564Sdim#include "HexagonSubtarget.h"
29327952Sdim#include "llvm/ADT/ArrayRef.h"
30327952Sdim#include "llvm/ADT/STLExtras.h"
31249423Sdim#include "llvm/ADT/SmallSet.h"
32314564Sdim#include "llvm/ADT/SmallVector.h"
33234285Sdim#include "llvm/ADT/Statistic.h"
34314564Sdim#include "llvm/ADT/StringRef.h"
35314564Sdim#include "llvm/CodeGen/MachineBasicBlock.h"
36234285Sdim#include "llvm/CodeGen/MachineDominators.h"
37234285Sdim#include "llvm/CodeGen/MachineFunction.h"
38234285Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
39314564Sdim#include "llvm/CodeGen/MachineInstr.h"
40234285Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
41234285Sdim#include "llvm/CodeGen/MachineLoopInfo.h"
42314564Sdim#include "llvm/CodeGen/MachineOperand.h"
43234285Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
44327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h"
45314564Sdim#include "llvm/IR/Constants.h"
46314564Sdim#include "llvm/IR/DebugLoc.h"
47360784Sdim#include "llvm/InitializePasses.h"
48314564Sdim#include "llvm/Pass.h"
49249423Sdim#include "llvm/Support/CommandLine.h"
50234285Sdim#include "llvm/Support/Debug.h"
51314564Sdim#include "llvm/Support/ErrorHandling.h"
52314564Sdim#include "llvm/Support/MathExtras.h"
53234285Sdim#include "llvm/Support/raw_ostream.h"
54314564Sdim#include <cassert>
55314564Sdim#include <cstdint>
56314564Sdim#include <cstdlib>
57314564Sdim#include <iterator>
58314564Sdim#include <map>
59314564Sdim#include <set>
60327952Sdim#include <string>
61314564Sdim#include <utility>
62249423Sdim#include <vector>
63234285Sdim
64234285Sdimusing namespace llvm;
65234285Sdim
66276479Sdim#define DEBUG_TYPE "hwloops"
67276479Sdim
68249423Sdim#ifndef NDEBUG
69288943Sdimstatic cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
70288943Sdim
71288943Sdim// Option to create preheader only for a specific function.
72288943Sdimstatic cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
73288943Sdim                                 cl::init(""));
74249423Sdim#endif
75249423Sdim
76288943Sdim// Option to create a preheader if one doesn't exist.
77288943Sdimstatic cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
78288943Sdim    cl::Hidden, cl::init(true),
79288943Sdim    cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
80288943Sdim
81314564Sdim// Turn it off by default. If a preheader block is not created here, the
82314564Sdim// software pipeliner may be unable to find a block suitable to serve as
83314564Sdim// a preheader. In that case SWP will not run.
84314564Sdimstatic cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false),
85314564Sdim  cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader "
86314564Sdim  "instructions"));
87314564Sdim
88234285SdimSTATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
89234285Sdim
90249423Sdimnamespace llvm {
91314564Sdim
92288943Sdim  FunctionPass *createHexagonHardwareLoops();
93249423Sdim  void initializeHexagonHardwareLoopsPass(PassRegistry&);
94249423Sdim
95314564Sdim} // end namespace llvm
96314564Sdim
97234285Sdimnamespace {
98314564Sdim
99234285Sdim  class CountValue;
100314564Sdim
101234285Sdim  struct HexagonHardwareLoops : public MachineFunctionPass {
102249423Sdim    MachineLoopInfo            *MLI;
103249423Sdim    MachineRegisterInfo        *MRI;
104249423Sdim    MachineDominatorTree       *MDT;
105249423Sdim    const HexagonInstrInfo     *TII;
106321369Sdim    const HexagonRegisterInfo  *TRI;
107249423Sdim#ifndef NDEBUG
108249423Sdim    static int Counter;
109249423Sdim#endif
110234285Sdim
111234285Sdim  public:
112249423Sdim    static char ID;
113234285Sdim
114327952Sdim    HexagonHardwareLoops() : MachineFunctionPass(ID) {}
115234285Sdim
116276479Sdim    bool runOnMachineFunction(MachineFunction &MF) override;
117234285Sdim
118314564Sdim    StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
119234285Sdim
120276479Sdim    void getAnalysisUsage(AnalysisUsage &AU) const override {
121234285Sdim      AU.addRequired<MachineDominatorTree>();
122234285Sdim      AU.addRequired<MachineLoopInfo>();
123234285Sdim      MachineFunctionPass::getAnalysisUsage(AU);
124234285Sdim    }
125234285Sdim
126234285Sdim  private:
127327952Sdim    using LoopFeederMap = std::map<unsigned, MachineInstr *>;
128288943Sdim
129249423Sdim    /// Kinds of comparisons in the compare instructions.
130249423Sdim    struct Comparison {
131249423Sdim      enum Kind {
132249423Sdim        EQ  = 0x01,
133249423Sdim        NE  = 0x02,
134288943Sdim        L   = 0x04,
135288943Sdim        G   = 0x08,
136288943Sdim        U   = 0x40,
137249423Sdim        LTs = L,
138249423Sdim        LEs = L | EQ,
139249423Sdim        GTs = G,
140249423Sdim        GEs = G | EQ,
141249423Sdim        LTu = L      | U,
142249423Sdim        LEu = L | EQ | U,
143249423Sdim        GTu = G      | U,
144249423Sdim        GEu = G | EQ | U
145249423Sdim      };
146249423Sdim
147249423Sdim      static Kind getSwappedComparison(Kind Cmp) {
148249423Sdim        assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
149249423Sdim        if ((Cmp & L) || (Cmp & G))
150249423Sdim          return (Kind)(Cmp ^ (L|G));
151249423Sdim        return Cmp;
152249423Sdim      }
153288943Sdim
154288943Sdim      static Kind getNegatedComparison(Kind Cmp) {
155288943Sdim        if ((Cmp & L) || (Cmp & G))
156288943Sdim          return (Kind)((Cmp ^ (L | G)) ^ EQ);
157288943Sdim        if ((Cmp & NE) || (Cmp & EQ))
158288943Sdim          return (Kind)(Cmp ^ (EQ | NE));
159288943Sdim        return (Kind)0;
160288943Sdim      }
161288943Sdim
162288943Sdim      static bool isSigned(Kind Cmp) {
163288943Sdim        return (Cmp & (L | G) && !(Cmp & U));
164288943Sdim      }
165288943Sdim
166288943Sdim      static bool isUnsigned(Kind Cmp) {
167288943Sdim        return (Cmp & U);
168288943Sdim      }
169249423Sdim    };
170249423Sdim
171341825Sdim    /// Find the register that contains the loop controlling
172234285Sdim    /// induction variable.
173249423Sdim    /// If successful, it will return true and set the \p Reg, \p IVBump
174249423Sdim    /// and \p IVOp arguments.  Otherwise it will return false.
175249423Sdim    /// The returned induction register is the register R that follows the
176249423Sdim    /// following induction pattern:
177249423Sdim    /// loop:
178249423Sdim    ///   R = phi ..., [ R.next, LatchBlock ]
179249423Sdim    ///   R.next = R + #bump
180249423Sdim    ///   if (R.next < #N) goto loop
181249423Sdim    /// IVBump is the immediate value added to R, and IVOp is the instruction
182249423Sdim    /// "R.next = R + #bump".
183249423Sdim    bool findInductionRegister(MachineLoop *L, unsigned &Reg,
184249423Sdim                               int64_t &IVBump, MachineInstr *&IVOp) const;
185234285Sdim
186341825Sdim    /// Return the comparison kind for the specified opcode.
187288943Sdim    Comparison::Kind getComparisonKind(unsigned CondOpc,
188288943Sdim                                       MachineOperand *InitialValue,
189288943Sdim                                       const MachineOperand *Endvalue,
190288943Sdim                                       int64_t IVBump) const;
191288943Sdim
192341825Sdim    /// Analyze the statements in a loop to determine if the loop
193249423Sdim    /// has a computable trip count and, if so, return a value that represents
194249423Sdim    /// the trip count expression.
195249423Sdim    CountValue *getLoopTripCount(MachineLoop *L,
196261991Sdim                                 SmallVectorImpl<MachineInstr *> &OldInsts);
197234285Sdim
198341825Sdim    /// Return the expression that represents the number of times
199249423Sdim    /// a loop iterates.  The function takes the operands that represent the
200249423Sdim    /// loop start value, loop end value, and induction value.  Based upon
201249423Sdim    /// these operands, the function attempts to compute the trip count.
202249423Sdim    /// If the trip count is not directly available (as an immediate value,
203249423Sdim    /// or a register), the function will attempt to insert computation of it
204249423Sdim    /// to the loop's preheader.
205288943Sdim    CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
206288943Sdim                             const MachineOperand *End, unsigned IVReg,
207288943Sdim                             int64_t IVBump, Comparison::Kind Cmp) const;
208234285Sdim
209341825Sdim    /// Return true if the instruction is not valid within a hardware
210249423Sdim    /// loop.
211288943Sdim    bool isInvalidLoopOperation(const MachineInstr *MI,
212288943Sdim                                bool IsInnerHWLoop) const;
213234285Sdim
214341825Sdim    /// Return true if the loop contains an instruction that inhibits
215249423Sdim    /// using the hardware loop.
216288943Sdim    bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
217234285Sdim
218341825Sdim    /// Given a loop, check if we can convert it to a hardware loop.
219249423Sdim    /// If so, then perform the conversion and return true.
220288943Sdim    bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
221234285Sdim
222341825Sdim    /// Return true if the instruction is now dead.
223249423Sdim    bool isDead(const MachineInstr *MI,
224261991Sdim                SmallVectorImpl<MachineInstr *> &DeadPhis) const;
225249423Sdim
226341825Sdim    /// Remove the instruction if it is now dead.
227249423Sdim    void removeIfDead(MachineInstr *MI);
228249423Sdim
229341825Sdim    /// Make sure that the "bump" instruction executes before the
230249423Sdim    /// compare.  We need that for the IV fixup, so that the compare
231249423Sdim    /// instruction would not use a bumped value that has not yet been
232249423Sdim    /// defined.  If the instructions are out of order, try to reorder them.
233249423Sdim    bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
234249423Sdim
235341825Sdim    /// Return true if MO and MI pair is visited only once. If visited
236288943Sdim    /// more than once, this indicates there is recursion. In such a case,
237288943Sdim    /// return false.
238288943Sdim    bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
239288943Sdim                      const MachineOperand *MO,
240288943Sdim                      LoopFeederMap &LoopFeederPhi) const;
241249423Sdim
242341825Sdim    /// Return true if the Phi may generate a value that may underflow,
243288943Sdim    /// or may wrap.
244288943Sdim    bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
245288943Sdim                               MachineBasicBlock *MBB, MachineLoop *L,
246288943Sdim                               LoopFeederMap &LoopFeederPhi) const;
247249423Sdim
248341825Sdim    /// Return true if the induction variable may underflow an unsigned
249288943Sdim    /// value in the first iteration.
250288943Sdim    bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
251288943Sdim                                     const MachineOperand *EndVal,
252288943Sdim                                     MachineBasicBlock *MBB, MachineLoop *L,
253288943Sdim                                     LoopFeederMap &LoopFeederPhi) const;
254288943Sdim
255341825Sdim    /// Check if the given operand has a compile-time known constant
256288943Sdim    /// value. Return true if yes, and false otherwise. When returning true, set
257288943Sdim    /// Val to the corresponding constant value.
258288943Sdim    bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
259288943Sdim
260341825Sdim    /// Check if the operand has a compile-time known constant value.
261288943Sdim    bool isImmediate(const MachineOperand &MO) const {
262288943Sdim      int64_t V;
263288943Sdim      return checkForImmediate(MO, V);
264288943Sdim    }
265288943Sdim
266341825Sdim    /// Return the immediate for the specified operand.
267288943Sdim    int64_t getImmediate(const MachineOperand &MO) const {
268288943Sdim      int64_t V;
269288943Sdim      if (!checkForImmediate(MO, V))
270288943Sdim        llvm_unreachable("Invalid operand");
271288943Sdim      return V;
272288943Sdim    }
273288943Sdim
274341825Sdim    /// Reset the given machine operand to now refer to a new immediate
275249423Sdim    /// value.  Assumes that the operand was already referencing an immediate
276249423Sdim    /// value, either directly, or via a register.
277249423Sdim    void setImmediate(MachineOperand &MO, int64_t Val);
278249423Sdim
279341825Sdim    /// Fix the data flow of the induction variable.
280249423Sdim    /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
281249423Sdim    ///                                     |
282249423Sdim    ///                                     +-> back to phi
283249423Sdim    /// where "bump" is the increment of the induction variable:
284249423Sdim    ///   iv = iv + #const.
285249423Sdim    /// Due to some prior code transformations, the actual flow may look
286249423Sdim    /// like this:
287249423Sdim    ///   phi -+-> bump ---> back to phi
288249423Sdim    ///        |
289249423Sdim    ///        +-> comparison-in-latch (against upper_bound-bump),
290249423Sdim    /// i.e. the comparison that controls the loop execution may be using
291249423Sdim    /// the value of the induction variable from before the increment.
292249423Sdim    ///
293249423Sdim    /// Return true if the loop's flow is the desired one (i.e. it's
294249423Sdim    /// either been fixed, or no fixing was necessary).
295249423Sdim    /// Otherwise, return false.  This can happen if the induction variable
296249423Sdim    /// couldn't be identified, or if the value in the latch's comparison
297249423Sdim    /// cannot be adjusted to reflect the post-bump value.
298249423Sdim    bool fixupInductionVariable(MachineLoop *L);
299249423Sdim
300341825Sdim    /// Given a loop, if it does not have a preheader, create one.
301249423Sdim    /// Return the block that is the preheader.
302249423Sdim    MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
303234285Sdim  };
304234285Sdim
305234285Sdim  char HexagonHardwareLoops::ID = 0;
306249423Sdim#ifndef NDEBUG
307249423Sdim  int HexagonHardwareLoops::Counter = 0;
308249423Sdim#endif
309234285Sdim
310341825Sdim  /// Abstraction for a trip count of a loop. A smaller version
311249423Sdim  /// of the MachineOperand class without the concerns of changing the
312249423Sdim  /// operand representation.
313234285Sdim  class CountValue {
314234285Sdim  public:
315234285Sdim    enum CountValueType {
316234285Sdim      CV_Register,
317234285Sdim      CV_Immediate
318234285Sdim    };
319314564Sdim
320234285Sdim  private:
321234285Sdim    CountValueType Kind;
322234285Sdim    union Values {
323249423Sdim      struct {
324249423Sdim        unsigned Reg;
325249423Sdim        unsigned Sub;
326249423Sdim      } R;
327249423Sdim      unsigned ImmVal;
328234285Sdim    } Contents;
329234285Sdim
330234285Sdim  public:
331249423Sdim    explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
332249423Sdim      Kind = t;
333249423Sdim      if (Kind == CV_Register) {
334249423Sdim        Contents.R.Reg = v;
335249423Sdim        Contents.R.Sub = u;
336249423Sdim      } else {
337249423Sdim        Contents.ImmVal = v;
338249423Sdim      }
339249423Sdim    }
340314564Sdim
341234285Sdim    bool isReg() const { return Kind == CV_Register; }
342234285Sdim    bool isImm() const { return Kind == CV_Immediate; }
343234285Sdim
344234285Sdim    unsigned getReg() const {
345234285Sdim      assert(isReg() && "Wrong CountValue accessor");
346249423Sdim      return Contents.R.Reg;
347234285Sdim    }
348327952Sdim
349249423Sdim    unsigned getSubReg() const {
350249423Sdim      assert(isReg() && "Wrong CountValue accessor");
351249423Sdim      return Contents.R.Sub;
352234285Sdim    }
353327952Sdim
354249423Sdim    unsigned getImm() const {
355234285Sdim      assert(isImm() && "Wrong CountValue accessor");
356234285Sdim      return Contents.ImmVal;
357234285Sdim    }
358234285Sdim
359288943Sdim    void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
360327952Sdim      if (isReg()) { OS << printReg(Contents.R.Reg, TRI, Contents.R.Sub); }
361249423Sdim      if (isImm()) { OS << Contents.ImmVal; }
362234285Sdim    }
363234285Sdim  };
364314564Sdim
365249423Sdim} // end anonymous namespace
366234285Sdim
367249423SdimINITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
368249423Sdim                      "Hexagon Hardware Loops", false, false)
369249423SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
370249423SdimINITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
371249423SdimINITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
372249423Sdim                    "Hexagon Hardware Loops", false, false)
373234285Sdim
374234285SdimFunctionPass *llvm::createHexagonHardwareLoops() {
375234285Sdim  return new HexagonHardwareLoops();
376234285Sdim}
377234285Sdim
378234285Sdimbool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
379341825Sdim  LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
380327952Sdim  if (skipFunction(MF.getFunction()))
381309124Sdim    return false;
382234285Sdim
383234285Sdim  bool Changed = false;
384234285Sdim
385234285Sdim  MLI = &getAnalysis<MachineLoopInfo>();
386234285Sdim  MRI = &MF.getRegInfo();
387249423Sdim  MDT = &getAnalysis<MachineDominatorTree>();
388321369Sdim  const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
389321369Sdim  TII = HST.getInstrInfo();
390321369Sdim  TRI = HST.getRegisterInfo();
391234285Sdim
392288943Sdim  for (auto &L : *MLI)
393288943Sdim    if (!L->getParentLoop()) {
394288943Sdim      bool L0Used = false;
395288943Sdim      bool L1Used = false;
396288943Sdim      Changed |= convertToHardwareLoop(L, L0Used, L1Used);
397288943Sdim    }
398234285Sdim
399234285Sdim  return Changed;
400234285Sdim}
401234285Sdim
402249423Sdimbool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
403249423Sdim                                                 unsigned &Reg,
404249423Sdim                                                 int64_t &IVBump,
405249423Sdim                                                 MachineInstr *&IVOp
406249423Sdim                                                 ) const {
407249423Sdim  MachineBasicBlock *Header = L->getHeader();
408314564Sdim  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
409249423Sdim  MachineBasicBlock *Latch = L->getLoopLatch();
410314564Sdim  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
411288943Sdim  if (!Header || !Preheader || !Latch || !ExitingBlock)
412249423Sdim    return false;
413249423Sdim
414249423Sdim  // This pair represents an induction register together with an immediate
415249423Sdim  // value that will be added to it in each loop iteration.
416327952Sdim  using RegisterBump = std::pair<unsigned, int64_t>;
417249423Sdim
418249423Sdim  // Mapping:  R.next -> (R, bump), where R, R.next and bump are derived
419249423Sdim  // from an induction operation
420249423Sdim  //   R.next = R + bump
421249423Sdim  // where bump is an immediate value.
422327952Sdim  using InductionMap = std::map<unsigned, RegisterBump>;
423249423Sdim
424249423Sdim  InductionMap IndMap;
425249423Sdim
426327952Sdim  using instr_iterator = MachineBasicBlock::instr_iterator;
427327952Sdim
428249423Sdim  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
429249423Sdim       I != E && I->isPHI(); ++I) {
430249423Sdim    MachineInstr *Phi = &*I;
431249423Sdim
432249423Sdim    // Have a PHI instruction.  Get the operand that corresponds to the
433249423Sdim    // latch block, and see if is a result of an addition of form "reg+imm",
434249423Sdim    // where the "reg" is defined by the PHI node we are looking at.
435249423Sdim    for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
436249423Sdim      if (Phi->getOperand(i+1).getMBB() != Latch)
437249423Sdim        continue;
438249423Sdim
439360784Sdim      Register PhiOpReg = Phi->getOperand(i).getReg();
440249423Sdim      MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
441249423Sdim
442314564Sdim      if (DI->getDesc().isAdd()) {
443288943Sdim        // If the register operand to the add is the PHI we're looking at, this
444288943Sdim        // meets the induction pattern.
445360784Sdim        Register IndReg = DI->getOperand(1).getReg();
446288943Sdim        MachineOperand &Opnd2 = DI->getOperand(2);
447288943Sdim        int64_t V;
448288943Sdim        if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
449360784Sdim          Register UpdReg = DI->getOperand(0).getReg();
450249423Sdim          IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
451249423Sdim        }
452249423Sdim      }
453249423Sdim    }  // for (i)
454249423Sdim  }  // for (instr)
455249423Sdim
456249423Sdim  SmallVector<MachineOperand,2> Cond;
457276479Sdim  MachineBasicBlock *TB = nullptr, *FB = nullptr;
458309124Sdim  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
459249423Sdim  if (NotAnalyzed)
460249423Sdim    return false;
461249423Sdim
462288943Sdim  unsigned PredR, PredPos, PredRegFlags;
463288943Sdim  if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
464288943Sdim    return false;
465249423Sdim
466249423Sdim  MachineInstr *PredI = MRI->getVRegDef(PredR);
467249423Sdim  if (!PredI->isCompare())
468249423Sdim    return false;
469249423Sdim
470249423Sdim  unsigned CmpReg1 = 0, CmpReg2 = 0;
471249423Sdim  int CmpImm = 0, CmpMask = 0;
472309124Sdim  bool CmpAnalyzed =
473309124Sdim      TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
474249423Sdim  // Fail if the compare was not analyzed, or it's not comparing a register
475249423Sdim  // with an immediate value.  Not checking the mask here, since we handle
476288943Sdim  // the individual compare opcodes (including A4_cmpb*) later on.
477249423Sdim  if (!CmpAnalyzed)
478249423Sdim    return false;
479249423Sdim
480249423Sdim  // Exactly one of the input registers to the comparison should be among
481249423Sdim  // the induction registers.
482249423Sdim  InductionMap::iterator IndMapEnd = IndMap.end();
483249423Sdim  InductionMap::iterator F = IndMapEnd;
484249423Sdim  if (CmpReg1 != 0) {
485249423Sdim    InductionMap::iterator F1 = IndMap.find(CmpReg1);
486249423Sdim    if (F1 != IndMapEnd)
487249423Sdim      F = F1;
488249423Sdim  }
489249423Sdim  if (CmpReg2 != 0) {
490249423Sdim    InductionMap::iterator F2 = IndMap.find(CmpReg2);
491249423Sdim    if (F2 != IndMapEnd) {
492249423Sdim      if (F != IndMapEnd)
493249423Sdim        return false;
494249423Sdim      F = F2;
495249423Sdim    }
496249423Sdim  }
497249423Sdim  if (F == IndMapEnd)
498249423Sdim    return false;
499249423Sdim
500249423Sdim  Reg = F->second.first;
501249423Sdim  IVBump = F->second.second;
502249423Sdim  IVOp = MRI->getVRegDef(F->first);
503249423Sdim  return true;
504249423Sdim}
505249423Sdim
506288943Sdim// Return the comparison kind for the specified opcode.
507288943SdimHexagonHardwareLoops::Comparison::Kind
508288943SdimHexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
509288943Sdim                                        MachineOperand *InitialValue,
510288943Sdim                                        const MachineOperand *EndValue,
511288943Sdim                                        int64_t IVBump) const {
512288943Sdim  Comparison::Kind Cmp = (Comparison::Kind)0;
513288943Sdim  switch (CondOpc) {
514327952Sdim  case Hexagon::C2_cmpeq:
515288943Sdim  case Hexagon::C2_cmpeqi:
516288943Sdim  case Hexagon::C2_cmpeqp:
517288943Sdim    Cmp = Comparison::EQ;
518288943Sdim    break;
519288943Sdim  case Hexagon::C4_cmpneq:
520288943Sdim  case Hexagon::C4_cmpneqi:
521288943Sdim    Cmp = Comparison::NE;
522288943Sdim    break;
523327952Sdim  case Hexagon::C2_cmplt:
524327952Sdim    Cmp = Comparison::LTs;
525327952Sdim    break;
526327952Sdim  case Hexagon::C2_cmpltu:
527327952Sdim    Cmp = Comparison::LTu;
528327952Sdim    break;
529288943Sdim  case Hexagon::C4_cmplte:
530327952Sdim  case Hexagon::C4_cmpltei:
531288943Sdim    Cmp = Comparison::LEs;
532288943Sdim    break;
533288943Sdim  case Hexagon::C4_cmplteu:
534327952Sdim  case Hexagon::C4_cmplteui:
535288943Sdim    Cmp = Comparison::LEu;
536288943Sdim    break;
537327952Sdim  case Hexagon::C2_cmpgt:
538327952Sdim  case Hexagon::C2_cmpgti:
539327952Sdim  case Hexagon::C2_cmpgtp:
540327952Sdim    Cmp = Comparison::GTs;
541327952Sdim    break;
542327952Sdim  case Hexagon::C2_cmpgtu:
543288943Sdim  case Hexagon::C2_cmpgtui:
544288943Sdim  case Hexagon::C2_cmpgtup:
545288943Sdim    Cmp = Comparison::GTu;
546288943Sdim    break;
547327952Sdim  case Hexagon::C2_cmpgei:
548327952Sdim    Cmp = Comparison::GEs;
549288943Sdim    break;
550327952Sdim  case Hexagon::C2_cmpgeui:
551327952Sdim    Cmp = Comparison::GEs;
552327952Sdim    break;
553288943Sdim  default:
554288943Sdim    return (Comparison::Kind)0;
555288943Sdim  }
556288943Sdim  return Cmp;
557288943Sdim}
558249423Sdim
559341825Sdim/// Analyze the statements in a loop to determine if the loop has
560249423Sdim/// a computable trip count and, if so, return a value that represents
561249423Sdim/// the trip count expression.
562234285Sdim///
563249423Sdim/// This function iterates over the phi nodes in the loop to check for
564249423Sdim/// induction variable patterns that are used in the calculation for
565249423Sdim/// the number of time the loop is executed.
566249423SdimCountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
567288943Sdim    SmallVectorImpl<MachineInstr *> &OldInsts) {
568234285Sdim  MachineBasicBlock *TopMBB = L->getTopBlock();
569234285Sdim  MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
570234285Sdim  assert(PI != TopMBB->pred_end() &&
571234285Sdim         "Loop must have more than one incoming edge!");
572234285Sdim  MachineBasicBlock *Backedge = *PI++;
573249423Sdim  if (PI == TopMBB->pred_end())  // dead loop?
574276479Sdim    return nullptr;
575234285Sdim  MachineBasicBlock *Incoming = *PI++;
576249423Sdim  if (PI != TopMBB->pred_end())  // multiple backedges?
577276479Sdim    return nullptr;
578234285Sdim
579249423Sdim  // Make sure there is one incoming and one backedge and determine which
580234285Sdim  // is which.
581234285Sdim  if (L->contains(Incoming)) {
582234285Sdim    if (L->contains(Backedge))
583276479Sdim      return nullptr;
584234285Sdim    std::swap(Incoming, Backedge);
585234285Sdim  } else if (!L->contains(Backedge))
586276479Sdim    return nullptr;
587234285Sdim
588249423Sdim  // Look for the cmp instruction to determine if we can get a useful trip
589249423Sdim  // count.  The trip count can be either a register or an immediate.  The
590249423Sdim  // location of the value depends upon the type (reg or imm).
591314564Sdim  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
592288943Sdim  if (!ExitingBlock)
593276479Sdim    return nullptr;
594249423Sdim
595249423Sdim  unsigned IVReg = 0;
596249423Sdim  int64_t IVBump = 0;
597249423Sdim  MachineInstr *IVOp;
598249423Sdim  bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
599249423Sdim  if (!FoundIV)
600276479Sdim    return nullptr;
601249423Sdim
602314564Sdim  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
603249423Sdim
604276479Sdim  MachineOperand *InitialValue = nullptr;
605249423Sdim  MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
606288943Sdim  MachineBasicBlock *Latch = L->getLoopLatch();
607249423Sdim  for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
608249423Sdim    MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
609249423Sdim    if (MBB == Preheader)
610249423Sdim      InitialValue = &IV_Phi->getOperand(i);
611249423Sdim    else if (MBB == Latch)
612249423Sdim      IVReg = IV_Phi->getOperand(i).getReg();  // Want IV reg after bump.
613234285Sdim  }
614249423Sdim  if (!InitialValue)
615276479Sdim    return nullptr;
616234285Sdim
617249423Sdim  SmallVector<MachineOperand,2> Cond;
618276479Sdim  MachineBasicBlock *TB = nullptr, *FB = nullptr;
619309124Sdim  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
620249423Sdim  if (NotAnalyzed)
621276479Sdim    return nullptr;
622234285Sdim
623249423Sdim  MachineBasicBlock *Header = L->getHeader();
624249423Sdim  // TB must be non-null.  If FB is also non-null, one of them must be
625249423Sdim  // the header.  Otherwise, branch to TB could be exiting the loop, and
626249423Sdim  // the fall through can go to the header.
627288943Sdim  assert (TB && "Exit block without a branch?");
628288943Sdim  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
629314564Sdim    MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
630288943Sdim    SmallVector<MachineOperand,2> LCond;
631309124Sdim    bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
632288943Sdim    if (NotAnalyzed)
633288943Sdim      return nullptr;
634288943Sdim    if (TB == Latch)
635288943Sdim      TB = (LTB == Header) ? LTB : LFB;
636288943Sdim    else
637288943Sdim      FB = (LTB == Header) ? LTB: LFB;
638288943Sdim  }
639249423Sdim  assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
640249423Sdim  if (!TB || (FB && TB != Header && FB != Header))
641276479Sdim    return nullptr;
642249423Sdim
643249423Sdim  // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
644249423Sdim  // to put imm(0), followed by P in the vector Cond.
645249423Sdim  // If TB is not the header, it means that the "not-taken" path must lead
646249423Sdim  // to the header.
647288943Sdim  bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
648288943Sdim  unsigned PredReg, PredPos, PredRegFlags;
649288943Sdim  if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
650288943Sdim    return nullptr;
651249423Sdim  MachineInstr *CondI = MRI->getVRegDef(PredReg);
652249423Sdim  unsigned CondOpc = CondI->getOpcode();
653249423Sdim
654249423Sdim  unsigned CmpReg1 = 0, CmpReg2 = 0;
655249423Sdim  int Mask = 0, ImmValue = 0;
656309124Sdim  bool AnalyzedCmp =
657309124Sdim      TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
658249423Sdim  if (!AnalyzedCmp)
659276479Sdim    return nullptr;
660249423Sdim
661249423Sdim  // The comparison operator type determines how we compute the loop
662249423Sdim  // trip count.
663249423Sdim  OldInsts.push_back(CondI);
664249423Sdim  OldInsts.push_back(IVOp);
665249423Sdim
666249423Sdim  // Sadly, the following code gets information based on the position
667249423Sdim  // of the operands in the compare instruction.  This has to be done
668249423Sdim  // this way, because the comparisons check for a specific relationship
669249423Sdim  // between the operands (e.g. is-less-than), rather than to find out
670249423Sdim  // what relationship the operands are in (as on PPC).
671249423Sdim  Comparison::Kind Cmp;
672249423Sdim  bool isSwapped = false;
673249423Sdim  const MachineOperand &Op1 = CondI->getOperand(1);
674249423Sdim  const MachineOperand &Op2 = CondI->getOperand(2);
675276479Sdim  const MachineOperand *EndValue = nullptr;
676249423Sdim
677249423Sdim  if (Op1.isReg()) {
678249423Sdim    if (Op2.isImm() || Op1.getReg() == IVReg)
679249423Sdim      EndValue = &Op2;
680249423Sdim    else {
681249423Sdim      EndValue = &Op1;
682249423Sdim      isSwapped = true;
683249423Sdim    }
684234285Sdim  }
685234285Sdim
686249423Sdim  if (!EndValue)
687276479Sdim    return nullptr;
688234285Sdim
689288943Sdim  Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
690288943Sdim  if (!Cmp)
691288943Sdim    return nullptr;
692288943Sdim  if (Negated)
693288943Sdim    Cmp = Comparison::getNegatedComparison(Cmp);
694249423Sdim  if (isSwapped)
695288943Sdim    Cmp = Comparison::getSwappedComparison(Cmp);
696249423Sdim
697249423Sdim  if (InitialValue->isReg()) {
698360784Sdim    Register R = InitialValue->getReg();
699249423Sdim    MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
700327952Sdim    if (!MDT->properlyDominates(DefBB, Header)) {
701327952Sdim      int64_t V;
702327952Sdim      if (!checkForImmediate(*InitialValue, V))
703327952Sdim        return nullptr;
704327952Sdim    }
705249423Sdim    OldInsts.push_back(MRI->getVRegDef(R));
706249423Sdim  }
707249423Sdim  if (EndValue->isReg()) {
708360784Sdim    Register R = EndValue->getReg();
709249423Sdim    MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
710327952Sdim    if (!MDT->properlyDominates(DefBB, Header)) {
711327952Sdim      int64_t V;
712327952Sdim      if (!checkForImmediate(*EndValue, V))
713327952Sdim        return nullptr;
714327952Sdim    }
715288943Sdim    OldInsts.push_back(MRI->getVRegDef(R));
716249423Sdim  }
717249423Sdim
718249423Sdim  return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
719234285Sdim}
720234285Sdim
721341825Sdim/// Helper function that returns the expression that represents the
722249423Sdim/// number of times a loop iterates.  The function takes the operands that
723249423Sdim/// represent the loop start value, loop end value, and induction value.
724249423Sdim/// Based upon these operands, the function attempts to compute the trip count.
725249423SdimCountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
726249423Sdim                                               const MachineOperand *Start,
727249423Sdim                                               const MachineOperand *End,
728249423Sdim                                               unsigned IVReg,
729249423Sdim                                               int64_t IVBump,
730249423Sdim                                               Comparison::Kind Cmp) const {
731249423Sdim  // Cannot handle comparison EQ, i.e. while (A == B).
732249423Sdim  if (Cmp == Comparison::EQ)
733276479Sdim    return nullptr;
734249423Sdim
735249423Sdim  // Check if either the start or end values are an assignment of an immediate.
736249423Sdim  // If so, use the immediate value rather than the register.
737249423Sdim  if (Start->isReg()) {
738249423Sdim    const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
739288943Sdim    if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
740288943Sdim                          StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
741249423Sdim      Start = &StartValInstr->getOperand(1);
742249423Sdim  }
743249423Sdim  if (End->isReg()) {
744249423Sdim    const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
745288943Sdim    if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
746288943Sdim                        EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
747249423Sdim      End = &EndValInstr->getOperand(1);
748249423Sdim  }
749249423Sdim
750288943Sdim  if (!Start->isReg() && !Start->isImm())
751288943Sdim    return nullptr;
752288943Sdim  if (!End->isReg() && !End->isImm())
753288943Sdim    return nullptr;
754249423Sdim
755249423Sdim  bool CmpLess =     Cmp & Comparison::L;
756249423Sdim  bool CmpGreater =  Cmp & Comparison::G;
757249423Sdim  bool CmpHasEqual = Cmp & Comparison::EQ;
758249423Sdim
759249423Sdim  // Avoid certain wrap-arounds.  This doesn't detect all wrap-arounds.
760249423Sdim  if (CmpLess && IVBump < 0)
761288943Sdim    // Loop going while iv is "less" with the iv value going down.  Must wrap.
762276479Sdim    return nullptr;
763288943Sdim
764249423Sdim  if (CmpGreater && IVBump > 0)
765288943Sdim    // Loop going while iv is "greater" with the iv value going up.  Must wrap.
766276479Sdim    return nullptr;
767249423Sdim
768288943Sdim  // Phis that may feed into the loop.
769288943Sdim  LoopFeederMap LoopFeederPhi;
770288943Sdim
771296417Sdim  // Check if the initial value may be zero and can be decremented in the first
772288943Sdim  // iteration. If the value is zero, the endloop instruction will not decrement
773296417Sdim  // the loop counter, so we shouldn't generate a hardware loop in this case.
774288943Sdim  if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
775288943Sdim                                  LoopFeederPhi))
776288943Sdim      return nullptr;
777288943Sdim
778249423Sdim  if (Start->isImm() && End->isImm()) {
779249423Sdim    // Both, start and end are immediates.
780249423Sdim    int64_t StartV = Start->getImm();
781249423Sdim    int64_t EndV = End->getImm();
782249423Sdim    int64_t Dist = EndV - StartV;
783249423Sdim    if (Dist == 0)
784276479Sdim      return nullptr;
785249423Sdim
786249423Sdim    bool Exact = (Dist % IVBump) == 0;
787249423Sdim
788249423Sdim    if (Cmp == Comparison::NE) {
789249423Sdim      if (!Exact)
790276479Sdim        return nullptr;
791249423Sdim      if ((Dist < 0) ^ (IVBump < 0))
792276479Sdim        return nullptr;
793249423Sdim    }
794249423Sdim
795249423Sdim    // For comparisons that include the final value (i.e. include equality
796249423Sdim    // with the final value), we need to increase the distance by 1.
797249423Sdim    if (CmpHasEqual)
798249423Sdim      Dist = Dist > 0 ? Dist+1 : Dist-1;
799249423Sdim
800288943Sdim    // For the loop to iterate, CmpLess should imply Dist > 0.  Similarly,
801288943Sdim    // CmpGreater should imply Dist < 0.  These conditions could actually
802288943Sdim    // fail, for example, in unreachable code (which may still appear to be
803288943Sdim    // reachable in the CFG).
804288943Sdim    if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
805288943Sdim      return nullptr;
806249423Sdim
807249423Sdim    // "Normalized" distance, i.e. with the bump set to +-1.
808288943Sdim    int64_t Dist1 = (IVBump > 0) ? (Dist +  (IVBump - 1)) / IVBump
809288943Sdim                                 : (-Dist + (-IVBump - 1)) / (-IVBump);
810249423Sdim    assert (Dist1 > 0 && "Fishy thing.  Both operands have the same sign.");
811249423Sdim
812249423Sdim    uint64_t Count = Dist1;
813249423Sdim
814249423Sdim    if (Count > 0xFFFFFFFFULL)
815276479Sdim      return nullptr;
816249423Sdim
817249423Sdim    return new CountValue(CountValue::CV_Immediate, Count);
818249423Sdim  }
819249423Sdim
820249423Sdim  // A general case: Start and End are some values, but the actual
821249423Sdim  // iteration count may not be available.  If it is not, insert
822249423Sdim  // a computation of it into the preheader.
823249423Sdim
824249423Sdim  // If the induction variable bump is not a power of 2, quit.
825249423Sdim  // Othwerise we'd need a general integer division.
826288943Sdim  if (!isPowerOf2_64(std::abs(IVBump)))
827276479Sdim    return nullptr;
828249423Sdim
829314564Sdim  MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);
830249423Sdim  assert (PH && "Should have a preheader by now");
831249423Sdim  MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
832288943Sdim  DebugLoc DL;
833288943Sdim  if (InsertPos != PH->end())
834288943Sdim    DL = InsertPos->getDebugLoc();
835249423Sdim
836249423Sdim  // If Start is an immediate and End is a register, the trip count
837249423Sdim  // will be "reg - imm".  Hexagon's "subtract immediate" instruction
838249423Sdim  // is actually "reg + -imm".
839249423Sdim
840249423Sdim  // If the loop IV is going downwards, i.e. if the bump is negative,
841249423Sdim  // then the iteration count (computed as End-Start) will need to be
842249423Sdim  // negated.  To avoid the negation, just swap Start and End.
843249423Sdim  if (IVBump < 0) {
844249423Sdim    std::swap(Start, End);
845249423Sdim    IVBump = -IVBump;
846249423Sdim  }
847249423Sdim  // Cmp may now have a wrong direction, e.g.  LEs may now be GEs.
848249423Sdim  // Signedness, and "including equality" are preserved.
849249423Sdim
850249423Sdim  bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
851249423Sdim  bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
852249423Sdim
853249423Sdim  int64_t StartV = 0, EndV = 0;
854249423Sdim  if (Start->isImm())
855249423Sdim    StartV = Start->getImm();
856249423Sdim  if (End->isImm())
857249423Sdim    EndV = End->getImm();
858249423Sdim
859249423Sdim  int64_t AdjV = 0;
860249423Sdim  // To compute the iteration count, we would need this computation:
861249423Sdim  //   Count = (End - Start + (IVBump-1)) / IVBump
862249423Sdim  // or, when CmpHasEqual:
863249423Sdim  //   Count = (End - Start + (IVBump-1)+1) / IVBump
864249423Sdim  // The "IVBump-1" part is the adjustment (AdjV).  We can avoid
865249423Sdim  // generating an instruction specifically to add it if we can adjust
866249423Sdim  // the immediate values for Start or End.
867249423Sdim
868249423Sdim  if (CmpHasEqual) {
869249423Sdim    // Need to add 1 to the total iteration count.
870249423Sdim    if (Start->isImm())
871249423Sdim      StartV--;
872249423Sdim    else if (End->isImm())
873249423Sdim      EndV++;
874249423Sdim    else
875249423Sdim      AdjV += 1;
876249423Sdim  }
877249423Sdim
878249423Sdim  if (Cmp != Comparison::NE) {
879249423Sdim    if (Start->isImm())
880249423Sdim      StartV -= (IVBump-1);
881249423Sdim    else if (End->isImm())
882249423Sdim      EndV += (IVBump-1);
883249423Sdim    else
884249423Sdim      AdjV += (IVBump-1);
885249423Sdim  }
886249423Sdim
887249423Sdim  unsigned R = 0, SR = 0;
888249423Sdim  if (Start->isReg()) {
889249423Sdim    R = Start->getReg();
890249423Sdim    SR = Start->getSubReg();
891249423Sdim  } else {
892249423Sdim    R = End->getReg();
893249423Sdim    SR = End->getSubReg();
894249423Sdim  }
895249423Sdim  const TargetRegisterClass *RC = MRI->getRegClass(R);
896249423Sdim  // Hardware loops cannot handle 64-bit registers.  If it's a double
897249423Sdim  // register, it has to have a subregister.
898249423Sdim  if (!SR && RC == &Hexagon::DoubleRegsRegClass)
899276479Sdim    return nullptr;
900249423Sdim  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
901249423Sdim
902249423Sdim  // Compute DistR (register with the distance between Start and End).
903249423Sdim  unsigned DistR, DistSR;
904249423Sdim
905249423Sdim  // Avoid special case, where the start value is an imm(0).
906249423Sdim  if (Start->isImm() && StartV == 0) {
907249423Sdim    DistR = End->getReg();
908249423Sdim    DistSR = End->getSubReg();
909249423Sdim  } else {
910280031Sdim    const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
911288943Sdim                              (RegToImm ? TII->get(Hexagon::A2_subri) :
912288943Sdim                                          TII->get(Hexagon::A2_addi));
913288943Sdim    if (RegToReg || RegToImm) {
914360784Sdim      Register SubR = MRI->createVirtualRegister(IntRC);
915288943Sdim      MachineInstrBuilder SubIB =
916288943Sdim        BuildMI(*PH, InsertPos, DL, SubD, SubR);
917249423Sdim
918288943Sdim      if (RegToReg)
919288943Sdim        SubIB.addReg(End->getReg(), 0, End->getSubReg())
920288943Sdim          .addReg(Start->getReg(), 0, Start->getSubReg());
921288943Sdim      else
922288943Sdim        SubIB.addImm(EndV)
923288943Sdim          .addReg(Start->getReg(), 0, Start->getSubReg());
924288943Sdim      DistR = SubR;
925288943Sdim    } else {
926288943Sdim      // If the loop has been unrolled, we should use the original loop count
927288943Sdim      // instead of recalculating the value. This will avoid additional
928288943Sdim      // 'Add' instruction.
929288943Sdim      const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
930288943Sdim      if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
931341825Sdim          EndValInstr->getOperand(1).getSubReg() == 0 &&
932288943Sdim          EndValInstr->getOperand(2).getImm() == StartV) {
933288943Sdim        DistR = EndValInstr->getOperand(1).getReg();
934288943Sdim      } else {
935360784Sdim        Register SubR = MRI->createVirtualRegister(IntRC);
936288943Sdim        MachineInstrBuilder SubIB =
937288943Sdim          BuildMI(*PH, InsertPos, DL, SubD, SubR);
938288943Sdim        SubIB.addReg(End->getReg(), 0, End->getSubReg())
939288943Sdim             .addImm(-StartV);
940288943Sdim        DistR = SubR;
941288943Sdim      }
942249423Sdim    }
943249423Sdim    DistSR = 0;
944249423Sdim  }
945249423Sdim
946249423Sdim  // From DistR, compute AdjR (register with the adjusted distance).
947249423Sdim  unsigned AdjR, AdjSR;
948249423Sdim
949249423Sdim  if (AdjV == 0) {
950249423Sdim    AdjR = DistR;
951249423Sdim    AdjSR = DistSR;
952249423Sdim  } else {
953249423Sdim    // Generate CountR = ADD DistR, AdjVal
954360784Sdim    Register AddR = MRI->createVirtualRegister(IntRC);
955288943Sdim    MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
956249423Sdim    BuildMI(*PH, InsertPos, DL, AddD, AddR)
957249423Sdim      .addReg(DistR, 0, DistSR)
958249423Sdim      .addImm(AdjV);
959249423Sdim
960249423Sdim    AdjR = AddR;
961249423Sdim    AdjSR = 0;
962249423Sdim  }
963249423Sdim
964249423Sdim  // From AdjR, compute CountR (register with the final count).
965249423Sdim  unsigned CountR, CountSR;
966249423Sdim
967249423Sdim  if (IVBump == 1) {
968249423Sdim    CountR = AdjR;
969249423Sdim    CountSR = AdjSR;
970249423Sdim  } else {
971249423Sdim    // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
972249423Sdim    unsigned Shift = Log2_32(IVBump);
973249423Sdim
974249423Sdim    // Generate NormR = LSR DistR, Shift.
975360784Sdim    Register LsrR = MRI->createVirtualRegister(IntRC);
976280031Sdim    const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
977249423Sdim    BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
978249423Sdim      .addReg(AdjR, 0, AdjSR)
979249423Sdim      .addImm(Shift);
980249423Sdim
981249423Sdim    CountR = LsrR;
982249423Sdim    CountSR = 0;
983249423Sdim  }
984249423Sdim
985249423Sdim  return new CountValue(CountValue::CV_Register, CountR, CountSR);
986234285Sdim}
987234285Sdim
988341825Sdim/// Return true if the operation is invalid within hardware loop.
989288943Sdimbool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
990288943Sdim                                                  bool IsInnerHWLoop) const {
991288943Sdim  // Call is not allowed because the callee may use a hardware loop except for
992288943Sdim  // the case when the call never returns.
993314564Sdim  if (MI->getDesc().isCall())
994314564Sdim    return !TII->doesNotReturn(*MI);
995249423Sdim
996288943Sdim  // Check if the instruction defines a hardware loop register.
997321369Sdim  using namespace Hexagon;
998327952Sdim
999321369Sdim  static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 };
1000321369Sdim  static const unsigned Regs1[]  = { LC1, SA1 };
1001321369Sdim  auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01))
1002321369Sdim                                 : makeArrayRef(Regs1, array_lengthof(Regs1));
1003321369Sdim  for (unsigned R : CheckRegs)
1004321369Sdim    if (MI->modifiesRegister(R, TRI))
1005234285Sdim      return true;
1006321369Sdim
1007234285Sdim  return false;
1008234285Sdim}
1009234285Sdim
1010341825Sdim/// Return true if the loop contains an instruction that inhibits
1011288943Sdim/// the use of the hardware loop instruction.
1012288943Sdimbool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
1013288943Sdim    bool IsInnerHWLoop) const {
1014344779Sdim  LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1015344779Sdim                    << printMBBReference(**L->block_begin()));
1016344779Sdim  for (MachineBasicBlock *MBB : L->getBlocks()) {
1017234285Sdim    for (MachineBasicBlock::iterator
1018234285Sdim           MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
1019234285Sdim      const MachineInstr *MI = &*MII;
1020288943Sdim      if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
1021341825Sdim        LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";
1022341825Sdim                   MI->dump(););
1023234285Sdim        return true;
1024288943Sdim      }
1025234285Sdim    }
1026234285Sdim  }
1027234285Sdim  return false;
1028234285Sdim}
1029234285Sdim
1030341825Sdim/// Returns true if the instruction is dead.  This was essentially
1031249423Sdim/// copied from DeadMachineInstructionElim::isDead, but with special cases
1032249423Sdim/// for inline asm, physical registers and instructions with side effects
1033249423Sdim/// removed.
1034249423Sdimbool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1035261991Sdim                              SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1036249423Sdim  // Examine each operand.
1037249423Sdim  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1038249423Sdim    const MachineOperand &MO = MI->getOperand(i);
1039249423Sdim    if (!MO.isReg() || !MO.isDef())
1040249423Sdim      continue;
1041249423Sdim
1042360784Sdim    Register Reg = MO.getReg();
1043249423Sdim    if (MRI->use_nodbg_empty(Reg))
1044249423Sdim      continue;
1045249423Sdim
1046327952Sdim    using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator;
1047249423Sdim
1048249423Sdim    // This instruction has users, but if the only user is the phi node for the
1049249423Sdim    // parent block, and the only use of that phi node is this instruction, then
1050249423Sdim    // this instruction is dead: both it (and the phi node) can be removed.
1051249423Sdim    use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1052249423Sdim    use_nodbg_iterator End = MRI->use_nodbg_end();
1053276479Sdim    if (std::next(I) != End || !I->getParent()->isPHI())
1054249423Sdim      return false;
1055249423Sdim
1056276479Sdim    MachineInstr *OnePhi = I->getParent();
1057249423Sdim    for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
1058249423Sdim      const MachineOperand &OPO = OnePhi->getOperand(j);
1059249423Sdim      if (!OPO.isReg() || !OPO.isDef())
1060249423Sdim        continue;
1061249423Sdim
1062360784Sdim      Register OPReg = OPO.getReg();
1063249423Sdim      use_nodbg_iterator nextJ;
1064249423Sdim      for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1065249423Sdim           J != End; J = nextJ) {
1066276479Sdim        nextJ = std::next(J);
1067276479Sdim        MachineOperand &Use = *J;
1068249423Sdim        MachineInstr *UseMI = Use.getParent();
1069249423Sdim
1070288943Sdim        // If the phi node has a user that is not MI, bail.
1071249423Sdim        if (MI != UseMI)
1072249423Sdim          return false;
1073249423Sdim      }
1074249423Sdim    }
1075249423Sdim    DeadPhis.push_back(OnePhi);
1076249423Sdim  }
1077249423Sdim
1078249423Sdim  // If there are no defs with uses, the instruction is dead.
1079249423Sdim  return true;
1080249423Sdim}
1081249423Sdim
1082249423Sdimvoid HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1083249423Sdim  // This procedure was essentially copied from DeadMachineInstructionElim.
1084249423Sdim
1085249423Sdim  SmallVector<MachineInstr*, 1> DeadPhis;
1086249423Sdim  if (isDead(MI, DeadPhis)) {
1087341825Sdim    LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI);
1088249423Sdim
1089249423Sdim    // It is possible that some DBG_VALUE instructions refer to this
1090249423Sdim    // instruction.  Examine each def operand for such references;
1091249423Sdim    // if found, mark the DBG_VALUE as undef (but don't delete it).
1092249423Sdim    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1093249423Sdim      const MachineOperand &MO = MI->getOperand(i);
1094249423Sdim      if (!MO.isReg() || !MO.isDef())
1095249423Sdim        continue;
1096360784Sdim      Register Reg = MO.getReg();
1097249423Sdim      MachineRegisterInfo::use_iterator nextI;
1098249423Sdim      for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
1099249423Sdim           E = MRI->use_end(); I != E; I = nextI) {
1100276479Sdim        nextI = std::next(I);  // I is invalidated by the setReg
1101276479Sdim        MachineOperand &Use = *I;
1102276479Sdim        MachineInstr *UseMI = I->getParent();
1103249423Sdim        if (UseMI == MI)
1104249423Sdim          continue;
1105249423Sdim        if (Use.isDebug())
1106249423Sdim          UseMI->getOperand(0).setReg(0U);
1107249423Sdim      }
1108249423Sdim    }
1109249423Sdim
1110249423Sdim    MI->eraseFromParent();
1111249423Sdim    for (unsigned i = 0; i < DeadPhis.size(); ++i)
1112249423Sdim      DeadPhis[i]->eraseFromParent();
1113249423Sdim  }
1114249423Sdim}
1115249423Sdim
1116341825Sdim/// Check if the loop is a candidate for converting to a hardware
1117249423Sdim/// loop.  If so, then perform the transformation.
1118234285Sdim///
1119249423Sdim/// This function works on innermost loops first.  A loop can be converted
1120249423Sdim/// if it is a counting loop; either a register value or an immediate.
1121234285Sdim///
1122249423Sdim/// The code makes several assumptions about the representation of the loop
1123249423Sdim/// in llvm.
1124288943Sdimbool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1125288943Sdim                                                 bool &RecL0used,
1126288943Sdim                                                 bool &RecL1used) {
1127249423Sdim  // This is just for sanity.
1128249423Sdim  assert(L->getHeader() && "Loop without a header?");
1129249423Sdim
1130234285Sdim  bool Changed = false;
1131288943Sdim  bool L0Used = false;
1132288943Sdim  bool L1Used = false;
1133288943Sdim
1134234285Sdim  // Process nested loops first.
1135288943Sdim  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
1136288943Sdim    Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
1137288943Sdim    L0Used |= RecL0used;
1138288943Sdim    L1Used |= RecL1used;
1139288943Sdim  }
1140249423Sdim
1141234285Sdim  // If a nested loop has been converted, then we can't convert this loop.
1142288943Sdim  if (Changed && L0Used && L1Used)
1143234285Sdim    return Changed;
1144249423Sdim
1145288943Sdim  unsigned LOOP_i;
1146288943Sdim  unsigned LOOP_r;
1147288943Sdim  unsigned ENDLOOP;
1148288943Sdim
1149288943Sdim  // Flag used to track loopN instruction:
1150288943Sdim  // 1 - Hardware loop is being generated for the inner most loop.
1151288943Sdim  // 0 - Hardware loop is being generated for the outer loop.
1152288943Sdim  unsigned IsInnerHWLoop = 1;
1153288943Sdim
1154288943Sdim  if (L0Used) {
1155288943Sdim    LOOP_i = Hexagon::J2_loop1i;
1156288943Sdim    LOOP_r = Hexagon::J2_loop1r;
1157288943Sdim    ENDLOOP = Hexagon::ENDLOOP1;
1158288943Sdim    IsInnerHWLoop = 0;
1159288943Sdim  } else {
1160288943Sdim    LOOP_i = Hexagon::J2_loop0i;
1161288943Sdim    LOOP_r = Hexagon::J2_loop0r;
1162288943Sdim    ENDLOOP = Hexagon::ENDLOOP0;
1163288943Sdim  }
1164288943Sdim
1165249423Sdim#ifndef NDEBUG
1166249423Sdim  // Stop trying after reaching the limit (if any).
1167249423Sdim  int Limit = HWLoopLimit;
1168249423Sdim  if (Limit >= 0) {
1169249423Sdim    if (Counter >= HWLoopLimit)
1170249423Sdim      return false;
1171249423Sdim    Counter++;
1172234285Sdim  }
1173249423Sdim#endif
1174249423Sdim
1175234285Sdim  // Does the loop contain any invalid instructions?
1176288943Sdim  if (containsInvalidInstruction(L, IsInnerHWLoop))
1177234285Sdim    return false;
1178249423Sdim
1179314564Sdim  MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1180234285Sdim  // Don't generate hw loop if the loop has more than one exit.
1181276479Sdim  if (!LastMBB)
1182234285Sdim    return false;
1183249423Sdim
1184234285Sdim  MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
1185249423Sdim  if (LastI == LastMBB->end())
1186249423Sdim    return false;
1187234285Sdim
1188288943Sdim  // Is the induction variable bump feeding the latch condition?
1189288943Sdim  if (!fixupInductionVariable(L))
1190288943Sdim    return false;
1191288943Sdim
1192249423Sdim  // Ensure the loop has a preheader: the loop instruction will be
1193249423Sdim  // placed there.
1194314564Sdim  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1195249423Sdim  if (!Preheader) {
1196249423Sdim    Preheader = createPreheaderForLoop(L);
1197249423Sdim    if (!Preheader)
1198249423Sdim      return false;
1199249423Sdim  }
1200288943Sdim
1201249423Sdim  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1202249423Sdim
1203249423Sdim  SmallVector<MachineInstr*, 2> OldInsts;
1204249423Sdim  // Are we able to determine the trip count for the loop?
1205249423Sdim  CountValue *TripCount = getLoopTripCount(L, OldInsts);
1206276479Sdim  if (!TripCount)
1207249423Sdim    return false;
1208249423Sdim
1209249423Sdim  // Is the trip count available in the preheader?
1210249423Sdim  if (TripCount->isReg()) {
1211249423Sdim    // There will be a use of the register inserted into the preheader,
1212249423Sdim    // so make sure that the register is actually defined at that point.
1213249423Sdim    MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1214249423Sdim    MachineBasicBlock *BBDef = TCDef->getParent();
1215288943Sdim    if (!MDT->dominates(BBDef, Preheader))
1216288943Sdim      return false;
1217249423Sdim  }
1218249423Sdim
1219234285Sdim  // Determine the loop start.
1220288943Sdim  MachineBasicBlock *TopBlock = L->getTopBlock();
1221314564Sdim  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1222314564Sdim  MachineBasicBlock *LoopStart = nullptr;
1223288943Sdim  if (ExitingBlock !=  L->getLoopLatch()) {
1224314564Sdim    MachineBasicBlock *TB = nullptr, *FB = nullptr;
1225288943Sdim    SmallVector<MachineOperand, 2> Cond;
1226288943Sdim
1227309124Sdim    if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1228234285Sdim      return false;
1229288943Sdim
1230288943Sdim    if (L->contains(TB))
1231288943Sdim      LoopStart = TB;
1232288943Sdim    else if (L->contains(FB))
1233288943Sdim      LoopStart = FB;
1234288943Sdim    else
1235288943Sdim      return false;
1236234285Sdim  }
1237288943Sdim  else
1238288943Sdim    LoopStart = TopBlock;
1239234285Sdim
1240249423Sdim  // Convert the loop to a hardware loop.
1241341825Sdim  LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1242249423Sdim  DebugLoc DL;
1243249423Sdim  if (InsertPos != Preheader->end())
1244249423Sdim    DL = InsertPos->getDebugLoc();
1245234285Sdim
1246234285Sdim  if (TripCount->isReg()) {
1247234285Sdim    // Create a copy of the loop count register.
1248360784Sdim    Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1249249423Sdim    BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1250249423Sdim      .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1251239462Sdim    // Add the Loop instruction to the beginning of the loop.
1252288943Sdim    BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1253249423Sdim      .addReg(CountReg);
1254234285Sdim  } else {
1255249423Sdim    assert(TripCount->isImm() && "Expecting immediate value for trip count");
1256249423Sdim    // Add the Loop immediate instruction to the beginning of the loop,
1257249423Sdim    // if the immediate fits in the instructions.  Otherwise, we need to
1258249423Sdim    // create a new virtual register.
1259234285Sdim    int64_t CountImm = TripCount->getImm();
1260327952Sdim    if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) {
1261360784Sdim      Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1262280031Sdim      BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1263249423Sdim        .addImm(CountImm);
1264288943Sdim      BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1265249423Sdim        .addMBB(LoopStart).addReg(CountReg);
1266249423Sdim    } else
1267288943Sdim      BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1268249423Sdim        .addMBB(LoopStart).addImm(CountImm);
1269234285Sdim  }
1270234285Sdim
1271249423Sdim  // Make sure the loop start always has a reference in the CFG.  We need
1272249423Sdim  // to create a BlockAddress operand to get this mechanism to work both the
1273234285Sdim  // MachineBasicBlock and BasicBlock objects need the flag set.
1274234285Sdim  LoopStart->setHasAddressTaken();
1275234285Sdim  // This line is needed to set the hasAddressTaken flag on the BasicBlock
1276249423Sdim  // object.
1277234285Sdim  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
1278234285Sdim
1279234285Sdim  // Replace the loop branch with an endloop instruction.
1280249423Sdim  DebugLoc LastIDL = LastI->getDebugLoc();
1281288943Sdim  BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1282234285Sdim
1283234285Sdim  // The loop ends with either:
1284234285Sdim  //  - a conditional branch followed by an unconditional branch, or
1285234285Sdim  //  - a conditional branch to the loop start.
1286280031Sdim  if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1287280031Sdim      LastI->getOpcode() == Hexagon::J2_jumpf) {
1288249423Sdim    // Delete one and change/add an uncond. branch to out of the loop.
1289234285Sdim    MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1290234285Sdim    LastI = LastMBB->erase(LastI);
1291234285Sdim    if (!L->contains(BranchTarget)) {
1292249423Sdim      if (LastI != LastMBB->end())
1293249423Sdim        LastI = LastMBB->erase(LastI);
1294234285Sdim      SmallVector<MachineOperand, 0> Cond;
1295314564Sdim      TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1296234285Sdim    }
1297234285Sdim  } else {
1298234285Sdim    // Conditional branch to loop start; just delete it.
1299234285Sdim    LastMBB->erase(LastI);
1300234285Sdim  }
1301234285Sdim  delete TripCount;
1302234285Sdim
1303249423Sdim  // The induction operation and the comparison may now be
1304249423Sdim  // unneeded. If these are unneeded, then remove them.
1305249423Sdim  for (unsigned i = 0; i < OldInsts.size(); ++i)
1306249423Sdim    removeIfDead(OldInsts[i]);
1307249423Sdim
1308234285Sdim  ++NumHWLoops;
1309288943Sdim
1310288943Sdim  // Set RecL1used and RecL0used only after hardware loop has been
1311288943Sdim  // successfully generated. Doing it earlier can cause wrong loop instruction
1312288943Sdim  // to be used.
1313288943Sdim  if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1314288943Sdim    RecL1used = true;
1315288943Sdim  else
1316288943Sdim    RecL0used = true;
1317288943Sdim
1318234285Sdim  return true;
1319234285Sdim}
1320234285Sdim
1321249423Sdimbool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1322249423Sdim                                            MachineInstr *CmpI) {
1323249423Sdim  assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1324249423Sdim
1325249423Sdim  MachineBasicBlock *BB = BumpI->getParent();
1326249423Sdim  if (CmpI->getParent() != BB)
1327249423Sdim    return false;
1328249423Sdim
1329327952Sdim  using instr_iterator = MachineBasicBlock::instr_iterator;
1330327952Sdim
1331249423Sdim  // Check if things are in order to begin with.
1332296417Sdim  for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1333249423Sdim    if (&*I == CmpI)
1334249423Sdim      return true;
1335249423Sdim
1336249423Sdim  // Out of order.
1337360784Sdim  Register PredR = CmpI->getOperand(0).getReg();
1338249423Sdim  bool FoundBump = false;
1339296417Sdim  instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1340249423Sdim  for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1341249423Sdim    MachineInstr *In = &*I;
1342249423Sdim    for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1343249423Sdim      MachineOperand &MO = In->getOperand(i);
1344249423Sdim      if (MO.isReg() && MO.isUse()) {
1345249423Sdim        if (MO.getReg() == PredR)  // Found an intervening use of PredR.
1346249423Sdim          return false;
1347249423Sdim      }
1348249423Sdim    }
1349249423Sdim
1350249423Sdim    if (In == BumpI) {
1351296417Sdim      BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1352249423Sdim      FoundBump = true;
1353249423Sdim      break;
1354249423Sdim    }
1355249423Sdim  }
1356249423Sdim  assert (FoundBump && "Cannot determine instruction order");
1357249423Sdim  return FoundBump;
1358234285Sdim}
1359234285Sdim
1360288943Sdim/// This function is required to break recursion. Visiting phis in a loop may
1361288943Sdim/// result in recursion during compilation. We break the recursion by making
1362288943Sdim/// sure that we visit a MachineOperand and its definition in a
1363288943Sdim/// MachineInstruction only once. If we attempt to visit more than once, then
1364288943Sdim/// there is recursion, and will return false.
1365288943Sdimbool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1366288943Sdim                                        MachineInstr *MI,
1367288943Sdim                                        const MachineOperand *MO,
1368288943Sdim                                        LoopFeederMap &LoopFeederPhi) const {
1369288943Sdim  if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1370344779Sdim    LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1371344779Sdim                      << printMBBReference(**L->block_begin()));
1372288943Sdim    // Ignore all BBs that form Loop.
1373344779Sdim    for (MachineBasicBlock *MBB : L->getBlocks()) {
1374288943Sdim      if (A == MBB)
1375288943Sdim        return false;
1376288943Sdim    }
1377288943Sdim    MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1378288943Sdim    LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1379288943Sdim    return true;
1380288943Sdim  } else
1381288943Sdim    // Already visited node.
1382288943Sdim    return false;
1383288943Sdim}
1384234285Sdim
1385288943Sdim/// Return true if a Phi may generate a value that can underflow.
1386288943Sdim/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1387288943Sdimbool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1388288943Sdim    MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1389288943Sdim    MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1390288943Sdim  assert(Phi->isPHI() && "Expecting a Phi.");
1391288943Sdim  // Walk through each Phi, and its used operands. Make sure that
1392288943Sdim  // if there is recursion in Phi, we won't generate hardware loops.
1393288943Sdim  for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1394288943Sdim    if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1395288943Sdim      if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1396288943Sdim                                      Phi->getParent(), L, LoopFeederPhi))
1397288943Sdim        return true;
1398288943Sdim  return false;
1399288943Sdim}
1400288943Sdim
1401288943Sdim/// Return true if the induction variable can underflow in the first iteration.
1402288943Sdim/// An example, is an initial unsigned value that is 0 and is decrement in the
1403288943Sdim/// first itertion of a do-while loop.  In this case, we cannot generate a
1404288943Sdim/// hardware loop because the endloop instruction does not decrement the loop
1405288943Sdim/// counter if it is <= 1. We only need to perform this analysis if the
1406288943Sdim/// initial value is a register.
1407288943Sdim///
1408288943Sdim/// This function assumes the initial value may underfow unless proven
1409288943Sdim/// otherwise. If the type is signed, then we don't care because signed
1410288943Sdim/// underflow is undefined. We attempt to prove the initial value is not
1411288943Sdim/// zero by perfoming a crude analysis of the loop counter. This function
1412288943Sdim/// checks if the initial value is used in any comparison prior to the loop
1413288943Sdim/// and, if so, assumes the comparison is a range check. This is inexact,
1414288943Sdim/// but will catch the simple cases.
1415288943Sdimbool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1416288943Sdim    const MachineOperand *InitVal, const MachineOperand *EndVal,
1417288943Sdim    MachineBasicBlock *MBB, MachineLoop *L,
1418288943Sdim    LoopFeederMap &LoopFeederPhi) const {
1419288943Sdim  // Only check register values since they are unknown.
1420288943Sdim  if (!InitVal->isReg())
1421288943Sdim    return false;
1422288943Sdim
1423288943Sdim  if (!EndVal->isImm())
1424288943Sdim    return false;
1425288943Sdim
1426288943Sdim  // A register value that is assigned an immediate is a known value, and it
1427288943Sdim  // won't underflow in the first iteration.
1428288943Sdim  int64_t Imm;
1429288943Sdim  if (checkForImmediate(*InitVal, Imm))
1430288943Sdim    return (EndVal->getImm() == Imm);
1431288943Sdim
1432360784Sdim  Register Reg = InitVal->getReg();
1433288943Sdim
1434288943Sdim  // We don't know the value of a physical register.
1435360784Sdim  if (!Register::isVirtualRegister(Reg))
1436288943Sdim    return true;
1437288943Sdim
1438288943Sdim  MachineInstr *Def = MRI->getVRegDef(Reg);
1439288943Sdim  if (!Def)
1440288943Sdim    return true;
1441288943Sdim
1442288943Sdim  // If the initial value is a Phi or copy and the operands may not underflow,
1443288943Sdim  // then the definition cannot be underflow either.
1444288943Sdim  if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1445288943Sdim                                             L, LoopFeederPhi))
1446288943Sdim    return false;
1447288943Sdim  if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1448288943Sdim                                                    EndVal, Def->getParent(),
1449288943Sdim                                                    L, LoopFeederPhi))
1450288943Sdim    return false;
1451288943Sdim
1452288943Sdim  // Iterate over the uses of the initial value. If the initial value is used
1453288943Sdim  // in a compare, then we assume this is a range check that ensures the loop
1454288943Sdim  // doesn't underflow. This is not an exact test and should be improved.
1455288943Sdim  for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
1456288943Sdim         E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1457288943Sdim    MachineInstr *MI = &*I;
1458288943Sdim    unsigned CmpReg1 = 0, CmpReg2 = 0;
1459288943Sdim    int CmpMask = 0, CmpValue = 0;
1460288943Sdim
1461309124Sdim    if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1462288943Sdim      continue;
1463288943Sdim
1464314564Sdim    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1465288943Sdim    SmallVector<MachineOperand, 2> Cond;
1466309124Sdim    if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1467288943Sdim      continue;
1468288943Sdim
1469314564Sdim    Comparison::Kind Cmp =
1470314564Sdim        getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
1471288943Sdim    if (Cmp == 0)
1472288943Sdim      continue;
1473288943Sdim    if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1474288943Sdim      Cmp = Comparison::getNegatedComparison(Cmp);
1475288943Sdim    if (CmpReg2 != 0 && CmpReg2 == Reg)
1476288943Sdim      Cmp = Comparison::getSwappedComparison(Cmp);
1477288943Sdim
1478288943Sdim    // Signed underflow is undefined.
1479288943Sdim    if (Comparison::isSigned(Cmp))
1480288943Sdim      return false;
1481288943Sdim
1482296417Sdim    // Check if there is a comparison of the initial value. If the initial value
1483288943Sdim    // is greater than or not equal to another value, then assume this is a
1484288943Sdim    // range check.
1485288943Sdim    if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1486288943Sdim      return false;
1487288943Sdim  }
1488288943Sdim
1489288943Sdim  // OK - this is a hack that needs to be improved. We really need to analyze
1490288943Sdim  // the instructions performed on the initial value. This works on the simplest
1491288943Sdim  // cases only.
1492288943Sdim  if (!Def->isCopy() && !Def->isPHI())
1493288943Sdim    return false;
1494288943Sdim
1495288943Sdim  return true;
1496288943Sdim}
1497288943Sdim
1498288943Sdimbool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1499288943Sdim                                             int64_t &Val) const {
1500288943Sdim  if (MO.isImm()) {
1501288943Sdim    Val = MO.getImm();
1502288943Sdim    return true;
1503288943Sdim  }
1504288943Sdim  if (!MO.isReg())
1505288943Sdim    return false;
1506288943Sdim
1507288943Sdim  // MO is a register. Check whether it is defined as an immediate value,
1508288943Sdim  // and if so, get the value of it in TV. That value will then need to be
1509288943Sdim  // processed to handle potential subregisters in MO.
1510288943Sdim  int64_t TV;
1511288943Sdim
1512360784Sdim  Register R = MO.getReg();
1513360784Sdim  if (!Register::isVirtualRegister(R))
1514288943Sdim    return false;
1515249423Sdim  MachineInstr *DI = MRI->getVRegDef(R);
1516249423Sdim  unsigned DOpc = DI->getOpcode();
1517249423Sdim  switch (DOpc) {
1518288943Sdim    case TargetOpcode::COPY:
1519280031Sdim    case Hexagon::A2_tfrsi:
1520280031Sdim    case Hexagon::A2_tfrpi:
1521314564Sdim    case Hexagon::CONST32:
1522327952Sdim    case Hexagon::CONST64:
1523288943Sdim      // Call recursively to avoid an extra check whether operand(1) is
1524288943Sdim      // indeed an immediate (it could be a global address, for example),
1525288943Sdim      // plus we can handle COPY at the same time.
1526288943Sdim      if (!checkForImmediate(DI->getOperand(1), TV))
1527288943Sdim        return false;
1528288943Sdim      break;
1529288943Sdim    case Hexagon::A2_combineii:
1530288943Sdim    case Hexagon::A4_combineir:
1531288943Sdim    case Hexagon::A4_combineii:
1532288943Sdim    case Hexagon::A4_combineri:
1533288943Sdim    case Hexagon::A2_combinew: {
1534288943Sdim      const MachineOperand &S1 = DI->getOperand(1);
1535288943Sdim      const MachineOperand &S2 = DI->getOperand(2);
1536288943Sdim      int64_t V1, V2;
1537288943Sdim      if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1538288943Sdim        return false;
1539321369Sdim      TV = V2 | (static_cast<uint64_t>(V1) << 32);
1540288943Sdim      break;
1541288943Sdim    }
1542288943Sdim    case TargetOpcode::REG_SEQUENCE: {
1543288943Sdim      const MachineOperand &S1 = DI->getOperand(1);
1544288943Sdim      const MachineOperand &S3 = DI->getOperand(3);
1545288943Sdim      int64_t V1, V3;
1546288943Sdim      if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1547288943Sdim        return false;
1548288943Sdim      unsigned Sub2 = DI->getOperand(2).getImm();
1549288943Sdim      unsigned Sub4 = DI->getOperand(4).getImm();
1550314564Sdim      if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1551288943Sdim        TV = V1 | (V3 << 32);
1552314564Sdim      else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1553288943Sdim        TV = V3 | (V1 << 32);
1554288943Sdim      else
1555288943Sdim        llvm_unreachable("Unexpected form of REG_SEQUENCE");
1556288943Sdim      break;
1557288943Sdim    }
1558288943Sdim
1559288943Sdim    default:
1560288943Sdim      return false;
1561249423Sdim  }
1562234285Sdim
1563314564Sdim  // By now, we should have successfully obtained the immediate value defining
1564288943Sdim  // the register referenced in MO. Handle a potential use of a subregister.
1565288943Sdim  switch (MO.getSubReg()) {
1566314564Sdim    case Hexagon::isub_lo:
1567288943Sdim      Val = TV & 0xFFFFFFFFULL;
1568288943Sdim      break;
1569314564Sdim    case Hexagon::isub_hi:
1570288943Sdim      Val = (TV >> 32) & 0xFFFFFFFFULL;
1571288943Sdim      break;
1572288943Sdim    default:
1573288943Sdim      Val = TV;
1574288943Sdim      break;
1575288943Sdim  }
1576288943Sdim  return true;
1577249423Sdim}
1578234285Sdim
1579249423Sdimvoid HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1580249423Sdim  if (MO.isImm()) {
1581249423Sdim    MO.setImm(Val);
1582249423Sdim    return;
1583234285Sdim  }
1584234285Sdim
1585249423Sdim  assert(MO.isReg());
1586360784Sdim  Register R = MO.getReg();
1587288943Sdim  MachineInstr *DI = MRI->getVRegDef(R);
1588234285Sdim
1589249423Sdim  const TargetRegisterClass *RC = MRI->getRegClass(R);
1590360784Sdim  Register NewR = MRI->createVirtualRegister(RC);
1591249423Sdim  MachineBasicBlock &B = *DI->getParent();
1592249423Sdim  DebugLoc DL = DI->getDebugLoc();
1593288943Sdim  BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1594249423Sdim  MO.setReg(NewR);
1595249423Sdim}
1596234285Sdim
1597288943Sdimstatic bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
1598288943Sdim  // These two instructions are not extendable.
1599288943Sdim  if (CmpOpc == Hexagon::A4_cmpbeqi)
1600288943Sdim    return isUInt<8>(Imm);
1601288943Sdim  if (CmpOpc == Hexagon::A4_cmpbgti)
1602288943Sdim    return isInt<8>(Imm);
1603288943Sdim  // The rest of the comparison-with-immediate instructions are extendable.
1604288943Sdim  return true;
1605288943Sdim}
1606249423Sdim
1607249423Sdimbool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1608249423Sdim  MachineBasicBlock *Header = L->getHeader();
1609249423Sdim  MachineBasicBlock *Latch = L->getLoopLatch();
1610314564Sdim  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1611249423Sdim
1612288943Sdim  if (!(Header && Latch && ExitingBlock))
1613249423Sdim    return false;
1614249423Sdim
1615249423Sdim  // These data structures follow the same concept as the corresponding
1616249423Sdim  // ones in findInductionRegister (where some comments are).
1617327952Sdim  using RegisterBump = std::pair<unsigned, int64_t>;
1618327952Sdim  using RegisterInduction = std::pair<unsigned, RegisterBump>;
1619327952Sdim  using RegisterInductionSet = std::set<RegisterInduction>;
1620249423Sdim
1621249423Sdim  // Register candidates for induction variables, with their associated bumps.
1622249423Sdim  RegisterInductionSet IndRegs;
1623249423Sdim
1624249423Sdim  // Look for induction patterns:
1625327952Sdim  //   %1 = PHI ..., [ latch, %2 ]
1626327952Sdim  //   %2 = ADD %1, imm
1627327952Sdim  using instr_iterator = MachineBasicBlock::instr_iterator;
1628327952Sdim
1629249423Sdim  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1630249423Sdim       I != E && I->isPHI(); ++I) {
1631249423Sdim    MachineInstr *Phi = &*I;
1632249423Sdim
1633249423Sdim    // Have a PHI instruction.
1634249423Sdim    for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1635249423Sdim      if (Phi->getOperand(i+1).getMBB() != Latch)
1636249423Sdim        continue;
1637249423Sdim
1638360784Sdim      Register PhiReg = Phi->getOperand(i).getReg();
1639249423Sdim      MachineInstr *DI = MRI->getVRegDef(PhiReg);
1640249423Sdim
1641314564Sdim      if (DI->getDesc().isAdd()) {
1642249423Sdim        // If the register operand to the add/sub is the PHI we are looking
1643249423Sdim        // at, this meets the induction pattern.
1644360784Sdim        Register IndReg = DI->getOperand(1).getReg();
1645288943Sdim        MachineOperand &Opnd2 = DI->getOperand(2);
1646288943Sdim        int64_t V;
1647288943Sdim        if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1648360784Sdim          Register UpdReg = DI->getOperand(0).getReg();
1649249423Sdim          IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1650234285Sdim        }
1651234285Sdim      }
1652249423Sdim    }  // for (i)
1653249423Sdim  }  // for (instr)
1654249423Sdim
1655249423Sdim  if (IndRegs.empty())
1656249423Sdim    return false;
1657249423Sdim
1658276479Sdim  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1659249423Sdim  SmallVector<MachineOperand,2> Cond;
1660249423Sdim  // AnalyzeBranch returns true if it fails to analyze branch.
1661309124Sdim  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1662288943Sdim  if (NotAnalyzed || Cond.empty())
1663249423Sdim    return false;
1664249423Sdim
1665288943Sdim  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1666314564Sdim    MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1667288943Sdim    SmallVector<MachineOperand,2> LCond;
1668309124Sdim    bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1669288943Sdim    if (NotAnalyzed)
1670288943Sdim      return false;
1671249423Sdim
1672288943Sdim    // Since latch is not the exiting block, the latch branch should be an
1673288943Sdim    // unconditional branch to the loop header.
1674288943Sdim    if (TB == Latch)
1675288943Sdim      TB = (LTB == Header) ? LTB : LFB;
1676288943Sdim    else
1677288943Sdim      FB = (LTB == Header) ? LTB : LFB;
1678288943Sdim  }
1679288943Sdim  if (TB != Header) {
1680288943Sdim    if (FB != Header) {
1681288943Sdim      // The latch/exit block does not go back to the header.
1682288943Sdim      return false;
1683288943Sdim    }
1684288943Sdim    // FB is the header (i.e., uncond. jump to branch header)
1685288943Sdim    // In this case, the LoopBody -> TB should not be a back edge otherwise
1686288943Sdim    // it could result in an infinite loop after conversion to hw_loop.
1687288943Sdim    // This case can happen when the Latch has two jumps like this:
1688288943Sdim    // Jmp_c OuterLoopHeader <-- TB
1689288943Sdim    // Jmp   InnerLoopHeader <-- FB
1690288943Sdim    if (MDT->dominates(TB, FB))
1691288943Sdim      return false;
1692288943Sdim  }
1693249423Sdim
1694249423Sdim  // Expecting a predicate register as a condition.  It won't be a hardware
1695249423Sdim  // predicate register at this point yet, just a vreg.
1696249423Sdim  // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
1697249423Sdim  // into Cond, followed by the predicate register.  For non-negated branches
1698249423Sdim  // it's just the register.
1699249423Sdim  unsigned CSz = Cond.size();
1700249423Sdim  if (CSz != 1 && CSz != 2)
1701249423Sdim    return false;
1702249423Sdim
1703288943Sdim  if (!Cond[CSz-1].isReg())
1704288943Sdim    return false;
1705288943Sdim
1706360784Sdim  Register P = Cond[CSz - 1].getReg();
1707249423Sdim  MachineInstr *PredDef = MRI->getVRegDef(P);
1708249423Sdim
1709249423Sdim  if (!PredDef->isCompare())
1710249423Sdim    return false;
1711249423Sdim
1712249423Sdim  SmallSet<unsigned,2> CmpRegs;
1713276479Sdim  MachineOperand *CmpImmOp = nullptr;
1714249423Sdim
1715249423Sdim  // Go over all operands to the compare and look for immediate and register
1716249423Sdim  // operands.  Assume that if the compare has a single register use and a
1717249423Sdim  // single immediate operand, then the register is being compared with the
1718249423Sdim  // immediate value.
1719249423Sdim  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1720249423Sdim    MachineOperand &MO = PredDef->getOperand(i);
1721249423Sdim    if (MO.isReg()) {
1722249423Sdim      // Skip all implicit references.  In one case there was:
1723327952Sdim      //   %140 = FCMPUGT32_rr %138, %139, implicit %usr
1724249423Sdim      if (MO.isImplicit())
1725249423Sdim        continue;
1726249423Sdim      if (MO.isUse()) {
1727288943Sdim        if (!isImmediate(MO)) {
1728249423Sdim          CmpRegs.insert(MO.getReg());
1729249423Sdim          continue;
1730249423Sdim        }
1731249423Sdim        // Consider the register to be the "immediate" operand.
1732249423Sdim        if (CmpImmOp)
1733249423Sdim          return false;
1734249423Sdim        CmpImmOp = &MO;
1735249423Sdim      }
1736249423Sdim    } else if (MO.isImm()) {
1737249423Sdim      if (CmpImmOp)    // A second immediate argument?  Confusing.  Bail out.
1738249423Sdim        return false;
1739249423Sdim      CmpImmOp = &MO;
1740234285Sdim    }
1741234285Sdim  }
1742234285Sdim
1743249423Sdim  if (CmpRegs.empty())
1744249423Sdim    return false;
1745234285Sdim
1746249423Sdim  // Check if the compared register follows the order we want.  Fix if needed.
1747249423Sdim  for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1748249423Sdim       I != E; ++I) {
1749249423Sdim    // This is a success.  If the register used in the comparison is one that
1750249423Sdim    // we have identified as a bumped (updated) induction register, there is
1751249423Sdim    // nothing to do.
1752249423Sdim    if (CmpRegs.count(I->first))
1753249423Sdim      return true;
1754249423Sdim
1755249423Sdim    // Otherwise, if the register being compared comes out of a PHI node,
1756249423Sdim    // and has been recognized as following the induction pattern, and is
1757249423Sdim    // compared against an immediate, we can fix it.
1758249423Sdim    const RegisterBump &RB = I->second;
1759249423Sdim    if (CmpRegs.count(RB.first)) {
1760288943Sdim      if (!CmpImmOp) {
1761288943Sdim        // If both operands to the compare instruction are registers, see if
1762288943Sdim        // it can be changed to use induction register as one of the operands.
1763288943Sdim        MachineInstr *IndI = nullptr;
1764288943Sdim        MachineInstr *nonIndI = nullptr;
1765288943Sdim        MachineOperand *IndMO = nullptr;
1766288943Sdim        MachineOperand *nonIndMO = nullptr;
1767288943Sdim
1768288943Sdim        for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1769288943Sdim          MachineOperand &MO = PredDef->getOperand(i);
1770288943Sdim          if (MO.isReg() && MO.getReg() == RB.first) {
1771341825Sdim            LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1772341825Sdim                              << ") = " << *(MRI->getVRegDef(I->first)));
1773288943Sdim            if (IndI)
1774288943Sdim              return false;
1775288943Sdim
1776288943Sdim            IndI = MRI->getVRegDef(I->first);
1777288943Sdim            IndMO = &MO;
1778288943Sdim          } else if (MO.isReg()) {
1779341825Sdim            LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1780341825Sdim                              << ") = " << *(MRI->getVRegDef(MO.getReg())));
1781288943Sdim            if (nonIndI)
1782288943Sdim              return false;
1783288943Sdim
1784288943Sdim            nonIndI = MRI->getVRegDef(MO.getReg());
1785288943Sdim            nonIndMO = &MO;
1786288943Sdim          }
1787288943Sdim        }
1788288943Sdim        if (IndI && nonIndI &&
1789288943Sdim            nonIndI->getOpcode() == Hexagon::A2_addi &&
1790288943Sdim            nonIndI->getOperand(2).isImm() &&
1791288943Sdim            nonIndI->getOperand(2).getImm() == - RB.second) {
1792288943Sdim          bool Order = orderBumpCompare(IndI, PredDef);
1793288943Sdim          if (Order) {
1794288943Sdim            IndMO->setReg(I->first);
1795288943Sdim            nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1796288943Sdim            return true;
1797288943Sdim          }
1798288943Sdim        }
1799249423Sdim        return false;
1800288943Sdim      }
1801249423Sdim
1802288943Sdim      // It is not valid to do this transformation on an unsigned comparison
1803288943Sdim      // because it may underflow.
1804314564Sdim      Comparison::Kind Cmp =
1805314564Sdim          getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
1806288943Sdim      if (!Cmp || Comparison::isUnsigned(Cmp))
1807288943Sdim        return false;
1808288943Sdim
1809288943Sdim      // If the register is being compared against an immediate, try changing
1810288943Sdim      // the compare instruction to use induction register and adjust the
1811288943Sdim      // immediate operand.
1812249423Sdim      int64_t CmpImm = getImmediate(*CmpImmOp);
1813249423Sdim      int64_t V = RB.second;
1814288943Sdim      // Handle Overflow (64-bit).
1815288943Sdim      if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1816288943Sdim          ((V < 0) && (CmpImm < INT64_MIN - V)))
1817249423Sdim        return false;
1818249423Sdim      CmpImm += V;
1819288943Sdim      // Most comparisons of register against an immediate value allow
1820288943Sdim      // the immediate to be constant-extended. There are some exceptions
1821288943Sdim      // though. Make sure the new combination will work.
1822288943Sdim      if (CmpImmOp->isImm())
1823288943Sdim        if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
1824288943Sdim          return false;
1825249423Sdim
1826249423Sdim      // Make sure that the compare happens after the bump.  Otherwise,
1827249423Sdim      // after the fixup, the compare would use a yet-undefined register.
1828249423Sdim      MachineInstr *BumpI = MRI->getVRegDef(I->first);
1829249423Sdim      bool Order = orderBumpCompare(BumpI, PredDef);
1830249423Sdim      if (!Order)
1831249423Sdim        return false;
1832249423Sdim
1833249423Sdim      // Finally, fix the compare instruction.
1834249423Sdim      setImmediate(*CmpImmOp, CmpImm);
1835249423Sdim      for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1836249423Sdim        MachineOperand &MO = PredDef->getOperand(i);
1837249423Sdim        if (MO.isReg() && MO.getReg() == RB.first) {
1838249423Sdim          MO.setReg(I->first);
1839249423Sdim          return true;
1840249423Sdim        }
1841249423Sdim      }
1842249423Sdim    }
1843249423Sdim  }
1844249423Sdim
1845249423Sdim  return false;
1846234285Sdim}
1847234285Sdim
1848314564Sdim/// createPreheaderForLoop - Create a preheader for a given loop.
1849249423SdimMachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1850249423Sdim      MachineLoop *L) {
1851314564Sdim  if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1852249423Sdim    return TmpPH;
1853288943Sdim  if (!HWCreatePreheader)
1854288943Sdim    return nullptr;
1855288943Sdim
1856249423Sdim  MachineBasicBlock *Header = L->getHeader();
1857249423Sdim  MachineBasicBlock *Latch = L->getLoopLatch();
1858314564Sdim  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1859249423Sdim  MachineFunction *MF = Header->getParent();
1860249423Sdim  DebugLoc DL;
1861249423Sdim
1862288943Sdim#ifndef NDEBUG
1863327952Sdim  if ((!PHFn.empty()) && (PHFn != MF->getName()))
1864276479Sdim    return nullptr;
1865288943Sdim#endif
1866249423Sdim
1867288943Sdim  if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1868288943Sdim    return nullptr;
1869288943Sdim
1870327952Sdim  using instr_iterator = MachineBasicBlock::instr_iterator;
1871249423Sdim
1872249423Sdim  // Verify that all existing predecessors have analyzable branches
1873249423Sdim  // (or no branches at all).
1874327952Sdim  using MBBVector = std::vector<MachineBasicBlock *>;
1875327952Sdim
1876249423Sdim  MBBVector Preds(Header->pred_begin(), Header->pred_end());
1877249423Sdim  SmallVector<MachineOperand,2> Tmp1;
1878276479Sdim  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1879249423Sdim
1880309124Sdim  if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1881276479Sdim    return nullptr;
1882249423Sdim
1883249423Sdim  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1884249423Sdim    MachineBasicBlock *PB = *I;
1885309124Sdim    bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1886288943Sdim    if (NotAnalyzed)
1887288943Sdim      return nullptr;
1888249423Sdim  }
1889249423Sdim
1890249423Sdim  MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
1891296417Sdim  MF->insert(Header->getIterator(), NewPH);
1892249423Sdim
1893249423Sdim  if (Header->pred_size() > 2) {
1894249423Sdim    // Ensure that the header has only two predecessors: the preheader and
1895249423Sdim    // the loop latch.  Any additional predecessors of the header should
1896288943Sdim    // join at the newly created preheader. Inspect all PHI nodes from the
1897249423Sdim    // header and create appropriate corresponding PHI nodes in the preheader.
1898249423Sdim
1899249423Sdim    for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1900249423Sdim         I != E && I->isPHI(); ++I) {
1901249423Sdim      MachineInstr *PN = &*I;
1902249423Sdim
1903249423Sdim      const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1904249423Sdim      MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1905249423Sdim      NewPH->insert(NewPH->end(), NewPN);
1906249423Sdim
1907360784Sdim      Register PR = PN->getOperand(0).getReg();
1908249423Sdim      const TargetRegisterClass *RC = MRI->getRegClass(PR);
1909360784Sdim      Register NewPR = MRI->createVirtualRegister(RC);
1910249423Sdim      NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1911249423Sdim
1912249423Sdim      // Copy all non-latch operands of a header's PHI node to the newly
1913249423Sdim      // created PHI node in the preheader.
1914249423Sdim      for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1915360784Sdim        Register PredR = PN->getOperand(i).getReg();
1916288943Sdim        unsigned PredRSub = PN->getOperand(i).getSubReg();
1917249423Sdim        MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1918249423Sdim        if (PredB == Latch)
1919249423Sdim          continue;
1920249423Sdim
1921288943Sdim        MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1922288943Sdim        MO.setSubReg(PredRSub);
1923288943Sdim        NewPN->addOperand(MO);
1924249423Sdim        NewPN->addOperand(MachineOperand::CreateMBB(PredB));
1925249423Sdim      }
1926249423Sdim
1927249423Sdim      // Remove copied operands from the old PHI node and add the value
1928249423Sdim      // coming from the preheader's PHI.
1929249423Sdim      for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1930249423Sdim        MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1931249423Sdim        if (PredB != Latch) {
1932249423Sdim          PN->RemoveOperand(i+1);
1933249423Sdim          PN->RemoveOperand(i);
1934249423Sdim        }
1935249423Sdim      }
1936249423Sdim      PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1937249423Sdim      PN->addOperand(MachineOperand::CreateMBB(NewPH));
1938249423Sdim    }
1939234285Sdim  } else {
1940249423Sdim    assert(Header->pred_size() == 2);
1941249423Sdim
1942249423Sdim    // The header has only two predecessors, but the non-latch predecessor
1943249423Sdim    // is not a preheader (e.g. it has other successors, etc.)
1944249423Sdim    // In such a case we don't need any extra PHI nodes in the new preheader,
1945249423Sdim    // all we need is to adjust existing PHIs in the header to now refer to
1946249423Sdim    // the new preheader.
1947249423Sdim    for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1948249423Sdim         I != E && I->isPHI(); ++I) {
1949249423Sdim      MachineInstr *PN = &*I;
1950249423Sdim      for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1951249423Sdim        MachineOperand &MO = PN->getOperand(i+1);
1952249423Sdim        if (MO.getMBB() != Latch)
1953249423Sdim          MO.setMBB(NewPH);
1954249423Sdim      }
1955249423Sdim    }
1956234285Sdim  }
1957249423Sdim
1958249423Sdim  // "Reroute" the CFG edges to link in the new preheader.
1959249423Sdim  // If any of the predecessors falls through to the header, insert a branch
1960249423Sdim  // to the new preheader in that place.
1961249423Sdim  SmallVector<MachineOperand,1> Tmp2;
1962249423Sdim  SmallVector<MachineOperand,1> EmptyCond;
1963249423Sdim
1964276479Sdim  TB = FB = nullptr;
1965249423Sdim
1966249423Sdim  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1967249423Sdim    MachineBasicBlock *PB = *I;
1968249423Sdim    if (PB != Latch) {
1969249423Sdim      Tmp2.clear();
1970309124Sdim      bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
1971276479Sdim      (void)NotAnalyzed; // suppress compiler warning
1972249423Sdim      assert (!NotAnalyzed && "Should be analyzable!");
1973249423Sdim      if (TB != Header && (Tmp2.empty() || FB != Header))
1974314564Sdim        TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1975249423Sdim      PB->ReplaceUsesOfBlockWith(Header, NewPH);
1976249423Sdim    }
1977249423Sdim  }
1978249423Sdim
1979249423Sdim  // It can happen that the latch block will fall through into the header.
1980249423Sdim  // Insert an unconditional branch to the header.
1981276479Sdim  TB = FB = nullptr;
1982309124Sdim  bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
1983276479Sdim  (void)LatchNotAnalyzed; // suppress compiler warning
1984249423Sdim  assert (!LatchNotAnalyzed && "Should be analyzable!");
1985249423Sdim  if (!TB && !FB)
1986314564Sdim    TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1987249423Sdim
1988249423Sdim  // Finally, the branch from the preheader to the header.
1989314564Sdim  TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1990249423Sdim  NewPH->addSuccessor(Header);
1991249423Sdim
1992288943Sdim  MachineLoop *ParentLoop = L->getParentLoop();
1993288943Sdim  if (ParentLoop)
1994288943Sdim    ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
1995288943Sdim
1996288943Sdim  // Update the dominator information with the new preheader.
1997288943Sdim  if (MDT) {
1998314564Sdim    if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
1999314564Sdim      if (MachineDomTreeNode *DHN = HN->getIDom()) {
2000314564Sdim        MDT->addNewBlock(NewPH, DHN->getBlock());
2001314564Sdim        MDT->changeImmediateDominator(Header, NewPH);
2002314564Sdim      }
2003314564Sdim    }
2004288943Sdim  }
2005288943Sdim
2006249423Sdim  return NewPH;
2007234285Sdim}
2008