HexagonHardwareLoops.cpp revision 321369
1234285Sdim//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===// 2234285Sdim// 3234285Sdim// The LLVM Compiler Infrastructure 4234285Sdim// 5234285Sdim// This file is distributed under the University of Illinois Open Source 6234285Sdim// License. See LICENSE.TXT for details. 7234285Sdim// 8234285Sdim//===----------------------------------------------------------------------===// 9234285Sdim// 10234285Sdim// This pass identifies loops where we can generate the Hexagon hardware 11234285Sdim// loop instruction. The hardware loop can perform loop branches with a 12234285Sdim// zero-cycle overhead. 13234285Sdim// 14234285Sdim// The pattern that defines the induction variable can changed depending on 15234285Sdim// prior optimizations. For example, the IndVarSimplify phase run by 'opt' 16234285Sdim// normalizes induction variables, and the Loop Strength Reduction pass 17234285Sdim// run by 'llc' may also make changes to the induction variable. 18234285Sdim// The pattern detected by this phase is due to running Strength Reduction. 19234285Sdim// 20234285Sdim// Criteria for hardware loops: 21234285Sdim// - Countable loops (w/ ind. var for a trip count) 22234285Sdim// - Assumes loops are normalized by IndVarSimplify 23234285Sdim// - Try inner-most loops first 24234285Sdim// - No function calls in loops. 25234285Sdim// 26234285Sdim//===----------------------------------------------------------------------===// 27234285Sdim 28314564Sdim#include "HexagonInstrInfo.h" 29314564Sdim#include "HexagonSubtarget.h" 30249423Sdim#include "llvm/ADT/SmallSet.h" 31314564Sdim#include "llvm/ADT/SmallVector.h" 32234285Sdim#include "llvm/ADT/Statistic.h" 33314564Sdim#include "llvm/ADT/StringRef.h" 34314564Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 35234285Sdim#include "llvm/CodeGen/MachineDominators.h" 36234285Sdim#include "llvm/CodeGen/MachineFunction.h" 37234285Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 38314564Sdim#include "llvm/CodeGen/MachineInstr.h" 39234285Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 40234285Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 41314564Sdim#include "llvm/CodeGen/MachineOperand.h" 42234285Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 43314564Sdim#include "llvm/IR/Constants.h" 44314564Sdim#include "llvm/IR/DebugLoc.h" 45314564Sdim#include "llvm/Pass.h" 46249423Sdim#include "llvm/Support/CommandLine.h" 47234285Sdim#include "llvm/Support/Debug.h" 48314564Sdim#include "llvm/Support/ErrorHandling.h" 49314564Sdim#include "llvm/Support/MathExtras.h" 50234285Sdim#include "llvm/Support/raw_ostream.h" 51314564Sdim#include "llvm/Target/TargetRegisterInfo.h" 52314564Sdim#include <cassert> 53314564Sdim#include <cstdint> 54314564Sdim#include <cstdlib> 55314564Sdim#include <iterator> 56314564Sdim#include <map> 57314564Sdim#include <set> 58314564Sdim#include <utility> 59249423Sdim#include <vector> 60234285Sdim 61234285Sdimusing namespace llvm; 62234285Sdim 63276479Sdim#define DEBUG_TYPE "hwloops" 64276479Sdim 65249423Sdim#ifndef NDEBUG 66288943Sdimstatic cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1)); 67288943Sdim 68288943Sdim// Option to create preheader only for a specific function. 69288943Sdimstatic cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden, 70288943Sdim cl::init("")); 71249423Sdim#endif 72249423Sdim 73288943Sdim// Option to create a preheader if one doesn't exist. 74288943Sdimstatic cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader", 75288943Sdim cl::Hidden, cl::init(true), 76288943Sdim cl::desc("Add a preheader to a hardware loop if one doesn't exist")); 77288943Sdim 78314564Sdim// Turn it off by default. If a preheader block is not created here, the 79314564Sdim// software pipeliner may be unable to find a block suitable to serve as 80314564Sdim// a preheader. In that case SWP will not run. 81314564Sdimstatic cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false), 82314564Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader " 83314564Sdim "instructions")); 84314564Sdim 85234285SdimSTATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); 86234285Sdim 87249423Sdimnamespace llvm { 88314564Sdim 89288943Sdim FunctionPass *createHexagonHardwareLoops(); 90249423Sdim void initializeHexagonHardwareLoopsPass(PassRegistry&); 91249423Sdim 92314564Sdim} // end namespace llvm 93314564Sdim 94234285Sdimnamespace { 95314564Sdim 96234285Sdim class CountValue; 97314564Sdim 98234285Sdim struct HexagonHardwareLoops : public MachineFunctionPass { 99249423Sdim MachineLoopInfo *MLI; 100249423Sdim MachineRegisterInfo *MRI; 101249423Sdim MachineDominatorTree *MDT; 102249423Sdim const HexagonInstrInfo *TII; 103321369Sdim const HexagonRegisterInfo *TRI; 104249423Sdim#ifndef NDEBUG 105249423Sdim static int Counter; 106249423Sdim#endif 107234285Sdim 108234285Sdim public: 109249423Sdim static char ID; 110234285Sdim 111249423Sdim HexagonHardwareLoops() : MachineFunctionPass(ID) { 112249423Sdim initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry()); 113249423Sdim } 114234285Sdim 115276479Sdim bool runOnMachineFunction(MachineFunction &MF) override; 116234285Sdim 117314564Sdim StringRef getPassName() const override { return "Hexagon Hardware Loops"; } 118234285Sdim 119276479Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 120234285Sdim AU.addRequired<MachineDominatorTree>(); 121234285Sdim AU.addRequired<MachineLoopInfo>(); 122234285Sdim MachineFunctionPass::getAnalysisUsage(AU); 123234285Sdim } 124234285Sdim 125234285Sdim private: 126288943Sdim typedef std::map<unsigned, MachineInstr *> LoopFeederMap; 127288943Sdim 128249423Sdim /// Kinds of comparisons in the compare instructions. 129249423Sdim struct Comparison { 130249423Sdim enum Kind { 131249423Sdim EQ = 0x01, 132249423Sdim NE = 0x02, 133288943Sdim L = 0x04, 134288943Sdim G = 0x08, 135288943Sdim U = 0x40, 136249423Sdim LTs = L, 137249423Sdim LEs = L | EQ, 138249423Sdim GTs = G, 139249423Sdim GEs = G | EQ, 140249423Sdim LTu = L | U, 141249423Sdim LEu = L | EQ | U, 142249423Sdim GTu = G | U, 143249423Sdim GEu = G | EQ | U 144249423Sdim }; 145249423Sdim 146249423Sdim static Kind getSwappedComparison(Kind Cmp) { 147249423Sdim assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator"); 148249423Sdim if ((Cmp & L) || (Cmp & G)) 149249423Sdim return (Kind)(Cmp ^ (L|G)); 150249423Sdim return Cmp; 151249423Sdim } 152288943Sdim 153288943Sdim static Kind getNegatedComparison(Kind Cmp) { 154288943Sdim if ((Cmp & L) || (Cmp & G)) 155288943Sdim return (Kind)((Cmp ^ (L | G)) ^ EQ); 156288943Sdim if ((Cmp & NE) || (Cmp & EQ)) 157288943Sdim return (Kind)(Cmp ^ (EQ | NE)); 158288943Sdim return (Kind)0; 159288943Sdim } 160288943Sdim 161288943Sdim static bool isSigned(Kind Cmp) { 162288943Sdim return (Cmp & (L | G) && !(Cmp & U)); 163288943Sdim } 164288943Sdim 165288943Sdim static bool isUnsigned(Kind Cmp) { 166288943Sdim return (Cmp & U); 167288943Sdim } 168249423Sdim }; 169249423Sdim 170249423Sdim /// \brief Find the register that contains the loop controlling 171234285Sdim /// induction variable. 172249423Sdim /// If successful, it will return true and set the \p Reg, \p IVBump 173249423Sdim /// and \p IVOp arguments. Otherwise it will return false. 174249423Sdim /// The returned induction register is the register R that follows the 175249423Sdim /// following induction pattern: 176249423Sdim /// loop: 177249423Sdim /// R = phi ..., [ R.next, LatchBlock ] 178249423Sdim /// R.next = R + #bump 179249423Sdim /// if (R.next < #N) goto loop 180249423Sdim /// IVBump is the immediate value added to R, and IVOp is the instruction 181249423Sdim /// "R.next = R + #bump". 182249423Sdim bool findInductionRegister(MachineLoop *L, unsigned &Reg, 183249423Sdim int64_t &IVBump, MachineInstr *&IVOp) const; 184234285Sdim 185288943Sdim /// \brief Return the comparison kind for the specified opcode. 186288943Sdim Comparison::Kind getComparisonKind(unsigned CondOpc, 187288943Sdim MachineOperand *InitialValue, 188288943Sdim const MachineOperand *Endvalue, 189288943Sdim int64_t IVBump) const; 190288943Sdim 191249423Sdim /// \brief Analyze the statements in a loop to determine if the loop 192249423Sdim /// has a computable trip count and, if so, return a value that represents 193249423Sdim /// the trip count expression. 194249423Sdim CountValue *getLoopTripCount(MachineLoop *L, 195261991Sdim SmallVectorImpl<MachineInstr *> &OldInsts); 196234285Sdim 197249423Sdim /// \brief Return the expression that represents the number of times 198249423Sdim /// a loop iterates. The function takes the operands that represent the 199249423Sdim /// loop start value, loop end value, and induction value. Based upon 200249423Sdim /// these operands, the function attempts to compute the trip count. 201249423Sdim /// If the trip count is not directly available (as an immediate value, 202249423Sdim /// or a register), the function will attempt to insert computation of it 203249423Sdim /// to the loop's preheader. 204288943Sdim CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start, 205288943Sdim const MachineOperand *End, unsigned IVReg, 206288943Sdim int64_t IVBump, Comparison::Kind Cmp) const; 207234285Sdim 208249423Sdim /// \brief Return true if the instruction is not valid within a hardware 209249423Sdim /// loop. 210288943Sdim bool isInvalidLoopOperation(const MachineInstr *MI, 211288943Sdim bool IsInnerHWLoop) const; 212234285Sdim 213249423Sdim /// \brief Return true if the loop contains an instruction that inhibits 214249423Sdim /// using the hardware loop. 215288943Sdim bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const; 216234285Sdim 217249423Sdim /// \brief Given a loop, check if we can convert it to a hardware loop. 218249423Sdim /// If so, then perform the conversion and return true. 219288943Sdim bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used); 220234285Sdim 221249423Sdim /// \brief Return true if the instruction is now dead. 222249423Sdim bool isDead(const MachineInstr *MI, 223261991Sdim SmallVectorImpl<MachineInstr *> &DeadPhis) const; 224249423Sdim 225249423Sdim /// \brief Remove the instruction if it is now dead. 226249423Sdim void removeIfDead(MachineInstr *MI); 227249423Sdim 228249423Sdim /// \brief Make sure that the "bump" instruction executes before the 229249423Sdim /// compare. We need that for the IV fixup, so that the compare 230249423Sdim /// instruction would not use a bumped value that has not yet been 231249423Sdim /// defined. If the instructions are out of order, try to reorder them. 232249423Sdim bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); 233249423Sdim 234288943Sdim /// \brief Return true if MO and MI pair is visited only once. If visited 235288943Sdim /// more than once, this indicates there is recursion. In such a case, 236288943Sdim /// return false. 237288943Sdim bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI, 238288943Sdim const MachineOperand *MO, 239288943Sdim LoopFeederMap &LoopFeederPhi) const; 240249423Sdim 241288943Sdim /// \brief Return true if the Phi may generate a value that may underflow, 242288943Sdim /// or may wrap. 243288943Sdim bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal, 244288943Sdim MachineBasicBlock *MBB, MachineLoop *L, 245288943Sdim LoopFeederMap &LoopFeederPhi) const; 246249423Sdim 247288943Sdim /// \brief Return true if the induction variable may underflow an unsigned 248288943Sdim /// value in the first iteration. 249288943Sdim bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal, 250288943Sdim const MachineOperand *EndVal, 251288943Sdim MachineBasicBlock *MBB, MachineLoop *L, 252288943Sdim LoopFeederMap &LoopFeederPhi) const; 253288943Sdim 254288943Sdim /// \brief Check if the given operand has a compile-time known constant 255288943Sdim /// value. Return true if yes, and false otherwise. When returning true, set 256288943Sdim /// Val to the corresponding constant value. 257288943Sdim bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const; 258288943Sdim 259288943Sdim /// \brief Check if the operand has a compile-time known constant value. 260288943Sdim bool isImmediate(const MachineOperand &MO) const { 261288943Sdim int64_t V; 262288943Sdim return checkForImmediate(MO, V); 263288943Sdim } 264288943Sdim 265288943Sdim /// \brief Return the immediate for the specified operand. 266288943Sdim int64_t getImmediate(const MachineOperand &MO) const { 267288943Sdim int64_t V; 268288943Sdim if (!checkForImmediate(MO, V)) 269288943Sdim llvm_unreachable("Invalid operand"); 270288943Sdim return V; 271288943Sdim } 272288943Sdim 273249423Sdim /// \brief Reset the given machine operand to now refer to a new immediate 274249423Sdim /// value. Assumes that the operand was already referencing an immediate 275249423Sdim /// value, either directly, or via a register. 276249423Sdim void setImmediate(MachineOperand &MO, int64_t Val); 277249423Sdim 278249423Sdim /// \brief Fix the data flow of the induction varible. 279249423Sdim /// The desired flow is: phi ---> bump -+-> comparison-in-latch. 280249423Sdim /// | 281249423Sdim /// +-> back to phi 282249423Sdim /// where "bump" is the increment of the induction variable: 283249423Sdim /// iv = iv + #const. 284249423Sdim /// Due to some prior code transformations, the actual flow may look 285249423Sdim /// like this: 286249423Sdim /// phi -+-> bump ---> back to phi 287249423Sdim /// | 288249423Sdim /// +-> comparison-in-latch (against upper_bound-bump), 289249423Sdim /// i.e. the comparison that controls the loop execution may be using 290249423Sdim /// the value of the induction variable from before the increment. 291249423Sdim /// 292249423Sdim /// Return true if the loop's flow is the desired one (i.e. it's 293249423Sdim /// either been fixed, or no fixing was necessary). 294249423Sdim /// Otherwise, return false. This can happen if the induction variable 295249423Sdim /// couldn't be identified, or if the value in the latch's comparison 296249423Sdim /// cannot be adjusted to reflect the post-bump value. 297249423Sdim bool fixupInductionVariable(MachineLoop *L); 298249423Sdim 299249423Sdim /// \brief Given a loop, if it does not have a preheader, create one. 300249423Sdim /// Return the block that is the preheader. 301249423Sdim MachineBasicBlock *createPreheaderForLoop(MachineLoop *L); 302234285Sdim }; 303234285Sdim 304234285Sdim char HexagonHardwareLoops::ID = 0; 305249423Sdim#ifndef NDEBUG 306249423Sdim int HexagonHardwareLoops::Counter = 0; 307249423Sdim#endif 308234285Sdim 309280031Sdim /// \brief Abstraction for a trip count of a loop. A smaller version 310249423Sdim /// of the MachineOperand class without the concerns of changing the 311249423Sdim /// operand representation. 312234285Sdim class CountValue { 313234285Sdim public: 314234285Sdim enum CountValueType { 315234285Sdim CV_Register, 316234285Sdim CV_Immediate 317234285Sdim }; 318314564Sdim 319234285Sdim private: 320234285Sdim CountValueType Kind; 321234285Sdim union Values { 322249423Sdim struct { 323249423Sdim unsigned Reg; 324249423Sdim unsigned Sub; 325249423Sdim } R; 326249423Sdim unsigned ImmVal; 327234285Sdim } Contents; 328234285Sdim 329234285Sdim public: 330249423Sdim explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) { 331249423Sdim Kind = t; 332249423Sdim if (Kind == CV_Register) { 333249423Sdim Contents.R.Reg = v; 334249423Sdim Contents.R.Sub = u; 335249423Sdim } else { 336249423Sdim Contents.ImmVal = v; 337249423Sdim } 338249423Sdim } 339314564Sdim 340234285Sdim bool isReg() const { return Kind == CV_Register; } 341234285Sdim bool isImm() const { return Kind == CV_Immediate; } 342234285Sdim 343234285Sdim unsigned getReg() const { 344234285Sdim assert(isReg() && "Wrong CountValue accessor"); 345249423Sdim return Contents.R.Reg; 346234285Sdim } 347249423Sdim unsigned getSubReg() const { 348249423Sdim assert(isReg() && "Wrong CountValue accessor"); 349249423Sdim return Contents.R.Sub; 350234285Sdim } 351249423Sdim unsigned getImm() const { 352234285Sdim assert(isImm() && "Wrong CountValue accessor"); 353234285Sdim return Contents.ImmVal; 354234285Sdim } 355234285Sdim 356288943Sdim void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const { 357249423Sdim if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); } 358249423Sdim if (isImm()) { OS << Contents.ImmVal; } 359234285Sdim } 360234285Sdim }; 361314564Sdim 362249423Sdim} // end anonymous namespace 363234285Sdim 364249423SdimINITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops", 365249423Sdim "Hexagon Hardware Loops", false, false) 366249423SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 367249423SdimINITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 368249423SdimINITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops", 369249423Sdim "Hexagon Hardware Loops", false, false) 370234285Sdim 371234285SdimFunctionPass *llvm::createHexagonHardwareLoops() { 372234285Sdim return new HexagonHardwareLoops(); 373234285Sdim} 374234285Sdim 375234285Sdimbool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { 376234285Sdim DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); 377309124Sdim if (skipFunction(*MF.getFunction())) 378309124Sdim return false; 379234285Sdim 380234285Sdim bool Changed = false; 381234285Sdim 382234285Sdim MLI = &getAnalysis<MachineLoopInfo>(); 383234285Sdim MRI = &MF.getRegInfo(); 384249423Sdim MDT = &getAnalysis<MachineDominatorTree>(); 385321369Sdim const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>(); 386321369Sdim TII = HST.getInstrInfo(); 387321369Sdim TRI = HST.getRegisterInfo(); 388234285Sdim 389288943Sdim for (auto &L : *MLI) 390288943Sdim if (!L->getParentLoop()) { 391288943Sdim bool L0Used = false; 392288943Sdim bool L1Used = false; 393288943Sdim Changed |= convertToHardwareLoop(L, L0Used, L1Used); 394288943Sdim } 395234285Sdim 396234285Sdim return Changed; 397234285Sdim} 398234285Sdim 399249423Sdimbool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, 400249423Sdim unsigned &Reg, 401249423Sdim int64_t &IVBump, 402249423Sdim MachineInstr *&IVOp 403249423Sdim ) const { 404249423Sdim MachineBasicBlock *Header = L->getHeader(); 405314564Sdim MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); 406249423Sdim MachineBasicBlock *Latch = L->getLoopLatch(); 407314564Sdim MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 408288943Sdim if (!Header || !Preheader || !Latch || !ExitingBlock) 409249423Sdim return false; 410249423Sdim 411249423Sdim // This pair represents an induction register together with an immediate 412249423Sdim // value that will be added to it in each loop iteration. 413249423Sdim typedef std::pair<unsigned,int64_t> RegisterBump; 414249423Sdim 415249423Sdim // Mapping: R.next -> (R, bump), where R, R.next and bump are derived 416249423Sdim // from an induction operation 417249423Sdim // R.next = R + bump 418249423Sdim // where bump is an immediate value. 419249423Sdim typedef std::map<unsigned,RegisterBump> InductionMap; 420249423Sdim 421249423Sdim InductionMap IndMap; 422249423Sdim 423249423Sdim typedef MachineBasicBlock::instr_iterator instr_iterator; 424249423Sdim for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 425249423Sdim I != E && I->isPHI(); ++I) { 426249423Sdim MachineInstr *Phi = &*I; 427249423Sdim 428249423Sdim // Have a PHI instruction. Get the operand that corresponds to the 429249423Sdim // latch block, and see if is a result of an addition of form "reg+imm", 430249423Sdim // where the "reg" is defined by the PHI node we are looking at. 431249423Sdim for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { 432249423Sdim if (Phi->getOperand(i+1).getMBB() != Latch) 433249423Sdim continue; 434249423Sdim 435249423Sdim unsigned PhiOpReg = Phi->getOperand(i).getReg(); 436249423Sdim MachineInstr *DI = MRI->getVRegDef(PhiOpReg); 437249423Sdim 438314564Sdim if (DI->getDesc().isAdd()) { 439288943Sdim // If the register operand to the add is the PHI we're looking at, this 440288943Sdim // meets the induction pattern. 441249423Sdim unsigned IndReg = DI->getOperand(1).getReg(); 442288943Sdim MachineOperand &Opnd2 = DI->getOperand(2); 443288943Sdim int64_t V; 444288943Sdim if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { 445249423Sdim unsigned UpdReg = DI->getOperand(0).getReg(); 446249423Sdim IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); 447249423Sdim } 448249423Sdim } 449249423Sdim } // for (i) 450249423Sdim } // for (instr) 451249423Sdim 452249423Sdim SmallVector<MachineOperand,2> Cond; 453276479Sdim MachineBasicBlock *TB = nullptr, *FB = nullptr; 454309124Sdim bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); 455249423Sdim if (NotAnalyzed) 456249423Sdim return false; 457249423Sdim 458288943Sdim unsigned PredR, PredPos, PredRegFlags; 459288943Sdim if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags)) 460288943Sdim return false; 461249423Sdim 462249423Sdim MachineInstr *PredI = MRI->getVRegDef(PredR); 463249423Sdim if (!PredI->isCompare()) 464249423Sdim return false; 465249423Sdim 466249423Sdim unsigned CmpReg1 = 0, CmpReg2 = 0; 467249423Sdim int CmpImm = 0, CmpMask = 0; 468309124Sdim bool CmpAnalyzed = 469309124Sdim TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm); 470249423Sdim // Fail if the compare was not analyzed, or it's not comparing a register 471249423Sdim // with an immediate value. Not checking the mask here, since we handle 472288943Sdim // the individual compare opcodes (including A4_cmpb*) later on. 473249423Sdim if (!CmpAnalyzed) 474249423Sdim return false; 475249423Sdim 476249423Sdim // Exactly one of the input registers to the comparison should be among 477249423Sdim // the induction registers. 478249423Sdim InductionMap::iterator IndMapEnd = IndMap.end(); 479249423Sdim InductionMap::iterator F = IndMapEnd; 480249423Sdim if (CmpReg1 != 0) { 481249423Sdim InductionMap::iterator F1 = IndMap.find(CmpReg1); 482249423Sdim if (F1 != IndMapEnd) 483249423Sdim F = F1; 484249423Sdim } 485249423Sdim if (CmpReg2 != 0) { 486249423Sdim InductionMap::iterator F2 = IndMap.find(CmpReg2); 487249423Sdim if (F2 != IndMapEnd) { 488249423Sdim if (F != IndMapEnd) 489249423Sdim return false; 490249423Sdim F = F2; 491249423Sdim } 492249423Sdim } 493249423Sdim if (F == IndMapEnd) 494249423Sdim return false; 495249423Sdim 496249423Sdim Reg = F->second.first; 497249423Sdim IVBump = F->second.second; 498249423Sdim IVOp = MRI->getVRegDef(F->first); 499249423Sdim return true; 500249423Sdim} 501249423Sdim 502288943Sdim// Return the comparison kind for the specified opcode. 503288943SdimHexagonHardwareLoops::Comparison::Kind 504288943SdimHexagonHardwareLoops::getComparisonKind(unsigned CondOpc, 505288943Sdim MachineOperand *InitialValue, 506288943Sdim const MachineOperand *EndValue, 507288943Sdim int64_t IVBump) const { 508288943Sdim Comparison::Kind Cmp = (Comparison::Kind)0; 509288943Sdim switch (CondOpc) { 510288943Sdim case Hexagon::C2_cmpeqi: 511288943Sdim case Hexagon::C2_cmpeq: 512288943Sdim case Hexagon::C2_cmpeqp: 513288943Sdim Cmp = Comparison::EQ; 514288943Sdim break; 515288943Sdim case Hexagon::C4_cmpneq: 516288943Sdim case Hexagon::C4_cmpneqi: 517288943Sdim Cmp = Comparison::NE; 518288943Sdim break; 519288943Sdim case Hexagon::C4_cmplte: 520288943Sdim Cmp = Comparison::LEs; 521288943Sdim break; 522288943Sdim case Hexagon::C4_cmplteu: 523288943Sdim Cmp = Comparison::LEu; 524288943Sdim break; 525288943Sdim case Hexagon::C2_cmpgtui: 526288943Sdim case Hexagon::C2_cmpgtu: 527288943Sdim case Hexagon::C2_cmpgtup: 528288943Sdim Cmp = Comparison::GTu; 529288943Sdim break; 530288943Sdim case Hexagon::C2_cmpgti: 531288943Sdim case Hexagon::C2_cmpgt: 532288943Sdim case Hexagon::C2_cmpgtp: 533288943Sdim Cmp = Comparison::GTs; 534288943Sdim break; 535288943Sdim default: 536288943Sdim return (Comparison::Kind)0; 537288943Sdim } 538288943Sdim return Cmp; 539288943Sdim} 540249423Sdim 541249423Sdim/// \brief Analyze the statements in a loop to determine if the loop has 542249423Sdim/// a computable trip count and, if so, return a value that represents 543249423Sdim/// the trip count expression. 544234285Sdim/// 545249423Sdim/// This function iterates over the phi nodes in the loop to check for 546249423Sdim/// induction variable patterns that are used in the calculation for 547249423Sdim/// the number of time the loop is executed. 548249423SdimCountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, 549288943Sdim SmallVectorImpl<MachineInstr *> &OldInsts) { 550234285Sdim MachineBasicBlock *TopMBB = L->getTopBlock(); 551234285Sdim MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); 552234285Sdim assert(PI != TopMBB->pred_end() && 553234285Sdim "Loop must have more than one incoming edge!"); 554234285Sdim MachineBasicBlock *Backedge = *PI++; 555249423Sdim if (PI == TopMBB->pred_end()) // dead loop? 556276479Sdim return nullptr; 557234285Sdim MachineBasicBlock *Incoming = *PI++; 558249423Sdim if (PI != TopMBB->pred_end()) // multiple backedges? 559276479Sdim return nullptr; 560234285Sdim 561249423Sdim // Make sure there is one incoming and one backedge and determine which 562234285Sdim // is which. 563234285Sdim if (L->contains(Incoming)) { 564234285Sdim if (L->contains(Backedge)) 565276479Sdim return nullptr; 566234285Sdim std::swap(Incoming, Backedge); 567234285Sdim } else if (!L->contains(Backedge)) 568276479Sdim return nullptr; 569234285Sdim 570249423Sdim // Look for the cmp instruction to determine if we can get a useful trip 571249423Sdim // count. The trip count can be either a register or an immediate. The 572249423Sdim // location of the value depends upon the type (reg or imm). 573314564Sdim MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 574288943Sdim if (!ExitingBlock) 575276479Sdim return nullptr; 576249423Sdim 577249423Sdim unsigned IVReg = 0; 578249423Sdim int64_t IVBump = 0; 579249423Sdim MachineInstr *IVOp; 580249423Sdim bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp); 581249423Sdim if (!FoundIV) 582276479Sdim return nullptr; 583249423Sdim 584314564Sdim MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); 585249423Sdim 586276479Sdim MachineOperand *InitialValue = nullptr; 587249423Sdim MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); 588288943Sdim MachineBasicBlock *Latch = L->getLoopLatch(); 589249423Sdim for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) { 590249423Sdim MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB(); 591249423Sdim if (MBB == Preheader) 592249423Sdim InitialValue = &IV_Phi->getOperand(i); 593249423Sdim else if (MBB == Latch) 594249423Sdim IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump. 595234285Sdim } 596249423Sdim if (!InitialValue) 597276479Sdim return nullptr; 598234285Sdim 599249423Sdim SmallVector<MachineOperand,2> Cond; 600276479Sdim MachineBasicBlock *TB = nullptr, *FB = nullptr; 601309124Sdim bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); 602249423Sdim if (NotAnalyzed) 603276479Sdim return nullptr; 604234285Sdim 605249423Sdim MachineBasicBlock *Header = L->getHeader(); 606249423Sdim // TB must be non-null. If FB is also non-null, one of them must be 607249423Sdim // the header. Otherwise, branch to TB could be exiting the loop, and 608249423Sdim // the fall through can go to the header. 609288943Sdim assert (TB && "Exit block without a branch?"); 610288943Sdim if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { 611314564Sdim MachineBasicBlock *LTB = nullptr, *LFB = nullptr; 612288943Sdim SmallVector<MachineOperand,2> LCond; 613309124Sdim bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); 614288943Sdim if (NotAnalyzed) 615288943Sdim return nullptr; 616288943Sdim if (TB == Latch) 617288943Sdim TB = (LTB == Header) ? LTB : LFB; 618288943Sdim else 619288943Sdim FB = (LTB == Header) ? LTB: LFB; 620288943Sdim } 621249423Sdim assert ((!FB || TB == Header || FB == Header) && "Branches not to header?"); 622249423Sdim if (!TB || (FB && TB != Header && FB != Header)) 623276479Sdim return nullptr; 624249423Sdim 625249423Sdim // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch 626249423Sdim // to put imm(0), followed by P in the vector Cond. 627249423Sdim // If TB is not the header, it means that the "not-taken" path must lead 628249423Sdim // to the header. 629288943Sdim bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header); 630288943Sdim unsigned PredReg, PredPos, PredRegFlags; 631288943Sdim if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags)) 632288943Sdim return nullptr; 633249423Sdim MachineInstr *CondI = MRI->getVRegDef(PredReg); 634249423Sdim unsigned CondOpc = CondI->getOpcode(); 635249423Sdim 636249423Sdim unsigned CmpReg1 = 0, CmpReg2 = 0; 637249423Sdim int Mask = 0, ImmValue = 0; 638309124Sdim bool AnalyzedCmp = 639309124Sdim TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue); 640249423Sdim if (!AnalyzedCmp) 641276479Sdim return nullptr; 642249423Sdim 643249423Sdim // The comparison operator type determines how we compute the loop 644249423Sdim // trip count. 645249423Sdim OldInsts.push_back(CondI); 646249423Sdim OldInsts.push_back(IVOp); 647249423Sdim 648249423Sdim // Sadly, the following code gets information based on the position 649249423Sdim // of the operands in the compare instruction. This has to be done 650249423Sdim // this way, because the comparisons check for a specific relationship 651249423Sdim // between the operands (e.g. is-less-than), rather than to find out 652249423Sdim // what relationship the operands are in (as on PPC). 653249423Sdim Comparison::Kind Cmp; 654249423Sdim bool isSwapped = false; 655249423Sdim const MachineOperand &Op1 = CondI->getOperand(1); 656249423Sdim const MachineOperand &Op2 = CondI->getOperand(2); 657276479Sdim const MachineOperand *EndValue = nullptr; 658249423Sdim 659249423Sdim if (Op1.isReg()) { 660249423Sdim if (Op2.isImm() || Op1.getReg() == IVReg) 661249423Sdim EndValue = &Op2; 662249423Sdim else { 663249423Sdim EndValue = &Op1; 664249423Sdim isSwapped = true; 665249423Sdim } 666234285Sdim } 667234285Sdim 668249423Sdim if (!EndValue) 669276479Sdim return nullptr; 670234285Sdim 671288943Sdim Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump); 672288943Sdim if (!Cmp) 673288943Sdim return nullptr; 674288943Sdim if (Negated) 675288943Sdim Cmp = Comparison::getNegatedComparison(Cmp); 676249423Sdim if (isSwapped) 677288943Sdim Cmp = Comparison::getSwappedComparison(Cmp); 678249423Sdim 679249423Sdim if (InitialValue->isReg()) { 680249423Sdim unsigned R = InitialValue->getReg(); 681249423Sdim MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); 682249423Sdim if (!MDT->properlyDominates(DefBB, Header)) 683276479Sdim return nullptr; 684249423Sdim OldInsts.push_back(MRI->getVRegDef(R)); 685249423Sdim } 686249423Sdim if (EndValue->isReg()) { 687249423Sdim unsigned R = EndValue->getReg(); 688249423Sdim MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); 689249423Sdim if (!MDT->properlyDominates(DefBB, Header)) 690276479Sdim return nullptr; 691288943Sdim OldInsts.push_back(MRI->getVRegDef(R)); 692249423Sdim } 693249423Sdim 694249423Sdim return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); 695234285Sdim} 696234285Sdim 697249423Sdim/// \brief Helper function that returns the expression that represents the 698249423Sdim/// number of times a loop iterates. The function takes the operands that 699249423Sdim/// represent the loop start value, loop end value, and induction value. 700249423Sdim/// Based upon these operands, the function attempts to compute the trip count. 701249423SdimCountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, 702249423Sdim const MachineOperand *Start, 703249423Sdim const MachineOperand *End, 704249423Sdim unsigned IVReg, 705249423Sdim int64_t IVBump, 706249423Sdim Comparison::Kind Cmp) const { 707249423Sdim // Cannot handle comparison EQ, i.e. while (A == B). 708249423Sdim if (Cmp == Comparison::EQ) 709276479Sdim return nullptr; 710249423Sdim 711249423Sdim // Check if either the start or end values are an assignment of an immediate. 712249423Sdim // If so, use the immediate value rather than the register. 713249423Sdim if (Start->isReg()) { 714249423Sdim const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg()); 715288943Sdim if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi || 716288943Sdim StartValInstr->getOpcode() == Hexagon::A2_tfrpi)) 717249423Sdim Start = &StartValInstr->getOperand(1); 718249423Sdim } 719249423Sdim if (End->isReg()) { 720249423Sdim const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); 721288943Sdim if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi || 722288943Sdim EndValInstr->getOpcode() == Hexagon::A2_tfrpi)) 723249423Sdim End = &EndValInstr->getOperand(1); 724249423Sdim } 725249423Sdim 726288943Sdim if (!Start->isReg() && !Start->isImm()) 727288943Sdim return nullptr; 728288943Sdim if (!End->isReg() && !End->isImm()) 729288943Sdim return nullptr; 730249423Sdim 731249423Sdim bool CmpLess = Cmp & Comparison::L; 732249423Sdim bool CmpGreater = Cmp & Comparison::G; 733249423Sdim bool CmpHasEqual = Cmp & Comparison::EQ; 734249423Sdim 735249423Sdim // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds. 736249423Sdim if (CmpLess && IVBump < 0) 737288943Sdim // Loop going while iv is "less" with the iv value going down. Must wrap. 738276479Sdim return nullptr; 739288943Sdim 740249423Sdim if (CmpGreater && IVBump > 0) 741288943Sdim // Loop going while iv is "greater" with the iv value going up. Must wrap. 742276479Sdim return nullptr; 743249423Sdim 744288943Sdim // Phis that may feed into the loop. 745288943Sdim LoopFeederMap LoopFeederPhi; 746288943Sdim 747296417Sdim // Check if the initial value may be zero and can be decremented in the first 748288943Sdim // iteration. If the value is zero, the endloop instruction will not decrement 749296417Sdim // the loop counter, so we shouldn't generate a hardware loop in this case. 750288943Sdim if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop, 751288943Sdim LoopFeederPhi)) 752288943Sdim return nullptr; 753288943Sdim 754249423Sdim if (Start->isImm() && End->isImm()) { 755249423Sdim // Both, start and end are immediates. 756249423Sdim int64_t StartV = Start->getImm(); 757249423Sdim int64_t EndV = End->getImm(); 758249423Sdim int64_t Dist = EndV - StartV; 759249423Sdim if (Dist == 0) 760276479Sdim return nullptr; 761249423Sdim 762249423Sdim bool Exact = (Dist % IVBump) == 0; 763249423Sdim 764249423Sdim if (Cmp == Comparison::NE) { 765249423Sdim if (!Exact) 766276479Sdim return nullptr; 767249423Sdim if ((Dist < 0) ^ (IVBump < 0)) 768276479Sdim return nullptr; 769249423Sdim } 770249423Sdim 771249423Sdim // For comparisons that include the final value (i.e. include equality 772249423Sdim // with the final value), we need to increase the distance by 1. 773249423Sdim if (CmpHasEqual) 774249423Sdim Dist = Dist > 0 ? Dist+1 : Dist-1; 775249423Sdim 776288943Sdim // For the loop to iterate, CmpLess should imply Dist > 0. Similarly, 777288943Sdim // CmpGreater should imply Dist < 0. These conditions could actually 778288943Sdim // fail, for example, in unreachable code (which may still appear to be 779288943Sdim // reachable in the CFG). 780288943Sdim if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0)) 781288943Sdim return nullptr; 782249423Sdim 783249423Sdim // "Normalized" distance, i.e. with the bump set to +-1. 784288943Sdim int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump 785288943Sdim : (-Dist + (-IVBump - 1)) / (-IVBump); 786249423Sdim assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign."); 787249423Sdim 788249423Sdim uint64_t Count = Dist1; 789249423Sdim 790249423Sdim if (Count > 0xFFFFFFFFULL) 791276479Sdim return nullptr; 792249423Sdim 793249423Sdim return new CountValue(CountValue::CV_Immediate, Count); 794249423Sdim } 795249423Sdim 796249423Sdim // A general case: Start and End are some values, but the actual 797249423Sdim // iteration count may not be available. If it is not, insert 798249423Sdim // a computation of it into the preheader. 799249423Sdim 800249423Sdim // If the induction variable bump is not a power of 2, quit. 801249423Sdim // Othwerise we'd need a general integer division. 802288943Sdim if (!isPowerOf2_64(std::abs(IVBump))) 803276479Sdim return nullptr; 804249423Sdim 805314564Sdim MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader); 806249423Sdim assert (PH && "Should have a preheader by now"); 807249423Sdim MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); 808288943Sdim DebugLoc DL; 809288943Sdim if (InsertPos != PH->end()) 810288943Sdim DL = InsertPos->getDebugLoc(); 811249423Sdim 812249423Sdim // If Start is an immediate and End is a register, the trip count 813249423Sdim // will be "reg - imm". Hexagon's "subtract immediate" instruction 814249423Sdim // is actually "reg + -imm". 815249423Sdim 816249423Sdim // If the loop IV is going downwards, i.e. if the bump is negative, 817249423Sdim // then the iteration count (computed as End-Start) will need to be 818249423Sdim // negated. To avoid the negation, just swap Start and End. 819249423Sdim if (IVBump < 0) { 820249423Sdim std::swap(Start, End); 821249423Sdim IVBump = -IVBump; 822249423Sdim } 823249423Sdim // Cmp may now have a wrong direction, e.g. LEs may now be GEs. 824249423Sdim // Signedness, and "including equality" are preserved. 825249423Sdim 826249423Sdim bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm) 827249423Sdim bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg) 828249423Sdim 829249423Sdim int64_t StartV = 0, EndV = 0; 830249423Sdim if (Start->isImm()) 831249423Sdim StartV = Start->getImm(); 832249423Sdim if (End->isImm()) 833249423Sdim EndV = End->getImm(); 834249423Sdim 835249423Sdim int64_t AdjV = 0; 836249423Sdim // To compute the iteration count, we would need this computation: 837249423Sdim // Count = (End - Start + (IVBump-1)) / IVBump 838249423Sdim // or, when CmpHasEqual: 839249423Sdim // Count = (End - Start + (IVBump-1)+1) / IVBump 840249423Sdim // The "IVBump-1" part is the adjustment (AdjV). We can avoid 841249423Sdim // generating an instruction specifically to add it if we can adjust 842249423Sdim // the immediate values for Start or End. 843249423Sdim 844249423Sdim if (CmpHasEqual) { 845249423Sdim // Need to add 1 to the total iteration count. 846249423Sdim if (Start->isImm()) 847249423Sdim StartV--; 848249423Sdim else if (End->isImm()) 849249423Sdim EndV++; 850249423Sdim else 851249423Sdim AdjV += 1; 852249423Sdim } 853249423Sdim 854249423Sdim if (Cmp != Comparison::NE) { 855249423Sdim if (Start->isImm()) 856249423Sdim StartV -= (IVBump-1); 857249423Sdim else if (End->isImm()) 858249423Sdim EndV += (IVBump-1); 859249423Sdim else 860249423Sdim AdjV += (IVBump-1); 861249423Sdim } 862249423Sdim 863249423Sdim unsigned R = 0, SR = 0; 864249423Sdim if (Start->isReg()) { 865249423Sdim R = Start->getReg(); 866249423Sdim SR = Start->getSubReg(); 867249423Sdim } else { 868249423Sdim R = End->getReg(); 869249423Sdim SR = End->getSubReg(); 870249423Sdim } 871249423Sdim const TargetRegisterClass *RC = MRI->getRegClass(R); 872249423Sdim // Hardware loops cannot handle 64-bit registers. If it's a double 873249423Sdim // register, it has to have a subregister. 874249423Sdim if (!SR && RC == &Hexagon::DoubleRegsRegClass) 875276479Sdim return nullptr; 876249423Sdim const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; 877249423Sdim 878249423Sdim // Compute DistR (register with the distance between Start and End). 879249423Sdim unsigned DistR, DistSR; 880249423Sdim 881249423Sdim // Avoid special case, where the start value is an imm(0). 882249423Sdim if (Start->isImm() && StartV == 0) { 883249423Sdim DistR = End->getReg(); 884249423Sdim DistSR = End->getSubReg(); 885249423Sdim } else { 886280031Sdim const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) : 887288943Sdim (RegToImm ? TII->get(Hexagon::A2_subri) : 888288943Sdim TII->get(Hexagon::A2_addi)); 889288943Sdim if (RegToReg || RegToImm) { 890288943Sdim unsigned SubR = MRI->createVirtualRegister(IntRC); 891288943Sdim MachineInstrBuilder SubIB = 892288943Sdim BuildMI(*PH, InsertPos, DL, SubD, SubR); 893249423Sdim 894288943Sdim if (RegToReg) 895288943Sdim SubIB.addReg(End->getReg(), 0, End->getSubReg()) 896288943Sdim .addReg(Start->getReg(), 0, Start->getSubReg()); 897288943Sdim else 898288943Sdim SubIB.addImm(EndV) 899288943Sdim .addReg(Start->getReg(), 0, Start->getSubReg()); 900288943Sdim DistR = SubR; 901288943Sdim } else { 902288943Sdim // If the loop has been unrolled, we should use the original loop count 903288943Sdim // instead of recalculating the value. This will avoid additional 904288943Sdim // 'Add' instruction. 905288943Sdim const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); 906288943Sdim if (EndValInstr->getOpcode() == Hexagon::A2_addi && 907288943Sdim EndValInstr->getOperand(2).getImm() == StartV) { 908288943Sdim DistR = EndValInstr->getOperand(1).getReg(); 909288943Sdim } else { 910288943Sdim unsigned SubR = MRI->createVirtualRegister(IntRC); 911288943Sdim MachineInstrBuilder SubIB = 912288943Sdim BuildMI(*PH, InsertPos, DL, SubD, SubR); 913288943Sdim SubIB.addReg(End->getReg(), 0, End->getSubReg()) 914288943Sdim .addImm(-StartV); 915288943Sdim DistR = SubR; 916288943Sdim } 917249423Sdim } 918249423Sdim DistSR = 0; 919249423Sdim } 920249423Sdim 921249423Sdim // From DistR, compute AdjR (register with the adjusted distance). 922249423Sdim unsigned AdjR, AdjSR; 923249423Sdim 924249423Sdim if (AdjV == 0) { 925249423Sdim AdjR = DistR; 926249423Sdim AdjSR = DistSR; 927249423Sdim } else { 928249423Sdim // Generate CountR = ADD DistR, AdjVal 929249423Sdim unsigned AddR = MRI->createVirtualRegister(IntRC); 930288943Sdim MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi); 931249423Sdim BuildMI(*PH, InsertPos, DL, AddD, AddR) 932249423Sdim .addReg(DistR, 0, DistSR) 933249423Sdim .addImm(AdjV); 934249423Sdim 935249423Sdim AdjR = AddR; 936249423Sdim AdjSR = 0; 937249423Sdim } 938249423Sdim 939249423Sdim // From AdjR, compute CountR (register with the final count). 940249423Sdim unsigned CountR, CountSR; 941249423Sdim 942249423Sdim if (IVBump == 1) { 943249423Sdim CountR = AdjR; 944249423Sdim CountSR = AdjSR; 945249423Sdim } else { 946249423Sdim // The IV bump is a power of two. Log_2(IV bump) is the shift amount. 947249423Sdim unsigned Shift = Log2_32(IVBump); 948249423Sdim 949249423Sdim // Generate NormR = LSR DistR, Shift. 950249423Sdim unsigned LsrR = MRI->createVirtualRegister(IntRC); 951280031Sdim const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r); 952249423Sdim BuildMI(*PH, InsertPos, DL, LsrD, LsrR) 953249423Sdim .addReg(AdjR, 0, AdjSR) 954249423Sdim .addImm(Shift); 955249423Sdim 956249423Sdim CountR = LsrR; 957249423Sdim CountSR = 0; 958249423Sdim } 959249423Sdim 960249423Sdim return new CountValue(CountValue::CV_Register, CountR, CountSR); 961234285Sdim} 962234285Sdim 963249423Sdim/// \brief Return true if the operation is invalid within hardware loop. 964288943Sdimbool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI, 965288943Sdim bool IsInnerHWLoop) const { 966288943Sdim // Call is not allowed because the callee may use a hardware loop except for 967288943Sdim // the case when the call never returns. 968314564Sdim if (MI->getDesc().isCall()) 969314564Sdim return !TII->doesNotReturn(*MI); 970249423Sdim 971288943Sdim // Check if the instruction defines a hardware loop register. 972321369Sdim using namespace Hexagon; 973321369Sdim static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 }; 974321369Sdim static const unsigned Regs1[] = { LC1, SA1 }; 975321369Sdim auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01)) 976321369Sdim : makeArrayRef(Regs1, array_lengthof(Regs1)); 977321369Sdim for (unsigned R : CheckRegs) 978321369Sdim if (MI->modifiesRegister(R, TRI)) 979234285Sdim return true; 980321369Sdim 981234285Sdim return false; 982234285Sdim} 983234285Sdim 984288943Sdim/// \brief Return true if the loop contains an instruction that inhibits 985288943Sdim/// the use of the hardware loop instruction. 986288943Sdimbool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L, 987288943Sdim bool IsInnerHWLoop) const { 988261991Sdim const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); 989288943Sdim DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber();); 990234285Sdim for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 991234285Sdim MachineBasicBlock *MBB = Blocks[i]; 992234285Sdim for (MachineBasicBlock::iterator 993234285Sdim MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { 994234285Sdim const MachineInstr *MI = &*MII; 995288943Sdim if (isInvalidLoopOperation(MI, IsInnerHWLoop)) { 996288943Sdim DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump();); 997234285Sdim return true; 998288943Sdim } 999234285Sdim } 1000234285Sdim } 1001234285Sdim return false; 1002234285Sdim} 1003234285Sdim 1004249423Sdim/// \brief Returns true if the instruction is dead. This was essentially 1005249423Sdim/// copied from DeadMachineInstructionElim::isDead, but with special cases 1006249423Sdim/// for inline asm, physical registers and instructions with side effects 1007249423Sdim/// removed. 1008249423Sdimbool HexagonHardwareLoops::isDead(const MachineInstr *MI, 1009261991Sdim SmallVectorImpl<MachineInstr *> &DeadPhis) const { 1010249423Sdim // Examine each operand. 1011249423Sdim for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1012249423Sdim const MachineOperand &MO = MI->getOperand(i); 1013249423Sdim if (!MO.isReg() || !MO.isDef()) 1014249423Sdim continue; 1015249423Sdim 1016249423Sdim unsigned Reg = MO.getReg(); 1017249423Sdim if (MRI->use_nodbg_empty(Reg)) 1018249423Sdim continue; 1019249423Sdim 1020249423Sdim typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator; 1021249423Sdim 1022249423Sdim // This instruction has users, but if the only user is the phi node for the 1023249423Sdim // parent block, and the only use of that phi node is this instruction, then 1024249423Sdim // this instruction is dead: both it (and the phi node) can be removed. 1025249423Sdim use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); 1026249423Sdim use_nodbg_iterator End = MRI->use_nodbg_end(); 1027276479Sdim if (std::next(I) != End || !I->getParent()->isPHI()) 1028249423Sdim return false; 1029249423Sdim 1030276479Sdim MachineInstr *OnePhi = I->getParent(); 1031249423Sdim for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { 1032249423Sdim const MachineOperand &OPO = OnePhi->getOperand(j); 1033249423Sdim if (!OPO.isReg() || !OPO.isDef()) 1034249423Sdim continue; 1035249423Sdim 1036249423Sdim unsigned OPReg = OPO.getReg(); 1037249423Sdim use_nodbg_iterator nextJ; 1038249423Sdim for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg); 1039249423Sdim J != End; J = nextJ) { 1040276479Sdim nextJ = std::next(J); 1041276479Sdim MachineOperand &Use = *J; 1042249423Sdim MachineInstr *UseMI = Use.getParent(); 1043249423Sdim 1044288943Sdim // If the phi node has a user that is not MI, bail. 1045249423Sdim if (MI != UseMI) 1046249423Sdim return false; 1047249423Sdim } 1048249423Sdim } 1049249423Sdim DeadPhis.push_back(OnePhi); 1050249423Sdim } 1051249423Sdim 1052249423Sdim // If there are no defs with uses, the instruction is dead. 1053249423Sdim return true; 1054249423Sdim} 1055249423Sdim 1056249423Sdimvoid HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { 1057249423Sdim // This procedure was essentially copied from DeadMachineInstructionElim. 1058249423Sdim 1059249423Sdim SmallVector<MachineInstr*, 1> DeadPhis; 1060249423Sdim if (isDead(MI, DeadPhis)) { 1061249423Sdim DEBUG(dbgs() << "HW looping will remove: " << *MI); 1062249423Sdim 1063249423Sdim // It is possible that some DBG_VALUE instructions refer to this 1064249423Sdim // instruction. Examine each def operand for such references; 1065249423Sdim // if found, mark the DBG_VALUE as undef (but don't delete it). 1066249423Sdim for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1067249423Sdim const MachineOperand &MO = MI->getOperand(i); 1068249423Sdim if (!MO.isReg() || !MO.isDef()) 1069249423Sdim continue; 1070249423Sdim unsigned Reg = MO.getReg(); 1071249423Sdim MachineRegisterInfo::use_iterator nextI; 1072249423Sdim for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), 1073249423Sdim E = MRI->use_end(); I != E; I = nextI) { 1074276479Sdim nextI = std::next(I); // I is invalidated by the setReg 1075276479Sdim MachineOperand &Use = *I; 1076276479Sdim MachineInstr *UseMI = I->getParent(); 1077249423Sdim if (UseMI == MI) 1078249423Sdim continue; 1079249423Sdim if (Use.isDebug()) 1080249423Sdim UseMI->getOperand(0).setReg(0U); 1081249423Sdim } 1082249423Sdim } 1083249423Sdim 1084249423Sdim MI->eraseFromParent(); 1085249423Sdim for (unsigned i = 0; i < DeadPhis.size(); ++i) 1086249423Sdim DeadPhis[i]->eraseFromParent(); 1087249423Sdim } 1088249423Sdim} 1089249423Sdim 1090249423Sdim/// \brief Check if the loop is a candidate for converting to a hardware 1091249423Sdim/// loop. If so, then perform the transformation. 1092234285Sdim/// 1093249423Sdim/// This function works on innermost loops first. A loop can be converted 1094249423Sdim/// if it is a counting loop; either a register value or an immediate. 1095234285Sdim/// 1096249423Sdim/// The code makes several assumptions about the representation of the loop 1097249423Sdim/// in llvm. 1098288943Sdimbool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, 1099288943Sdim bool &RecL0used, 1100288943Sdim bool &RecL1used) { 1101249423Sdim // This is just for sanity. 1102249423Sdim assert(L->getHeader() && "Loop without a header?"); 1103249423Sdim 1104234285Sdim bool Changed = false; 1105288943Sdim bool L0Used = false; 1106288943Sdim bool L1Used = false; 1107288943Sdim 1108234285Sdim // Process nested loops first. 1109288943Sdim for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { 1110288943Sdim Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used); 1111288943Sdim L0Used |= RecL0used; 1112288943Sdim L1Used |= RecL1used; 1113288943Sdim } 1114249423Sdim 1115234285Sdim // If a nested loop has been converted, then we can't convert this loop. 1116288943Sdim if (Changed && L0Used && L1Used) 1117234285Sdim return Changed; 1118249423Sdim 1119288943Sdim unsigned LOOP_i; 1120288943Sdim unsigned LOOP_r; 1121288943Sdim unsigned ENDLOOP; 1122288943Sdim 1123288943Sdim // Flag used to track loopN instruction: 1124288943Sdim // 1 - Hardware loop is being generated for the inner most loop. 1125288943Sdim // 0 - Hardware loop is being generated for the outer loop. 1126288943Sdim unsigned IsInnerHWLoop = 1; 1127288943Sdim 1128288943Sdim if (L0Used) { 1129288943Sdim LOOP_i = Hexagon::J2_loop1i; 1130288943Sdim LOOP_r = Hexagon::J2_loop1r; 1131288943Sdim ENDLOOP = Hexagon::ENDLOOP1; 1132288943Sdim IsInnerHWLoop = 0; 1133288943Sdim } else { 1134288943Sdim LOOP_i = Hexagon::J2_loop0i; 1135288943Sdim LOOP_r = Hexagon::J2_loop0r; 1136288943Sdim ENDLOOP = Hexagon::ENDLOOP0; 1137288943Sdim } 1138288943Sdim 1139249423Sdim#ifndef NDEBUG 1140249423Sdim // Stop trying after reaching the limit (if any). 1141249423Sdim int Limit = HWLoopLimit; 1142249423Sdim if (Limit >= 0) { 1143249423Sdim if (Counter >= HWLoopLimit) 1144249423Sdim return false; 1145249423Sdim Counter++; 1146234285Sdim } 1147249423Sdim#endif 1148249423Sdim 1149234285Sdim // Does the loop contain any invalid instructions? 1150288943Sdim if (containsInvalidInstruction(L, IsInnerHWLoop)) 1151234285Sdim return false; 1152249423Sdim 1153314564Sdim MachineBasicBlock *LastMBB = L->findLoopControlBlock(); 1154234285Sdim // Don't generate hw loop if the loop has more than one exit. 1155276479Sdim if (!LastMBB) 1156234285Sdim return false; 1157249423Sdim 1158234285Sdim MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); 1159249423Sdim if (LastI == LastMBB->end()) 1160249423Sdim return false; 1161234285Sdim 1162288943Sdim // Is the induction variable bump feeding the latch condition? 1163288943Sdim if (!fixupInductionVariable(L)) 1164288943Sdim return false; 1165288943Sdim 1166249423Sdim // Ensure the loop has a preheader: the loop instruction will be 1167249423Sdim // placed there. 1168314564Sdim MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); 1169249423Sdim if (!Preheader) { 1170249423Sdim Preheader = createPreheaderForLoop(L); 1171249423Sdim if (!Preheader) 1172249423Sdim return false; 1173249423Sdim } 1174288943Sdim 1175249423Sdim MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); 1176249423Sdim 1177249423Sdim SmallVector<MachineInstr*, 2> OldInsts; 1178249423Sdim // Are we able to determine the trip count for the loop? 1179249423Sdim CountValue *TripCount = getLoopTripCount(L, OldInsts); 1180276479Sdim if (!TripCount) 1181249423Sdim return false; 1182249423Sdim 1183249423Sdim // Is the trip count available in the preheader? 1184249423Sdim if (TripCount->isReg()) { 1185249423Sdim // There will be a use of the register inserted into the preheader, 1186249423Sdim // so make sure that the register is actually defined at that point. 1187249423Sdim MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg()); 1188249423Sdim MachineBasicBlock *BBDef = TCDef->getParent(); 1189288943Sdim if (!MDT->dominates(BBDef, Preheader)) 1190288943Sdim return false; 1191249423Sdim } 1192249423Sdim 1193234285Sdim // Determine the loop start. 1194288943Sdim MachineBasicBlock *TopBlock = L->getTopBlock(); 1195314564Sdim MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 1196314564Sdim MachineBasicBlock *LoopStart = nullptr; 1197288943Sdim if (ExitingBlock != L->getLoopLatch()) { 1198314564Sdim MachineBasicBlock *TB = nullptr, *FB = nullptr; 1199288943Sdim SmallVector<MachineOperand, 2> Cond; 1200288943Sdim 1201309124Sdim if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false)) 1202234285Sdim return false; 1203288943Sdim 1204288943Sdim if (L->contains(TB)) 1205288943Sdim LoopStart = TB; 1206288943Sdim else if (L->contains(FB)) 1207288943Sdim LoopStart = FB; 1208288943Sdim else 1209288943Sdim return false; 1210234285Sdim } 1211288943Sdim else 1212288943Sdim LoopStart = TopBlock; 1213234285Sdim 1214249423Sdim // Convert the loop to a hardware loop. 1215234285Sdim DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); 1216249423Sdim DebugLoc DL; 1217249423Sdim if (InsertPos != Preheader->end()) 1218249423Sdim DL = InsertPos->getDebugLoc(); 1219234285Sdim 1220234285Sdim if (TripCount->isReg()) { 1221234285Sdim // Create a copy of the loop count register. 1222249423Sdim unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); 1223249423Sdim BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) 1224249423Sdim .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); 1225239462Sdim // Add the Loop instruction to the beginning of the loop. 1226288943Sdim BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart) 1227249423Sdim .addReg(CountReg); 1228234285Sdim } else { 1229249423Sdim assert(TripCount->isImm() && "Expecting immediate value for trip count"); 1230249423Sdim // Add the Loop immediate instruction to the beginning of the loop, 1231249423Sdim // if the immediate fits in the instructions. Otherwise, we need to 1232249423Sdim // create a new virtual register. 1233234285Sdim int64_t CountImm = TripCount->getImm(); 1234288943Sdim if (!TII->isValidOffset(LOOP_i, CountImm)) { 1235249423Sdim unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); 1236280031Sdim BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg) 1237249423Sdim .addImm(CountImm); 1238288943Sdim BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)) 1239249423Sdim .addMBB(LoopStart).addReg(CountReg); 1240249423Sdim } else 1241288943Sdim BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i)) 1242249423Sdim .addMBB(LoopStart).addImm(CountImm); 1243234285Sdim } 1244234285Sdim 1245249423Sdim // Make sure the loop start always has a reference in the CFG. We need 1246249423Sdim // to create a BlockAddress operand to get this mechanism to work both the 1247234285Sdim // MachineBasicBlock and BasicBlock objects need the flag set. 1248234285Sdim LoopStart->setHasAddressTaken(); 1249234285Sdim // This line is needed to set the hasAddressTaken flag on the BasicBlock 1250249423Sdim // object. 1251234285Sdim BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); 1252234285Sdim 1253234285Sdim // Replace the loop branch with an endloop instruction. 1254249423Sdim DebugLoc LastIDL = LastI->getDebugLoc(); 1255288943Sdim BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart); 1256234285Sdim 1257234285Sdim // The loop ends with either: 1258234285Sdim // - a conditional branch followed by an unconditional branch, or 1259234285Sdim // - a conditional branch to the loop start. 1260280031Sdim if (LastI->getOpcode() == Hexagon::J2_jumpt || 1261280031Sdim LastI->getOpcode() == Hexagon::J2_jumpf) { 1262249423Sdim // Delete one and change/add an uncond. branch to out of the loop. 1263234285Sdim MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); 1264234285Sdim LastI = LastMBB->erase(LastI); 1265234285Sdim if (!L->contains(BranchTarget)) { 1266249423Sdim if (LastI != LastMBB->end()) 1267249423Sdim LastI = LastMBB->erase(LastI); 1268234285Sdim SmallVector<MachineOperand, 0> Cond; 1269314564Sdim TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL); 1270234285Sdim } 1271234285Sdim } else { 1272234285Sdim // Conditional branch to loop start; just delete it. 1273234285Sdim LastMBB->erase(LastI); 1274234285Sdim } 1275234285Sdim delete TripCount; 1276234285Sdim 1277249423Sdim // The induction operation and the comparison may now be 1278249423Sdim // unneeded. If these are unneeded, then remove them. 1279249423Sdim for (unsigned i = 0; i < OldInsts.size(); ++i) 1280249423Sdim removeIfDead(OldInsts[i]); 1281249423Sdim 1282234285Sdim ++NumHWLoops; 1283288943Sdim 1284288943Sdim // Set RecL1used and RecL0used only after hardware loop has been 1285288943Sdim // successfully generated. Doing it earlier can cause wrong loop instruction 1286288943Sdim // to be used. 1287288943Sdim if (L0Used) // Loop0 was already used. So, the correct loop must be loop1. 1288288943Sdim RecL1used = true; 1289288943Sdim else 1290288943Sdim RecL0used = true; 1291288943Sdim 1292234285Sdim return true; 1293234285Sdim} 1294234285Sdim 1295249423Sdimbool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, 1296249423Sdim MachineInstr *CmpI) { 1297249423Sdim assert (BumpI != CmpI && "Bump and compare in the same instruction?"); 1298249423Sdim 1299249423Sdim MachineBasicBlock *BB = BumpI->getParent(); 1300249423Sdim if (CmpI->getParent() != BB) 1301249423Sdim return false; 1302249423Sdim 1303249423Sdim typedef MachineBasicBlock::instr_iterator instr_iterator; 1304249423Sdim // Check if things are in order to begin with. 1305296417Sdim for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I) 1306249423Sdim if (&*I == CmpI) 1307249423Sdim return true; 1308249423Sdim 1309249423Sdim // Out of order. 1310249423Sdim unsigned PredR = CmpI->getOperand(0).getReg(); 1311249423Sdim bool FoundBump = false; 1312296417Sdim instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt); 1313249423Sdim for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { 1314249423Sdim MachineInstr *In = &*I; 1315249423Sdim for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) { 1316249423Sdim MachineOperand &MO = In->getOperand(i); 1317249423Sdim if (MO.isReg() && MO.isUse()) { 1318249423Sdim if (MO.getReg() == PredR) // Found an intervening use of PredR. 1319249423Sdim return false; 1320249423Sdim } 1321249423Sdim } 1322249423Sdim 1323249423Sdim if (In == BumpI) { 1324296417Sdim BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator()); 1325249423Sdim FoundBump = true; 1326249423Sdim break; 1327249423Sdim } 1328249423Sdim } 1329249423Sdim assert (FoundBump && "Cannot determine instruction order"); 1330249423Sdim return FoundBump; 1331234285Sdim} 1332234285Sdim 1333288943Sdim/// This function is required to break recursion. Visiting phis in a loop may 1334288943Sdim/// result in recursion during compilation. We break the recursion by making 1335288943Sdim/// sure that we visit a MachineOperand and its definition in a 1336288943Sdim/// MachineInstruction only once. If we attempt to visit more than once, then 1337288943Sdim/// there is recursion, and will return false. 1338288943Sdimbool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, 1339288943Sdim MachineInstr *MI, 1340288943Sdim const MachineOperand *MO, 1341288943Sdim LoopFeederMap &LoopFeederPhi) const { 1342288943Sdim if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) { 1343288943Sdim const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); 1344288943Sdim DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber();); 1345288943Sdim // Ignore all BBs that form Loop. 1346288943Sdim for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 1347288943Sdim MachineBasicBlock *MBB = Blocks[i]; 1348288943Sdim if (A == MBB) 1349288943Sdim return false; 1350288943Sdim } 1351288943Sdim MachineInstr *Def = MRI->getVRegDef(MO->getReg()); 1352288943Sdim LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def)); 1353288943Sdim return true; 1354288943Sdim } else 1355288943Sdim // Already visited node. 1356288943Sdim return false; 1357288943Sdim} 1358234285Sdim 1359288943Sdim/// Return true if a Phi may generate a value that can underflow. 1360288943Sdim/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand. 1361288943Sdimbool HexagonHardwareLoops::phiMayWrapOrUnderflow( 1362288943Sdim MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB, 1363288943Sdim MachineLoop *L, LoopFeederMap &LoopFeederPhi) const { 1364288943Sdim assert(Phi->isPHI() && "Expecting a Phi."); 1365288943Sdim // Walk through each Phi, and its used operands. Make sure that 1366288943Sdim // if there is recursion in Phi, we won't generate hardware loops. 1367288943Sdim for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2) 1368288943Sdim if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi)) 1369288943Sdim if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal, 1370288943Sdim Phi->getParent(), L, LoopFeederPhi)) 1371288943Sdim return true; 1372288943Sdim return false; 1373288943Sdim} 1374288943Sdim 1375288943Sdim/// Return true if the induction variable can underflow in the first iteration. 1376288943Sdim/// An example, is an initial unsigned value that is 0 and is decrement in the 1377288943Sdim/// first itertion of a do-while loop. In this case, we cannot generate a 1378288943Sdim/// hardware loop because the endloop instruction does not decrement the loop 1379288943Sdim/// counter if it is <= 1. We only need to perform this analysis if the 1380288943Sdim/// initial value is a register. 1381288943Sdim/// 1382288943Sdim/// This function assumes the initial value may underfow unless proven 1383288943Sdim/// otherwise. If the type is signed, then we don't care because signed 1384288943Sdim/// underflow is undefined. We attempt to prove the initial value is not 1385288943Sdim/// zero by perfoming a crude analysis of the loop counter. This function 1386288943Sdim/// checks if the initial value is used in any comparison prior to the loop 1387288943Sdim/// and, if so, assumes the comparison is a range check. This is inexact, 1388288943Sdim/// but will catch the simple cases. 1389288943Sdimbool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( 1390288943Sdim const MachineOperand *InitVal, const MachineOperand *EndVal, 1391288943Sdim MachineBasicBlock *MBB, MachineLoop *L, 1392288943Sdim LoopFeederMap &LoopFeederPhi) const { 1393288943Sdim // Only check register values since they are unknown. 1394288943Sdim if (!InitVal->isReg()) 1395288943Sdim return false; 1396288943Sdim 1397288943Sdim if (!EndVal->isImm()) 1398288943Sdim return false; 1399288943Sdim 1400288943Sdim // A register value that is assigned an immediate is a known value, and it 1401288943Sdim // won't underflow in the first iteration. 1402288943Sdim int64_t Imm; 1403288943Sdim if (checkForImmediate(*InitVal, Imm)) 1404288943Sdim return (EndVal->getImm() == Imm); 1405288943Sdim 1406288943Sdim unsigned Reg = InitVal->getReg(); 1407288943Sdim 1408288943Sdim // We don't know the value of a physical register. 1409288943Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1410288943Sdim return true; 1411288943Sdim 1412288943Sdim MachineInstr *Def = MRI->getVRegDef(Reg); 1413288943Sdim if (!Def) 1414288943Sdim return true; 1415288943Sdim 1416288943Sdim // If the initial value is a Phi or copy and the operands may not underflow, 1417288943Sdim // then the definition cannot be underflow either. 1418288943Sdim if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(), 1419288943Sdim L, LoopFeederPhi)) 1420288943Sdim return false; 1421288943Sdim if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)), 1422288943Sdim EndVal, Def->getParent(), 1423288943Sdim L, LoopFeederPhi)) 1424288943Sdim return false; 1425288943Sdim 1426288943Sdim // Iterate over the uses of the initial value. If the initial value is used 1427288943Sdim // in a compare, then we assume this is a range check that ensures the loop 1428288943Sdim // doesn't underflow. This is not an exact test and should be improved. 1429288943Sdim for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg), 1430288943Sdim E = MRI->use_instr_nodbg_end(); I != E; ++I) { 1431288943Sdim MachineInstr *MI = &*I; 1432288943Sdim unsigned CmpReg1 = 0, CmpReg2 = 0; 1433288943Sdim int CmpMask = 0, CmpValue = 0; 1434288943Sdim 1435309124Sdim if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue)) 1436288943Sdim continue; 1437288943Sdim 1438314564Sdim MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 1439288943Sdim SmallVector<MachineOperand, 2> Cond; 1440309124Sdim if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false)) 1441288943Sdim continue; 1442288943Sdim 1443314564Sdim Comparison::Kind Cmp = 1444314564Sdim getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0); 1445288943Sdim if (Cmp == 0) 1446288943Sdim continue; 1447288943Sdim if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB)) 1448288943Sdim Cmp = Comparison::getNegatedComparison(Cmp); 1449288943Sdim if (CmpReg2 != 0 && CmpReg2 == Reg) 1450288943Sdim Cmp = Comparison::getSwappedComparison(Cmp); 1451288943Sdim 1452288943Sdim // Signed underflow is undefined. 1453288943Sdim if (Comparison::isSigned(Cmp)) 1454288943Sdim return false; 1455288943Sdim 1456296417Sdim // Check if there is a comparison of the initial value. If the initial value 1457288943Sdim // is greater than or not equal to another value, then assume this is a 1458288943Sdim // range check. 1459288943Sdim if ((Cmp & Comparison::G) || Cmp == Comparison::NE) 1460288943Sdim return false; 1461288943Sdim } 1462288943Sdim 1463288943Sdim // OK - this is a hack that needs to be improved. We really need to analyze 1464288943Sdim // the instructions performed on the initial value. This works on the simplest 1465288943Sdim // cases only. 1466288943Sdim if (!Def->isCopy() && !Def->isPHI()) 1467288943Sdim return false; 1468288943Sdim 1469288943Sdim return true; 1470288943Sdim} 1471288943Sdim 1472288943Sdimbool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO, 1473288943Sdim int64_t &Val) const { 1474288943Sdim if (MO.isImm()) { 1475288943Sdim Val = MO.getImm(); 1476288943Sdim return true; 1477288943Sdim } 1478288943Sdim if (!MO.isReg()) 1479288943Sdim return false; 1480288943Sdim 1481288943Sdim // MO is a register. Check whether it is defined as an immediate value, 1482288943Sdim // and if so, get the value of it in TV. That value will then need to be 1483288943Sdim // processed to handle potential subregisters in MO. 1484288943Sdim int64_t TV; 1485288943Sdim 1486288943Sdim unsigned R = MO.getReg(); 1487288943Sdim if (!TargetRegisterInfo::isVirtualRegister(R)) 1488288943Sdim return false; 1489249423Sdim MachineInstr *DI = MRI->getVRegDef(R); 1490249423Sdim unsigned DOpc = DI->getOpcode(); 1491249423Sdim switch (DOpc) { 1492288943Sdim case TargetOpcode::COPY: 1493280031Sdim case Hexagon::A2_tfrsi: 1494280031Sdim case Hexagon::A2_tfrpi: 1495314564Sdim case Hexagon::CONST32: 1496314564Sdim case Hexagon::CONST64: { 1497288943Sdim // Call recursively to avoid an extra check whether operand(1) is 1498288943Sdim // indeed an immediate (it could be a global address, for example), 1499288943Sdim // plus we can handle COPY at the same time. 1500288943Sdim if (!checkForImmediate(DI->getOperand(1), TV)) 1501288943Sdim return false; 1502288943Sdim break; 1503288943Sdim } 1504288943Sdim case Hexagon::A2_combineii: 1505288943Sdim case Hexagon::A4_combineir: 1506288943Sdim case Hexagon::A4_combineii: 1507288943Sdim case Hexagon::A4_combineri: 1508288943Sdim case Hexagon::A2_combinew: { 1509288943Sdim const MachineOperand &S1 = DI->getOperand(1); 1510288943Sdim const MachineOperand &S2 = DI->getOperand(2); 1511288943Sdim int64_t V1, V2; 1512288943Sdim if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2)) 1513288943Sdim return false; 1514321369Sdim TV = V2 | (static_cast<uint64_t>(V1) << 32); 1515288943Sdim break; 1516288943Sdim } 1517288943Sdim case TargetOpcode::REG_SEQUENCE: { 1518288943Sdim const MachineOperand &S1 = DI->getOperand(1); 1519288943Sdim const MachineOperand &S3 = DI->getOperand(3); 1520288943Sdim int64_t V1, V3; 1521288943Sdim if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3)) 1522288943Sdim return false; 1523288943Sdim unsigned Sub2 = DI->getOperand(2).getImm(); 1524288943Sdim unsigned Sub4 = DI->getOperand(4).getImm(); 1525314564Sdim if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi) 1526288943Sdim TV = V1 | (V3 << 32); 1527314564Sdim else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo) 1528288943Sdim TV = V3 | (V1 << 32); 1529288943Sdim else 1530288943Sdim llvm_unreachable("Unexpected form of REG_SEQUENCE"); 1531288943Sdim break; 1532288943Sdim } 1533288943Sdim 1534288943Sdim default: 1535288943Sdim return false; 1536249423Sdim } 1537234285Sdim 1538314564Sdim // By now, we should have successfully obtained the immediate value defining 1539288943Sdim // the register referenced in MO. Handle a potential use of a subregister. 1540288943Sdim switch (MO.getSubReg()) { 1541314564Sdim case Hexagon::isub_lo: 1542288943Sdim Val = TV & 0xFFFFFFFFULL; 1543288943Sdim break; 1544314564Sdim case Hexagon::isub_hi: 1545288943Sdim Val = (TV >> 32) & 0xFFFFFFFFULL; 1546288943Sdim break; 1547288943Sdim default: 1548288943Sdim Val = TV; 1549288943Sdim break; 1550288943Sdim } 1551288943Sdim return true; 1552249423Sdim} 1553234285Sdim 1554249423Sdimvoid HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { 1555249423Sdim if (MO.isImm()) { 1556249423Sdim MO.setImm(Val); 1557249423Sdim return; 1558234285Sdim } 1559234285Sdim 1560249423Sdim assert(MO.isReg()); 1561249423Sdim unsigned R = MO.getReg(); 1562288943Sdim MachineInstr *DI = MRI->getVRegDef(R); 1563234285Sdim 1564249423Sdim const TargetRegisterClass *RC = MRI->getRegClass(R); 1565249423Sdim unsigned NewR = MRI->createVirtualRegister(RC); 1566249423Sdim MachineBasicBlock &B = *DI->getParent(); 1567249423Sdim DebugLoc DL = DI->getDebugLoc(); 1568288943Sdim BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val); 1569249423Sdim MO.setReg(NewR); 1570249423Sdim} 1571234285Sdim 1572288943Sdimstatic bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) { 1573288943Sdim // These two instructions are not extendable. 1574288943Sdim if (CmpOpc == Hexagon::A4_cmpbeqi) 1575288943Sdim return isUInt<8>(Imm); 1576288943Sdim if (CmpOpc == Hexagon::A4_cmpbgti) 1577288943Sdim return isInt<8>(Imm); 1578288943Sdim // The rest of the comparison-with-immediate instructions are extendable. 1579288943Sdim return true; 1580288943Sdim} 1581249423Sdim 1582249423Sdimbool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { 1583249423Sdim MachineBasicBlock *Header = L->getHeader(); 1584249423Sdim MachineBasicBlock *Latch = L->getLoopLatch(); 1585314564Sdim MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 1586249423Sdim 1587288943Sdim if (!(Header && Latch && ExitingBlock)) 1588249423Sdim return false; 1589249423Sdim 1590249423Sdim // These data structures follow the same concept as the corresponding 1591249423Sdim // ones in findInductionRegister (where some comments are). 1592249423Sdim typedef std::pair<unsigned,int64_t> RegisterBump; 1593249423Sdim typedef std::pair<unsigned,RegisterBump> RegisterInduction; 1594249423Sdim typedef std::set<RegisterInduction> RegisterInductionSet; 1595249423Sdim 1596249423Sdim // Register candidates for induction variables, with their associated bumps. 1597249423Sdim RegisterInductionSet IndRegs; 1598249423Sdim 1599249423Sdim // Look for induction patterns: 1600249423Sdim // vreg1 = PHI ..., [ latch, vreg2 ] 1601249423Sdim // vreg2 = ADD vreg1, imm 1602249423Sdim typedef MachineBasicBlock::instr_iterator instr_iterator; 1603249423Sdim for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 1604249423Sdim I != E && I->isPHI(); ++I) { 1605249423Sdim MachineInstr *Phi = &*I; 1606249423Sdim 1607249423Sdim // Have a PHI instruction. 1608249423Sdim for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { 1609249423Sdim if (Phi->getOperand(i+1).getMBB() != Latch) 1610249423Sdim continue; 1611249423Sdim 1612249423Sdim unsigned PhiReg = Phi->getOperand(i).getReg(); 1613249423Sdim MachineInstr *DI = MRI->getVRegDef(PhiReg); 1614249423Sdim 1615314564Sdim if (DI->getDesc().isAdd()) { 1616249423Sdim // If the register operand to the add/sub is the PHI we are looking 1617249423Sdim // at, this meets the induction pattern. 1618249423Sdim unsigned IndReg = DI->getOperand(1).getReg(); 1619288943Sdim MachineOperand &Opnd2 = DI->getOperand(2); 1620288943Sdim int64_t V; 1621288943Sdim if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { 1622249423Sdim unsigned UpdReg = DI->getOperand(0).getReg(); 1623249423Sdim IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); 1624234285Sdim } 1625234285Sdim } 1626249423Sdim } // for (i) 1627249423Sdim } // for (instr) 1628249423Sdim 1629249423Sdim if (IndRegs.empty()) 1630249423Sdim return false; 1631249423Sdim 1632276479Sdim MachineBasicBlock *TB = nullptr, *FB = nullptr; 1633249423Sdim SmallVector<MachineOperand,2> Cond; 1634249423Sdim // AnalyzeBranch returns true if it fails to analyze branch. 1635309124Sdim bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); 1636288943Sdim if (NotAnalyzed || Cond.empty()) 1637249423Sdim return false; 1638249423Sdim 1639288943Sdim if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { 1640314564Sdim MachineBasicBlock *LTB = nullptr, *LFB = nullptr; 1641288943Sdim SmallVector<MachineOperand,2> LCond; 1642309124Sdim bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); 1643288943Sdim if (NotAnalyzed) 1644288943Sdim return false; 1645249423Sdim 1646288943Sdim // Since latch is not the exiting block, the latch branch should be an 1647288943Sdim // unconditional branch to the loop header. 1648288943Sdim if (TB == Latch) 1649288943Sdim TB = (LTB == Header) ? LTB : LFB; 1650288943Sdim else 1651288943Sdim FB = (LTB == Header) ? LTB : LFB; 1652288943Sdim } 1653288943Sdim if (TB != Header) { 1654288943Sdim if (FB != Header) { 1655288943Sdim // The latch/exit block does not go back to the header. 1656288943Sdim return false; 1657288943Sdim } 1658288943Sdim // FB is the header (i.e., uncond. jump to branch header) 1659288943Sdim // In this case, the LoopBody -> TB should not be a back edge otherwise 1660288943Sdim // it could result in an infinite loop after conversion to hw_loop. 1661288943Sdim // This case can happen when the Latch has two jumps like this: 1662288943Sdim // Jmp_c OuterLoopHeader <-- TB 1663288943Sdim // Jmp InnerLoopHeader <-- FB 1664288943Sdim if (MDT->dominates(TB, FB)) 1665288943Sdim return false; 1666288943Sdim } 1667249423Sdim 1668249423Sdim // Expecting a predicate register as a condition. It won't be a hardware 1669249423Sdim // predicate register at this point yet, just a vreg. 1670249423Sdim // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0) 1671249423Sdim // into Cond, followed by the predicate register. For non-negated branches 1672249423Sdim // it's just the register. 1673249423Sdim unsigned CSz = Cond.size(); 1674249423Sdim if (CSz != 1 && CSz != 2) 1675249423Sdim return false; 1676249423Sdim 1677288943Sdim if (!Cond[CSz-1].isReg()) 1678288943Sdim return false; 1679288943Sdim 1680249423Sdim unsigned P = Cond[CSz-1].getReg(); 1681249423Sdim MachineInstr *PredDef = MRI->getVRegDef(P); 1682249423Sdim 1683249423Sdim if (!PredDef->isCompare()) 1684249423Sdim return false; 1685249423Sdim 1686249423Sdim SmallSet<unsigned,2> CmpRegs; 1687276479Sdim MachineOperand *CmpImmOp = nullptr; 1688249423Sdim 1689249423Sdim // Go over all operands to the compare and look for immediate and register 1690249423Sdim // operands. Assume that if the compare has a single register use and a 1691249423Sdim // single immediate operand, then the register is being compared with the 1692249423Sdim // immediate value. 1693249423Sdim for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { 1694249423Sdim MachineOperand &MO = PredDef->getOperand(i); 1695249423Sdim if (MO.isReg()) { 1696249423Sdim // Skip all implicit references. In one case there was: 1697249423Sdim // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use> 1698249423Sdim if (MO.isImplicit()) 1699249423Sdim continue; 1700249423Sdim if (MO.isUse()) { 1701288943Sdim if (!isImmediate(MO)) { 1702249423Sdim CmpRegs.insert(MO.getReg()); 1703249423Sdim continue; 1704249423Sdim } 1705249423Sdim // Consider the register to be the "immediate" operand. 1706249423Sdim if (CmpImmOp) 1707249423Sdim return false; 1708249423Sdim CmpImmOp = &MO; 1709249423Sdim } 1710249423Sdim } else if (MO.isImm()) { 1711249423Sdim if (CmpImmOp) // A second immediate argument? Confusing. Bail out. 1712249423Sdim return false; 1713249423Sdim CmpImmOp = &MO; 1714234285Sdim } 1715234285Sdim } 1716234285Sdim 1717249423Sdim if (CmpRegs.empty()) 1718249423Sdim return false; 1719234285Sdim 1720249423Sdim // Check if the compared register follows the order we want. Fix if needed. 1721249423Sdim for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end(); 1722249423Sdim I != E; ++I) { 1723249423Sdim // This is a success. If the register used in the comparison is one that 1724249423Sdim // we have identified as a bumped (updated) induction register, there is 1725249423Sdim // nothing to do. 1726249423Sdim if (CmpRegs.count(I->first)) 1727249423Sdim return true; 1728249423Sdim 1729249423Sdim // Otherwise, if the register being compared comes out of a PHI node, 1730249423Sdim // and has been recognized as following the induction pattern, and is 1731249423Sdim // compared against an immediate, we can fix it. 1732249423Sdim const RegisterBump &RB = I->second; 1733249423Sdim if (CmpRegs.count(RB.first)) { 1734288943Sdim if (!CmpImmOp) { 1735288943Sdim // If both operands to the compare instruction are registers, see if 1736288943Sdim // it can be changed to use induction register as one of the operands. 1737288943Sdim MachineInstr *IndI = nullptr; 1738288943Sdim MachineInstr *nonIndI = nullptr; 1739288943Sdim MachineOperand *IndMO = nullptr; 1740288943Sdim MachineOperand *nonIndMO = nullptr; 1741288943Sdim 1742288943Sdim for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) { 1743288943Sdim MachineOperand &MO = PredDef->getOperand(i); 1744288943Sdim if (MO.isReg() && MO.getReg() == RB.first) { 1745288943Sdim DEBUG(dbgs() << "\n DefMI(" << i << ") = " 1746288943Sdim << *(MRI->getVRegDef(I->first))); 1747288943Sdim if (IndI) 1748288943Sdim return false; 1749288943Sdim 1750288943Sdim IndI = MRI->getVRegDef(I->first); 1751288943Sdim IndMO = &MO; 1752288943Sdim } else if (MO.isReg()) { 1753288943Sdim DEBUG(dbgs() << "\n DefMI(" << i << ") = " 1754288943Sdim << *(MRI->getVRegDef(MO.getReg()))); 1755288943Sdim if (nonIndI) 1756288943Sdim return false; 1757288943Sdim 1758288943Sdim nonIndI = MRI->getVRegDef(MO.getReg()); 1759288943Sdim nonIndMO = &MO; 1760288943Sdim } 1761288943Sdim } 1762288943Sdim if (IndI && nonIndI && 1763288943Sdim nonIndI->getOpcode() == Hexagon::A2_addi && 1764288943Sdim nonIndI->getOperand(2).isImm() && 1765288943Sdim nonIndI->getOperand(2).getImm() == - RB.second) { 1766288943Sdim bool Order = orderBumpCompare(IndI, PredDef); 1767288943Sdim if (Order) { 1768288943Sdim IndMO->setReg(I->first); 1769288943Sdim nonIndMO->setReg(nonIndI->getOperand(1).getReg()); 1770288943Sdim return true; 1771288943Sdim } 1772288943Sdim } 1773249423Sdim return false; 1774288943Sdim } 1775249423Sdim 1776288943Sdim // It is not valid to do this transformation on an unsigned comparison 1777288943Sdim // because it may underflow. 1778314564Sdim Comparison::Kind Cmp = 1779314564Sdim getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0); 1780288943Sdim if (!Cmp || Comparison::isUnsigned(Cmp)) 1781288943Sdim return false; 1782288943Sdim 1783288943Sdim // If the register is being compared against an immediate, try changing 1784288943Sdim // the compare instruction to use induction register and adjust the 1785288943Sdim // immediate operand. 1786249423Sdim int64_t CmpImm = getImmediate(*CmpImmOp); 1787249423Sdim int64_t V = RB.second; 1788288943Sdim // Handle Overflow (64-bit). 1789288943Sdim if (((V > 0) && (CmpImm > INT64_MAX - V)) || 1790288943Sdim ((V < 0) && (CmpImm < INT64_MIN - V))) 1791249423Sdim return false; 1792249423Sdim CmpImm += V; 1793288943Sdim // Most comparisons of register against an immediate value allow 1794288943Sdim // the immediate to be constant-extended. There are some exceptions 1795288943Sdim // though. Make sure the new combination will work. 1796288943Sdim if (CmpImmOp->isImm()) 1797288943Sdim if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm)) 1798288943Sdim return false; 1799249423Sdim 1800249423Sdim // Make sure that the compare happens after the bump. Otherwise, 1801249423Sdim // after the fixup, the compare would use a yet-undefined register. 1802249423Sdim MachineInstr *BumpI = MRI->getVRegDef(I->first); 1803249423Sdim bool Order = orderBumpCompare(BumpI, PredDef); 1804249423Sdim if (!Order) 1805249423Sdim return false; 1806249423Sdim 1807249423Sdim // Finally, fix the compare instruction. 1808249423Sdim setImmediate(*CmpImmOp, CmpImm); 1809249423Sdim for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { 1810249423Sdim MachineOperand &MO = PredDef->getOperand(i); 1811249423Sdim if (MO.isReg() && MO.getReg() == RB.first) { 1812249423Sdim MO.setReg(I->first); 1813249423Sdim return true; 1814249423Sdim } 1815249423Sdim } 1816249423Sdim } 1817249423Sdim } 1818249423Sdim 1819249423Sdim return false; 1820234285Sdim} 1821234285Sdim 1822314564Sdim/// createPreheaderForLoop - Create a preheader for a given loop. 1823249423SdimMachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( 1824249423Sdim MachineLoop *L) { 1825314564Sdim if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader)) 1826249423Sdim return TmpPH; 1827288943Sdim if (!HWCreatePreheader) 1828288943Sdim return nullptr; 1829288943Sdim 1830249423Sdim MachineBasicBlock *Header = L->getHeader(); 1831249423Sdim MachineBasicBlock *Latch = L->getLoopLatch(); 1832314564Sdim MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 1833249423Sdim MachineFunction *MF = Header->getParent(); 1834249423Sdim DebugLoc DL; 1835249423Sdim 1836288943Sdim#ifndef NDEBUG 1837288943Sdim if ((PHFn != "") && (PHFn != MF->getName())) 1838276479Sdim return nullptr; 1839288943Sdim#endif 1840249423Sdim 1841288943Sdim if (!Latch || !ExitingBlock || Header->hasAddressTaken()) 1842288943Sdim return nullptr; 1843288943Sdim 1844249423Sdim typedef MachineBasicBlock::instr_iterator instr_iterator; 1845249423Sdim 1846249423Sdim // Verify that all existing predecessors have analyzable branches 1847249423Sdim // (or no branches at all). 1848249423Sdim typedef std::vector<MachineBasicBlock*> MBBVector; 1849249423Sdim MBBVector Preds(Header->pred_begin(), Header->pred_end()); 1850249423Sdim SmallVector<MachineOperand,2> Tmp1; 1851276479Sdim MachineBasicBlock *TB = nullptr, *FB = nullptr; 1852249423Sdim 1853309124Sdim if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false)) 1854276479Sdim return nullptr; 1855249423Sdim 1856249423Sdim for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { 1857249423Sdim MachineBasicBlock *PB = *I; 1858309124Sdim bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false); 1859288943Sdim if (NotAnalyzed) 1860288943Sdim return nullptr; 1861249423Sdim } 1862249423Sdim 1863249423Sdim MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); 1864296417Sdim MF->insert(Header->getIterator(), NewPH); 1865249423Sdim 1866249423Sdim if (Header->pred_size() > 2) { 1867249423Sdim // Ensure that the header has only two predecessors: the preheader and 1868249423Sdim // the loop latch. Any additional predecessors of the header should 1869288943Sdim // join at the newly created preheader. Inspect all PHI nodes from the 1870249423Sdim // header and create appropriate corresponding PHI nodes in the preheader. 1871249423Sdim 1872249423Sdim for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 1873249423Sdim I != E && I->isPHI(); ++I) { 1874249423Sdim MachineInstr *PN = &*I; 1875249423Sdim 1876249423Sdim const MCInstrDesc &PD = TII->get(TargetOpcode::PHI); 1877249423Sdim MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL); 1878249423Sdim NewPH->insert(NewPH->end(), NewPN); 1879249423Sdim 1880249423Sdim unsigned PR = PN->getOperand(0).getReg(); 1881249423Sdim const TargetRegisterClass *RC = MRI->getRegClass(PR); 1882249423Sdim unsigned NewPR = MRI->createVirtualRegister(RC); 1883249423Sdim NewPN->addOperand(MachineOperand::CreateReg(NewPR, true)); 1884249423Sdim 1885249423Sdim // Copy all non-latch operands of a header's PHI node to the newly 1886249423Sdim // created PHI node in the preheader. 1887249423Sdim for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { 1888249423Sdim unsigned PredR = PN->getOperand(i).getReg(); 1889288943Sdim unsigned PredRSub = PN->getOperand(i).getSubReg(); 1890249423Sdim MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); 1891249423Sdim if (PredB == Latch) 1892249423Sdim continue; 1893249423Sdim 1894288943Sdim MachineOperand MO = MachineOperand::CreateReg(PredR, false); 1895288943Sdim MO.setSubReg(PredRSub); 1896288943Sdim NewPN->addOperand(MO); 1897249423Sdim NewPN->addOperand(MachineOperand::CreateMBB(PredB)); 1898249423Sdim } 1899249423Sdim 1900249423Sdim // Remove copied operands from the old PHI node and add the value 1901249423Sdim // coming from the preheader's PHI. 1902249423Sdim for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { 1903249423Sdim MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); 1904249423Sdim if (PredB != Latch) { 1905249423Sdim PN->RemoveOperand(i+1); 1906249423Sdim PN->RemoveOperand(i); 1907249423Sdim } 1908249423Sdim } 1909249423Sdim PN->addOperand(MachineOperand::CreateReg(NewPR, false)); 1910249423Sdim PN->addOperand(MachineOperand::CreateMBB(NewPH)); 1911249423Sdim } 1912234285Sdim } else { 1913249423Sdim assert(Header->pred_size() == 2); 1914249423Sdim 1915249423Sdim // The header has only two predecessors, but the non-latch predecessor 1916249423Sdim // is not a preheader (e.g. it has other successors, etc.) 1917249423Sdim // In such a case we don't need any extra PHI nodes in the new preheader, 1918249423Sdim // all we need is to adjust existing PHIs in the header to now refer to 1919249423Sdim // the new preheader. 1920249423Sdim for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 1921249423Sdim I != E && I->isPHI(); ++I) { 1922249423Sdim MachineInstr *PN = &*I; 1923249423Sdim for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { 1924249423Sdim MachineOperand &MO = PN->getOperand(i+1); 1925249423Sdim if (MO.getMBB() != Latch) 1926249423Sdim MO.setMBB(NewPH); 1927249423Sdim } 1928249423Sdim } 1929234285Sdim } 1930249423Sdim 1931249423Sdim // "Reroute" the CFG edges to link in the new preheader. 1932249423Sdim // If any of the predecessors falls through to the header, insert a branch 1933249423Sdim // to the new preheader in that place. 1934249423Sdim SmallVector<MachineOperand,1> Tmp2; 1935249423Sdim SmallVector<MachineOperand,1> EmptyCond; 1936249423Sdim 1937276479Sdim TB = FB = nullptr; 1938249423Sdim 1939249423Sdim for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { 1940249423Sdim MachineBasicBlock *PB = *I; 1941249423Sdim if (PB != Latch) { 1942249423Sdim Tmp2.clear(); 1943309124Sdim bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false); 1944276479Sdim (void)NotAnalyzed; // suppress compiler warning 1945249423Sdim assert (!NotAnalyzed && "Should be analyzable!"); 1946249423Sdim if (TB != Header && (Tmp2.empty() || FB != Header)) 1947314564Sdim TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL); 1948249423Sdim PB->ReplaceUsesOfBlockWith(Header, NewPH); 1949249423Sdim } 1950249423Sdim } 1951249423Sdim 1952249423Sdim // It can happen that the latch block will fall through into the header. 1953249423Sdim // Insert an unconditional branch to the header. 1954276479Sdim TB = FB = nullptr; 1955309124Sdim bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false); 1956276479Sdim (void)LatchNotAnalyzed; // suppress compiler warning 1957249423Sdim assert (!LatchNotAnalyzed && "Should be analyzable!"); 1958249423Sdim if (!TB && !FB) 1959314564Sdim TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL); 1960249423Sdim 1961249423Sdim // Finally, the branch from the preheader to the header. 1962314564Sdim TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL); 1963249423Sdim NewPH->addSuccessor(Header); 1964249423Sdim 1965288943Sdim MachineLoop *ParentLoop = L->getParentLoop(); 1966288943Sdim if (ParentLoop) 1967288943Sdim ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase()); 1968288943Sdim 1969288943Sdim // Update the dominator information with the new preheader. 1970288943Sdim if (MDT) { 1971314564Sdim if (MachineDomTreeNode *HN = MDT->getNode(Header)) { 1972314564Sdim if (MachineDomTreeNode *DHN = HN->getIDom()) { 1973314564Sdim MDT->addNewBlock(NewPH, DHN->getBlock()); 1974314564Sdim MDT->changeImmediateDominator(Header, NewPH); 1975314564Sdim } 1976314564Sdim } 1977288943Sdim } 1978288943Sdim 1979249423Sdim return NewPH; 1980234285Sdim} 1981