1327952Sdim//===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===// 2234285Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6234285Sdim// 7234285Sdim//===----------------------------------------------------------------------===// 8234285Sdim// 9234285Sdim// This pass identifies loops where we can generate the Hexagon hardware 10234285Sdim// loop instruction. The hardware loop can perform loop branches with a 11234285Sdim// zero-cycle overhead. 12234285Sdim// 13234285Sdim// The pattern that defines the induction variable can changed depending on 14234285Sdim// prior optimizations. For example, the IndVarSimplify phase run by 'opt' 15234285Sdim// normalizes induction variables, and the Loop Strength Reduction pass 16234285Sdim// run by 'llc' may also make changes to the induction variable. 17234285Sdim// The pattern detected by this phase is due to running Strength Reduction. 18234285Sdim// 19234285Sdim// Criteria for hardware loops: 20234285Sdim// - Countable loops (w/ ind. var for a trip count) 21234285Sdim// - Assumes loops are normalized by IndVarSimplify 22234285Sdim// - Try inner-most loops first 23234285Sdim// - No function calls in loops. 24234285Sdim// 25234285Sdim//===----------------------------------------------------------------------===// 26234285Sdim 27314564Sdim#include "HexagonInstrInfo.h" 28314564Sdim#include "HexagonSubtarget.h" 29327952Sdim#include "llvm/ADT/ArrayRef.h" 30327952Sdim#include "llvm/ADT/STLExtras.h" 31249423Sdim#include "llvm/ADT/SmallSet.h" 32314564Sdim#include "llvm/ADT/SmallVector.h" 33234285Sdim#include "llvm/ADT/Statistic.h" 34314564Sdim#include "llvm/ADT/StringRef.h" 35314564Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 36234285Sdim#include "llvm/CodeGen/MachineDominators.h" 37234285Sdim#include "llvm/CodeGen/MachineFunction.h" 38234285Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 39314564Sdim#include "llvm/CodeGen/MachineInstr.h" 40234285Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 41234285Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 42314564Sdim#include "llvm/CodeGen/MachineOperand.h" 43234285Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 44327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h" 45314564Sdim#include "llvm/IR/Constants.h" 46314564Sdim#include "llvm/IR/DebugLoc.h" 47360784Sdim#include "llvm/InitializePasses.h" 48314564Sdim#include "llvm/Pass.h" 49249423Sdim#include "llvm/Support/CommandLine.h" 50234285Sdim#include "llvm/Support/Debug.h" 51314564Sdim#include "llvm/Support/ErrorHandling.h" 52314564Sdim#include "llvm/Support/MathExtras.h" 53234285Sdim#include "llvm/Support/raw_ostream.h" 54314564Sdim#include <cassert> 55314564Sdim#include <cstdint> 56314564Sdim#include <cstdlib> 57314564Sdim#include <iterator> 58314564Sdim#include <map> 59314564Sdim#include <set> 60327952Sdim#include <string> 61314564Sdim#include <utility> 62249423Sdim#include <vector> 63234285Sdim 64234285Sdimusing namespace llvm; 65234285Sdim 66276479Sdim#define DEBUG_TYPE "hwloops" 67276479Sdim 68249423Sdim#ifndef NDEBUG 69288943Sdimstatic cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1)); 70288943Sdim 71288943Sdim// Option to create preheader only for a specific function. 72288943Sdimstatic cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden, 73288943Sdim cl::init("")); 74249423Sdim#endif 75249423Sdim 76288943Sdim// Option to create a preheader if one doesn't exist. 77288943Sdimstatic cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader", 78288943Sdim cl::Hidden, cl::init(true), 79288943Sdim cl::desc("Add a preheader to a hardware loop if one doesn't exist")); 80288943Sdim 81314564Sdim// Turn it off by default. If a preheader block is not created here, the 82314564Sdim// software pipeliner may be unable to find a block suitable to serve as 83314564Sdim// a preheader. In that case SWP will not run. 84314564Sdimstatic cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false), 85314564Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader " 86314564Sdim "instructions")); 87314564Sdim 88234285SdimSTATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); 89234285Sdim 90249423Sdimnamespace llvm { 91314564Sdim 92288943Sdim FunctionPass *createHexagonHardwareLoops(); 93249423Sdim void initializeHexagonHardwareLoopsPass(PassRegistry&); 94249423Sdim 95314564Sdim} // end namespace llvm 96314564Sdim 97234285Sdimnamespace { 98314564Sdim 99234285Sdim class CountValue; 100314564Sdim 101234285Sdim struct HexagonHardwareLoops : public MachineFunctionPass { 102249423Sdim MachineLoopInfo *MLI; 103249423Sdim MachineRegisterInfo *MRI; 104249423Sdim MachineDominatorTree *MDT; 105249423Sdim const HexagonInstrInfo *TII; 106321369Sdim const HexagonRegisterInfo *TRI; 107249423Sdim#ifndef NDEBUG 108249423Sdim static int Counter; 109249423Sdim#endif 110234285Sdim 111234285Sdim public: 112249423Sdim static char ID; 113234285Sdim 114327952Sdim HexagonHardwareLoops() : MachineFunctionPass(ID) {} 115234285Sdim 116276479Sdim bool runOnMachineFunction(MachineFunction &MF) override; 117234285Sdim 118314564Sdim StringRef getPassName() const override { return "Hexagon Hardware Loops"; } 119234285Sdim 120276479Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 121234285Sdim AU.addRequired<MachineDominatorTree>(); 122234285Sdim AU.addRequired<MachineLoopInfo>(); 123234285Sdim MachineFunctionPass::getAnalysisUsage(AU); 124234285Sdim } 125234285Sdim 126234285Sdim private: 127327952Sdim using LoopFeederMap = std::map<unsigned, MachineInstr *>; 128288943Sdim 129249423Sdim /// Kinds of comparisons in the compare instructions. 130249423Sdim struct Comparison { 131249423Sdim enum Kind { 132249423Sdim EQ = 0x01, 133249423Sdim NE = 0x02, 134288943Sdim L = 0x04, 135288943Sdim G = 0x08, 136288943Sdim U = 0x40, 137249423Sdim LTs = L, 138249423Sdim LEs = L | EQ, 139249423Sdim GTs = G, 140249423Sdim GEs = G | EQ, 141249423Sdim LTu = L | U, 142249423Sdim LEu = L | EQ | U, 143249423Sdim GTu = G | U, 144249423Sdim GEu = G | EQ | U 145249423Sdim }; 146249423Sdim 147249423Sdim static Kind getSwappedComparison(Kind Cmp) { 148249423Sdim assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator"); 149249423Sdim if ((Cmp & L) || (Cmp & G)) 150249423Sdim return (Kind)(Cmp ^ (L|G)); 151249423Sdim return Cmp; 152249423Sdim } 153288943Sdim 154288943Sdim static Kind getNegatedComparison(Kind Cmp) { 155288943Sdim if ((Cmp & L) || (Cmp & G)) 156288943Sdim return (Kind)((Cmp ^ (L | G)) ^ EQ); 157288943Sdim if ((Cmp & NE) || (Cmp & EQ)) 158288943Sdim return (Kind)(Cmp ^ (EQ | NE)); 159288943Sdim return (Kind)0; 160288943Sdim } 161288943Sdim 162288943Sdim static bool isSigned(Kind Cmp) { 163288943Sdim return (Cmp & (L | G) && !(Cmp & U)); 164288943Sdim } 165288943Sdim 166288943Sdim static bool isUnsigned(Kind Cmp) { 167288943Sdim return (Cmp & U); 168288943Sdim } 169249423Sdim }; 170249423Sdim 171341825Sdim /// Find the register that contains the loop controlling 172234285Sdim /// induction variable. 173249423Sdim /// If successful, it will return true and set the \p Reg, \p IVBump 174249423Sdim /// and \p IVOp arguments. Otherwise it will return false. 175249423Sdim /// The returned induction register is the register R that follows the 176249423Sdim /// following induction pattern: 177249423Sdim /// loop: 178249423Sdim /// R = phi ..., [ R.next, LatchBlock ] 179249423Sdim /// R.next = R + #bump 180249423Sdim /// if (R.next < #N) goto loop 181249423Sdim /// IVBump is the immediate value added to R, and IVOp is the instruction 182249423Sdim /// "R.next = R + #bump". 183249423Sdim bool findInductionRegister(MachineLoop *L, unsigned &Reg, 184249423Sdim int64_t &IVBump, MachineInstr *&IVOp) const; 185234285Sdim 186341825Sdim /// Return the comparison kind for the specified opcode. 187288943Sdim Comparison::Kind getComparisonKind(unsigned CondOpc, 188288943Sdim MachineOperand *InitialValue, 189288943Sdim const MachineOperand *Endvalue, 190288943Sdim int64_t IVBump) const; 191288943Sdim 192341825Sdim /// Analyze the statements in a loop to determine if the loop 193249423Sdim /// has a computable trip count and, if so, return a value that represents 194249423Sdim /// the trip count expression. 195249423Sdim CountValue *getLoopTripCount(MachineLoop *L, 196261991Sdim SmallVectorImpl<MachineInstr *> &OldInsts); 197234285Sdim 198341825Sdim /// Return the expression that represents the number of times 199249423Sdim /// a loop iterates. The function takes the operands that represent the 200249423Sdim /// loop start value, loop end value, and induction value. Based upon 201249423Sdim /// these operands, the function attempts to compute the trip count. 202249423Sdim /// If the trip count is not directly available (as an immediate value, 203249423Sdim /// or a register), the function will attempt to insert computation of it 204249423Sdim /// to the loop's preheader. 205288943Sdim CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start, 206288943Sdim const MachineOperand *End, unsigned IVReg, 207288943Sdim int64_t IVBump, Comparison::Kind Cmp) const; 208234285Sdim 209341825Sdim /// Return true if the instruction is not valid within a hardware 210249423Sdim /// loop. 211288943Sdim bool isInvalidLoopOperation(const MachineInstr *MI, 212288943Sdim bool IsInnerHWLoop) const; 213234285Sdim 214341825Sdim /// Return true if the loop contains an instruction that inhibits 215249423Sdim /// using the hardware loop. 216288943Sdim bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const; 217234285Sdim 218341825Sdim /// Given a loop, check if we can convert it to a hardware loop. 219249423Sdim /// If so, then perform the conversion and return true. 220288943Sdim bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used); 221234285Sdim 222341825Sdim /// Return true if the instruction is now dead. 223249423Sdim bool isDead(const MachineInstr *MI, 224261991Sdim SmallVectorImpl<MachineInstr *> &DeadPhis) const; 225249423Sdim 226341825Sdim /// Remove the instruction if it is now dead. 227249423Sdim void removeIfDead(MachineInstr *MI); 228249423Sdim 229341825Sdim /// Make sure that the "bump" instruction executes before the 230249423Sdim /// compare. We need that for the IV fixup, so that the compare 231249423Sdim /// instruction would not use a bumped value that has not yet been 232249423Sdim /// defined. If the instructions are out of order, try to reorder them. 233249423Sdim bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); 234249423Sdim 235341825Sdim /// Return true if MO and MI pair is visited only once. If visited 236288943Sdim /// more than once, this indicates there is recursion. In such a case, 237288943Sdim /// return false. 238288943Sdim bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI, 239288943Sdim const MachineOperand *MO, 240288943Sdim LoopFeederMap &LoopFeederPhi) const; 241249423Sdim 242341825Sdim /// Return true if the Phi may generate a value that may underflow, 243288943Sdim /// or may wrap. 244288943Sdim bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal, 245288943Sdim MachineBasicBlock *MBB, MachineLoop *L, 246288943Sdim LoopFeederMap &LoopFeederPhi) const; 247249423Sdim 248341825Sdim /// Return true if the induction variable may underflow an unsigned 249288943Sdim /// value in the first iteration. 250288943Sdim bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal, 251288943Sdim const MachineOperand *EndVal, 252288943Sdim MachineBasicBlock *MBB, MachineLoop *L, 253288943Sdim LoopFeederMap &LoopFeederPhi) const; 254288943Sdim 255341825Sdim /// Check if the given operand has a compile-time known constant 256288943Sdim /// value. Return true if yes, and false otherwise. When returning true, set 257288943Sdim /// Val to the corresponding constant value. 258288943Sdim bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const; 259288943Sdim 260341825Sdim /// Check if the operand has a compile-time known constant value. 261288943Sdim bool isImmediate(const MachineOperand &MO) const { 262288943Sdim int64_t V; 263288943Sdim return checkForImmediate(MO, V); 264288943Sdim } 265288943Sdim 266341825Sdim /// Return the immediate for the specified operand. 267288943Sdim int64_t getImmediate(const MachineOperand &MO) const { 268288943Sdim int64_t V; 269288943Sdim if (!checkForImmediate(MO, V)) 270288943Sdim llvm_unreachable("Invalid operand"); 271288943Sdim return V; 272288943Sdim } 273288943Sdim 274341825Sdim /// Reset the given machine operand to now refer to a new immediate 275249423Sdim /// value. Assumes that the operand was already referencing an immediate 276249423Sdim /// value, either directly, or via a register. 277249423Sdim void setImmediate(MachineOperand &MO, int64_t Val); 278249423Sdim 279341825Sdim /// Fix the data flow of the induction variable. 280249423Sdim /// The desired flow is: phi ---> bump -+-> comparison-in-latch. 281249423Sdim /// | 282249423Sdim /// +-> back to phi 283249423Sdim /// where "bump" is the increment of the induction variable: 284249423Sdim /// iv = iv + #const. 285249423Sdim /// Due to some prior code transformations, the actual flow may look 286249423Sdim /// like this: 287249423Sdim /// phi -+-> bump ---> back to phi 288249423Sdim /// | 289249423Sdim /// +-> comparison-in-latch (against upper_bound-bump), 290249423Sdim /// i.e. the comparison that controls the loop execution may be using 291249423Sdim /// the value of the induction variable from before the increment. 292249423Sdim /// 293249423Sdim /// Return true if the loop's flow is the desired one (i.e. it's 294249423Sdim /// either been fixed, or no fixing was necessary). 295249423Sdim /// Otherwise, return false. This can happen if the induction variable 296249423Sdim /// couldn't be identified, or if the value in the latch's comparison 297249423Sdim /// cannot be adjusted to reflect the post-bump value. 298249423Sdim bool fixupInductionVariable(MachineLoop *L); 299249423Sdim 300341825Sdim /// Given a loop, if it does not have a preheader, create one. 301249423Sdim /// Return the block that is the preheader. 302249423Sdim MachineBasicBlock *createPreheaderForLoop(MachineLoop *L); 303234285Sdim }; 304234285Sdim 305234285Sdim char HexagonHardwareLoops::ID = 0; 306249423Sdim#ifndef NDEBUG 307249423Sdim int HexagonHardwareLoops::Counter = 0; 308249423Sdim#endif 309234285Sdim 310341825Sdim /// Abstraction for a trip count of a loop. A smaller version 311249423Sdim /// of the MachineOperand class without the concerns of changing the 312249423Sdim /// operand representation. 313234285Sdim class CountValue { 314234285Sdim public: 315234285Sdim enum CountValueType { 316234285Sdim CV_Register, 317234285Sdim CV_Immediate 318234285Sdim }; 319314564Sdim 320234285Sdim private: 321234285Sdim CountValueType Kind; 322234285Sdim union Values { 323249423Sdim struct { 324249423Sdim unsigned Reg; 325249423Sdim unsigned Sub; 326249423Sdim } R; 327249423Sdim unsigned ImmVal; 328234285Sdim } Contents; 329234285Sdim 330234285Sdim public: 331249423Sdim explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) { 332249423Sdim Kind = t; 333249423Sdim if (Kind == CV_Register) { 334249423Sdim Contents.R.Reg = v; 335249423Sdim Contents.R.Sub = u; 336249423Sdim } else { 337249423Sdim Contents.ImmVal = v; 338249423Sdim } 339249423Sdim } 340314564Sdim 341234285Sdim bool isReg() const { return Kind == CV_Register; } 342234285Sdim bool isImm() const { return Kind == CV_Immediate; } 343234285Sdim 344234285Sdim unsigned getReg() const { 345234285Sdim assert(isReg() && "Wrong CountValue accessor"); 346249423Sdim return Contents.R.Reg; 347234285Sdim } 348327952Sdim 349249423Sdim unsigned getSubReg() const { 350249423Sdim assert(isReg() && "Wrong CountValue accessor"); 351249423Sdim return Contents.R.Sub; 352234285Sdim } 353327952Sdim 354249423Sdim unsigned getImm() const { 355234285Sdim assert(isImm() && "Wrong CountValue accessor"); 356234285Sdim return Contents.ImmVal; 357234285Sdim } 358234285Sdim 359288943Sdim void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const { 360327952Sdim if (isReg()) { OS << printReg(Contents.R.Reg, TRI, Contents.R.Sub); } 361249423Sdim if (isImm()) { OS << Contents.ImmVal; } 362234285Sdim } 363234285Sdim }; 364314564Sdim 365249423Sdim} // end anonymous namespace 366234285Sdim 367249423SdimINITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops", 368249423Sdim "Hexagon Hardware Loops", false, false) 369249423SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 370249423SdimINITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 371249423SdimINITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops", 372249423Sdim "Hexagon Hardware Loops", false, false) 373234285Sdim 374234285SdimFunctionPass *llvm::createHexagonHardwareLoops() { 375234285Sdim return new HexagonHardwareLoops(); 376234285Sdim} 377234285Sdim 378234285Sdimbool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { 379341825Sdim LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); 380327952Sdim if (skipFunction(MF.getFunction())) 381309124Sdim return false; 382234285Sdim 383234285Sdim bool Changed = false; 384234285Sdim 385234285Sdim MLI = &getAnalysis<MachineLoopInfo>(); 386234285Sdim MRI = &MF.getRegInfo(); 387249423Sdim MDT = &getAnalysis<MachineDominatorTree>(); 388321369Sdim const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>(); 389321369Sdim TII = HST.getInstrInfo(); 390321369Sdim TRI = HST.getRegisterInfo(); 391234285Sdim 392288943Sdim for (auto &L : *MLI) 393288943Sdim if (!L->getParentLoop()) { 394288943Sdim bool L0Used = false; 395288943Sdim bool L1Used = false; 396288943Sdim Changed |= convertToHardwareLoop(L, L0Used, L1Used); 397288943Sdim } 398234285Sdim 399234285Sdim return Changed; 400234285Sdim} 401234285Sdim 402249423Sdimbool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, 403249423Sdim unsigned &Reg, 404249423Sdim int64_t &IVBump, 405249423Sdim MachineInstr *&IVOp 406249423Sdim ) const { 407249423Sdim MachineBasicBlock *Header = L->getHeader(); 408314564Sdim MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); 409249423Sdim MachineBasicBlock *Latch = L->getLoopLatch(); 410314564Sdim MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 411288943Sdim if (!Header || !Preheader || !Latch || !ExitingBlock) 412249423Sdim return false; 413249423Sdim 414249423Sdim // This pair represents an induction register together with an immediate 415249423Sdim // value that will be added to it in each loop iteration. 416327952Sdim using RegisterBump = std::pair<unsigned, int64_t>; 417249423Sdim 418249423Sdim // Mapping: R.next -> (R, bump), where R, R.next and bump are derived 419249423Sdim // from an induction operation 420249423Sdim // R.next = R + bump 421249423Sdim // where bump is an immediate value. 422327952Sdim using InductionMap = std::map<unsigned, RegisterBump>; 423249423Sdim 424249423Sdim InductionMap IndMap; 425249423Sdim 426327952Sdim using instr_iterator = MachineBasicBlock::instr_iterator; 427327952Sdim 428249423Sdim for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 429249423Sdim I != E && I->isPHI(); ++I) { 430249423Sdim MachineInstr *Phi = &*I; 431249423Sdim 432249423Sdim // Have a PHI instruction. Get the operand that corresponds to the 433249423Sdim // latch block, and see if is a result of an addition of form "reg+imm", 434249423Sdim // where the "reg" is defined by the PHI node we are looking at. 435249423Sdim for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { 436249423Sdim if (Phi->getOperand(i+1).getMBB() != Latch) 437249423Sdim continue; 438249423Sdim 439360784Sdim Register PhiOpReg = Phi->getOperand(i).getReg(); 440249423Sdim MachineInstr *DI = MRI->getVRegDef(PhiOpReg); 441249423Sdim 442314564Sdim if (DI->getDesc().isAdd()) { 443288943Sdim // If the register operand to the add is the PHI we're looking at, this 444288943Sdim // meets the induction pattern. 445360784Sdim Register IndReg = DI->getOperand(1).getReg(); 446288943Sdim MachineOperand &Opnd2 = DI->getOperand(2); 447288943Sdim int64_t V; 448288943Sdim if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { 449360784Sdim Register UpdReg = DI->getOperand(0).getReg(); 450249423Sdim IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); 451249423Sdim } 452249423Sdim } 453249423Sdim } // for (i) 454249423Sdim } // for (instr) 455249423Sdim 456249423Sdim SmallVector<MachineOperand,2> Cond; 457276479Sdim MachineBasicBlock *TB = nullptr, *FB = nullptr; 458309124Sdim bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); 459249423Sdim if (NotAnalyzed) 460249423Sdim return false; 461249423Sdim 462288943Sdim unsigned PredR, PredPos, PredRegFlags; 463288943Sdim if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags)) 464288943Sdim return false; 465249423Sdim 466249423Sdim MachineInstr *PredI = MRI->getVRegDef(PredR); 467249423Sdim if (!PredI->isCompare()) 468249423Sdim return false; 469249423Sdim 470249423Sdim unsigned CmpReg1 = 0, CmpReg2 = 0; 471249423Sdim int CmpImm = 0, CmpMask = 0; 472309124Sdim bool CmpAnalyzed = 473309124Sdim TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm); 474249423Sdim // Fail if the compare was not analyzed, or it's not comparing a register 475249423Sdim // with an immediate value. Not checking the mask here, since we handle 476288943Sdim // the individual compare opcodes (including A4_cmpb*) later on. 477249423Sdim if (!CmpAnalyzed) 478249423Sdim return false; 479249423Sdim 480249423Sdim // Exactly one of the input registers to the comparison should be among 481249423Sdim // the induction registers. 482249423Sdim InductionMap::iterator IndMapEnd = IndMap.end(); 483249423Sdim InductionMap::iterator F = IndMapEnd; 484249423Sdim if (CmpReg1 != 0) { 485249423Sdim InductionMap::iterator F1 = IndMap.find(CmpReg1); 486249423Sdim if (F1 != IndMapEnd) 487249423Sdim F = F1; 488249423Sdim } 489249423Sdim if (CmpReg2 != 0) { 490249423Sdim InductionMap::iterator F2 = IndMap.find(CmpReg2); 491249423Sdim if (F2 != IndMapEnd) { 492249423Sdim if (F != IndMapEnd) 493249423Sdim return false; 494249423Sdim F = F2; 495249423Sdim } 496249423Sdim } 497249423Sdim if (F == IndMapEnd) 498249423Sdim return false; 499249423Sdim 500249423Sdim Reg = F->second.first; 501249423Sdim IVBump = F->second.second; 502249423Sdim IVOp = MRI->getVRegDef(F->first); 503249423Sdim return true; 504249423Sdim} 505249423Sdim 506288943Sdim// Return the comparison kind for the specified opcode. 507288943SdimHexagonHardwareLoops::Comparison::Kind 508288943SdimHexagonHardwareLoops::getComparisonKind(unsigned CondOpc, 509288943Sdim MachineOperand *InitialValue, 510288943Sdim const MachineOperand *EndValue, 511288943Sdim int64_t IVBump) const { 512288943Sdim Comparison::Kind Cmp = (Comparison::Kind)0; 513288943Sdim switch (CondOpc) { 514327952Sdim case Hexagon::C2_cmpeq: 515288943Sdim case Hexagon::C2_cmpeqi: 516288943Sdim case Hexagon::C2_cmpeqp: 517288943Sdim Cmp = Comparison::EQ; 518288943Sdim break; 519288943Sdim case Hexagon::C4_cmpneq: 520288943Sdim case Hexagon::C4_cmpneqi: 521288943Sdim Cmp = Comparison::NE; 522288943Sdim break; 523327952Sdim case Hexagon::C2_cmplt: 524327952Sdim Cmp = Comparison::LTs; 525327952Sdim break; 526327952Sdim case Hexagon::C2_cmpltu: 527327952Sdim Cmp = Comparison::LTu; 528327952Sdim break; 529288943Sdim case Hexagon::C4_cmplte: 530327952Sdim case Hexagon::C4_cmpltei: 531288943Sdim Cmp = Comparison::LEs; 532288943Sdim break; 533288943Sdim case Hexagon::C4_cmplteu: 534327952Sdim case Hexagon::C4_cmplteui: 535288943Sdim Cmp = Comparison::LEu; 536288943Sdim break; 537327952Sdim case Hexagon::C2_cmpgt: 538327952Sdim case Hexagon::C2_cmpgti: 539327952Sdim case Hexagon::C2_cmpgtp: 540327952Sdim Cmp = Comparison::GTs; 541327952Sdim break; 542327952Sdim case Hexagon::C2_cmpgtu: 543288943Sdim case Hexagon::C2_cmpgtui: 544288943Sdim case Hexagon::C2_cmpgtup: 545288943Sdim Cmp = Comparison::GTu; 546288943Sdim break; 547327952Sdim case Hexagon::C2_cmpgei: 548327952Sdim Cmp = Comparison::GEs; 549288943Sdim break; 550327952Sdim case Hexagon::C2_cmpgeui: 551327952Sdim Cmp = Comparison::GEs; 552327952Sdim break; 553288943Sdim default: 554288943Sdim return (Comparison::Kind)0; 555288943Sdim } 556288943Sdim return Cmp; 557288943Sdim} 558249423Sdim 559341825Sdim/// Analyze the statements in a loop to determine if the loop has 560249423Sdim/// a computable trip count and, if so, return a value that represents 561249423Sdim/// the trip count expression. 562234285Sdim/// 563249423Sdim/// This function iterates over the phi nodes in the loop to check for 564249423Sdim/// induction variable patterns that are used in the calculation for 565249423Sdim/// the number of time the loop is executed. 566249423SdimCountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, 567288943Sdim SmallVectorImpl<MachineInstr *> &OldInsts) { 568234285Sdim MachineBasicBlock *TopMBB = L->getTopBlock(); 569234285Sdim MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); 570234285Sdim assert(PI != TopMBB->pred_end() && 571234285Sdim "Loop must have more than one incoming edge!"); 572234285Sdim MachineBasicBlock *Backedge = *PI++; 573249423Sdim if (PI == TopMBB->pred_end()) // dead loop? 574276479Sdim return nullptr; 575234285Sdim MachineBasicBlock *Incoming = *PI++; 576249423Sdim if (PI != TopMBB->pred_end()) // multiple backedges? 577276479Sdim return nullptr; 578234285Sdim 579249423Sdim // Make sure there is one incoming and one backedge and determine which 580234285Sdim // is which. 581234285Sdim if (L->contains(Incoming)) { 582234285Sdim if (L->contains(Backedge)) 583276479Sdim return nullptr; 584234285Sdim std::swap(Incoming, Backedge); 585234285Sdim } else if (!L->contains(Backedge)) 586276479Sdim return nullptr; 587234285Sdim 588249423Sdim // Look for the cmp instruction to determine if we can get a useful trip 589249423Sdim // count. The trip count can be either a register or an immediate. The 590249423Sdim // location of the value depends upon the type (reg or imm). 591314564Sdim MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 592288943Sdim if (!ExitingBlock) 593276479Sdim return nullptr; 594249423Sdim 595249423Sdim unsigned IVReg = 0; 596249423Sdim int64_t IVBump = 0; 597249423Sdim MachineInstr *IVOp; 598249423Sdim bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp); 599249423Sdim if (!FoundIV) 600276479Sdim return nullptr; 601249423Sdim 602314564Sdim MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); 603249423Sdim 604276479Sdim MachineOperand *InitialValue = nullptr; 605249423Sdim MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); 606288943Sdim MachineBasicBlock *Latch = L->getLoopLatch(); 607249423Sdim for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) { 608249423Sdim MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB(); 609249423Sdim if (MBB == Preheader) 610249423Sdim InitialValue = &IV_Phi->getOperand(i); 611249423Sdim else if (MBB == Latch) 612249423Sdim IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump. 613234285Sdim } 614249423Sdim if (!InitialValue) 615276479Sdim return nullptr; 616234285Sdim 617249423Sdim SmallVector<MachineOperand,2> Cond; 618276479Sdim MachineBasicBlock *TB = nullptr, *FB = nullptr; 619309124Sdim bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); 620249423Sdim if (NotAnalyzed) 621276479Sdim return nullptr; 622234285Sdim 623249423Sdim MachineBasicBlock *Header = L->getHeader(); 624249423Sdim // TB must be non-null. If FB is also non-null, one of them must be 625249423Sdim // the header. Otherwise, branch to TB could be exiting the loop, and 626249423Sdim // the fall through can go to the header. 627288943Sdim assert (TB && "Exit block without a branch?"); 628288943Sdim if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { 629314564Sdim MachineBasicBlock *LTB = nullptr, *LFB = nullptr; 630288943Sdim SmallVector<MachineOperand,2> LCond; 631309124Sdim bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); 632288943Sdim if (NotAnalyzed) 633288943Sdim return nullptr; 634288943Sdim if (TB == Latch) 635288943Sdim TB = (LTB == Header) ? LTB : LFB; 636288943Sdim else 637288943Sdim FB = (LTB == Header) ? LTB: LFB; 638288943Sdim } 639249423Sdim assert ((!FB || TB == Header || FB == Header) && "Branches not to header?"); 640249423Sdim if (!TB || (FB && TB != Header && FB != Header)) 641276479Sdim return nullptr; 642249423Sdim 643249423Sdim // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch 644249423Sdim // to put imm(0), followed by P in the vector Cond. 645249423Sdim // If TB is not the header, it means that the "not-taken" path must lead 646249423Sdim // to the header. 647288943Sdim bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header); 648288943Sdim unsigned PredReg, PredPos, PredRegFlags; 649288943Sdim if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags)) 650288943Sdim return nullptr; 651249423Sdim MachineInstr *CondI = MRI->getVRegDef(PredReg); 652249423Sdim unsigned CondOpc = CondI->getOpcode(); 653249423Sdim 654249423Sdim unsigned CmpReg1 = 0, CmpReg2 = 0; 655249423Sdim int Mask = 0, ImmValue = 0; 656309124Sdim bool AnalyzedCmp = 657309124Sdim TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue); 658249423Sdim if (!AnalyzedCmp) 659276479Sdim return nullptr; 660249423Sdim 661249423Sdim // The comparison operator type determines how we compute the loop 662249423Sdim // trip count. 663249423Sdim OldInsts.push_back(CondI); 664249423Sdim OldInsts.push_back(IVOp); 665249423Sdim 666249423Sdim // Sadly, the following code gets information based on the position 667249423Sdim // of the operands in the compare instruction. This has to be done 668249423Sdim // this way, because the comparisons check for a specific relationship 669249423Sdim // between the operands (e.g. is-less-than), rather than to find out 670249423Sdim // what relationship the operands are in (as on PPC). 671249423Sdim Comparison::Kind Cmp; 672249423Sdim bool isSwapped = false; 673249423Sdim const MachineOperand &Op1 = CondI->getOperand(1); 674249423Sdim const MachineOperand &Op2 = CondI->getOperand(2); 675276479Sdim const MachineOperand *EndValue = nullptr; 676249423Sdim 677249423Sdim if (Op1.isReg()) { 678249423Sdim if (Op2.isImm() || Op1.getReg() == IVReg) 679249423Sdim EndValue = &Op2; 680249423Sdim else { 681249423Sdim EndValue = &Op1; 682249423Sdim isSwapped = true; 683249423Sdim } 684234285Sdim } 685234285Sdim 686249423Sdim if (!EndValue) 687276479Sdim return nullptr; 688234285Sdim 689288943Sdim Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump); 690288943Sdim if (!Cmp) 691288943Sdim return nullptr; 692288943Sdim if (Negated) 693288943Sdim Cmp = Comparison::getNegatedComparison(Cmp); 694249423Sdim if (isSwapped) 695288943Sdim Cmp = Comparison::getSwappedComparison(Cmp); 696249423Sdim 697249423Sdim if (InitialValue->isReg()) { 698360784Sdim Register R = InitialValue->getReg(); 699249423Sdim MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); 700327952Sdim if (!MDT->properlyDominates(DefBB, Header)) { 701327952Sdim int64_t V; 702327952Sdim if (!checkForImmediate(*InitialValue, V)) 703327952Sdim return nullptr; 704327952Sdim } 705249423Sdim OldInsts.push_back(MRI->getVRegDef(R)); 706249423Sdim } 707249423Sdim if (EndValue->isReg()) { 708360784Sdim Register R = EndValue->getReg(); 709249423Sdim MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); 710327952Sdim if (!MDT->properlyDominates(DefBB, Header)) { 711327952Sdim int64_t V; 712327952Sdim if (!checkForImmediate(*EndValue, V)) 713327952Sdim return nullptr; 714327952Sdim } 715288943Sdim OldInsts.push_back(MRI->getVRegDef(R)); 716249423Sdim } 717249423Sdim 718249423Sdim return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); 719234285Sdim} 720234285Sdim 721341825Sdim/// Helper function that returns the expression that represents the 722249423Sdim/// number of times a loop iterates. The function takes the operands that 723249423Sdim/// represent the loop start value, loop end value, and induction value. 724249423Sdim/// Based upon these operands, the function attempts to compute the trip count. 725249423SdimCountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, 726249423Sdim const MachineOperand *Start, 727249423Sdim const MachineOperand *End, 728249423Sdim unsigned IVReg, 729249423Sdim int64_t IVBump, 730249423Sdim Comparison::Kind Cmp) const { 731249423Sdim // Cannot handle comparison EQ, i.e. while (A == B). 732249423Sdim if (Cmp == Comparison::EQ) 733276479Sdim return nullptr; 734249423Sdim 735249423Sdim // Check if either the start or end values are an assignment of an immediate. 736249423Sdim // If so, use the immediate value rather than the register. 737249423Sdim if (Start->isReg()) { 738249423Sdim const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg()); 739288943Sdim if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi || 740288943Sdim StartValInstr->getOpcode() == Hexagon::A2_tfrpi)) 741249423Sdim Start = &StartValInstr->getOperand(1); 742249423Sdim } 743249423Sdim if (End->isReg()) { 744249423Sdim const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); 745288943Sdim if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi || 746288943Sdim EndValInstr->getOpcode() == Hexagon::A2_tfrpi)) 747249423Sdim End = &EndValInstr->getOperand(1); 748249423Sdim } 749249423Sdim 750288943Sdim if (!Start->isReg() && !Start->isImm()) 751288943Sdim return nullptr; 752288943Sdim if (!End->isReg() && !End->isImm()) 753288943Sdim return nullptr; 754249423Sdim 755249423Sdim bool CmpLess = Cmp & Comparison::L; 756249423Sdim bool CmpGreater = Cmp & Comparison::G; 757249423Sdim bool CmpHasEqual = Cmp & Comparison::EQ; 758249423Sdim 759249423Sdim // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds. 760249423Sdim if (CmpLess && IVBump < 0) 761288943Sdim // Loop going while iv is "less" with the iv value going down. Must wrap. 762276479Sdim return nullptr; 763288943Sdim 764249423Sdim if (CmpGreater && IVBump > 0) 765288943Sdim // Loop going while iv is "greater" with the iv value going up. Must wrap. 766276479Sdim return nullptr; 767249423Sdim 768288943Sdim // Phis that may feed into the loop. 769288943Sdim LoopFeederMap LoopFeederPhi; 770288943Sdim 771296417Sdim // Check if the initial value may be zero and can be decremented in the first 772288943Sdim // iteration. If the value is zero, the endloop instruction will not decrement 773296417Sdim // the loop counter, so we shouldn't generate a hardware loop in this case. 774288943Sdim if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop, 775288943Sdim LoopFeederPhi)) 776288943Sdim return nullptr; 777288943Sdim 778249423Sdim if (Start->isImm() && End->isImm()) { 779249423Sdim // Both, start and end are immediates. 780249423Sdim int64_t StartV = Start->getImm(); 781249423Sdim int64_t EndV = End->getImm(); 782249423Sdim int64_t Dist = EndV - StartV; 783249423Sdim if (Dist == 0) 784276479Sdim return nullptr; 785249423Sdim 786249423Sdim bool Exact = (Dist % IVBump) == 0; 787249423Sdim 788249423Sdim if (Cmp == Comparison::NE) { 789249423Sdim if (!Exact) 790276479Sdim return nullptr; 791249423Sdim if ((Dist < 0) ^ (IVBump < 0)) 792276479Sdim return nullptr; 793249423Sdim } 794249423Sdim 795249423Sdim // For comparisons that include the final value (i.e. include equality 796249423Sdim // with the final value), we need to increase the distance by 1. 797249423Sdim if (CmpHasEqual) 798249423Sdim Dist = Dist > 0 ? Dist+1 : Dist-1; 799249423Sdim 800288943Sdim // For the loop to iterate, CmpLess should imply Dist > 0. Similarly, 801288943Sdim // CmpGreater should imply Dist < 0. These conditions could actually 802288943Sdim // fail, for example, in unreachable code (which may still appear to be 803288943Sdim // reachable in the CFG). 804288943Sdim if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0)) 805288943Sdim return nullptr; 806249423Sdim 807249423Sdim // "Normalized" distance, i.e. with the bump set to +-1. 808288943Sdim int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump 809288943Sdim : (-Dist + (-IVBump - 1)) / (-IVBump); 810249423Sdim assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign."); 811249423Sdim 812249423Sdim uint64_t Count = Dist1; 813249423Sdim 814249423Sdim if (Count > 0xFFFFFFFFULL) 815276479Sdim return nullptr; 816249423Sdim 817249423Sdim return new CountValue(CountValue::CV_Immediate, Count); 818249423Sdim } 819249423Sdim 820249423Sdim // A general case: Start and End are some values, but the actual 821249423Sdim // iteration count may not be available. If it is not, insert 822249423Sdim // a computation of it into the preheader. 823249423Sdim 824249423Sdim // If the induction variable bump is not a power of 2, quit. 825249423Sdim // Othwerise we'd need a general integer division. 826288943Sdim if (!isPowerOf2_64(std::abs(IVBump))) 827276479Sdim return nullptr; 828249423Sdim 829314564Sdim MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader); 830249423Sdim assert (PH && "Should have a preheader by now"); 831249423Sdim MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); 832288943Sdim DebugLoc DL; 833288943Sdim if (InsertPos != PH->end()) 834288943Sdim DL = InsertPos->getDebugLoc(); 835249423Sdim 836249423Sdim // If Start is an immediate and End is a register, the trip count 837249423Sdim // will be "reg - imm". Hexagon's "subtract immediate" instruction 838249423Sdim // is actually "reg + -imm". 839249423Sdim 840249423Sdim // If the loop IV is going downwards, i.e. if the bump is negative, 841249423Sdim // then the iteration count (computed as End-Start) will need to be 842249423Sdim // negated. To avoid the negation, just swap Start and End. 843249423Sdim if (IVBump < 0) { 844249423Sdim std::swap(Start, End); 845249423Sdim IVBump = -IVBump; 846249423Sdim } 847249423Sdim // Cmp may now have a wrong direction, e.g. LEs may now be GEs. 848249423Sdim // Signedness, and "including equality" are preserved. 849249423Sdim 850249423Sdim bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm) 851249423Sdim bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg) 852249423Sdim 853249423Sdim int64_t StartV = 0, EndV = 0; 854249423Sdim if (Start->isImm()) 855249423Sdim StartV = Start->getImm(); 856249423Sdim if (End->isImm()) 857249423Sdim EndV = End->getImm(); 858249423Sdim 859249423Sdim int64_t AdjV = 0; 860249423Sdim // To compute the iteration count, we would need this computation: 861249423Sdim // Count = (End - Start + (IVBump-1)) / IVBump 862249423Sdim // or, when CmpHasEqual: 863249423Sdim // Count = (End - Start + (IVBump-1)+1) / IVBump 864249423Sdim // The "IVBump-1" part is the adjustment (AdjV). We can avoid 865249423Sdim // generating an instruction specifically to add it if we can adjust 866249423Sdim // the immediate values for Start or End. 867249423Sdim 868249423Sdim if (CmpHasEqual) { 869249423Sdim // Need to add 1 to the total iteration count. 870249423Sdim if (Start->isImm()) 871249423Sdim StartV--; 872249423Sdim else if (End->isImm()) 873249423Sdim EndV++; 874249423Sdim else 875249423Sdim AdjV += 1; 876249423Sdim } 877249423Sdim 878249423Sdim if (Cmp != Comparison::NE) { 879249423Sdim if (Start->isImm()) 880249423Sdim StartV -= (IVBump-1); 881249423Sdim else if (End->isImm()) 882249423Sdim EndV += (IVBump-1); 883249423Sdim else 884249423Sdim AdjV += (IVBump-1); 885249423Sdim } 886249423Sdim 887249423Sdim unsigned R = 0, SR = 0; 888249423Sdim if (Start->isReg()) { 889249423Sdim R = Start->getReg(); 890249423Sdim SR = Start->getSubReg(); 891249423Sdim } else { 892249423Sdim R = End->getReg(); 893249423Sdim SR = End->getSubReg(); 894249423Sdim } 895249423Sdim const TargetRegisterClass *RC = MRI->getRegClass(R); 896249423Sdim // Hardware loops cannot handle 64-bit registers. If it's a double 897249423Sdim // register, it has to have a subregister. 898249423Sdim if (!SR && RC == &Hexagon::DoubleRegsRegClass) 899276479Sdim return nullptr; 900249423Sdim const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; 901249423Sdim 902249423Sdim // Compute DistR (register with the distance between Start and End). 903249423Sdim unsigned DistR, DistSR; 904249423Sdim 905249423Sdim // Avoid special case, where the start value is an imm(0). 906249423Sdim if (Start->isImm() && StartV == 0) { 907249423Sdim DistR = End->getReg(); 908249423Sdim DistSR = End->getSubReg(); 909249423Sdim } else { 910280031Sdim const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) : 911288943Sdim (RegToImm ? TII->get(Hexagon::A2_subri) : 912288943Sdim TII->get(Hexagon::A2_addi)); 913288943Sdim if (RegToReg || RegToImm) { 914360784Sdim Register SubR = MRI->createVirtualRegister(IntRC); 915288943Sdim MachineInstrBuilder SubIB = 916288943Sdim BuildMI(*PH, InsertPos, DL, SubD, SubR); 917249423Sdim 918288943Sdim if (RegToReg) 919288943Sdim SubIB.addReg(End->getReg(), 0, End->getSubReg()) 920288943Sdim .addReg(Start->getReg(), 0, Start->getSubReg()); 921288943Sdim else 922288943Sdim SubIB.addImm(EndV) 923288943Sdim .addReg(Start->getReg(), 0, Start->getSubReg()); 924288943Sdim DistR = SubR; 925288943Sdim } else { 926288943Sdim // If the loop has been unrolled, we should use the original loop count 927288943Sdim // instead of recalculating the value. This will avoid additional 928288943Sdim // 'Add' instruction. 929288943Sdim const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); 930288943Sdim if (EndValInstr->getOpcode() == Hexagon::A2_addi && 931341825Sdim EndValInstr->getOperand(1).getSubReg() == 0 && 932288943Sdim EndValInstr->getOperand(2).getImm() == StartV) { 933288943Sdim DistR = EndValInstr->getOperand(1).getReg(); 934288943Sdim } else { 935360784Sdim Register SubR = MRI->createVirtualRegister(IntRC); 936288943Sdim MachineInstrBuilder SubIB = 937288943Sdim BuildMI(*PH, InsertPos, DL, SubD, SubR); 938288943Sdim SubIB.addReg(End->getReg(), 0, End->getSubReg()) 939288943Sdim .addImm(-StartV); 940288943Sdim DistR = SubR; 941288943Sdim } 942249423Sdim } 943249423Sdim DistSR = 0; 944249423Sdim } 945249423Sdim 946249423Sdim // From DistR, compute AdjR (register with the adjusted distance). 947249423Sdim unsigned AdjR, AdjSR; 948249423Sdim 949249423Sdim if (AdjV == 0) { 950249423Sdim AdjR = DistR; 951249423Sdim AdjSR = DistSR; 952249423Sdim } else { 953249423Sdim // Generate CountR = ADD DistR, AdjVal 954360784Sdim Register AddR = MRI->createVirtualRegister(IntRC); 955288943Sdim MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi); 956249423Sdim BuildMI(*PH, InsertPos, DL, AddD, AddR) 957249423Sdim .addReg(DistR, 0, DistSR) 958249423Sdim .addImm(AdjV); 959249423Sdim 960249423Sdim AdjR = AddR; 961249423Sdim AdjSR = 0; 962249423Sdim } 963249423Sdim 964249423Sdim // From AdjR, compute CountR (register with the final count). 965249423Sdim unsigned CountR, CountSR; 966249423Sdim 967249423Sdim if (IVBump == 1) { 968249423Sdim CountR = AdjR; 969249423Sdim CountSR = AdjSR; 970249423Sdim } else { 971249423Sdim // The IV bump is a power of two. Log_2(IV bump) is the shift amount. 972249423Sdim unsigned Shift = Log2_32(IVBump); 973249423Sdim 974249423Sdim // Generate NormR = LSR DistR, Shift. 975360784Sdim Register LsrR = MRI->createVirtualRegister(IntRC); 976280031Sdim const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r); 977249423Sdim BuildMI(*PH, InsertPos, DL, LsrD, LsrR) 978249423Sdim .addReg(AdjR, 0, AdjSR) 979249423Sdim .addImm(Shift); 980249423Sdim 981249423Sdim CountR = LsrR; 982249423Sdim CountSR = 0; 983249423Sdim } 984249423Sdim 985249423Sdim return new CountValue(CountValue::CV_Register, CountR, CountSR); 986234285Sdim} 987234285Sdim 988341825Sdim/// Return true if the operation is invalid within hardware loop. 989288943Sdimbool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI, 990288943Sdim bool IsInnerHWLoop) const { 991288943Sdim // Call is not allowed because the callee may use a hardware loop except for 992288943Sdim // the case when the call never returns. 993314564Sdim if (MI->getDesc().isCall()) 994314564Sdim return !TII->doesNotReturn(*MI); 995249423Sdim 996288943Sdim // Check if the instruction defines a hardware loop register. 997321369Sdim using namespace Hexagon; 998327952Sdim 999321369Sdim static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 }; 1000321369Sdim static const unsigned Regs1[] = { LC1, SA1 }; 1001321369Sdim auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01)) 1002321369Sdim : makeArrayRef(Regs1, array_lengthof(Regs1)); 1003321369Sdim for (unsigned R : CheckRegs) 1004321369Sdim if (MI->modifiesRegister(R, TRI)) 1005234285Sdim return true; 1006321369Sdim 1007234285Sdim return false; 1008234285Sdim} 1009234285Sdim 1010341825Sdim/// Return true if the loop contains an instruction that inhibits 1011288943Sdim/// the use of the hardware loop instruction. 1012288943Sdimbool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L, 1013288943Sdim bool IsInnerHWLoop) const { 1014344779Sdim LLVM_DEBUG(dbgs() << "\nhw_loop head, " 1015344779Sdim << printMBBReference(**L->block_begin())); 1016344779Sdim for (MachineBasicBlock *MBB : L->getBlocks()) { 1017234285Sdim for (MachineBasicBlock::iterator 1018234285Sdim MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { 1019234285Sdim const MachineInstr *MI = &*MII; 1020288943Sdim if (isInvalidLoopOperation(MI, IsInnerHWLoop)) { 1021341825Sdim LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:"; 1022341825Sdim MI->dump();); 1023234285Sdim return true; 1024288943Sdim } 1025234285Sdim } 1026234285Sdim } 1027234285Sdim return false; 1028234285Sdim} 1029234285Sdim 1030341825Sdim/// Returns true if the instruction is dead. This was essentially 1031249423Sdim/// copied from DeadMachineInstructionElim::isDead, but with special cases 1032249423Sdim/// for inline asm, physical registers and instructions with side effects 1033249423Sdim/// removed. 1034249423Sdimbool HexagonHardwareLoops::isDead(const MachineInstr *MI, 1035261991Sdim SmallVectorImpl<MachineInstr *> &DeadPhis) const { 1036249423Sdim // Examine each operand. 1037249423Sdim for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1038249423Sdim const MachineOperand &MO = MI->getOperand(i); 1039249423Sdim if (!MO.isReg() || !MO.isDef()) 1040249423Sdim continue; 1041249423Sdim 1042360784Sdim Register Reg = MO.getReg(); 1043249423Sdim if (MRI->use_nodbg_empty(Reg)) 1044249423Sdim continue; 1045249423Sdim 1046327952Sdim using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator; 1047249423Sdim 1048249423Sdim // This instruction has users, but if the only user is the phi node for the 1049249423Sdim // parent block, and the only use of that phi node is this instruction, then 1050249423Sdim // this instruction is dead: both it (and the phi node) can be removed. 1051249423Sdim use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); 1052249423Sdim use_nodbg_iterator End = MRI->use_nodbg_end(); 1053276479Sdim if (std::next(I) != End || !I->getParent()->isPHI()) 1054249423Sdim return false; 1055249423Sdim 1056276479Sdim MachineInstr *OnePhi = I->getParent(); 1057249423Sdim for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { 1058249423Sdim const MachineOperand &OPO = OnePhi->getOperand(j); 1059249423Sdim if (!OPO.isReg() || !OPO.isDef()) 1060249423Sdim continue; 1061249423Sdim 1062360784Sdim Register OPReg = OPO.getReg(); 1063249423Sdim use_nodbg_iterator nextJ; 1064249423Sdim for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg); 1065249423Sdim J != End; J = nextJ) { 1066276479Sdim nextJ = std::next(J); 1067276479Sdim MachineOperand &Use = *J; 1068249423Sdim MachineInstr *UseMI = Use.getParent(); 1069249423Sdim 1070288943Sdim // If the phi node has a user that is not MI, bail. 1071249423Sdim if (MI != UseMI) 1072249423Sdim return false; 1073249423Sdim } 1074249423Sdim } 1075249423Sdim DeadPhis.push_back(OnePhi); 1076249423Sdim } 1077249423Sdim 1078249423Sdim // If there are no defs with uses, the instruction is dead. 1079249423Sdim return true; 1080249423Sdim} 1081249423Sdim 1082249423Sdimvoid HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { 1083249423Sdim // This procedure was essentially copied from DeadMachineInstructionElim. 1084249423Sdim 1085249423Sdim SmallVector<MachineInstr*, 1> DeadPhis; 1086249423Sdim if (isDead(MI, DeadPhis)) { 1087341825Sdim LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI); 1088249423Sdim 1089249423Sdim // It is possible that some DBG_VALUE instructions refer to this 1090249423Sdim // instruction. Examine each def operand for such references; 1091249423Sdim // if found, mark the DBG_VALUE as undef (but don't delete it). 1092249423Sdim for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1093249423Sdim const MachineOperand &MO = MI->getOperand(i); 1094249423Sdim if (!MO.isReg() || !MO.isDef()) 1095249423Sdim continue; 1096360784Sdim Register Reg = MO.getReg(); 1097249423Sdim MachineRegisterInfo::use_iterator nextI; 1098249423Sdim for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), 1099249423Sdim E = MRI->use_end(); I != E; I = nextI) { 1100276479Sdim nextI = std::next(I); // I is invalidated by the setReg 1101276479Sdim MachineOperand &Use = *I; 1102276479Sdim MachineInstr *UseMI = I->getParent(); 1103249423Sdim if (UseMI == MI) 1104249423Sdim continue; 1105249423Sdim if (Use.isDebug()) 1106249423Sdim UseMI->getOperand(0).setReg(0U); 1107249423Sdim } 1108249423Sdim } 1109249423Sdim 1110249423Sdim MI->eraseFromParent(); 1111249423Sdim for (unsigned i = 0; i < DeadPhis.size(); ++i) 1112249423Sdim DeadPhis[i]->eraseFromParent(); 1113249423Sdim } 1114249423Sdim} 1115249423Sdim 1116341825Sdim/// Check if the loop is a candidate for converting to a hardware 1117249423Sdim/// loop. If so, then perform the transformation. 1118234285Sdim/// 1119249423Sdim/// This function works on innermost loops first. A loop can be converted 1120249423Sdim/// if it is a counting loop; either a register value or an immediate. 1121234285Sdim/// 1122249423Sdim/// The code makes several assumptions about the representation of the loop 1123249423Sdim/// in llvm. 1124288943Sdimbool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, 1125288943Sdim bool &RecL0used, 1126288943Sdim bool &RecL1used) { 1127249423Sdim // This is just for sanity. 1128249423Sdim assert(L->getHeader() && "Loop without a header?"); 1129249423Sdim 1130234285Sdim bool Changed = false; 1131288943Sdim bool L0Used = false; 1132288943Sdim bool L1Used = false; 1133288943Sdim 1134234285Sdim // Process nested loops first. 1135288943Sdim for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { 1136288943Sdim Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used); 1137288943Sdim L0Used |= RecL0used; 1138288943Sdim L1Used |= RecL1used; 1139288943Sdim } 1140249423Sdim 1141234285Sdim // If a nested loop has been converted, then we can't convert this loop. 1142288943Sdim if (Changed && L0Used && L1Used) 1143234285Sdim return Changed; 1144249423Sdim 1145288943Sdim unsigned LOOP_i; 1146288943Sdim unsigned LOOP_r; 1147288943Sdim unsigned ENDLOOP; 1148288943Sdim 1149288943Sdim // Flag used to track loopN instruction: 1150288943Sdim // 1 - Hardware loop is being generated for the inner most loop. 1151288943Sdim // 0 - Hardware loop is being generated for the outer loop. 1152288943Sdim unsigned IsInnerHWLoop = 1; 1153288943Sdim 1154288943Sdim if (L0Used) { 1155288943Sdim LOOP_i = Hexagon::J2_loop1i; 1156288943Sdim LOOP_r = Hexagon::J2_loop1r; 1157288943Sdim ENDLOOP = Hexagon::ENDLOOP1; 1158288943Sdim IsInnerHWLoop = 0; 1159288943Sdim } else { 1160288943Sdim LOOP_i = Hexagon::J2_loop0i; 1161288943Sdim LOOP_r = Hexagon::J2_loop0r; 1162288943Sdim ENDLOOP = Hexagon::ENDLOOP0; 1163288943Sdim } 1164288943Sdim 1165249423Sdim#ifndef NDEBUG 1166249423Sdim // Stop trying after reaching the limit (if any). 1167249423Sdim int Limit = HWLoopLimit; 1168249423Sdim if (Limit >= 0) { 1169249423Sdim if (Counter >= HWLoopLimit) 1170249423Sdim return false; 1171249423Sdim Counter++; 1172234285Sdim } 1173249423Sdim#endif 1174249423Sdim 1175234285Sdim // Does the loop contain any invalid instructions? 1176288943Sdim if (containsInvalidInstruction(L, IsInnerHWLoop)) 1177234285Sdim return false; 1178249423Sdim 1179314564Sdim MachineBasicBlock *LastMBB = L->findLoopControlBlock(); 1180234285Sdim // Don't generate hw loop if the loop has more than one exit. 1181276479Sdim if (!LastMBB) 1182234285Sdim return false; 1183249423Sdim 1184234285Sdim MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); 1185249423Sdim if (LastI == LastMBB->end()) 1186249423Sdim return false; 1187234285Sdim 1188288943Sdim // Is the induction variable bump feeding the latch condition? 1189288943Sdim if (!fixupInductionVariable(L)) 1190288943Sdim return false; 1191288943Sdim 1192249423Sdim // Ensure the loop has a preheader: the loop instruction will be 1193249423Sdim // placed there. 1194314564Sdim MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); 1195249423Sdim if (!Preheader) { 1196249423Sdim Preheader = createPreheaderForLoop(L); 1197249423Sdim if (!Preheader) 1198249423Sdim return false; 1199249423Sdim } 1200288943Sdim 1201249423Sdim MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); 1202249423Sdim 1203249423Sdim SmallVector<MachineInstr*, 2> OldInsts; 1204249423Sdim // Are we able to determine the trip count for the loop? 1205249423Sdim CountValue *TripCount = getLoopTripCount(L, OldInsts); 1206276479Sdim if (!TripCount) 1207249423Sdim return false; 1208249423Sdim 1209249423Sdim // Is the trip count available in the preheader? 1210249423Sdim if (TripCount->isReg()) { 1211249423Sdim // There will be a use of the register inserted into the preheader, 1212249423Sdim // so make sure that the register is actually defined at that point. 1213249423Sdim MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg()); 1214249423Sdim MachineBasicBlock *BBDef = TCDef->getParent(); 1215288943Sdim if (!MDT->dominates(BBDef, Preheader)) 1216288943Sdim return false; 1217249423Sdim } 1218249423Sdim 1219234285Sdim // Determine the loop start. 1220288943Sdim MachineBasicBlock *TopBlock = L->getTopBlock(); 1221314564Sdim MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 1222314564Sdim MachineBasicBlock *LoopStart = nullptr; 1223288943Sdim if (ExitingBlock != L->getLoopLatch()) { 1224314564Sdim MachineBasicBlock *TB = nullptr, *FB = nullptr; 1225288943Sdim SmallVector<MachineOperand, 2> Cond; 1226288943Sdim 1227309124Sdim if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false)) 1228234285Sdim return false; 1229288943Sdim 1230288943Sdim if (L->contains(TB)) 1231288943Sdim LoopStart = TB; 1232288943Sdim else if (L->contains(FB)) 1233288943Sdim LoopStart = FB; 1234288943Sdim else 1235288943Sdim return false; 1236234285Sdim } 1237288943Sdim else 1238288943Sdim LoopStart = TopBlock; 1239234285Sdim 1240249423Sdim // Convert the loop to a hardware loop. 1241341825Sdim LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); 1242249423Sdim DebugLoc DL; 1243249423Sdim if (InsertPos != Preheader->end()) 1244249423Sdim DL = InsertPos->getDebugLoc(); 1245234285Sdim 1246234285Sdim if (TripCount->isReg()) { 1247234285Sdim // Create a copy of the loop count register. 1248360784Sdim Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); 1249249423Sdim BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) 1250249423Sdim .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); 1251239462Sdim // Add the Loop instruction to the beginning of the loop. 1252288943Sdim BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart) 1253249423Sdim .addReg(CountReg); 1254234285Sdim } else { 1255249423Sdim assert(TripCount->isImm() && "Expecting immediate value for trip count"); 1256249423Sdim // Add the Loop immediate instruction to the beginning of the loop, 1257249423Sdim // if the immediate fits in the instructions. Otherwise, we need to 1258249423Sdim // create a new virtual register. 1259234285Sdim int64_t CountImm = TripCount->getImm(); 1260327952Sdim if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) { 1261360784Sdim Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); 1262280031Sdim BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg) 1263249423Sdim .addImm(CountImm); 1264288943Sdim BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)) 1265249423Sdim .addMBB(LoopStart).addReg(CountReg); 1266249423Sdim } else 1267288943Sdim BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i)) 1268249423Sdim .addMBB(LoopStart).addImm(CountImm); 1269234285Sdim } 1270234285Sdim 1271249423Sdim // Make sure the loop start always has a reference in the CFG. We need 1272249423Sdim // to create a BlockAddress operand to get this mechanism to work both the 1273234285Sdim // MachineBasicBlock and BasicBlock objects need the flag set. 1274234285Sdim LoopStart->setHasAddressTaken(); 1275234285Sdim // This line is needed to set the hasAddressTaken flag on the BasicBlock 1276249423Sdim // object. 1277234285Sdim BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); 1278234285Sdim 1279234285Sdim // Replace the loop branch with an endloop instruction. 1280249423Sdim DebugLoc LastIDL = LastI->getDebugLoc(); 1281288943Sdim BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart); 1282234285Sdim 1283234285Sdim // The loop ends with either: 1284234285Sdim // - a conditional branch followed by an unconditional branch, or 1285234285Sdim // - a conditional branch to the loop start. 1286280031Sdim if (LastI->getOpcode() == Hexagon::J2_jumpt || 1287280031Sdim LastI->getOpcode() == Hexagon::J2_jumpf) { 1288249423Sdim // Delete one and change/add an uncond. branch to out of the loop. 1289234285Sdim MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); 1290234285Sdim LastI = LastMBB->erase(LastI); 1291234285Sdim if (!L->contains(BranchTarget)) { 1292249423Sdim if (LastI != LastMBB->end()) 1293249423Sdim LastI = LastMBB->erase(LastI); 1294234285Sdim SmallVector<MachineOperand, 0> Cond; 1295314564Sdim TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL); 1296234285Sdim } 1297234285Sdim } else { 1298234285Sdim // Conditional branch to loop start; just delete it. 1299234285Sdim LastMBB->erase(LastI); 1300234285Sdim } 1301234285Sdim delete TripCount; 1302234285Sdim 1303249423Sdim // The induction operation and the comparison may now be 1304249423Sdim // unneeded. If these are unneeded, then remove them. 1305249423Sdim for (unsigned i = 0; i < OldInsts.size(); ++i) 1306249423Sdim removeIfDead(OldInsts[i]); 1307249423Sdim 1308234285Sdim ++NumHWLoops; 1309288943Sdim 1310288943Sdim // Set RecL1used and RecL0used only after hardware loop has been 1311288943Sdim // successfully generated. Doing it earlier can cause wrong loop instruction 1312288943Sdim // to be used. 1313288943Sdim if (L0Used) // Loop0 was already used. So, the correct loop must be loop1. 1314288943Sdim RecL1used = true; 1315288943Sdim else 1316288943Sdim RecL0used = true; 1317288943Sdim 1318234285Sdim return true; 1319234285Sdim} 1320234285Sdim 1321249423Sdimbool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, 1322249423Sdim MachineInstr *CmpI) { 1323249423Sdim assert (BumpI != CmpI && "Bump and compare in the same instruction?"); 1324249423Sdim 1325249423Sdim MachineBasicBlock *BB = BumpI->getParent(); 1326249423Sdim if (CmpI->getParent() != BB) 1327249423Sdim return false; 1328249423Sdim 1329327952Sdim using instr_iterator = MachineBasicBlock::instr_iterator; 1330327952Sdim 1331249423Sdim // Check if things are in order to begin with. 1332296417Sdim for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I) 1333249423Sdim if (&*I == CmpI) 1334249423Sdim return true; 1335249423Sdim 1336249423Sdim // Out of order. 1337360784Sdim Register PredR = CmpI->getOperand(0).getReg(); 1338249423Sdim bool FoundBump = false; 1339296417Sdim instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt); 1340249423Sdim for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { 1341249423Sdim MachineInstr *In = &*I; 1342249423Sdim for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) { 1343249423Sdim MachineOperand &MO = In->getOperand(i); 1344249423Sdim if (MO.isReg() && MO.isUse()) { 1345249423Sdim if (MO.getReg() == PredR) // Found an intervening use of PredR. 1346249423Sdim return false; 1347249423Sdim } 1348249423Sdim } 1349249423Sdim 1350249423Sdim if (In == BumpI) { 1351296417Sdim BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator()); 1352249423Sdim FoundBump = true; 1353249423Sdim break; 1354249423Sdim } 1355249423Sdim } 1356249423Sdim assert (FoundBump && "Cannot determine instruction order"); 1357249423Sdim return FoundBump; 1358234285Sdim} 1359234285Sdim 1360288943Sdim/// This function is required to break recursion. Visiting phis in a loop may 1361288943Sdim/// result in recursion during compilation. We break the recursion by making 1362288943Sdim/// sure that we visit a MachineOperand and its definition in a 1363288943Sdim/// MachineInstruction only once. If we attempt to visit more than once, then 1364288943Sdim/// there is recursion, and will return false. 1365288943Sdimbool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, 1366288943Sdim MachineInstr *MI, 1367288943Sdim const MachineOperand *MO, 1368288943Sdim LoopFeederMap &LoopFeederPhi) const { 1369288943Sdim if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) { 1370344779Sdim LLVM_DEBUG(dbgs() << "\nhw_loop head, " 1371344779Sdim << printMBBReference(**L->block_begin())); 1372288943Sdim // Ignore all BBs that form Loop. 1373344779Sdim for (MachineBasicBlock *MBB : L->getBlocks()) { 1374288943Sdim if (A == MBB) 1375288943Sdim return false; 1376288943Sdim } 1377288943Sdim MachineInstr *Def = MRI->getVRegDef(MO->getReg()); 1378288943Sdim LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def)); 1379288943Sdim return true; 1380288943Sdim } else 1381288943Sdim // Already visited node. 1382288943Sdim return false; 1383288943Sdim} 1384234285Sdim 1385288943Sdim/// Return true if a Phi may generate a value that can underflow. 1386288943Sdim/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand. 1387288943Sdimbool HexagonHardwareLoops::phiMayWrapOrUnderflow( 1388288943Sdim MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB, 1389288943Sdim MachineLoop *L, LoopFeederMap &LoopFeederPhi) const { 1390288943Sdim assert(Phi->isPHI() && "Expecting a Phi."); 1391288943Sdim // Walk through each Phi, and its used operands. Make sure that 1392288943Sdim // if there is recursion in Phi, we won't generate hardware loops. 1393288943Sdim for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2) 1394288943Sdim if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi)) 1395288943Sdim if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal, 1396288943Sdim Phi->getParent(), L, LoopFeederPhi)) 1397288943Sdim return true; 1398288943Sdim return false; 1399288943Sdim} 1400288943Sdim 1401288943Sdim/// Return true if the induction variable can underflow in the first iteration. 1402288943Sdim/// An example, is an initial unsigned value that is 0 and is decrement in the 1403288943Sdim/// first itertion of a do-while loop. In this case, we cannot generate a 1404288943Sdim/// hardware loop because the endloop instruction does not decrement the loop 1405288943Sdim/// counter if it is <= 1. We only need to perform this analysis if the 1406288943Sdim/// initial value is a register. 1407288943Sdim/// 1408288943Sdim/// This function assumes the initial value may underfow unless proven 1409288943Sdim/// otherwise. If the type is signed, then we don't care because signed 1410288943Sdim/// underflow is undefined. We attempt to prove the initial value is not 1411288943Sdim/// zero by perfoming a crude analysis of the loop counter. This function 1412288943Sdim/// checks if the initial value is used in any comparison prior to the loop 1413288943Sdim/// and, if so, assumes the comparison is a range check. This is inexact, 1414288943Sdim/// but will catch the simple cases. 1415288943Sdimbool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( 1416288943Sdim const MachineOperand *InitVal, const MachineOperand *EndVal, 1417288943Sdim MachineBasicBlock *MBB, MachineLoop *L, 1418288943Sdim LoopFeederMap &LoopFeederPhi) const { 1419288943Sdim // Only check register values since they are unknown. 1420288943Sdim if (!InitVal->isReg()) 1421288943Sdim return false; 1422288943Sdim 1423288943Sdim if (!EndVal->isImm()) 1424288943Sdim return false; 1425288943Sdim 1426288943Sdim // A register value that is assigned an immediate is a known value, and it 1427288943Sdim // won't underflow in the first iteration. 1428288943Sdim int64_t Imm; 1429288943Sdim if (checkForImmediate(*InitVal, Imm)) 1430288943Sdim return (EndVal->getImm() == Imm); 1431288943Sdim 1432360784Sdim Register Reg = InitVal->getReg(); 1433288943Sdim 1434288943Sdim // We don't know the value of a physical register. 1435360784Sdim if (!Register::isVirtualRegister(Reg)) 1436288943Sdim return true; 1437288943Sdim 1438288943Sdim MachineInstr *Def = MRI->getVRegDef(Reg); 1439288943Sdim if (!Def) 1440288943Sdim return true; 1441288943Sdim 1442288943Sdim // If the initial value is a Phi or copy and the operands may not underflow, 1443288943Sdim // then the definition cannot be underflow either. 1444288943Sdim if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(), 1445288943Sdim L, LoopFeederPhi)) 1446288943Sdim return false; 1447288943Sdim if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)), 1448288943Sdim EndVal, Def->getParent(), 1449288943Sdim L, LoopFeederPhi)) 1450288943Sdim return false; 1451288943Sdim 1452288943Sdim // Iterate over the uses of the initial value. If the initial value is used 1453288943Sdim // in a compare, then we assume this is a range check that ensures the loop 1454288943Sdim // doesn't underflow. This is not an exact test and should be improved. 1455288943Sdim for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg), 1456288943Sdim E = MRI->use_instr_nodbg_end(); I != E; ++I) { 1457288943Sdim MachineInstr *MI = &*I; 1458288943Sdim unsigned CmpReg1 = 0, CmpReg2 = 0; 1459288943Sdim int CmpMask = 0, CmpValue = 0; 1460288943Sdim 1461309124Sdim if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue)) 1462288943Sdim continue; 1463288943Sdim 1464314564Sdim MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 1465288943Sdim SmallVector<MachineOperand, 2> Cond; 1466309124Sdim if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false)) 1467288943Sdim continue; 1468288943Sdim 1469314564Sdim Comparison::Kind Cmp = 1470314564Sdim getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0); 1471288943Sdim if (Cmp == 0) 1472288943Sdim continue; 1473288943Sdim if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB)) 1474288943Sdim Cmp = Comparison::getNegatedComparison(Cmp); 1475288943Sdim if (CmpReg2 != 0 && CmpReg2 == Reg) 1476288943Sdim Cmp = Comparison::getSwappedComparison(Cmp); 1477288943Sdim 1478288943Sdim // Signed underflow is undefined. 1479288943Sdim if (Comparison::isSigned(Cmp)) 1480288943Sdim return false; 1481288943Sdim 1482296417Sdim // Check if there is a comparison of the initial value. If the initial value 1483288943Sdim // is greater than or not equal to another value, then assume this is a 1484288943Sdim // range check. 1485288943Sdim if ((Cmp & Comparison::G) || Cmp == Comparison::NE) 1486288943Sdim return false; 1487288943Sdim } 1488288943Sdim 1489288943Sdim // OK - this is a hack that needs to be improved. We really need to analyze 1490288943Sdim // the instructions performed on the initial value. This works on the simplest 1491288943Sdim // cases only. 1492288943Sdim if (!Def->isCopy() && !Def->isPHI()) 1493288943Sdim return false; 1494288943Sdim 1495288943Sdim return true; 1496288943Sdim} 1497288943Sdim 1498288943Sdimbool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO, 1499288943Sdim int64_t &Val) const { 1500288943Sdim if (MO.isImm()) { 1501288943Sdim Val = MO.getImm(); 1502288943Sdim return true; 1503288943Sdim } 1504288943Sdim if (!MO.isReg()) 1505288943Sdim return false; 1506288943Sdim 1507288943Sdim // MO is a register. Check whether it is defined as an immediate value, 1508288943Sdim // and if so, get the value of it in TV. That value will then need to be 1509288943Sdim // processed to handle potential subregisters in MO. 1510288943Sdim int64_t TV; 1511288943Sdim 1512360784Sdim Register R = MO.getReg(); 1513360784Sdim if (!Register::isVirtualRegister(R)) 1514288943Sdim return false; 1515249423Sdim MachineInstr *DI = MRI->getVRegDef(R); 1516249423Sdim unsigned DOpc = DI->getOpcode(); 1517249423Sdim switch (DOpc) { 1518288943Sdim case TargetOpcode::COPY: 1519280031Sdim case Hexagon::A2_tfrsi: 1520280031Sdim case Hexagon::A2_tfrpi: 1521314564Sdim case Hexagon::CONST32: 1522327952Sdim case Hexagon::CONST64: 1523288943Sdim // Call recursively to avoid an extra check whether operand(1) is 1524288943Sdim // indeed an immediate (it could be a global address, for example), 1525288943Sdim // plus we can handle COPY at the same time. 1526288943Sdim if (!checkForImmediate(DI->getOperand(1), TV)) 1527288943Sdim return false; 1528288943Sdim break; 1529288943Sdim case Hexagon::A2_combineii: 1530288943Sdim case Hexagon::A4_combineir: 1531288943Sdim case Hexagon::A4_combineii: 1532288943Sdim case Hexagon::A4_combineri: 1533288943Sdim case Hexagon::A2_combinew: { 1534288943Sdim const MachineOperand &S1 = DI->getOperand(1); 1535288943Sdim const MachineOperand &S2 = DI->getOperand(2); 1536288943Sdim int64_t V1, V2; 1537288943Sdim if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2)) 1538288943Sdim return false; 1539321369Sdim TV = V2 | (static_cast<uint64_t>(V1) << 32); 1540288943Sdim break; 1541288943Sdim } 1542288943Sdim case TargetOpcode::REG_SEQUENCE: { 1543288943Sdim const MachineOperand &S1 = DI->getOperand(1); 1544288943Sdim const MachineOperand &S3 = DI->getOperand(3); 1545288943Sdim int64_t V1, V3; 1546288943Sdim if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3)) 1547288943Sdim return false; 1548288943Sdim unsigned Sub2 = DI->getOperand(2).getImm(); 1549288943Sdim unsigned Sub4 = DI->getOperand(4).getImm(); 1550314564Sdim if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi) 1551288943Sdim TV = V1 | (V3 << 32); 1552314564Sdim else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo) 1553288943Sdim TV = V3 | (V1 << 32); 1554288943Sdim else 1555288943Sdim llvm_unreachable("Unexpected form of REG_SEQUENCE"); 1556288943Sdim break; 1557288943Sdim } 1558288943Sdim 1559288943Sdim default: 1560288943Sdim return false; 1561249423Sdim } 1562234285Sdim 1563314564Sdim // By now, we should have successfully obtained the immediate value defining 1564288943Sdim // the register referenced in MO. Handle a potential use of a subregister. 1565288943Sdim switch (MO.getSubReg()) { 1566314564Sdim case Hexagon::isub_lo: 1567288943Sdim Val = TV & 0xFFFFFFFFULL; 1568288943Sdim break; 1569314564Sdim case Hexagon::isub_hi: 1570288943Sdim Val = (TV >> 32) & 0xFFFFFFFFULL; 1571288943Sdim break; 1572288943Sdim default: 1573288943Sdim Val = TV; 1574288943Sdim break; 1575288943Sdim } 1576288943Sdim return true; 1577249423Sdim} 1578234285Sdim 1579249423Sdimvoid HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { 1580249423Sdim if (MO.isImm()) { 1581249423Sdim MO.setImm(Val); 1582249423Sdim return; 1583234285Sdim } 1584234285Sdim 1585249423Sdim assert(MO.isReg()); 1586360784Sdim Register R = MO.getReg(); 1587288943Sdim MachineInstr *DI = MRI->getVRegDef(R); 1588234285Sdim 1589249423Sdim const TargetRegisterClass *RC = MRI->getRegClass(R); 1590360784Sdim Register NewR = MRI->createVirtualRegister(RC); 1591249423Sdim MachineBasicBlock &B = *DI->getParent(); 1592249423Sdim DebugLoc DL = DI->getDebugLoc(); 1593288943Sdim BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val); 1594249423Sdim MO.setReg(NewR); 1595249423Sdim} 1596234285Sdim 1597288943Sdimstatic bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) { 1598288943Sdim // These two instructions are not extendable. 1599288943Sdim if (CmpOpc == Hexagon::A4_cmpbeqi) 1600288943Sdim return isUInt<8>(Imm); 1601288943Sdim if (CmpOpc == Hexagon::A4_cmpbgti) 1602288943Sdim return isInt<8>(Imm); 1603288943Sdim // The rest of the comparison-with-immediate instructions are extendable. 1604288943Sdim return true; 1605288943Sdim} 1606249423Sdim 1607249423Sdimbool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { 1608249423Sdim MachineBasicBlock *Header = L->getHeader(); 1609249423Sdim MachineBasicBlock *Latch = L->getLoopLatch(); 1610314564Sdim MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 1611249423Sdim 1612288943Sdim if (!(Header && Latch && ExitingBlock)) 1613249423Sdim return false; 1614249423Sdim 1615249423Sdim // These data structures follow the same concept as the corresponding 1616249423Sdim // ones in findInductionRegister (where some comments are). 1617327952Sdim using RegisterBump = std::pair<unsigned, int64_t>; 1618327952Sdim using RegisterInduction = std::pair<unsigned, RegisterBump>; 1619327952Sdim using RegisterInductionSet = std::set<RegisterInduction>; 1620249423Sdim 1621249423Sdim // Register candidates for induction variables, with their associated bumps. 1622249423Sdim RegisterInductionSet IndRegs; 1623249423Sdim 1624249423Sdim // Look for induction patterns: 1625327952Sdim // %1 = PHI ..., [ latch, %2 ] 1626327952Sdim // %2 = ADD %1, imm 1627327952Sdim using instr_iterator = MachineBasicBlock::instr_iterator; 1628327952Sdim 1629249423Sdim for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 1630249423Sdim I != E && I->isPHI(); ++I) { 1631249423Sdim MachineInstr *Phi = &*I; 1632249423Sdim 1633249423Sdim // Have a PHI instruction. 1634249423Sdim for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { 1635249423Sdim if (Phi->getOperand(i+1).getMBB() != Latch) 1636249423Sdim continue; 1637249423Sdim 1638360784Sdim Register PhiReg = Phi->getOperand(i).getReg(); 1639249423Sdim MachineInstr *DI = MRI->getVRegDef(PhiReg); 1640249423Sdim 1641314564Sdim if (DI->getDesc().isAdd()) { 1642249423Sdim // If the register operand to the add/sub is the PHI we are looking 1643249423Sdim // at, this meets the induction pattern. 1644360784Sdim Register IndReg = DI->getOperand(1).getReg(); 1645288943Sdim MachineOperand &Opnd2 = DI->getOperand(2); 1646288943Sdim int64_t V; 1647288943Sdim if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { 1648360784Sdim Register UpdReg = DI->getOperand(0).getReg(); 1649249423Sdim IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); 1650234285Sdim } 1651234285Sdim } 1652249423Sdim } // for (i) 1653249423Sdim } // for (instr) 1654249423Sdim 1655249423Sdim if (IndRegs.empty()) 1656249423Sdim return false; 1657249423Sdim 1658276479Sdim MachineBasicBlock *TB = nullptr, *FB = nullptr; 1659249423Sdim SmallVector<MachineOperand,2> Cond; 1660249423Sdim // AnalyzeBranch returns true if it fails to analyze branch. 1661309124Sdim bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); 1662288943Sdim if (NotAnalyzed || Cond.empty()) 1663249423Sdim return false; 1664249423Sdim 1665288943Sdim if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { 1666314564Sdim MachineBasicBlock *LTB = nullptr, *LFB = nullptr; 1667288943Sdim SmallVector<MachineOperand,2> LCond; 1668309124Sdim bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); 1669288943Sdim if (NotAnalyzed) 1670288943Sdim return false; 1671249423Sdim 1672288943Sdim // Since latch is not the exiting block, the latch branch should be an 1673288943Sdim // unconditional branch to the loop header. 1674288943Sdim if (TB == Latch) 1675288943Sdim TB = (LTB == Header) ? LTB : LFB; 1676288943Sdim else 1677288943Sdim FB = (LTB == Header) ? LTB : LFB; 1678288943Sdim } 1679288943Sdim if (TB != Header) { 1680288943Sdim if (FB != Header) { 1681288943Sdim // The latch/exit block does not go back to the header. 1682288943Sdim return false; 1683288943Sdim } 1684288943Sdim // FB is the header (i.e., uncond. jump to branch header) 1685288943Sdim // In this case, the LoopBody -> TB should not be a back edge otherwise 1686288943Sdim // it could result in an infinite loop after conversion to hw_loop. 1687288943Sdim // This case can happen when the Latch has two jumps like this: 1688288943Sdim // Jmp_c OuterLoopHeader <-- TB 1689288943Sdim // Jmp InnerLoopHeader <-- FB 1690288943Sdim if (MDT->dominates(TB, FB)) 1691288943Sdim return false; 1692288943Sdim } 1693249423Sdim 1694249423Sdim // Expecting a predicate register as a condition. It won't be a hardware 1695249423Sdim // predicate register at this point yet, just a vreg. 1696249423Sdim // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0) 1697249423Sdim // into Cond, followed by the predicate register. For non-negated branches 1698249423Sdim // it's just the register. 1699249423Sdim unsigned CSz = Cond.size(); 1700249423Sdim if (CSz != 1 && CSz != 2) 1701249423Sdim return false; 1702249423Sdim 1703288943Sdim if (!Cond[CSz-1].isReg()) 1704288943Sdim return false; 1705288943Sdim 1706360784Sdim Register P = Cond[CSz - 1].getReg(); 1707249423Sdim MachineInstr *PredDef = MRI->getVRegDef(P); 1708249423Sdim 1709249423Sdim if (!PredDef->isCompare()) 1710249423Sdim return false; 1711249423Sdim 1712249423Sdim SmallSet<unsigned,2> CmpRegs; 1713276479Sdim MachineOperand *CmpImmOp = nullptr; 1714249423Sdim 1715249423Sdim // Go over all operands to the compare and look for immediate and register 1716249423Sdim // operands. Assume that if the compare has a single register use and a 1717249423Sdim // single immediate operand, then the register is being compared with the 1718249423Sdim // immediate value. 1719249423Sdim for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { 1720249423Sdim MachineOperand &MO = PredDef->getOperand(i); 1721249423Sdim if (MO.isReg()) { 1722249423Sdim // Skip all implicit references. In one case there was: 1723327952Sdim // %140 = FCMPUGT32_rr %138, %139, implicit %usr 1724249423Sdim if (MO.isImplicit()) 1725249423Sdim continue; 1726249423Sdim if (MO.isUse()) { 1727288943Sdim if (!isImmediate(MO)) { 1728249423Sdim CmpRegs.insert(MO.getReg()); 1729249423Sdim continue; 1730249423Sdim } 1731249423Sdim // Consider the register to be the "immediate" operand. 1732249423Sdim if (CmpImmOp) 1733249423Sdim return false; 1734249423Sdim CmpImmOp = &MO; 1735249423Sdim } 1736249423Sdim } else if (MO.isImm()) { 1737249423Sdim if (CmpImmOp) // A second immediate argument? Confusing. Bail out. 1738249423Sdim return false; 1739249423Sdim CmpImmOp = &MO; 1740234285Sdim } 1741234285Sdim } 1742234285Sdim 1743249423Sdim if (CmpRegs.empty()) 1744249423Sdim return false; 1745234285Sdim 1746249423Sdim // Check if the compared register follows the order we want. Fix if needed. 1747249423Sdim for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end(); 1748249423Sdim I != E; ++I) { 1749249423Sdim // This is a success. If the register used in the comparison is one that 1750249423Sdim // we have identified as a bumped (updated) induction register, there is 1751249423Sdim // nothing to do. 1752249423Sdim if (CmpRegs.count(I->first)) 1753249423Sdim return true; 1754249423Sdim 1755249423Sdim // Otherwise, if the register being compared comes out of a PHI node, 1756249423Sdim // and has been recognized as following the induction pattern, and is 1757249423Sdim // compared against an immediate, we can fix it. 1758249423Sdim const RegisterBump &RB = I->second; 1759249423Sdim if (CmpRegs.count(RB.first)) { 1760288943Sdim if (!CmpImmOp) { 1761288943Sdim // If both operands to the compare instruction are registers, see if 1762288943Sdim // it can be changed to use induction register as one of the operands. 1763288943Sdim MachineInstr *IndI = nullptr; 1764288943Sdim MachineInstr *nonIndI = nullptr; 1765288943Sdim MachineOperand *IndMO = nullptr; 1766288943Sdim MachineOperand *nonIndMO = nullptr; 1767288943Sdim 1768288943Sdim for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) { 1769288943Sdim MachineOperand &MO = PredDef->getOperand(i); 1770288943Sdim if (MO.isReg() && MO.getReg() == RB.first) { 1771341825Sdim LLVM_DEBUG(dbgs() << "\n DefMI(" << i 1772341825Sdim << ") = " << *(MRI->getVRegDef(I->first))); 1773288943Sdim if (IndI) 1774288943Sdim return false; 1775288943Sdim 1776288943Sdim IndI = MRI->getVRegDef(I->first); 1777288943Sdim IndMO = &MO; 1778288943Sdim } else if (MO.isReg()) { 1779341825Sdim LLVM_DEBUG(dbgs() << "\n DefMI(" << i 1780341825Sdim << ") = " << *(MRI->getVRegDef(MO.getReg()))); 1781288943Sdim if (nonIndI) 1782288943Sdim return false; 1783288943Sdim 1784288943Sdim nonIndI = MRI->getVRegDef(MO.getReg()); 1785288943Sdim nonIndMO = &MO; 1786288943Sdim } 1787288943Sdim } 1788288943Sdim if (IndI && nonIndI && 1789288943Sdim nonIndI->getOpcode() == Hexagon::A2_addi && 1790288943Sdim nonIndI->getOperand(2).isImm() && 1791288943Sdim nonIndI->getOperand(2).getImm() == - RB.second) { 1792288943Sdim bool Order = orderBumpCompare(IndI, PredDef); 1793288943Sdim if (Order) { 1794288943Sdim IndMO->setReg(I->first); 1795288943Sdim nonIndMO->setReg(nonIndI->getOperand(1).getReg()); 1796288943Sdim return true; 1797288943Sdim } 1798288943Sdim } 1799249423Sdim return false; 1800288943Sdim } 1801249423Sdim 1802288943Sdim // It is not valid to do this transformation on an unsigned comparison 1803288943Sdim // because it may underflow. 1804314564Sdim Comparison::Kind Cmp = 1805314564Sdim getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0); 1806288943Sdim if (!Cmp || Comparison::isUnsigned(Cmp)) 1807288943Sdim return false; 1808288943Sdim 1809288943Sdim // If the register is being compared against an immediate, try changing 1810288943Sdim // the compare instruction to use induction register and adjust the 1811288943Sdim // immediate operand. 1812249423Sdim int64_t CmpImm = getImmediate(*CmpImmOp); 1813249423Sdim int64_t V = RB.second; 1814288943Sdim // Handle Overflow (64-bit). 1815288943Sdim if (((V > 0) && (CmpImm > INT64_MAX - V)) || 1816288943Sdim ((V < 0) && (CmpImm < INT64_MIN - V))) 1817249423Sdim return false; 1818249423Sdim CmpImm += V; 1819288943Sdim // Most comparisons of register against an immediate value allow 1820288943Sdim // the immediate to be constant-extended. There are some exceptions 1821288943Sdim // though. Make sure the new combination will work. 1822288943Sdim if (CmpImmOp->isImm()) 1823288943Sdim if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm)) 1824288943Sdim return false; 1825249423Sdim 1826249423Sdim // Make sure that the compare happens after the bump. Otherwise, 1827249423Sdim // after the fixup, the compare would use a yet-undefined register. 1828249423Sdim MachineInstr *BumpI = MRI->getVRegDef(I->first); 1829249423Sdim bool Order = orderBumpCompare(BumpI, PredDef); 1830249423Sdim if (!Order) 1831249423Sdim return false; 1832249423Sdim 1833249423Sdim // Finally, fix the compare instruction. 1834249423Sdim setImmediate(*CmpImmOp, CmpImm); 1835249423Sdim for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { 1836249423Sdim MachineOperand &MO = PredDef->getOperand(i); 1837249423Sdim if (MO.isReg() && MO.getReg() == RB.first) { 1838249423Sdim MO.setReg(I->first); 1839249423Sdim return true; 1840249423Sdim } 1841249423Sdim } 1842249423Sdim } 1843249423Sdim } 1844249423Sdim 1845249423Sdim return false; 1846234285Sdim} 1847234285Sdim 1848314564Sdim/// createPreheaderForLoop - Create a preheader for a given loop. 1849249423SdimMachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( 1850249423Sdim MachineLoop *L) { 1851314564Sdim if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader)) 1852249423Sdim return TmpPH; 1853288943Sdim if (!HWCreatePreheader) 1854288943Sdim return nullptr; 1855288943Sdim 1856249423Sdim MachineBasicBlock *Header = L->getHeader(); 1857249423Sdim MachineBasicBlock *Latch = L->getLoopLatch(); 1858314564Sdim MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 1859249423Sdim MachineFunction *MF = Header->getParent(); 1860249423Sdim DebugLoc DL; 1861249423Sdim 1862288943Sdim#ifndef NDEBUG 1863327952Sdim if ((!PHFn.empty()) && (PHFn != MF->getName())) 1864276479Sdim return nullptr; 1865288943Sdim#endif 1866249423Sdim 1867288943Sdim if (!Latch || !ExitingBlock || Header->hasAddressTaken()) 1868288943Sdim return nullptr; 1869288943Sdim 1870327952Sdim using instr_iterator = MachineBasicBlock::instr_iterator; 1871249423Sdim 1872249423Sdim // Verify that all existing predecessors have analyzable branches 1873249423Sdim // (or no branches at all). 1874327952Sdim using MBBVector = std::vector<MachineBasicBlock *>; 1875327952Sdim 1876249423Sdim MBBVector Preds(Header->pred_begin(), Header->pred_end()); 1877249423Sdim SmallVector<MachineOperand,2> Tmp1; 1878276479Sdim MachineBasicBlock *TB = nullptr, *FB = nullptr; 1879249423Sdim 1880309124Sdim if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false)) 1881276479Sdim return nullptr; 1882249423Sdim 1883249423Sdim for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { 1884249423Sdim MachineBasicBlock *PB = *I; 1885309124Sdim bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false); 1886288943Sdim if (NotAnalyzed) 1887288943Sdim return nullptr; 1888249423Sdim } 1889249423Sdim 1890249423Sdim MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); 1891296417Sdim MF->insert(Header->getIterator(), NewPH); 1892249423Sdim 1893249423Sdim if (Header->pred_size() > 2) { 1894249423Sdim // Ensure that the header has only two predecessors: the preheader and 1895249423Sdim // the loop latch. Any additional predecessors of the header should 1896288943Sdim // join at the newly created preheader. Inspect all PHI nodes from the 1897249423Sdim // header and create appropriate corresponding PHI nodes in the preheader. 1898249423Sdim 1899249423Sdim for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 1900249423Sdim I != E && I->isPHI(); ++I) { 1901249423Sdim MachineInstr *PN = &*I; 1902249423Sdim 1903249423Sdim const MCInstrDesc &PD = TII->get(TargetOpcode::PHI); 1904249423Sdim MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL); 1905249423Sdim NewPH->insert(NewPH->end(), NewPN); 1906249423Sdim 1907360784Sdim Register PR = PN->getOperand(0).getReg(); 1908249423Sdim const TargetRegisterClass *RC = MRI->getRegClass(PR); 1909360784Sdim Register NewPR = MRI->createVirtualRegister(RC); 1910249423Sdim NewPN->addOperand(MachineOperand::CreateReg(NewPR, true)); 1911249423Sdim 1912249423Sdim // Copy all non-latch operands of a header's PHI node to the newly 1913249423Sdim // created PHI node in the preheader. 1914249423Sdim for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { 1915360784Sdim Register PredR = PN->getOperand(i).getReg(); 1916288943Sdim unsigned PredRSub = PN->getOperand(i).getSubReg(); 1917249423Sdim MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); 1918249423Sdim if (PredB == Latch) 1919249423Sdim continue; 1920249423Sdim 1921288943Sdim MachineOperand MO = MachineOperand::CreateReg(PredR, false); 1922288943Sdim MO.setSubReg(PredRSub); 1923288943Sdim NewPN->addOperand(MO); 1924249423Sdim NewPN->addOperand(MachineOperand::CreateMBB(PredB)); 1925249423Sdim } 1926249423Sdim 1927249423Sdim // Remove copied operands from the old PHI node and add the value 1928249423Sdim // coming from the preheader's PHI. 1929249423Sdim for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { 1930249423Sdim MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); 1931249423Sdim if (PredB != Latch) { 1932249423Sdim PN->RemoveOperand(i+1); 1933249423Sdim PN->RemoveOperand(i); 1934249423Sdim } 1935249423Sdim } 1936249423Sdim PN->addOperand(MachineOperand::CreateReg(NewPR, false)); 1937249423Sdim PN->addOperand(MachineOperand::CreateMBB(NewPH)); 1938249423Sdim } 1939234285Sdim } else { 1940249423Sdim assert(Header->pred_size() == 2); 1941249423Sdim 1942249423Sdim // The header has only two predecessors, but the non-latch predecessor 1943249423Sdim // is not a preheader (e.g. it has other successors, etc.) 1944249423Sdim // In such a case we don't need any extra PHI nodes in the new preheader, 1945249423Sdim // all we need is to adjust existing PHIs in the header to now refer to 1946249423Sdim // the new preheader. 1947249423Sdim for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 1948249423Sdim I != E && I->isPHI(); ++I) { 1949249423Sdim MachineInstr *PN = &*I; 1950249423Sdim for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { 1951249423Sdim MachineOperand &MO = PN->getOperand(i+1); 1952249423Sdim if (MO.getMBB() != Latch) 1953249423Sdim MO.setMBB(NewPH); 1954249423Sdim } 1955249423Sdim } 1956234285Sdim } 1957249423Sdim 1958249423Sdim // "Reroute" the CFG edges to link in the new preheader. 1959249423Sdim // If any of the predecessors falls through to the header, insert a branch 1960249423Sdim // to the new preheader in that place. 1961249423Sdim SmallVector<MachineOperand,1> Tmp2; 1962249423Sdim SmallVector<MachineOperand,1> EmptyCond; 1963249423Sdim 1964276479Sdim TB = FB = nullptr; 1965249423Sdim 1966249423Sdim for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { 1967249423Sdim MachineBasicBlock *PB = *I; 1968249423Sdim if (PB != Latch) { 1969249423Sdim Tmp2.clear(); 1970309124Sdim bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false); 1971276479Sdim (void)NotAnalyzed; // suppress compiler warning 1972249423Sdim assert (!NotAnalyzed && "Should be analyzable!"); 1973249423Sdim if (TB != Header && (Tmp2.empty() || FB != Header)) 1974314564Sdim TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL); 1975249423Sdim PB->ReplaceUsesOfBlockWith(Header, NewPH); 1976249423Sdim } 1977249423Sdim } 1978249423Sdim 1979249423Sdim // It can happen that the latch block will fall through into the header. 1980249423Sdim // Insert an unconditional branch to the header. 1981276479Sdim TB = FB = nullptr; 1982309124Sdim bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false); 1983276479Sdim (void)LatchNotAnalyzed; // suppress compiler warning 1984249423Sdim assert (!LatchNotAnalyzed && "Should be analyzable!"); 1985249423Sdim if (!TB && !FB) 1986314564Sdim TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL); 1987249423Sdim 1988249423Sdim // Finally, the branch from the preheader to the header. 1989314564Sdim TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL); 1990249423Sdim NewPH->addSuccessor(Header); 1991249423Sdim 1992288943Sdim MachineLoop *ParentLoop = L->getParentLoop(); 1993288943Sdim if (ParentLoop) 1994288943Sdim ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase()); 1995288943Sdim 1996288943Sdim // Update the dominator information with the new preheader. 1997288943Sdim if (MDT) { 1998314564Sdim if (MachineDomTreeNode *HN = MDT->getNode(Header)) { 1999314564Sdim if (MachineDomTreeNode *DHN = HN->getIDom()) { 2000314564Sdim MDT->addNewBlock(NewPH, DHN->getBlock()); 2001314564Sdim MDT->changeImmediateDominator(Header, NewPH); 2002314564Sdim } 2003314564Sdim } 2004288943Sdim } 2005288943Sdim 2006249423Sdim return NewPH; 2007234285Sdim} 2008