1327952Sdim//===- HexagonSplitDouble.cpp ---------------------------------------------===// 2292915Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6292915Sdim// 7292915Sdim//===----------------------------------------------------------------------===// 8292915Sdim 9292915Sdim#define DEBUG_TYPE "hsdr" 10292915Sdim 11314564Sdim#include "HexagonInstrInfo.h" 12292915Sdim#include "HexagonRegisterInfo.h" 13314564Sdim#include "HexagonSubtarget.h" 14314564Sdim#include "llvm/ADT/BitVector.h" 15321369Sdim#include "llvm/ADT/STLExtras.h" 16314564Sdim#include "llvm/ADT/SmallVector.h" 17314564Sdim#include "llvm/ADT/StringRef.h" 18314564Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 19292915Sdim#include "llvm/CodeGen/MachineFunction.h" 20292915Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 21314564Sdim#include "llvm/CodeGen/MachineInstr.h" 22292915Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 23292915Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 24314564Sdim#include "llvm/CodeGen/MachineMemOperand.h" 25314564Sdim#include "llvm/CodeGen/MachineOperand.h" 26292915Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 27327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h" 28341825Sdim#include "llvm/Config/llvm-config.h" 29314564Sdim#include "llvm/IR/DebugLoc.h" 30309124Sdim#include "llvm/Pass.h" 31292915Sdim#include "llvm/Support/CommandLine.h" 32314564Sdim#include "llvm/Support/Compiler.h" 33292915Sdim#include "llvm/Support/Debug.h" 34314564Sdim#include "llvm/Support/ErrorHandling.h" 35292915Sdim#include "llvm/Support/raw_ostream.h" 36314564Sdim#include <algorithm> 37314564Sdim#include <cassert> 38314564Sdim#include <cstdint> 39314564Sdim#include <limits> 40292915Sdim#include <map> 41292915Sdim#include <set> 42314564Sdim#include <utility> 43292915Sdim#include <vector> 44292915Sdim 45292915Sdimusing namespace llvm; 46292915Sdim 47292915Sdimnamespace llvm { 48314564Sdim 49292915Sdim FunctionPass *createHexagonSplitDoubleRegs(); 50292915Sdim void initializeHexagonSplitDoubleRegsPass(PassRegistry&); 51292915Sdim 52314564Sdim} // end namespace llvm 53314564Sdim 54327952Sdimstatic cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1), 55327952Sdim cl::desc("Maximum number of split partitions")); 56327952Sdimstatic cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true), 57327952Sdim cl::desc("Do not split loads or stores")); 58341825Sdim static cl::opt<bool> SplitAll("hsdr-split-all", cl::Hidden, cl::init(false), 59341825Sdim cl::desc("Split all partitions")); 60327952Sdim 61292915Sdimnamespace { 62314564Sdim 63292915Sdim class HexagonSplitDoubleRegs : public MachineFunctionPass { 64292915Sdim public: 65292915Sdim static char ID; 66314564Sdim 67341825Sdim HexagonSplitDoubleRegs() : MachineFunctionPass(ID) {} 68314564Sdim 69314564Sdim StringRef getPassName() const override { 70292915Sdim return "Hexagon Split Double Registers"; 71292915Sdim } 72314564Sdim 73292915Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 74292915Sdim AU.addRequired<MachineLoopInfo>(); 75292915Sdim AU.addPreserved<MachineLoopInfo>(); 76292915Sdim MachineFunctionPass::getAnalysisUsage(AU); 77292915Sdim } 78314564Sdim 79292915Sdim bool runOnMachineFunction(MachineFunction &MF) override; 80292915Sdim 81292915Sdim private: 82292915Sdim static const TargetRegisterClass *const DoubleRC; 83292915Sdim 84327952Sdim const HexagonRegisterInfo *TRI = nullptr; 85327952Sdim const HexagonInstrInfo *TII = nullptr; 86292915Sdim const MachineLoopInfo *MLI; 87292915Sdim MachineRegisterInfo *MRI; 88292915Sdim 89327952Sdim using USet = std::set<unsigned>; 90327952Sdim using UUSetMap = std::map<unsigned, USet>; 91327952Sdim using UUPair = std::pair<unsigned, unsigned>; 92327952Sdim using UUPairMap = std::map<unsigned, UUPair>; 93327952Sdim using LoopRegMap = std::map<const MachineLoop *, USet>; 94292915Sdim 95292915Sdim bool isInduction(unsigned Reg, LoopRegMap &IRM) const; 96292915Sdim bool isVolatileInstr(const MachineInstr *MI) const; 97292915Sdim bool isFixedInstr(const MachineInstr *MI) const; 98292915Sdim void partitionRegisters(UUSetMap &P2Rs); 99292915Sdim int32_t profit(const MachineInstr *MI) const; 100341825Sdim int32_t profit(unsigned Reg) const; 101292915Sdim bool isProfitable(const USet &Part, LoopRegMap &IRM) const; 102292915Sdim 103292915Sdim void collectIndRegsForLoop(const MachineLoop *L, USet &Rs); 104292915Sdim void collectIndRegs(LoopRegMap &IRM); 105292915Sdim 106292915Sdim void createHalfInstr(unsigned Opc, MachineInstr *MI, 107292915Sdim const UUPairMap &PairMap, unsigned SubR); 108292915Sdim void splitMemRef(MachineInstr *MI, const UUPairMap &PairMap); 109292915Sdim void splitImmediate(MachineInstr *MI, const UUPairMap &PairMap); 110292915Sdim void splitCombine(MachineInstr *MI, const UUPairMap &PairMap); 111292915Sdim void splitExt(MachineInstr *MI, const UUPairMap &PairMap); 112292915Sdim void splitShift(MachineInstr *MI, const UUPairMap &PairMap); 113292915Sdim void splitAslOr(MachineInstr *MI, const UUPairMap &PairMap); 114292915Sdim bool splitInstr(MachineInstr *MI, const UUPairMap &PairMap); 115292915Sdim void replaceSubregUses(MachineInstr *MI, const UUPairMap &PairMap); 116292915Sdim void collapseRegPairs(MachineInstr *MI, const UUPairMap &PairMap); 117292915Sdim bool splitPartition(const USet &Part); 118292915Sdim 119292915Sdim static int Counter; 120327952Sdim 121292915Sdim static void dump_partition(raw_ostream&, const USet&, 122292915Sdim const TargetRegisterInfo&); 123292915Sdim }; 124314564Sdim 125314564Sdim} // end anonymous namespace 126314564Sdim 127327952Sdimchar HexagonSplitDoubleRegs::ID; 128327952Sdimint HexagonSplitDoubleRegs::Counter = 0; 129327952Sdimconst TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC = 130327952Sdim &Hexagon::DoubleRegsRegClass; 131327952Sdim 132292915SdimINITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double", 133292915Sdim "Hexagon Split Double Registers", false, false) 134292915Sdim 135321369Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 136321369SdimLLVM_DUMP_METHOD void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os, 137292915Sdim const USet &Part, const TargetRegisterInfo &TRI) { 138292915Sdim dbgs() << '{'; 139292915Sdim for (auto I : Part) 140327952Sdim dbgs() << ' ' << printReg(I, &TRI); 141292915Sdim dbgs() << " }"; 142292915Sdim} 143321369Sdim#endif 144292915Sdim 145292915Sdimbool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const { 146292915Sdim for (auto I : IRM) { 147292915Sdim const USet &Rs = I.second; 148292915Sdim if (Rs.find(Reg) != Rs.end()) 149292915Sdim return true; 150292915Sdim } 151292915Sdim return false; 152292915Sdim} 153292915Sdim 154292915Sdimbool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const { 155353358Sdim for (auto &MO : MI->memoperands()) 156353358Sdim if (MO->isVolatile() || MO->isAtomic()) 157292915Sdim return true; 158292915Sdim return false; 159292915Sdim} 160292915Sdim 161292915Sdimbool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const { 162360784Sdim if (MI->mayLoadOrStore()) 163292915Sdim if (MemRefsFixed || isVolatileInstr(MI)) 164292915Sdim return true; 165341825Sdim if (MI->isDebugInstr()) 166292915Sdim return false; 167292915Sdim 168292915Sdim unsigned Opc = MI->getOpcode(); 169292915Sdim switch (Opc) { 170292915Sdim default: 171292915Sdim return true; 172292915Sdim 173292915Sdim case TargetOpcode::PHI: 174292915Sdim case TargetOpcode::COPY: 175292915Sdim break; 176292915Sdim 177292915Sdim case Hexagon::L2_loadrd_io: 178292915Sdim // Not handling stack stores (only reg-based addresses). 179292915Sdim if (MI->getOperand(1).isReg()) 180292915Sdim break; 181292915Sdim return true; 182292915Sdim case Hexagon::S2_storerd_io: 183292915Sdim // Not handling stack stores (only reg-based addresses). 184292915Sdim if (MI->getOperand(0).isReg()) 185292915Sdim break; 186292915Sdim return true; 187292915Sdim case Hexagon::L2_loadrd_pi: 188292915Sdim case Hexagon::S2_storerd_pi: 189292915Sdim 190292915Sdim case Hexagon::A2_tfrpi: 191292915Sdim case Hexagon::A2_combineii: 192292915Sdim case Hexagon::A4_combineir: 193292915Sdim case Hexagon::A4_combineii: 194292915Sdim case Hexagon::A4_combineri: 195292915Sdim case Hexagon::A2_combinew: 196314564Sdim case Hexagon::CONST64: 197292915Sdim 198292915Sdim case Hexagon::A2_sxtw: 199292915Sdim 200292915Sdim case Hexagon::A2_andp: 201292915Sdim case Hexagon::A2_orp: 202292915Sdim case Hexagon::A2_xorp: 203292915Sdim case Hexagon::S2_asl_i_p_or: 204292915Sdim case Hexagon::S2_asl_i_p: 205292915Sdim case Hexagon::S2_asr_i_p: 206292915Sdim case Hexagon::S2_lsr_i_p: 207292915Sdim break; 208292915Sdim } 209292915Sdim 210292915Sdim for (auto &Op : MI->operands()) { 211292915Sdim if (!Op.isReg()) 212292915Sdim continue; 213360784Sdim Register R = Op.getReg(); 214360784Sdim if (!Register::isVirtualRegister(R)) 215292915Sdim return true; 216292915Sdim } 217292915Sdim return false; 218292915Sdim} 219292915Sdim 220292915Sdimvoid HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { 221327952Sdim using UUMap = std::map<unsigned, unsigned>; 222327952Sdim using UVect = std::vector<unsigned>; 223292915Sdim 224292915Sdim unsigned NumRegs = MRI->getNumVirtRegs(); 225292915Sdim BitVector DoubleRegs(NumRegs); 226292915Sdim for (unsigned i = 0; i < NumRegs; ++i) { 227360784Sdim unsigned R = Register::index2VirtReg(i); 228292915Sdim if (MRI->getRegClass(R) == DoubleRC) 229292915Sdim DoubleRegs.set(i); 230292915Sdim } 231292915Sdim 232292915Sdim BitVector FixedRegs(NumRegs); 233292915Sdim for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { 234360784Sdim unsigned R = Register::index2VirtReg(x); 235292915Sdim MachineInstr *DefI = MRI->getVRegDef(R); 236292915Sdim // In some cases a register may exist, but never be defined or used. 237292915Sdim // It should never appear anywhere, but mark it as "fixed", just to be 238292915Sdim // safe. 239292915Sdim if (!DefI || isFixedInstr(DefI)) 240292915Sdim FixedRegs.set(x); 241292915Sdim } 242292915Sdim 243292915Sdim UUSetMap AssocMap; 244292915Sdim for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { 245292915Sdim if (FixedRegs[x]) 246292915Sdim continue; 247360784Sdim unsigned R = Register::index2VirtReg(x); 248341825Sdim LLVM_DEBUG(dbgs() << printReg(R, TRI) << " ~~"); 249292915Sdim USet &Asc = AssocMap[R]; 250292915Sdim for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end(); 251292915Sdim U != Z; ++U) { 252292915Sdim MachineOperand &Op = *U; 253292915Sdim MachineInstr *UseI = Op.getParent(); 254292915Sdim if (isFixedInstr(UseI)) 255292915Sdim continue; 256292915Sdim for (unsigned i = 0, n = UseI->getNumOperands(); i < n; ++i) { 257292915Sdim MachineOperand &MO = UseI->getOperand(i); 258292915Sdim // Skip non-registers or registers with subregisters. 259292915Sdim if (&MO == &Op || !MO.isReg() || MO.getSubReg()) 260292915Sdim continue; 261360784Sdim Register T = MO.getReg(); 262360784Sdim if (!Register::isVirtualRegister(T)) { 263292915Sdim FixedRegs.set(x); 264292915Sdim continue; 265292915Sdim } 266292915Sdim if (MRI->getRegClass(T) != DoubleRC) 267292915Sdim continue; 268360784Sdim unsigned u = Register::virtReg2Index(T); 269292915Sdim if (FixedRegs[u]) 270292915Sdim continue; 271341825Sdim LLVM_DEBUG(dbgs() << ' ' << printReg(T, TRI)); 272292915Sdim Asc.insert(T); 273292915Sdim // Make it symmetric. 274292915Sdim AssocMap[T].insert(R); 275292915Sdim } 276292915Sdim } 277341825Sdim LLVM_DEBUG(dbgs() << '\n'); 278292915Sdim } 279292915Sdim 280292915Sdim UUMap R2P; 281292915Sdim unsigned NextP = 1; 282292915Sdim USet Visited; 283292915Sdim for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { 284360784Sdim unsigned R = Register::index2VirtReg(x); 285292915Sdim if (Visited.count(R)) 286292915Sdim continue; 287292915Sdim // Create a new partition for R. 288292915Sdim unsigned ThisP = FixedRegs[x] ? 0 : NextP++; 289292915Sdim UVect WorkQ; 290292915Sdim WorkQ.push_back(R); 291292915Sdim for (unsigned i = 0; i < WorkQ.size(); ++i) { 292292915Sdim unsigned T = WorkQ[i]; 293292915Sdim if (Visited.count(T)) 294292915Sdim continue; 295292915Sdim R2P[T] = ThisP; 296292915Sdim Visited.insert(T); 297292915Sdim // Add all registers associated with T. 298292915Sdim USet &Asc = AssocMap[T]; 299292915Sdim for (USet::iterator J = Asc.begin(), F = Asc.end(); J != F; ++J) 300292915Sdim WorkQ.push_back(*J); 301292915Sdim } 302292915Sdim } 303292915Sdim 304292915Sdim for (auto I : R2P) 305292915Sdim P2Rs[I.second].insert(I.first); 306292915Sdim} 307292915Sdim 308341825Sdimstatic inline int32_t profitImm(unsigned Imm) { 309292915Sdim int32_t P = 0; 310341825Sdim if (Imm == 0 || Imm == 0xFFFFFFFF) 311341825Sdim P += 10; 312292915Sdim return P; 313292915Sdim} 314292915Sdim 315292915Sdimint32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { 316292915Sdim unsigned ImmX = 0; 317292915Sdim unsigned Opc = MI->getOpcode(); 318292915Sdim switch (Opc) { 319292915Sdim case TargetOpcode::PHI: 320292915Sdim for (const auto &Op : MI->operands()) 321292915Sdim if (!Op.getSubReg()) 322292915Sdim return 0; 323292915Sdim return 10; 324292915Sdim case TargetOpcode::COPY: 325292915Sdim if (MI->getOperand(1).getSubReg() != 0) 326292915Sdim return 10; 327292915Sdim return 0; 328292915Sdim 329292915Sdim case Hexagon::L2_loadrd_io: 330292915Sdim case Hexagon::S2_storerd_io: 331292915Sdim return -1; 332292915Sdim case Hexagon::L2_loadrd_pi: 333292915Sdim case Hexagon::S2_storerd_pi: 334292915Sdim return 2; 335292915Sdim 336292915Sdim case Hexagon::A2_tfrpi: 337314564Sdim case Hexagon::CONST64: { 338292915Sdim uint64_t D = MI->getOperand(1).getImm(); 339292915Sdim unsigned Lo = D & 0xFFFFFFFFULL; 340292915Sdim unsigned Hi = D >> 32; 341341825Sdim return profitImm(Lo) + profitImm(Hi); 342292915Sdim } 343292915Sdim case Hexagon::A2_combineii: 344341825Sdim case Hexagon::A4_combineii: { 345341825Sdim const MachineOperand &Op1 = MI->getOperand(1); 346341825Sdim const MachineOperand &Op2 = MI->getOperand(2); 347341825Sdim int32_t Prof1 = Op1.isImm() ? profitImm(Op1.getImm()) : 0; 348341825Sdim int32_t Prof2 = Op2.isImm() ? profitImm(Op2.getImm()) : 0; 349341825Sdim return Prof1 + Prof2; 350341825Sdim } 351292915Sdim case Hexagon::A4_combineri: 352292915Sdim ImmX++; 353321369Sdim // Fall through into A4_combineir. 354321369Sdim LLVM_FALLTHROUGH; 355292915Sdim case Hexagon::A4_combineir: { 356292915Sdim ImmX++; 357341825Sdim const MachineOperand &OpX = MI->getOperand(ImmX); 358341825Sdim if (OpX.isImm()) { 359341825Sdim int64_t V = OpX.getImm(); 360341825Sdim if (V == 0 || V == -1) 361341825Sdim return 10; 362341825Sdim } 363292915Sdim // Fall through into A2_combinew. 364314564Sdim LLVM_FALLTHROUGH; 365292915Sdim } 366292915Sdim case Hexagon::A2_combinew: 367292915Sdim return 2; 368292915Sdim 369292915Sdim case Hexagon::A2_sxtw: 370292915Sdim return 3; 371292915Sdim 372292915Sdim case Hexagon::A2_andp: 373292915Sdim case Hexagon::A2_orp: 374341825Sdim case Hexagon::A2_xorp: { 375360784Sdim Register Rs = MI->getOperand(1).getReg(); 376360784Sdim Register Rt = MI->getOperand(2).getReg(); 377341825Sdim return profit(Rs) + profit(Rt); 378341825Sdim } 379292915Sdim 380292915Sdim case Hexagon::S2_asl_i_p_or: { 381292915Sdim unsigned S = MI->getOperand(3).getImm(); 382292915Sdim if (S == 0 || S == 32) 383292915Sdim return 10; 384292915Sdim return -1; 385292915Sdim } 386292915Sdim case Hexagon::S2_asl_i_p: 387292915Sdim case Hexagon::S2_asr_i_p: 388292915Sdim case Hexagon::S2_lsr_i_p: 389292915Sdim unsigned S = MI->getOperand(2).getImm(); 390292915Sdim if (S == 0 || S == 32) 391292915Sdim return 10; 392292915Sdim if (S == 16) 393292915Sdim return 5; 394292915Sdim if (S == 48) 395292915Sdim return 7; 396292915Sdim return -10; 397292915Sdim } 398292915Sdim 399292915Sdim return 0; 400292915Sdim} 401292915Sdim 402341825Sdimint32_t HexagonSplitDoubleRegs::profit(unsigned Reg) const { 403360784Sdim assert(Register::isVirtualRegister(Reg)); 404341825Sdim 405341825Sdim const MachineInstr *DefI = MRI->getVRegDef(Reg); 406341825Sdim switch (DefI->getOpcode()) { 407341825Sdim case Hexagon::A2_tfrpi: 408341825Sdim case Hexagon::CONST64: 409341825Sdim case Hexagon::A2_combineii: 410341825Sdim case Hexagon::A4_combineii: 411341825Sdim case Hexagon::A4_combineri: 412341825Sdim case Hexagon::A4_combineir: 413341825Sdim case Hexagon::A2_combinew: 414341825Sdim return profit(DefI); 415341825Sdim default: 416341825Sdim break; 417341825Sdim } 418341825Sdim return 0; 419341825Sdim} 420341825Sdim 421292915Sdimbool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM) 422292915Sdim const { 423321369Sdim unsigned FixedNum = 0, LoopPhiNum = 0; 424292915Sdim int32_t TotalP = 0; 425292915Sdim 426292915Sdim for (unsigned DR : Part) { 427292915Sdim MachineInstr *DefI = MRI->getVRegDef(DR); 428292915Sdim int32_t P = profit(DefI); 429314564Sdim if (P == std::numeric_limits<int>::min()) 430292915Sdim return false; 431292915Sdim TotalP += P; 432292915Sdim // Reduce the profitability of splitting induction registers. 433292915Sdim if (isInduction(DR, IRM)) 434292915Sdim TotalP -= 30; 435292915Sdim 436292915Sdim for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end(); 437292915Sdim U != W; ++U) { 438292915Sdim MachineInstr *UseI = U->getParent(); 439292915Sdim if (isFixedInstr(UseI)) { 440292915Sdim FixedNum++; 441292915Sdim // Calculate the cost of generating REG_SEQUENCE instructions. 442292915Sdim for (auto &Op : UseI->operands()) { 443292915Sdim if (Op.isReg() && Part.count(Op.getReg())) 444292915Sdim if (Op.getSubReg()) 445292915Sdim TotalP -= 2; 446292915Sdim } 447292915Sdim continue; 448292915Sdim } 449292915Sdim // If a register from this partition is used in a fixed instruction, 450292915Sdim // and there is also a register in this partition that is used in 451292915Sdim // a loop phi node, then decrease the splitting profit as this can 452292915Sdim // confuse the modulo scheduler. 453292915Sdim if (UseI->isPHI()) { 454292915Sdim const MachineBasicBlock *PB = UseI->getParent(); 455292915Sdim const MachineLoop *L = MLI->getLoopFor(PB); 456292915Sdim if (L && L->getHeader() == PB) 457292915Sdim LoopPhiNum++; 458292915Sdim } 459292915Sdim // Splittable instruction. 460292915Sdim int32_t P = profit(UseI); 461314564Sdim if (P == std::numeric_limits<int>::min()) 462292915Sdim return false; 463292915Sdim TotalP += P; 464292915Sdim } 465292915Sdim } 466292915Sdim 467292915Sdim if (FixedNum > 0 && LoopPhiNum > 0) 468292915Sdim TotalP -= 20*LoopPhiNum; 469292915Sdim 470341825Sdim LLVM_DEBUG(dbgs() << "Partition profit: " << TotalP << '\n'); 471341825Sdim if (SplitAll) 472341825Sdim return true; 473292915Sdim return TotalP > 0; 474292915Sdim} 475292915Sdim 476292915Sdimvoid HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, 477292915Sdim USet &Rs) { 478292915Sdim const MachineBasicBlock *HB = L->getHeader(); 479292915Sdim const MachineBasicBlock *LB = L->getLoopLatch(); 480292915Sdim if (!HB || !LB) 481292915Sdim return; 482292915Sdim 483292915Sdim // Examine the latch branch. Expect it to be a conditional branch to 484292915Sdim // the header (either "br-cond header" or "br-cond exit; br header"). 485314564Sdim MachineBasicBlock *TB = nullptr, *FB = nullptr; 486292915Sdim MachineBasicBlock *TmpLB = const_cast<MachineBasicBlock*>(LB); 487292915Sdim SmallVector<MachineOperand,2> Cond; 488309124Sdim bool BadLB = TII->analyzeBranch(*TmpLB, TB, FB, Cond, false); 489314564Sdim // Only analyzable conditional branches. HII::analyzeBranch will put 490292915Sdim // the branch opcode as the first element of Cond, and the predicate 491292915Sdim // operand as the second. 492292915Sdim if (BadLB || Cond.size() != 2) 493292915Sdim return; 494292915Sdim // Only simple jump-conditional (with or without negation). 495292915Sdim if (!TII->PredOpcodeHasJMP_c(Cond[0].getImm())) 496292915Sdim return; 497292915Sdim // Must go to the header. 498292915Sdim if (TB != HB && FB != HB) 499292915Sdim return; 500314564Sdim assert(Cond[1].isReg() && "Unexpected Cond vector from analyzeBranch"); 501292915Sdim // Expect a predicate register. 502360784Sdim Register PR = Cond[1].getReg(); 503292915Sdim assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass); 504292915Sdim 505292915Sdim // Get the registers on which the loop controlling compare instruction 506292915Sdim // depends. 507292915Sdim unsigned CmpR1 = 0, CmpR2 = 0; 508292915Sdim const MachineInstr *CmpI = MRI->getVRegDef(PR); 509292915Sdim while (CmpI->getOpcode() == Hexagon::C2_not) 510292915Sdim CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg()); 511292915Sdim 512292915Sdim int Mask = 0, Val = 0; 513309124Sdim bool OkCI = TII->analyzeCompare(*CmpI, CmpR1, CmpR2, Mask, Val); 514292915Sdim if (!OkCI) 515292915Sdim return; 516292915Sdim // Eliminate non-double input registers. 517292915Sdim if (CmpR1 && MRI->getRegClass(CmpR1) != DoubleRC) 518292915Sdim CmpR1 = 0; 519292915Sdim if (CmpR2 && MRI->getRegClass(CmpR2) != DoubleRC) 520292915Sdim CmpR2 = 0; 521292915Sdim if (!CmpR1 && !CmpR2) 522292915Sdim return; 523292915Sdim 524292915Sdim // Now examine the top of the loop: the phi nodes that could poten- 525292915Sdim // tially define loop induction registers. The registers defined by 526292915Sdim // such a phi node would be used in a 64-bit add, which then would 527292915Sdim // be used in the loop compare instruction. 528292915Sdim 529292915Sdim // Get the set of all double registers defined by phi nodes in the 530292915Sdim // loop header. 531327952Sdim using UVect = std::vector<unsigned>; 532327952Sdim 533292915Sdim UVect DP; 534292915Sdim for (auto &MI : *HB) { 535292915Sdim if (!MI.isPHI()) 536292915Sdim break; 537292915Sdim const MachineOperand &MD = MI.getOperand(0); 538360784Sdim Register R = MD.getReg(); 539292915Sdim if (MRI->getRegClass(R) == DoubleRC) 540292915Sdim DP.push_back(R); 541292915Sdim } 542292915Sdim if (DP.empty()) 543292915Sdim return; 544292915Sdim 545292915Sdim auto NoIndOp = [this, CmpR1, CmpR2] (unsigned R) -> bool { 546292915Sdim for (auto I = MRI->use_nodbg_begin(R), E = MRI->use_nodbg_end(); 547292915Sdim I != E; ++I) { 548292915Sdim const MachineInstr *UseI = I->getParent(); 549292915Sdim if (UseI->getOpcode() != Hexagon::A2_addp) 550292915Sdim continue; 551292915Sdim // Get the output from the add. If it is one of the inputs to the 552292915Sdim // loop-controlling compare instruction, then R is likely an induc- 553292915Sdim // tion register. 554360784Sdim Register T = UseI->getOperand(0).getReg(); 555292915Sdim if (T == CmpR1 || T == CmpR2) 556292915Sdim return false; 557292915Sdim } 558292915Sdim return true; 559292915Sdim }; 560314564Sdim UVect::iterator End = llvm::remove_if(DP, NoIndOp); 561292915Sdim Rs.insert(DP.begin(), End); 562292915Sdim Rs.insert(CmpR1); 563292915Sdim Rs.insert(CmpR2); 564292915Sdim 565341825Sdim LLVM_DEBUG({ 566327952Sdim dbgs() << "For loop at " << printMBBReference(*HB) << " ind regs: "; 567292915Sdim dump_partition(dbgs(), Rs, *TRI); 568292915Sdim dbgs() << '\n'; 569292915Sdim }); 570292915Sdim} 571292915Sdim 572292915Sdimvoid HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) { 573327952Sdim using LoopVector = std::vector<MachineLoop *>; 574327952Sdim 575292915Sdim LoopVector WorkQ; 576292915Sdim 577292915Sdim for (auto I : *MLI) 578292915Sdim WorkQ.push_back(I); 579292915Sdim for (unsigned i = 0; i < WorkQ.size(); ++i) { 580292915Sdim for (auto I : *WorkQ[i]) 581292915Sdim WorkQ.push_back(I); 582292915Sdim } 583292915Sdim 584292915Sdim USet Rs; 585292915Sdim for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) { 586292915Sdim MachineLoop *L = WorkQ[i]; 587292915Sdim Rs.clear(); 588292915Sdim collectIndRegsForLoop(L, Rs); 589292915Sdim if (!Rs.empty()) 590292915Sdim IRM.insert(std::make_pair(L, Rs)); 591292915Sdim } 592292915Sdim} 593292915Sdim 594292915Sdimvoid HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI, 595292915Sdim const UUPairMap &PairMap, unsigned SubR) { 596292915Sdim MachineBasicBlock &B = *MI->getParent(); 597292915Sdim DebugLoc DL = MI->getDebugLoc(); 598292915Sdim MachineInstr *NewI = BuildMI(B, MI, DL, TII->get(Opc)); 599292915Sdim 600292915Sdim for (auto &Op : MI->operands()) { 601292915Sdim if (!Op.isReg()) { 602292915Sdim NewI->addOperand(Op); 603292915Sdim continue; 604292915Sdim } 605292915Sdim // For register operands, set the subregister. 606360784Sdim Register R = Op.getReg(); 607292915Sdim unsigned SR = Op.getSubReg(); 608360784Sdim bool isVirtReg = Register::isVirtualRegister(R); 609292915Sdim bool isKill = Op.isKill(); 610292915Sdim if (isVirtReg && MRI->getRegClass(R) == DoubleRC) { 611292915Sdim isKill = false; 612292915Sdim UUPairMap::const_iterator F = PairMap.find(R); 613292915Sdim if (F == PairMap.end()) { 614292915Sdim SR = SubR; 615292915Sdim } else { 616292915Sdim const UUPair &P = F->second; 617314564Sdim R = (SubR == Hexagon::isub_lo) ? P.first : P.second; 618292915Sdim SR = 0; 619292915Sdim } 620292915Sdim } 621292915Sdim auto CO = MachineOperand::CreateReg(R, Op.isDef(), Op.isImplicit(), isKill, 622292915Sdim Op.isDead(), Op.isUndef(), Op.isEarlyClobber(), SR, Op.isDebug(), 623292915Sdim Op.isInternalRead()); 624292915Sdim NewI->addOperand(CO); 625292915Sdim } 626292915Sdim} 627292915Sdim 628292915Sdimvoid HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI, 629292915Sdim const UUPairMap &PairMap) { 630292915Sdim bool Load = MI->mayLoad(); 631292915Sdim unsigned OrigOpc = MI->getOpcode(); 632292915Sdim bool PostInc = (OrigOpc == Hexagon::L2_loadrd_pi || 633292915Sdim OrigOpc == Hexagon::S2_storerd_pi); 634292915Sdim MachineInstr *LowI, *HighI; 635292915Sdim MachineBasicBlock &B = *MI->getParent(); 636292915Sdim DebugLoc DL = MI->getDebugLoc(); 637292915Sdim 638292915Sdim // Index of the base-address-register operand. 639292915Sdim unsigned AdrX = PostInc ? (Load ? 2 : 1) 640292915Sdim : (Load ? 1 : 0); 641292915Sdim MachineOperand &AdrOp = MI->getOperand(AdrX); 642292915Sdim unsigned RSA = getRegState(AdrOp); 643292915Sdim MachineOperand &ValOp = Load ? MI->getOperand(0) 644292915Sdim : (PostInc ? MI->getOperand(3) 645292915Sdim : MI->getOperand(2)); 646292915Sdim UUPairMap::const_iterator F = PairMap.find(ValOp.getReg()); 647292915Sdim assert(F != PairMap.end()); 648292915Sdim 649292915Sdim if (Load) { 650292915Sdim const UUPair &P = F->second; 651292915Sdim int64_t Off = PostInc ? 0 : MI->getOperand(2).getImm(); 652292915Sdim LowI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.first) 653292915Sdim .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) 654292915Sdim .addImm(Off); 655292915Sdim HighI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.second) 656292915Sdim .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) 657292915Sdim .addImm(Off+4); 658292915Sdim } else { 659292915Sdim const UUPair &P = F->second; 660292915Sdim int64_t Off = PostInc ? 0 : MI->getOperand(1).getImm(); 661292915Sdim LowI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io)) 662292915Sdim .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) 663292915Sdim .addImm(Off) 664292915Sdim .addReg(P.first); 665292915Sdim HighI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io)) 666292915Sdim .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) 667292915Sdim .addImm(Off+4) 668292915Sdim .addReg(P.second); 669292915Sdim } 670292915Sdim 671292915Sdim if (PostInc) { 672292915Sdim // Create the increment of the address register. 673292915Sdim int64_t Inc = Load ? MI->getOperand(3).getImm() 674292915Sdim : MI->getOperand(2).getImm(); 675292915Sdim MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0); 676292915Sdim const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg()); 677360784Sdim Register NewR = MRI->createVirtualRegister(RC); 678292915Sdim assert(!UpdOp.getSubReg() && "Def operand with subreg"); 679292915Sdim BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR) 680292915Sdim .addReg(AdrOp.getReg(), RSA) 681292915Sdim .addImm(Inc); 682292915Sdim MRI->replaceRegWith(UpdOp.getReg(), NewR); 683292915Sdim // The original instruction will be deleted later. 684292915Sdim } 685292915Sdim 686292915Sdim // Generate a new pair of memory-operands. 687292915Sdim MachineFunction &MF = *B.getParent(); 688292915Sdim for (auto &MO : MI->memoperands()) { 689292915Sdim const MachinePointerInfo &Ptr = MO->getPointerInfo(); 690309124Sdim MachineMemOperand::Flags F = MO->getFlags(); 691292915Sdim int A = MO->getAlignment(); 692292915Sdim 693292915Sdim auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A); 694292915Sdim LowI->addMemOperand(MF, Tmp1); 695292915Sdim auto *Tmp2 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, std::min(A, 4)); 696292915Sdim HighI->addMemOperand(MF, Tmp2); 697292915Sdim } 698292915Sdim} 699292915Sdim 700292915Sdimvoid HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI, 701292915Sdim const UUPairMap &PairMap) { 702292915Sdim MachineOperand &Op0 = MI->getOperand(0); 703292915Sdim MachineOperand &Op1 = MI->getOperand(1); 704292915Sdim assert(Op0.isReg() && Op1.isImm()); 705292915Sdim uint64_t V = Op1.getImm(); 706292915Sdim 707292915Sdim MachineBasicBlock &B = *MI->getParent(); 708292915Sdim DebugLoc DL = MI->getDebugLoc(); 709292915Sdim UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); 710292915Sdim assert(F != PairMap.end()); 711292915Sdim const UUPair &P = F->second; 712292915Sdim 713292915Sdim // The operand to A2_tfrsi can only have 32 significant bits. Immediate 714292915Sdim // values in MachineOperand are stored as 64-bit integers, and so the 715292915Sdim // value -1 may be represented either as 64-bit -1, or 4294967295. Both 716292915Sdim // will have the 32 higher bits truncated in the end, but -1 will remain 717292915Sdim // as -1, while the latter may appear to be a large unsigned value 718292915Sdim // requiring a constant extender. The casting to int32_t will select the 719292915Sdim // former representation. (The same reasoning applies to all 32-bit 720292915Sdim // values.) 721292915Sdim BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first) 722292915Sdim .addImm(int32_t(V & 0xFFFFFFFFULL)); 723292915Sdim BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second) 724292915Sdim .addImm(int32_t(V >> 32)); 725292915Sdim} 726292915Sdim 727292915Sdimvoid HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI, 728292915Sdim const UUPairMap &PairMap) { 729292915Sdim MachineOperand &Op0 = MI->getOperand(0); 730292915Sdim MachineOperand &Op1 = MI->getOperand(1); 731292915Sdim MachineOperand &Op2 = MI->getOperand(2); 732292915Sdim assert(Op0.isReg()); 733292915Sdim 734292915Sdim MachineBasicBlock &B = *MI->getParent(); 735292915Sdim DebugLoc DL = MI->getDebugLoc(); 736292915Sdim UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); 737292915Sdim assert(F != PairMap.end()); 738292915Sdim const UUPair &P = F->second; 739292915Sdim 740341825Sdim if (!Op1.isReg()) { 741292915Sdim BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second) 742341825Sdim .add(Op1); 743341825Sdim } else { 744292915Sdim BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second) 745292915Sdim .addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg()); 746341825Sdim } 747292915Sdim 748341825Sdim if (!Op2.isReg()) { 749292915Sdim BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first) 750341825Sdim .add(Op2); 751341825Sdim } else { 752292915Sdim BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first) 753292915Sdim .addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg()); 754341825Sdim } 755292915Sdim} 756292915Sdim 757292915Sdimvoid HexagonSplitDoubleRegs::splitExt(MachineInstr *MI, 758292915Sdim const UUPairMap &PairMap) { 759292915Sdim MachineOperand &Op0 = MI->getOperand(0); 760292915Sdim MachineOperand &Op1 = MI->getOperand(1); 761292915Sdim assert(Op0.isReg() && Op1.isReg()); 762292915Sdim 763292915Sdim MachineBasicBlock &B = *MI->getParent(); 764292915Sdim DebugLoc DL = MI->getDebugLoc(); 765292915Sdim UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); 766292915Sdim assert(F != PairMap.end()); 767292915Sdim const UUPair &P = F->second; 768292915Sdim unsigned RS = getRegState(Op1); 769292915Sdim 770292915Sdim BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first) 771292915Sdim .addReg(Op1.getReg(), RS & ~RegState::Kill, Op1.getSubReg()); 772292915Sdim BuildMI(B, MI, DL, TII->get(Hexagon::S2_asr_i_r), P.second) 773292915Sdim .addReg(Op1.getReg(), RS, Op1.getSubReg()) 774292915Sdim .addImm(31); 775292915Sdim} 776292915Sdim 777292915Sdimvoid HexagonSplitDoubleRegs::splitShift(MachineInstr *MI, 778292915Sdim const UUPairMap &PairMap) { 779314564Sdim using namespace Hexagon; 780314564Sdim 781292915Sdim MachineOperand &Op0 = MI->getOperand(0); 782292915Sdim MachineOperand &Op1 = MI->getOperand(1); 783292915Sdim MachineOperand &Op2 = MI->getOperand(2); 784292915Sdim assert(Op0.isReg() && Op1.isReg() && Op2.isImm()); 785292915Sdim int64_t Sh64 = Op2.getImm(); 786292915Sdim assert(Sh64 >= 0 && Sh64 < 64); 787292915Sdim unsigned S = Sh64; 788292915Sdim 789292915Sdim UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); 790292915Sdim assert(F != PairMap.end()); 791292915Sdim const UUPair &P = F->second; 792360784Sdim Register LoR = P.first; 793360784Sdim Register HiR = P.second; 794292915Sdim 795292915Sdim unsigned Opc = MI->getOpcode(); 796292915Sdim bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p); 797292915Sdim bool Left = !Right; 798292915Sdim bool Signed = (Opc == S2_asr_i_p); 799292915Sdim 800292915Sdim MachineBasicBlock &B = *MI->getParent(); 801292915Sdim DebugLoc DL = MI->getDebugLoc(); 802292915Sdim unsigned RS = getRegState(Op1); 803292915Sdim unsigned ShiftOpc = Left ? S2_asl_i_r 804292915Sdim : (Signed ? S2_asr_i_r : S2_lsr_i_r); 805314564Sdim unsigned LoSR = isub_lo; 806314564Sdim unsigned HiSR = isub_hi; 807292915Sdim 808292915Sdim if (S == 0) { 809292915Sdim // No shift, subregister copy. 810292915Sdim BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR) 811292915Sdim .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); 812292915Sdim BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), HiR) 813292915Sdim .addReg(Op1.getReg(), RS, HiSR); 814292915Sdim } else if (S < 32) { 815292915Sdim const TargetRegisterClass *IntRC = &IntRegsRegClass; 816360784Sdim Register TmpR = MRI->createVirtualRegister(IntRC); 817292915Sdim // Expansion: 818292915Sdim // Shift left: DR = shl R, #s 819292915Sdim // LoR = shl R.lo, #s 820292915Sdim // TmpR = extractu R.lo, #s, #32-s 821292915Sdim // HiR = or (TmpR, asl(R.hi, #s)) 822292915Sdim // Shift right: DR = shr R, #s 823292915Sdim // HiR = shr R.hi, #s 824292915Sdim // TmpR = shr R.lo, #s 825292915Sdim // LoR = insert TmpR, R.hi, #s, #32-s 826292915Sdim 827292915Sdim // Shift left: 828292915Sdim // LoR = shl R.lo, #s 829292915Sdim // Shift right: 830292915Sdim // TmpR = shr R.lo, #s 831292915Sdim 832292915Sdim // Make a special case for A2_aslh and A2_asrh (they are predicable as 833292915Sdim // opposed to S2_asl_i_r/S2_asr_i_r). 834292915Sdim if (S == 16 && Left) 835292915Sdim BuildMI(B, MI, DL, TII->get(A2_aslh), LoR) 836292915Sdim .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); 837292915Sdim else if (S == 16 && Signed) 838292915Sdim BuildMI(B, MI, DL, TII->get(A2_asrh), TmpR) 839292915Sdim .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); 840292915Sdim else 841292915Sdim BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? LoR : TmpR)) 842292915Sdim .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR) 843292915Sdim .addImm(S); 844292915Sdim 845292915Sdim if (Left) { 846292915Sdim // TmpR = extractu R.lo, #s, #32-s 847292915Sdim BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR) 848292915Sdim .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR) 849292915Sdim .addImm(S) 850292915Sdim .addImm(32-S); 851292915Sdim // HiR = or (TmpR, asl(R.hi, #s)) 852292915Sdim BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR) 853292915Sdim .addReg(TmpR) 854292915Sdim .addReg(Op1.getReg(), RS, HiSR) 855292915Sdim .addImm(S); 856292915Sdim } else { 857292915Sdim // HiR = shr R.hi, #s 858292915Sdim BuildMI(B, MI, DL, TII->get(ShiftOpc), HiR) 859292915Sdim .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR) 860292915Sdim .addImm(S); 861292915Sdim // LoR = insert TmpR, R.hi, #s, #32-s 862292915Sdim BuildMI(B, MI, DL, TII->get(S2_insert), LoR) 863292915Sdim .addReg(TmpR) 864292915Sdim .addReg(Op1.getReg(), RS, HiSR) 865292915Sdim .addImm(S) 866292915Sdim .addImm(32-S); 867292915Sdim } 868292915Sdim } else if (S == 32) { 869292915Sdim BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), (Left ? HiR : LoR)) 870292915Sdim .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR)); 871292915Sdim if (!Signed) 872292915Sdim BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR)) 873292915Sdim .addImm(0); 874292915Sdim else // Must be right shift. 875292915Sdim BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR) 876292915Sdim .addReg(Op1.getReg(), RS, HiSR) 877292915Sdim .addImm(31); 878292915Sdim } else if (S < 64) { 879292915Sdim S -= 32; 880292915Sdim if (S == 16 && Left) 881292915Sdim BuildMI(B, MI, DL, TII->get(A2_aslh), HiR) 882292915Sdim .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); 883292915Sdim else if (S == 16 && Signed) 884292915Sdim BuildMI(B, MI, DL, TII->get(A2_asrh), LoR) 885292915Sdim .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR); 886292915Sdim else 887292915Sdim BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? HiR : LoR)) 888292915Sdim .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR)) 889292915Sdim .addImm(S); 890292915Sdim 891292915Sdim if (Signed) 892292915Sdim BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR) 893292915Sdim .addReg(Op1.getReg(), RS, HiSR) 894292915Sdim .addImm(31); 895292915Sdim else 896292915Sdim BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR)) 897292915Sdim .addImm(0); 898292915Sdim } 899292915Sdim} 900292915Sdim 901292915Sdimvoid HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI, 902292915Sdim const UUPairMap &PairMap) { 903314564Sdim using namespace Hexagon; 904314564Sdim 905292915Sdim MachineOperand &Op0 = MI->getOperand(0); 906292915Sdim MachineOperand &Op1 = MI->getOperand(1); 907292915Sdim MachineOperand &Op2 = MI->getOperand(2); 908292915Sdim MachineOperand &Op3 = MI->getOperand(3); 909292915Sdim assert(Op0.isReg() && Op1.isReg() && Op2.isReg() && Op3.isImm()); 910292915Sdim int64_t Sh64 = Op3.getImm(); 911292915Sdim assert(Sh64 >= 0 && Sh64 < 64); 912292915Sdim unsigned S = Sh64; 913292915Sdim 914292915Sdim UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); 915292915Sdim assert(F != PairMap.end()); 916292915Sdim const UUPair &P = F->second; 917292915Sdim unsigned LoR = P.first; 918292915Sdim unsigned HiR = P.second; 919292915Sdim 920292915Sdim MachineBasicBlock &B = *MI->getParent(); 921292915Sdim DebugLoc DL = MI->getDebugLoc(); 922292915Sdim unsigned RS1 = getRegState(Op1); 923292915Sdim unsigned RS2 = getRegState(Op2); 924292915Sdim const TargetRegisterClass *IntRC = &IntRegsRegClass; 925292915Sdim 926314564Sdim unsigned LoSR = isub_lo; 927314564Sdim unsigned HiSR = isub_hi; 928292915Sdim 929292915Sdim // Op0 = S2_asl_i_p_or Op1, Op2, Op3 930292915Sdim // means: Op0 = or (Op1, asl(Op2, Op3)) 931292915Sdim 932292915Sdim // Expansion of 933292915Sdim // DR = or (R1, asl(R2, #s)) 934292915Sdim // 935292915Sdim // LoR = or (R1.lo, asl(R2.lo, #s)) 936292915Sdim // Tmp1 = extractu R2.lo, #s, #32-s 937292915Sdim // Tmp2 = or R1.hi, Tmp1 938292915Sdim // HiR = or (Tmp2, asl(R2.hi, #s)) 939292915Sdim 940292915Sdim if (S == 0) { 941292915Sdim // DR = or (R1, asl(R2, #0)) 942292915Sdim // -> or (R1, R2) 943292915Sdim // i.e. LoR = or R1.lo, R2.lo 944292915Sdim // HiR = or R1.hi, R2.hi 945292915Sdim BuildMI(B, MI, DL, TII->get(A2_or), LoR) 946292915Sdim .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR) 947292915Sdim .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR); 948292915Sdim BuildMI(B, MI, DL, TII->get(A2_or), HiR) 949292915Sdim .addReg(Op1.getReg(), RS1, HiSR) 950292915Sdim .addReg(Op2.getReg(), RS2, HiSR); 951292915Sdim } else if (S < 32) { 952292915Sdim BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), LoR) 953292915Sdim .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR) 954292915Sdim .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR) 955292915Sdim .addImm(S); 956360784Sdim Register TmpR1 = MRI->createVirtualRegister(IntRC); 957292915Sdim BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1) 958292915Sdim .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR) 959292915Sdim .addImm(S) 960292915Sdim .addImm(32-S); 961360784Sdim Register TmpR2 = MRI->createVirtualRegister(IntRC); 962292915Sdim BuildMI(B, MI, DL, TII->get(A2_or), TmpR2) 963292915Sdim .addReg(Op1.getReg(), RS1, HiSR) 964292915Sdim .addReg(TmpR1); 965292915Sdim BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR) 966292915Sdim .addReg(TmpR2) 967292915Sdim .addReg(Op2.getReg(), RS2, HiSR) 968292915Sdim .addImm(S); 969292915Sdim } else if (S == 32) { 970292915Sdim // DR = or (R1, asl(R2, #32)) 971292915Sdim // -> or R1, R2.lo 972292915Sdim // LoR = R1.lo 973292915Sdim // HiR = or R1.hi, R2.lo 974292915Sdim BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR) 975292915Sdim .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR); 976292915Sdim BuildMI(B, MI, DL, TII->get(A2_or), HiR) 977292915Sdim .addReg(Op1.getReg(), RS1, HiSR) 978292915Sdim .addReg(Op2.getReg(), RS2, LoSR); 979292915Sdim } else if (S < 64) { 980292915Sdim // DR = or (R1, asl(R2, #s)) 981292915Sdim // 982292915Sdim // LoR = R1:lo 983292915Sdim // HiR = or (R1:hi, asl(R2:lo, #s-32)) 984292915Sdim S -= 32; 985292915Sdim BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR) 986292915Sdim .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR); 987292915Sdim BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR) 988292915Sdim .addReg(Op1.getReg(), RS1, HiSR) 989292915Sdim .addReg(Op2.getReg(), RS2, LoSR) 990292915Sdim .addImm(S); 991292915Sdim } 992292915Sdim} 993292915Sdim 994292915Sdimbool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI, 995292915Sdim const UUPairMap &PairMap) { 996314564Sdim using namespace Hexagon; 997314564Sdim 998341825Sdim LLVM_DEBUG(dbgs() << "Splitting: " << *MI); 999292915Sdim bool Split = false; 1000292915Sdim unsigned Opc = MI->getOpcode(); 1001292915Sdim 1002292915Sdim switch (Opc) { 1003292915Sdim case TargetOpcode::PHI: 1004292915Sdim case TargetOpcode::COPY: { 1005360784Sdim Register DstR = MI->getOperand(0).getReg(); 1006292915Sdim if (MRI->getRegClass(DstR) == DoubleRC) { 1007314564Sdim createHalfInstr(Opc, MI, PairMap, isub_lo); 1008314564Sdim createHalfInstr(Opc, MI, PairMap, isub_hi); 1009292915Sdim Split = true; 1010292915Sdim } 1011292915Sdim break; 1012292915Sdim } 1013292915Sdim case A2_andp: 1014314564Sdim createHalfInstr(A2_and, MI, PairMap, isub_lo); 1015314564Sdim createHalfInstr(A2_and, MI, PairMap, isub_hi); 1016292915Sdim Split = true; 1017292915Sdim break; 1018292915Sdim case A2_orp: 1019314564Sdim createHalfInstr(A2_or, MI, PairMap, isub_lo); 1020314564Sdim createHalfInstr(A2_or, MI, PairMap, isub_hi); 1021292915Sdim Split = true; 1022292915Sdim break; 1023292915Sdim case A2_xorp: 1024314564Sdim createHalfInstr(A2_xor, MI, PairMap, isub_lo); 1025314564Sdim createHalfInstr(A2_xor, MI, PairMap, isub_hi); 1026292915Sdim Split = true; 1027292915Sdim break; 1028292915Sdim 1029292915Sdim case L2_loadrd_io: 1030292915Sdim case L2_loadrd_pi: 1031292915Sdim case S2_storerd_io: 1032292915Sdim case S2_storerd_pi: 1033292915Sdim splitMemRef(MI, PairMap); 1034292915Sdim Split = true; 1035292915Sdim break; 1036292915Sdim 1037292915Sdim case A2_tfrpi: 1038314564Sdim case CONST64: 1039292915Sdim splitImmediate(MI, PairMap); 1040292915Sdim Split = true; 1041292915Sdim break; 1042292915Sdim 1043292915Sdim case A2_combineii: 1044292915Sdim case A4_combineir: 1045292915Sdim case A4_combineii: 1046292915Sdim case A4_combineri: 1047292915Sdim case A2_combinew: 1048292915Sdim splitCombine(MI, PairMap); 1049292915Sdim Split = true; 1050292915Sdim break; 1051292915Sdim 1052292915Sdim case A2_sxtw: 1053292915Sdim splitExt(MI, PairMap); 1054292915Sdim Split = true; 1055292915Sdim break; 1056292915Sdim 1057292915Sdim case S2_asl_i_p: 1058292915Sdim case S2_asr_i_p: 1059292915Sdim case S2_lsr_i_p: 1060292915Sdim splitShift(MI, PairMap); 1061292915Sdim Split = true; 1062292915Sdim break; 1063292915Sdim 1064292915Sdim case S2_asl_i_p_or: 1065292915Sdim splitAslOr(MI, PairMap); 1066292915Sdim Split = true; 1067292915Sdim break; 1068292915Sdim 1069292915Sdim default: 1070292915Sdim llvm_unreachable("Instruction not splitable"); 1071292915Sdim return false; 1072292915Sdim } 1073292915Sdim 1074292915Sdim return Split; 1075292915Sdim} 1076292915Sdim 1077292915Sdimvoid HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI, 1078292915Sdim const UUPairMap &PairMap) { 1079292915Sdim for (auto &Op : MI->operands()) { 1080292915Sdim if (!Op.isReg() || !Op.isUse() || !Op.getSubReg()) 1081292915Sdim continue; 1082360784Sdim Register R = Op.getReg(); 1083292915Sdim UUPairMap::const_iterator F = PairMap.find(R); 1084292915Sdim if (F == PairMap.end()) 1085292915Sdim continue; 1086292915Sdim const UUPair &P = F->second; 1087292915Sdim switch (Op.getSubReg()) { 1088314564Sdim case Hexagon::isub_lo: 1089292915Sdim Op.setReg(P.first); 1090292915Sdim break; 1091314564Sdim case Hexagon::isub_hi: 1092292915Sdim Op.setReg(P.second); 1093292915Sdim break; 1094292915Sdim } 1095292915Sdim Op.setSubReg(0); 1096292915Sdim } 1097292915Sdim} 1098292915Sdim 1099292915Sdimvoid HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI, 1100292915Sdim const UUPairMap &PairMap) { 1101292915Sdim MachineBasicBlock &B = *MI->getParent(); 1102292915Sdim DebugLoc DL = MI->getDebugLoc(); 1103292915Sdim 1104292915Sdim for (auto &Op : MI->operands()) { 1105292915Sdim if (!Op.isReg() || !Op.isUse()) 1106292915Sdim continue; 1107360784Sdim Register R = Op.getReg(); 1108360784Sdim if (!Register::isVirtualRegister(R)) 1109292915Sdim continue; 1110292915Sdim if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg()) 1111292915Sdim continue; 1112292915Sdim UUPairMap::const_iterator F = PairMap.find(R); 1113292915Sdim if (F == PairMap.end()) 1114292915Sdim continue; 1115292915Sdim const UUPair &Pr = F->second; 1116360784Sdim Register NewDR = MRI->createVirtualRegister(DoubleRC); 1117292915Sdim BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR) 1118292915Sdim .addReg(Pr.first) 1119314564Sdim .addImm(Hexagon::isub_lo) 1120292915Sdim .addReg(Pr.second) 1121314564Sdim .addImm(Hexagon::isub_hi); 1122292915Sdim Op.setReg(NewDR); 1123292915Sdim } 1124292915Sdim} 1125292915Sdim 1126292915Sdimbool HexagonSplitDoubleRegs::splitPartition(const USet &Part) { 1127327952Sdim using MISet = std::set<MachineInstr *>; 1128327952Sdim 1129292915Sdim const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; 1130292915Sdim bool Changed = false; 1131292915Sdim 1132341825Sdim LLVM_DEBUG(dbgs() << "Splitting partition: "; 1133341825Sdim dump_partition(dbgs(), Part, *TRI); dbgs() << '\n'); 1134292915Sdim 1135292915Sdim UUPairMap PairMap; 1136292915Sdim 1137292915Sdim MISet SplitIns; 1138292915Sdim for (unsigned DR : Part) { 1139292915Sdim MachineInstr *DefI = MRI->getVRegDef(DR); 1140292915Sdim SplitIns.insert(DefI); 1141292915Sdim 1142292915Sdim // Collect all instructions, including fixed ones. We won't split them, 1143292915Sdim // but we need to visit them again to insert the REG_SEQUENCE instructions. 1144292915Sdim for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end(); 1145292915Sdim U != W; ++U) 1146292915Sdim SplitIns.insert(U->getParent()); 1147292915Sdim 1148360784Sdim Register LoR = MRI->createVirtualRegister(IntRC); 1149360784Sdim Register HiR = MRI->createVirtualRegister(IntRC); 1150341825Sdim LLVM_DEBUG(dbgs() << "Created mapping: " << printReg(DR, TRI) << " -> " 1151341825Sdim << printReg(HiR, TRI) << ':' << printReg(LoR, TRI) 1152341825Sdim << '\n'); 1153292915Sdim PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR))); 1154292915Sdim } 1155292915Sdim 1156292915Sdim MISet Erase; 1157292915Sdim for (auto MI : SplitIns) { 1158292915Sdim if (isFixedInstr(MI)) { 1159292915Sdim collapseRegPairs(MI, PairMap); 1160292915Sdim } else { 1161292915Sdim bool Done = splitInstr(MI, PairMap); 1162292915Sdim if (Done) 1163292915Sdim Erase.insert(MI); 1164292915Sdim Changed |= Done; 1165292915Sdim } 1166292915Sdim } 1167292915Sdim 1168292915Sdim for (unsigned DR : Part) { 1169292915Sdim // Before erasing "double" instructions, revisit all uses of the double 1170292915Sdim // registers in this partition, and replace all uses of them with subre- 1171292915Sdim // gisters, with the corresponding single registers. 1172292915Sdim MISet Uses; 1173292915Sdim for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end(); 1174292915Sdim U != W; ++U) 1175292915Sdim Uses.insert(U->getParent()); 1176292915Sdim for (auto M : Uses) 1177292915Sdim replaceSubregUses(M, PairMap); 1178292915Sdim } 1179292915Sdim 1180292915Sdim for (auto MI : Erase) { 1181292915Sdim MachineBasicBlock *B = MI->getParent(); 1182292915Sdim B->erase(MI); 1183292915Sdim } 1184292915Sdim 1185292915Sdim return Changed; 1186292915Sdim} 1187292915Sdim 1188292915Sdimbool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) { 1189327952Sdim if (skipFunction(MF.getFunction())) 1190309124Sdim return false; 1191309124Sdim 1192341825Sdim LLVM_DEBUG(dbgs() << "Splitting double registers in function: " 1193341825Sdim << MF.getName() << '\n'); 1194341825Sdim 1195292915Sdim auto &ST = MF.getSubtarget<HexagonSubtarget>(); 1196292915Sdim TRI = ST.getRegisterInfo(); 1197292915Sdim TII = ST.getInstrInfo(); 1198292915Sdim MRI = &MF.getRegInfo(); 1199292915Sdim MLI = &getAnalysis<MachineLoopInfo>(); 1200292915Sdim 1201292915Sdim UUSetMap P2Rs; 1202292915Sdim LoopRegMap IRM; 1203292915Sdim 1204292915Sdim collectIndRegs(IRM); 1205292915Sdim partitionRegisters(P2Rs); 1206292915Sdim 1207341825Sdim LLVM_DEBUG({ 1208292915Sdim dbgs() << "Register partitioning: (partition #0 is fixed)\n"; 1209292915Sdim for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) { 1210292915Sdim dbgs() << '#' << I->first << " -> "; 1211292915Sdim dump_partition(dbgs(), I->second, *TRI); 1212292915Sdim dbgs() << '\n'; 1213292915Sdim } 1214292915Sdim }); 1215292915Sdim 1216292915Sdim bool Changed = false; 1217292915Sdim int Limit = MaxHSDR; 1218292915Sdim 1219292915Sdim for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) { 1220292915Sdim if (I->first == 0) 1221292915Sdim continue; 1222292915Sdim if (Limit >= 0 && Counter >= Limit) 1223292915Sdim break; 1224292915Sdim USet &Part = I->second; 1225341825Sdim LLVM_DEBUG(dbgs() << "Calculating profit for partition #" << I->first 1226341825Sdim << '\n'); 1227292915Sdim if (!isProfitable(Part, IRM)) 1228292915Sdim continue; 1229292915Sdim Counter++; 1230292915Sdim Changed |= splitPartition(Part); 1231292915Sdim } 1232292915Sdim 1233292915Sdim return Changed; 1234292915Sdim} 1235292915Sdim 1236292915SdimFunctionPass *llvm::createHexagonSplitDoubleRegs() { 1237292915Sdim return new HexagonSplitDoubleRegs(); 1238292915Sdim} 1239