1327952Sdim//===- HexagonSplitDouble.cpp ---------------------------------------------===//
2292915Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6292915Sdim//
7292915Sdim//===----------------------------------------------------------------------===//
8292915Sdim
9292915Sdim#define DEBUG_TYPE "hsdr"
10292915Sdim
11314564Sdim#include "HexagonInstrInfo.h"
12292915Sdim#include "HexagonRegisterInfo.h"
13314564Sdim#include "HexagonSubtarget.h"
14314564Sdim#include "llvm/ADT/BitVector.h"
15321369Sdim#include "llvm/ADT/STLExtras.h"
16314564Sdim#include "llvm/ADT/SmallVector.h"
17314564Sdim#include "llvm/ADT/StringRef.h"
18314564Sdim#include "llvm/CodeGen/MachineBasicBlock.h"
19292915Sdim#include "llvm/CodeGen/MachineFunction.h"
20292915Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
21314564Sdim#include "llvm/CodeGen/MachineInstr.h"
22292915Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
23292915Sdim#include "llvm/CodeGen/MachineLoopInfo.h"
24314564Sdim#include "llvm/CodeGen/MachineMemOperand.h"
25314564Sdim#include "llvm/CodeGen/MachineOperand.h"
26292915Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
27327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h"
28341825Sdim#include "llvm/Config/llvm-config.h"
29314564Sdim#include "llvm/IR/DebugLoc.h"
30309124Sdim#include "llvm/Pass.h"
31292915Sdim#include "llvm/Support/CommandLine.h"
32314564Sdim#include "llvm/Support/Compiler.h"
33292915Sdim#include "llvm/Support/Debug.h"
34314564Sdim#include "llvm/Support/ErrorHandling.h"
35292915Sdim#include "llvm/Support/raw_ostream.h"
36314564Sdim#include <algorithm>
37314564Sdim#include <cassert>
38314564Sdim#include <cstdint>
39314564Sdim#include <limits>
40292915Sdim#include <map>
41292915Sdim#include <set>
42314564Sdim#include <utility>
43292915Sdim#include <vector>
44292915Sdim
45292915Sdimusing namespace llvm;
46292915Sdim
47292915Sdimnamespace llvm {
48314564Sdim
49292915Sdim  FunctionPass *createHexagonSplitDoubleRegs();
50292915Sdim  void initializeHexagonSplitDoubleRegsPass(PassRegistry&);
51292915Sdim
52314564Sdim} // end namespace llvm
53314564Sdim
54327952Sdimstatic cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1),
55327952Sdim    cl::desc("Maximum number of split partitions"));
56327952Sdimstatic cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true),
57327952Sdim    cl::desc("Do not split loads or stores"));
58341825Sdim  static cl::opt<bool> SplitAll("hsdr-split-all", cl::Hidden, cl::init(false),
59341825Sdim      cl::desc("Split all partitions"));
60327952Sdim
61292915Sdimnamespace {
62314564Sdim
63292915Sdim  class HexagonSplitDoubleRegs : public MachineFunctionPass {
64292915Sdim  public:
65292915Sdim    static char ID;
66314564Sdim
67341825Sdim    HexagonSplitDoubleRegs() : MachineFunctionPass(ID) {}
68314564Sdim
69314564Sdim    StringRef getPassName() const override {
70292915Sdim      return "Hexagon Split Double Registers";
71292915Sdim    }
72314564Sdim
73292915Sdim    void getAnalysisUsage(AnalysisUsage &AU) const override {
74292915Sdim      AU.addRequired<MachineLoopInfo>();
75292915Sdim      AU.addPreserved<MachineLoopInfo>();
76292915Sdim      MachineFunctionPass::getAnalysisUsage(AU);
77292915Sdim    }
78314564Sdim
79292915Sdim    bool runOnMachineFunction(MachineFunction &MF) override;
80292915Sdim
81292915Sdim  private:
82292915Sdim    static const TargetRegisterClass *const DoubleRC;
83292915Sdim
84327952Sdim    const HexagonRegisterInfo *TRI = nullptr;
85327952Sdim    const HexagonInstrInfo *TII = nullptr;
86292915Sdim    const MachineLoopInfo *MLI;
87292915Sdim    MachineRegisterInfo *MRI;
88292915Sdim
89327952Sdim    using USet = std::set<unsigned>;
90327952Sdim    using UUSetMap = std::map<unsigned, USet>;
91327952Sdim    using UUPair = std::pair<unsigned, unsigned>;
92327952Sdim    using UUPairMap = std::map<unsigned, UUPair>;
93327952Sdim    using LoopRegMap = std::map<const MachineLoop *, USet>;
94292915Sdim
95292915Sdim    bool isInduction(unsigned Reg, LoopRegMap &IRM) const;
96292915Sdim    bool isVolatileInstr(const MachineInstr *MI) const;
97292915Sdim    bool isFixedInstr(const MachineInstr *MI) const;
98292915Sdim    void partitionRegisters(UUSetMap &P2Rs);
99292915Sdim    int32_t profit(const MachineInstr *MI) const;
100341825Sdim    int32_t profit(unsigned Reg) const;
101292915Sdim    bool isProfitable(const USet &Part, LoopRegMap &IRM) const;
102292915Sdim
103292915Sdim    void collectIndRegsForLoop(const MachineLoop *L, USet &Rs);
104292915Sdim    void collectIndRegs(LoopRegMap &IRM);
105292915Sdim
106292915Sdim    void createHalfInstr(unsigned Opc, MachineInstr *MI,
107292915Sdim        const UUPairMap &PairMap, unsigned SubR);
108292915Sdim    void splitMemRef(MachineInstr *MI, const UUPairMap &PairMap);
109292915Sdim    void splitImmediate(MachineInstr *MI, const UUPairMap &PairMap);
110292915Sdim    void splitCombine(MachineInstr *MI, const UUPairMap &PairMap);
111292915Sdim    void splitExt(MachineInstr *MI, const UUPairMap &PairMap);
112292915Sdim    void splitShift(MachineInstr *MI, const UUPairMap &PairMap);
113292915Sdim    void splitAslOr(MachineInstr *MI, const UUPairMap &PairMap);
114292915Sdim    bool splitInstr(MachineInstr *MI, const UUPairMap &PairMap);
115292915Sdim    void replaceSubregUses(MachineInstr *MI, const UUPairMap &PairMap);
116292915Sdim    void collapseRegPairs(MachineInstr *MI, const UUPairMap &PairMap);
117292915Sdim    bool splitPartition(const USet &Part);
118292915Sdim
119292915Sdim    static int Counter;
120327952Sdim
121292915Sdim    static void dump_partition(raw_ostream&, const USet&,
122292915Sdim       const TargetRegisterInfo&);
123292915Sdim  };
124314564Sdim
125314564Sdim} // end anonymous namespace
126314564Sdim
127327952Sdimchar HexagonSplitDoubleRegs::ID;
128327952Sdimint HexagonSplitDoubleRegs::Counter = 0;
129327952Sdimconst TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC =
130327952Sdim    &Hexagon::DoubleRegsRegClass;
131327952Sdim
132292915SdimINITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double",
133292915Sdim  "Hexagon Split Double Registers", false, false)
134292915Sdim
135321369Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
136321369SdimLLVM_DUMP_METHOD void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
137292915Sdim      const USet &Part, const TargetRegisterInfo &TRI) {
138292915Sdim  dbgs() << '{';
139292915Sdim  for (auto I : Part)
140327952Sdim    dbgs() << ' ' << printReg(I, &TRI);
141292915Sdim  dbgs() << " }";
142292915Sdim}
143321369Sdim#endif
144292915Sdim
145292915Sdimbool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
146292915Sdim  for (auto I : IRM) {
147292915Sdim    const USet &Rs = I.second;
148292915Sdim    if (Rs.find(Reg) != Rs.end())
149292915Sdim      return true;
150292915Sdim  }
151292915Sdim  return false;
152292915Sdim}
153292915Sdim
154292915Sdimbool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const {
155353358Sdim  for (auto &MO : MI->memoperands())
156353358Sdim    if (MO->isVolatile() || MO->isAtomic())
157292915Sdim      return true;
158292915Sdim  return false;
159292915Sdim}
160292915Sdim
161292915Sdimbool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
162360784Sdim  if (MI->mayLoadOrStore())
163292915Sdim    if (MemRefsFixed || isVolatileInstr(MI))
164292915Sdim      return true;
165341825Sdim  if (MI->isDebugInstr())
166292915Sdim    return false;
167292915Sdim
168292915Sdim  unsigned Opc = MI->getOpcode();
169292915Sdim  switch (Opc) {
170292915Sdim    default:
171292915Sdim      return true;
172292915Sdim
173292915Sdim    case TargetOpcode::PHI:
174292915Sdim    case TargetOpcode::COPY:
175292915Sdim      break;
176292915Sdim
177292915Sdim    case Hexagon::L2_loadrd_io:
178292915Sdim      // Not handling stack stores (only reg-based addresses).
179292915Sdim      if (MI->getOperand(1).isReg())
180292915Sdim        break;
181292915Sdim      return true;
182292915Sdim    case Hexagon::S2_storerd_io:
183292915Sdim      // Not handling stack stores (only reg-based addresses).
184292915Sdim      if (MI->getOperand(0).isReg())
185292915Sdim        break;
186292915Sdim      return true;
187292915Sdim    case Hexagon::L2_loadrd_pi:
188292915Sdim    case Hexagon::S2_storerd_pi:
189292915Sdim
190292915Sdim    case Hexagon::A2_tfrpi:
191292915Sdim    case Hexagon::A2_combineii:
192292915Sdim    case Hexagon::A4_combineir:
193292915Sdim    case Hexagon::A4_combineii:
194292915Sdim    case Hexagon::A4_combineri:
195292915Sdim    case Hexagon::A2_combinew:
196314564Sdim    case Hexagon::CONST64:
197292915Sdim
198292915Sdim    case Hexagon::A2_sxtw:
199292915Sdim
200292915Sdim    case Hexagon::A2_andp:
201292915Sdim    case Hexagon::A2_orp:
202292915Sdim    case Hexagon::A2_xorp:
203292915Sdim    case Hexagon::S2_asl_i_p_or:
204292915Sdim    case Hexagon::S2_asl_i_p:
205292915Sdim    case Hexagon::S2_asr_i_p:
206292915Sdim    case Hexagon::S2_lsr_i_p:
207292915Sdim      break;
208292915Sdim  }
209292915Sdim
210292915Sdim  for (auto &Op : MI->operands()) {
211292915Sdim    if (!Op.isReg())
212292915Sdim      continue;
213360784Sdim    Register R = Op.getReg();
214360784Sdim    if (!Register::isVirtualRegister(R))
215292915Sdim      return true;
216292915Sdim  }
217292915Sdim  return false;
218292915Sdim}
219292915Sdim
220292915Sdimvoid HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
221327952Sdim  using UUMap = std::map<unsigned, unsigned>;
222327952Sdim  using UVect = std::vector<unsigned>;
223292915Sdim
224292915Sdim  unsigned NumRegs = MRI->getNumVirtRegs();
225292915Sdim  BitVector DoubleRegs(NumRegs);
226292915Sdim  for (unsigned i = 0; i < NumRegs; ++i) {
227360784Sdim    unsigned R = Register::index2VirtReg(i);
228292915Sdim    if (MRI->getRegClass(R) == DoubleRC)
229292915Sdim      DoubleRegs.set(i);
230292915Sdim  }
231292915Sdim
232292915Sdim  BitVector FixedRegs(NumRegs);
233292915Sdim  for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
234360784Sdim    unsigned R = Register::index2VirtReg(x);
235292915Sdim    MachineInstr *DefI = MRI->getVRegDef(R);
236292915Sdim    // In some cases a register may exist, but never be defined or used.
237292915Sdim    // It should never appear anywhere, but mark it as "fixed", just to be
238292915Sdim    // safe.
239292915Sdim    if (!DefI || isFixedInstr(DefI))
240292915Sdim      FixedRegs.set(x);
241292915Sdim  }
242292915Sdim
243292915Sdim  UUSetMap AssocMap;
244292915Sdim  for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
245292915Sdim    if (FixedRegs[x])
246292915Sdim      continue;
247360784Sdim    unsigned R = Register::index2VirtReg(x);
248341825Sdim    LLVM_DEBUG(dbgs() << printReg(R, TRI) << " ~~");
249292915Sdim    USet &Asc = AssocMap[R];
250292915Sdim    for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end();
251292915Sdim         U != Z; ++U) {
252292915Sdim      MachineOperand &Op = *U;
253292915Sdim      MachineInstr *UseI = Op.getParent();
254292915Sdim      if (isFixedInstr(UseI))
255292915Sdim        continue;
256292915Sdim      for (unsigned i = 0, n = UseI->getNumOperands(); i < n; ++i) {
257292915Sdim        MachineOperand &MO = UseI->getOperand(i);
258292915Sdim        // Skip non-registers or registers with subregisters.
259292915Sdim        if (&MO == &Op || !MO.isReg() || MO.getSubReg())
260292915Sdim          continue;
261360784Sdim        Register T = MO.getReg();
262360784Sdim        if (!Register::isVirtualRegister(T)) {
263292915Sdim          FixedRegs.set(x);
264292915Sdim          continue;
265292915Sdim        }
266292915Sdim        if (MRI->getRegClass(T) != DoubleRC)
267292915Sdim          continue;
268360784Sdim        unsigned u = Register::virtReg2Index(T);
269292915Sdim        if (FixedRegs[u])
270292915Sdim          continue;
271341825Sdim        LLVM_DEBUG(dbgs() << ' ' << printReg(T, TRI));
272292915Sdim        Asc.insert(T);
273292915Sdim        // Make it symmetric.
274292915Sdim        AssocMap[T].insert(R);
275292915Sdim      }
276292915Sdim    }
277341825Sdim    LLVM_DEBUG(dbgs() << '\n');
278292915Sdim  }
279292915Sdim
280292915Sdim  UUMap R2P;
281292915Sdim  unsigned NextP = 1;
282292915Sdim  USet Visited;
283292915Sdim  for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
284360784Sdim    unsigned R = Register::index2VirtReg(x);
285292915Sdim    if (Visited.count(R))
286292915Sdim      continue;
287292915Sdim    // Create a new partition for R.
288292915Sdim    unsigned ThisP = FixedRegs[x] ? 0 : NextP++;
289292915Sdim    UVect WorkQ;
290292915Sdim    WorkQ.push_back(R);
291292915Sdim    for (unsigned i = 0; i < WorkQ.size(); ++i) {
292292915Sdim      unsigned T = WorkQ[i];
293292915Sdim      if (Visited.count(T))
294292915Sdim        continue;
295292915Sdim      R2P[T] = ThisP;
296292915Sdim      Visited.insert(T);
297292915Sdim      // Add all registers associated with T.
298292915Sdim      USet &Asc = AssocMap[T];
299292915Sdim      for (USet::iterator J = Asc.begin(), F = Asc.end(); J != F; ++J)
300292915Sdim        WorkQ.push_back(*J);
301292915Sdim    }
302292915Sdim  }
303292915Sdim
304292915Sdim  for (auto I : R2P)
305292915Sdim    P2Rs[I.second].insert(I.first);
306292915Sdim}
307292915Sdim
308341825Sdimstatic inline int32_t profitImm(unsigned Imm) {
309292915Sdim  int32_t P = 0;
310341825Sdim  if (Imm == 0 || Imm == 0xFFFFFFFF)
311341825Sdim    P += 10;
312292915Sdim  return P;
313292915Sdim}
314292915Sdim
315292915Sdimint32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
316292915Sdim  unsigned ImmX = 0;
317292915Sdim  unsigned Opc = MI->getOpcode();
318292915Sdim  switch (Opc) {
319292915Sdim    case TargetOpcode::PHI:
320292915Sdim      for (const auto &Op : MI->operands())
321292915Sdim        if (!Op.getSubReg())
322292915Sdim          return 0;
323292915Sdim      return 10;
324292915Sdim    case TargetOpcode::COPY:
325292915Sdim      if (MI->getOperand(1).getSubReg() != 0)
326292915Sdim        return 10;
327292915Sdim      return 0;
328292915Sdim
329292915Sdim    case Hexagon::L2_loadrd_io:
330292915Sdim    case Hexagon::S2_storerd_io:
331292915Sdim      return -1;
332292915Sdim    case Hexagon::L2_loadrd_pi:
333292915Sdim    case Hexagon::S2_storerd_pi:
334292915Sdim      return 2;
335292915Sdim
336292915Sdim    case Hexagon::A2_tfrpi:
337314564Sdim    case Hexagon::CONST64: {
338292915Sdim      uint64_t D = MI->getOperand(1).getImm();
339292915Sdim      unsigned Lo = D & 0xFFFFFFFFULL;
340292915Sdim      unsigned Hi = D >> 32;
341341825Sdim      return profitImm(Lo) + profitImm(Hi);
342292915Sdim    }
343292915Sdim    case Hexagon::A2_combineii:
344341825Sdim    case Hexagon::A4_combineii: {
345341825Sdim      const MachineOperand &Op1 = MI->getOperand(1);
346341825Sdim      const MachineOperand &Op2 = MI->getOperand(2);
347341825Sdim      int32_t Prof1 = Op1.isImm() ? profitImm(Op1.getImm()) : 0;
348341825Sdim      int32_t Prof2 = Op2.isImm() ? profitImm(Op2.getImm()) : 0;
349341825Sdim      return Prof1 + Prof2;
350341825Sdim    }
351292915Sdim    case Hexagon::A4_combineri:
352292915Sdim      ImmX++;
353321369Sdim      // Fall through into A4_combineir.
354321369Sdim      LLVM_FALLTHROUGH;
355292915Sdim    case Hexagon::A4_combineir: {
356292915Sdim      ImmX++;
357341825Sdim      const MachineOperand &OpX = MI->getOperand(ImmX);
358341825Sdim      if (OpX.isImm()) {
359341825Sdim        int64_t V = OpX.getImm();
360341825Sdim        if (V == 0 || V == -1)
361341825Sdim          return 10;
362341825Sdim      }
363292915Sdim      // Fall through into A2_combinew.
364314564Sdim      LLVM_FALLTHROUGH;
365292915Sdim    }
366292915Sdim    case Hexagon::A2_combinew:
367292915Sdim      return 2;
368292915Sdim
369292915Sdim    case Hexagon::A2_sxtw:
370292915Sdim      return 3;
371292915Sdim
372292915Sdim    case Hexagon::A2_andp:
373292915Sdim    case Hexagon::A2_orp:
374341825Sdim    case Hexagon::A2_xorp: {
375360784Sdim      Register Rs = MI->getOperand(1).getReg();
376360784Sdim      Register Rt = MI->getOperand(2).getReg();
377341825Sdim      return profit(Rs) + profit(Rt);
378341825Sdim    }
379292915Sdim
380292915Sdim    case Hexagon::S2_asl_i_p_or: {
381292915Sdim      unsigned S = MI->getOperand(3).getImm();
382292915Sdim      if (S == 0 || S == 32)
383292915Sdim        return 10;
384292915Sdim      return -1;
385292915Sdim    }
386292915Sdim    case Hexagon::S2_asl_i_p:
387292915Sdim    case Hexagon::S2_asr_i_p:
388292915Sdim    case Hexagon::S2_lsr_i_p:
389292915Sdim      unsigned S = MI->getOperand(2).getImm();
390292915Sdim      if (S == 0 || S == 32)
391292915Sdim        return 10;
392292915Sdim      if (S == 16)
393292915Sdim        return 5;
394292915Sdim      if (S == 48)
395292915Sdim        return 7;
396292915Sdim      return -10;
397292915Sdim  }
398292915Sdim
399292915Sdim  return 0;
400292915Sdim}
401292915Sdim
402341825Sdimint32_t HexagonSplitDoubleRegs::profit(unsigned Reg) const {
403360784Sdim  assert(Register::isVirtualRegister(Reg));
404341825Sdim
405341825Sdim  const MachineInstr *DefI = MRI->getVRegDef(Reg);
406341825Sdim  switch (DefI->getOpcode()) {
407341825Sdim    case Hexagon::A2_tfrpi:
408341825Sdim    case Hexagon::CONST64:
409341825Sdim    case Hexagon::A2_combineii:
410341825Sdim    case Hexagon::A4_combineii:
411341825Sdim    case Hexagon::A4_combineri:
412341825Sdim    case Hexagon::A4_combineir:
413341825Sdim    case Hexagon::A2_combinew:
414341825Sdim      return profit(DefI);
415341825Sdim    default:
416341825Sdim      break;
417341825Sdim  }
418341825Sdim  return 0;
419341825Sdim}
420341825Sdim
421292915Sdimbool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
422292915Sdim      const {
423321369Sdim  unsigned FixedNum = 0, LoopPhiNum = 0;
424292915Sdim  int32_t TotalP = 0;
425292915Sdim
426292915Sdim  for (unsigned DR : Part) {
427292915Sdim    MachineInstr *DefI = MRI->getVRegDef(DR);
428292915Sdim    int32_t P = profit(DefI);
429314564Sdim    if (P == std::numeric_limits<int>::min())
430292915Sdim      return false;
431292915Sdim    TotalP += P;
432292915Sdim    // Reduce the profitability of splitting induction registers.
433292915Sdim    if (isInduction(DR, IRM))
434292915Sdim      TotalP -= 30;
435292915Sdim
436292915Sdim    for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
437292915Sdim         U != W; ++U) {
438292915Sdim      MachineInstr *UseI = U->getParent();
439292915Sdim      if (isFixedInstr(UseI)) {
440292915Sdim        FixedNum++;
441292915Sdim        // Calculate the cost of generating REG_SEQUENCE instructions.
442292915Sdim        for (auto &Op : UseI->operands()) {
443292915Sdim          if (Op.isReg() && Part.count(Op.getReg()))
444292915Sdim            if (Op.getSubReg())
445292915Sdim              TotalP -= 2;
446292915Sdim        }
447292915Sdim        continue;
448292915Sdim      }
449292915Sdim      // If a register from this partition is used in a fixed instruction,
450292915Sdim      // and there is also a register in this partition that is used in
451292915Sdim      // a loop phi node, then decrease the splitting profit as this can
452292915Sdim      // confuse the modulo scheduler.
453292915Sdim      if (UseI->isPHI()) {
454292915Sdim        const MachineBasicBlock *PB = UseI->getParent();
455292915Sdim        const MachineLoop *L = MLI->getLoopFor(PB);
456292915Sdim        if (L && L->getHeader() == PB)
457292915Sdim          LoopPhiNum++;
458292915Sdim      }
459292915Sdim      // Splittable instruction.
460292915Sdim      int32_t P = profit(UseI);
461314564Sdim      if (P == std::numeric_limits<int>::min())
462292915Sdim        return false;
463292915Sdim      TotalP += P;
464292915Sdim    }
465292915Sdim  }
466292915Sdim
467292915Sdim  if (FixedNum > 0 && LoopPhiNum > 0)
468292915Sdim    TotalP -= 20*LoopPhiNum;
469292915Sdim
470341825Sdim  LLVM_DEBUG(dbgs() << "Partition profit: " << TotalP << '\n');
471341825Sdim  if (SplitAll)
472341825Sdim    return true;
473292915Sdim  return TotalP > 0;
474292915Sdim}
475292915Sdim
476292915Sdimvoid HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
477292915Sdim      USet &Rs) {
478292915Sdim  const MachineBasicBlock *HB = L->getHeader();
479292915Sdim  const MachineBasicBlock *LB = L->getLoopLatch();
480292915Sdim  if (!HB || !LB)
481292915Sdim    return;
482292915Sdim
483292915Sdim  // Examine the latch branch. Expect it to be a conditional branch to
484292915Sdim  // the header (either "br-cond header" or "br-cond exit; br header").
485314564Sdim  MachineBasicBlock *TB = nullptr, *FB = nullptr;
486292915Sdim  MachineBasicBlock *TmpLB = const_cast<MachineBasicBlock*>(LB);
487292915Sdim  SmallVector<MachineOperand,2> Cond;
488309124Sdim  bool BadLB = TII->analyzeBranch(*TmpLB, TB, FB, Cond, false);
489314564Sdim  // Only analyzable conditional branches. HII::analyzeBranch will put
490292915Sdim  // the branch opcode as the first element of Cond, and the predicate
491292915Sdim  // operand as the second.
492292915Sdim  if (BadLB || Cond.size() != 2)
493292915Sdim    return;
494292915Sdim  // Only simple jump-conditional (with or without negation).
495292915Sdim  if (!TII->PredOpcodeHasJMP_c(Cond[0].getImm()))
496292915Sdim    return;
497292915Sdim  // Must go to the header.
498292915Sdim  if (TB != HB && FB != HB)
499292915Sdim    return;
500314564Sdim  assert(Cond[1].isReg() && "Unexpected Cond vector from analyzeBranch");
501292915Sdim  // Expect a predicate register.
502360784Sdim  Register PR = Cond[1].getReg();
503292915Sdim  assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass);
504292915Sdim
505292915Sdim  // Get the registers on which the loop controlling compare instruction
506292915Sdim  // depends.
507292915Sdim  unsigned CmpR1 = 0, CmpR2 = 0;
508292915Sdim  const MachineInstr *CmpI = MRI->getVRegDef(PR);
509292915Sdim  while (CmpI->getOpcode() == Hexagon::C2_not)
510292915Sdim    CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg());
511292915Sdim
512292915Sdim  int Mask = 0, Val = 0;
513309124Sdim  bool OkCI = TII->analyzeCompare(*CmpI, CmpR1, CmpR2, Mask, Val);
514292915Sdim  if (!OkCI)
515292915Sdim    return;
516292915Sdim  // Eliminate non-double input registers.
517292915Sdim  if (CmpR1 && MRI->getRegClass(CmpR1) != DoubleRC)
518292915Sdim    CmpR1 = 0;
519292915Sdim  if (CmpR2 && MRI->getRegClass(CmpR2) != DoubleRC)
520292915Sdim    CmpR2 = 0;
521292915Sdim  if (!CmpR1 && !CmpR2)
522292915Sdim    return;
523292915Sdim
524292915Sdim  // Now examine the top of the loop: the phi nodes that could poten-
525292915Sdim  // tially define loop induction registers. The registers defined by
526292915Sdim  // such a phi node would be used in a 64-bit add, which then would
527292915Sdim  // be used in the loop compare instruction.
528292915Sdim
529292915Sdim  // Get the set of all double registers defined by phi nodes in the
530292915Sdim  // loop header.
531327952Sdim  using UVect = std::vector<unsigned>;
532327952Sdim
533292915Sdim  UVect DP;
534292915Sdim  for (auto &MI : *HB) {
535292915Sdim    if (!MI.isPHI())
536292915Sdim      break;
537292915Sdim    const MachineOperand &MD = MI.getOperand(0);
538360784Sdim    Register R = MD.getReg();
539292915Sdim    if (MRI->getRegClass(R) == DoubleRC)
540292915Sdim      DP.push_back(R);
541292915Sdim  }
542292915Sdim  if (DP.empty())
543292915Sdim    return;
544292915Sdim
545292915Sdim  auto NoIndOp = [this, CmpR1, CmpR2] (unsigned R) -> bool {
546292915Sdim    for (auto I = MRI->use_nodbg_begin(R), E = MRI->use_nodbg_end();
547292915Sdim         I != E; ++I) {
548292915Sdim      const MachineInstr *UseI = I->getParent();
549292915Sdim      if (UseI->getOpcode() != Hexagon::A2_addp)
550292915Sdim        continue;
551292915Sdim      // Get the output from the add. If it is one of the inputs to the
552292915Sdim      // loop-controlling compare instruction, then R is likely an induc-
553292915Sdim      // tion register.
554360784Sdim      Register T = UseI->getOperand(0).getReg();
555292915Sdim      if (T == CmpR1 || T == CmpR2)
556292915Sdim        return false;
557292915Sdim    }
558292915Sdim    return true;
559292915Sdim  };
560314564Sdim  UVect::iterator End = llvm::remove_if(DP, NoIndOp);
561292915Sdim  Rs.insert(DP.begin(), End);
562292915Sdim  Rs.insert(CmpR1);
563292915Sdim  Rs.insert(CmpR2);
564292915Sdim
565341825Sdim  LLVM_DEBUG({
566327952Sdim    dbgs() << "For loop at " << printMBBReference(*HB) << " ind regs: ";
567292915Sdim    dump_partition(dbgs(), Rs, *TRI);
568292915Sdim    dbgs() << '\n';
569292915Sdim  });
570292915Sdim}
571292915Sdim
572292915Sdimvoid HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) {
573327952Sdim  using LoopVector = std::vector<MachineLoop *>;
574327952Sdim
575292915Sdim  LoopVector WorkQ;
576292915Sdim
577292915Sdim  for (auto I : *MLI)
578292915Sdim    WorkQ.push_back(I);
579292915Sdim  for (unsigned i = 0; i < WorkQ.size(); ++i) {
580292915Sdim    for (auto I : *WorkQ[i])
581292915Sdim      WorkQ.push_back(I);
582292915Sdim  }
583292915Sdim
584292915Sdim  USet Rs;
585292915Sdim  for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) {
586292915Sdim    MachineLoop *L = WorkQ[i];
587292915Sdim    Rs.clear();
588292915Sdim    collectIndRegsForLoop(L, Rs);
589292915Sdim    if (!Rs.empty())
590292915Sdim      IRM.insert(std::make_pair(L, Rs));
591292915Sdim  }
592292915Sdim}
593292915Sdim
594292915Sdimvoid HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI,
595292915Sdim      const UUPairMap &PairMap, unsigned SubR) {
596292915Sdim  MachineBasicBlock &B = *MI->getParent();
597292915Sdim  DebugLoc DL = MI->getDebugLoc();
598292915Sdim  MachineInstr *NewI = BuildMI(B, MI, DL, TII->get(Opc));
599292915Sdim
600292915Sdim  for (auto &Op : MI->operands()) {
601292915Sdim    if (!Op.isReg()) {
602292915Sdim      NewI->addOperand(Op);
603292915Sdim      continue;
604292915Sdim    }
605292915Sdim    // For register operands, set the subregister.
606360784Sdim    Register R = Op.getReg();
607292915Sdim    unsigned SR = Op.getSubReg();
608360784Sdim    bool isVirtReg = Register::isVirtualRegister(R);
609292915Sdim    bool isKill = Op.isKill();
610292915Sdim    if (isVirtReg && MRI->getRegClass(R) == DoubleRC) {
611292915Sdim      isKill = false;
612292915Sdim      UUPairMap::const_iterator F = PairMap.find(R);
613292915Sdim      if (F == PairMap.end()) {
614292915Sdim        SR = SubR;
615292915Sdim      } else {
616292915Sdim        const UUPair &P = F->second;
617314564Sdim        R = (SubR == Hexagon::isub_lo) ? P.first : P.second;
618292915Sdim        SR = 0;
619292915Sdim      }
620292915Sdim    }
621292915Sdim    auto CO = MachineOperand::CreateReg(R, Op.isDef(), Op.isImplicit(), isKill,
622292915Sdim          Op.isDead(), Op.isUndef(), Op.isEarlyClobber(), SR, Op.isDebug(),
623292915Sdim          Op.isInternalRead());
624292915Sdim    NewI->addOperand(CO);
625292915Sdim  }
626292915Sdim}
627292915Sdim
628292915Sdimvoid HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI,
629292915Sdim      const UUPairMap &PairMap) {
630292915Sdim  bool Load = MI->mayLoad();
631292915Sdim  unsigned OrigOpc = MI->getOpcode();
632292915Sdim  bool PostInc = (OrigOpc == Hexagon::L2_loadrd_pi ||
633292915Sdim                  OrigOpc == Hexagon::S2_storerd_pi);
634292915Sdim  MachineInstr *LowI, *HighI;
635292915Sdim  MachineBasicBlock &B = *MI->getParent();
636292915Sdim  DebugLoc DL = MI->getDebugLoc();
637292915Sdim
638292915Sdim  // Index of the base-address-register operand.
639292915Sdim  unsigned AdrX = PostInc ? (Load ? 2 : 1)
640292915Sdim                          : (Load ? 1 : 0);
641292915Sdim  MachineOperand &AdrOp = MI->getOperand(AdrX);
642292915Sdim  unsigned RSA = getRegState(AdrOp);
643292915Sdim  MachineOperand &ValOp = Load ? MI->getOperand(0)
644292915Sdim                               : (PostInc ? MI->getOperand(3)
645292915Sdim                                          : MI->getOperand(2));
646292915Sdim  UUPairMap::const_iterator F = PairMap.find(ValOp.getReg());
647292915Sdim  assert(F != PairMap.end());
648292915Sdim
649292915Sdim  if (Load) {
650292915Sdim    const UUPair &P = F->second;
651292915Sdim    int64_t Off = PostInc ? 0 : MI->getOperand(2).getImm();
652292915Sdim    LowI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.first)
653292915Sdim             .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
654292915Sdim             .addImm(Off);
655292915Sdim    HighI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.second)
656292915Sdim              .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
657292915Sdim              .addImm(Off+4);
658292915Sdim  } else {
659292915Sdim    const UUPair &P = F->second;
660292915Sdim    int64_t Off = PostInc ? 0 : MI->getOperand(1).getImm();
661292915Sdim    LowI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
662292915Sdim             .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
663292915Sdim             .addImm(Off)
664292915Sdim             .addReg(P.first);
665292915Sdim    HighI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
666292915Sdim              .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
667292915Sdim              .addImm(Off+4)
668292915Sdim              .addReg(P.second);
669292915Sdim  }
670292915Sdim
671292915Sdim  if (PostInc) {
672292915Sdim    // Create the increment of the address register.
673292915Sdim    int64_t Inc = Load ? MI->getOperand(3).getImm()
674292915Sdim                       : MI->getOperand(2).getImm();
675292915Sdim    MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0);
676292915Sdim    const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg());
677360784Sdim    Register NewR = MRI->createVirtualRegister(RC);
678292915Sdim    assert(!UpdOp.getSubReg() && "Def operand with subreg");
679292915Sdim    BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR)
680292915Sdim      .addReg(AdrOp.getReg(), RSA)
681292915Sdim      .addImm(Inc);
682292915Sdim    MRI->replaceRegWith(UpdOp.getReg(), NewR);
683292915Sdim    // The original instruction will be deleted later.
684292915Sdim  }
685292915Sdim
686292915Sdim  // Generate a new pair of memory-operands.
687292915Sdim  MachineFunction &MF = *B.getParent();
688292915Sdim  for (auto &MO : MI->memoperands()) {
689292915Sdim    const MachinePointerInfo &Ptr = MO->getPointerInfo();
690309124Sdim    MachineMemOperand::Flags F = MO->getFlags();
691292915Sdim    int A = MO->getAlignment();
692292915Sdim
693292915Sdim    auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A);
694292915Sdim    LowI->addMemOperand(MF, Tmp1);
695292915Sdim    auto *Tmp2 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, std::min(A, 4));
696292915Sdim    HighI->addMemOperand(MF, Tmp2);
697292915Sdim  }
698292915Sdim}
699292915Sdim
700292915Sdimvoid HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI,
701292915Sdim      const UUPairMap &PairMap) {
702292915Sdim  MachineOperand &Op0 = MI->getOperand(0);
703292915Sdim  MachineOperand &Op1 = MI->getOperand(1);
704292915Sdim  assert(Op0.isReg() && Op1.isImm());
705292915Sdim  uint64_t V = Op1.getImm();
706292915Sdim
707292915Sdim  MachineBasicBlock &B = *MI->getParent();
708292915Sdim  DebugLoc DL = MI->getDebugLoc();
709292915Sdim  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
710292915Sdim  assert(F != PairMap.end());
711292915Sdim  const UUPair &P = F->second;
712292915Sdim
713292915Sdim  // The operand to A2_tfrsi can only have 32 significant bits. Immediate
714292915Sdim  // values in MachineOperand are stored as 64-bit integers, and so the
715292915Sdim  // value -1 may be represented either as 64-bit -1, or 4294967295. Both
716292915Sdim  // will have the 32 higher bits truncated in the end, but -1 will remain
717292915Sdim  // as -1, while the latter may appear to be a large unsigned value
718292915Sdim  // requiring a constant extender. The casting to int32_t will select the
719292915Sdim  // former representation. (The same reasoning applies to all 32-bit
720292915Sdim  // values.)
721292915Sdim  BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
722292915Sdim    .addImm(int32_t(V & 0xFFFFFFFFULL));
723292915Sdim  BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
724292915Sdim    .addImm(int32_t(V >> 32));
725292915Sdim}
726292915Sdim
727292915Sdimvoid HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI,
728292915Sdim      const UUPairMap &PairMap) {
729292915Sdim  MachineOperand &Op0 = MI->getOperand(0);
730292915Sdim  MachineOperand &Op1 = MI->getOperand(1);
731292915Sdim  MachineOperand &Op2 = MI->getOperand(2);
732292915Sdim  assert(Op0.isReg());
733292915Sdim
734292915Sdim  MachineBasicBlock &B = *MI->getParent();
735292915Sdim  DebugLoc DL = MI->getDebugLoc();
736292915Sdim  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
737292915Sdim  assert(F != PairMap.end());
738292915Sdim  const UUPair &P = F->second;
739292915Sdim
740341825Sdim  if (!Op1.isReg()) {
741292915Sdim    BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
742341825Sdim      .add(Op1);
743341825Sdim  } else {
744292915Sdim    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second)
745292915Sdim      .addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg());
746341825Sdim  }
747292915Sdim
748341825Sdim  if (!Op2.isReg()) {
749292915Sdim    BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
750341825Sdim      .add(Op2);
751341825Sdim  } else {
752292915Sdim    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
753292915Sdim      .addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg());
754341825Sdim  }
755292915Sdim}
756292915Sdim
757292915Sdimvoid HexagonSplitDoubleRegs::splitExt(MachineInstr *MI,
758292915Sdim      const UUPairMap &PairMap) {
759292915Sdim  MachineOperand &Op0 = MI->getOperand(0);
760292915Sdim  MachineOperand &Op1 = MI->getOperand(1);
761292915Sdim  assert(Op0.isReg() && Op1.isReg());
762292915Sdim
763292915Sdim  MachineBasicBlock &B = *MI->getParent();
764292915Sdim  DebugLoc DL = MI->getDebugLoc();
765292915Sdim  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
766292915Sdim  assert(F != PairMap.end());
767292915Sdim  const UUPair &P = F->second;
768292915Sdim  unsigned RS = getRegState(Op1);
769292915Sdim
770292915Sdim  BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
771292915Sdim    .addReg(Op1.getReg(), RS & ~RegState::Kill, Op1.getSubReg());
772292915Sdim  BuildMI(B, MI, DL, TII->get(Hexagon::S2_asr_i_r), P.second)
773292915Sdim    .addReg(Op1.getReg(), RS, Op1.getSubReg())
774292915Sdim    .addImm(31);
775292915Sdim}
776292915Sdim
777292915Sdimvoid HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
778292915Sdim      const UUPairMap &PairMap) {
779314564Sdim  using namespace Hexagon;
780314564Sdim
781292915Sdim  MachineOperand &Op0 = MI->getOperand(0);
782292915Sdim  MachineOperand &Op1 = MI->getOperand(1);
783292915Sdim  MachineOperand &Op2 = MI->getOperand(2);
784292915Sdim  assert(Op0.isReg() && Op1.isReg() && Op2.isImm());
785292915Sdim  int64_t Sh64 = Op2.getImm();
786292915Sdim  assert(Sh64 >= 0 && Sh64 < 64);
787292915Sdim  unsigned S = Sh64;
788292915Sdim
789292915Sdim  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
790292915Sdim  assert(F != PairMap.end());
791292915Sdim  const UUPair &P = F->second;
792360784Sdim  Register LoR = P.first;
793360784Sdim  Register HiR = P.second;
794292915Sdim
795292915Sdim  unsigned Opc = MI->getOpcode();
796292915Sdim  bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p);
797292915Sdim  bool Left = !Right;
798292915Sdim  bool Signed = (Opc == S2_asr_i_p);
799292915Sdim
800292915Sdim  MachineBasicBlock &B = *MI->getParent();
801292915Sdim  DebugLoc DL = MI->getDebugLoc();
802292915Sdim  unsigned RS = getRegState(Op1);
803292915Sdim  unsigned ShiftOpc = Left ? S2_asl_i_r
804292915Sdim                           : (Signed ? S2_asr_i_r : S2_lsr_i_r);
805314564Sdim  unsigned LoSR = isub_lo;
806314564Sdim  unsigned HiSR = isub_hi;
807292915Sdim
808292915Sdim  if (S == 0) {
809292915Sdim    // No shift, subregister copy.
810292915Sdim    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
811292915Sdim      .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
812292915Sdim    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), HiR)
813292915Sdim      .addReg(Op1.getReg(), RS, HiSR);
814292915Sdim  } else if (S < 32) {
815292915Sdim    const TargetRegisterClass *IntRC = &IntRegsRegClass;
816360784Sdim    Register TmpR = MRI->createVirtualRegister(IntRC);
817292915Sdim    // Expansion:
818292915Sdim    // Shift left:    DR = shl R, #s
819292915Sdim    //   LoR  = shl R.lo, #s
820292915Sdim    //   TmpR = extractu R.lo, #s, #32-s
821292915Sdim    //   HiR  = or (TmpR, asl(R.hi, #s))
822292915Sdim    // Shift right:   DR = shr R, #s
823292915Sdim    //   HiR  = shr R.hi, #s
824292915Sdim    //   TmpR = shr R.lo, #s
825292915Sdim    //   LoR  = insert TmpR, R.hi, #s, #32-s
826292915Sdim
827292915Sdim    // Shift left:
828292915Sdim    //   LoR  = shl R.lo, #s
829292915Sdim    // Shift right:
830292915Sdim    //   TmpR = shr R.lo, #s
831292915Sdim
832292915Sdim    // Make a special case for A2_aslh and A2_asrh (they are predicable as
833292915Sdim    // opposed to S2_asl_i_r/S2_asr_i_r).
834292915Sdim    if (S == 16 && Left)
835292915Sdim      BuildMI(B, MI, DL, TII->get(A2_aslh), LoR)
836292915Sdim        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
837292915Sdim    else if (S == 16 && Signed)
838292915Sdim      BuildMI(B, MI, DL, TII->get(A2_asrh), TmpR)
839292915Sdim        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
840292915Sdim    else
841292915Sdim      BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? LoR : TmpR))
842292915Sdim        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
843292915Sdim        .addImm(S);
844292915Sdim
845292915Sdim    if (Left) {
846292915Sdim      // TmpR = extractu R.lo, #s, #32-s
847292915Sdim      BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR)
848292915Sdim        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
849292915Sdim        .addImm(S)
850292915Sdim        .addImm(32-S);
851292915Sdim      // HiR  = or (TmpR, asl(R.hi, #s))
852292915Sdim      BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
853292915Sdim        .addReg(TmpR)
854292915Sdim        .addReg(Op1.getReg(), RS, HiSR)
855292915Sdim        .addImm(S);
856292915Sdim    } else {
857292915Sdim      // HiR  = shr R.hi, #s
858292915Sdim      BuildMI(B, MI, DL, TII->get(ShiftOpc), HiR)
859292915Sdim        .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR)
860292915Sdim        .addImm(S);
861292915Sdim      // LoR  = insert TmpR, R.hi, #s, #32-s
862292915Sdim      BuildMI(B, MI, DL, TII->get(S2_insert), LoR)
863292915Sdim        .addReg(TmpR)
864292915Sdim        .addReg(Op1.getReg(), RS, HiSR)
865292915Sdim        .addImm(S)
866292915Sdim        .addImm(32-S);
867292915Sdim    }
868292915Sdim  } else if (S == 32) {
869292915Sdim    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), (Left ? HiR : LoR))
870292915Sdim      .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR));
871292915Sdim    if (!Signed)
872292915Sdim      BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
873292915Sdim        .addImm(0);
874292915Sdim    else  // Must be right shift.
875292915Sdim      BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
876292915Sdim        .addReg(Op1.getReg(), RS, HiSR)
877292915Sdim        .addImm(31);
878292915Sdim  } else if (S < 64) {
879292915Sdim    S -= 32;
880292915Sdim    if (S == 16 && Left)
881292915Sdim      BuildMI(B, MI, DL, TII->get(A2_aslh), HiR)
882292915Sdim        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
883292915Sdim    else if (S == 16 && Signed)
884292915Sdim      BuildMI(B, MI, DL, TII->get(A2_asrh), LoR)
885292915Sdim        .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR);
886292915Sdim    else
887292915Sdim      BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? HiR : LoR))
888292915Sdim        .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR))
889292915Sdim        .addImm(S);
890292915Sdim
891292915Sdim    if (Signed)
892292915Sdim      BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
893292915Sdim        .addReg(Op1.getReg(), RS, HiSR)
894292915Sdim        .addImm(31);
895292915Sdim    else
896292915Sdim      BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
897292915Sdim        .addImm(0);
898292915Sdim  }
899292915Sdim}
900292915Sdim
901292915Sdimvoid HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI,
902292915Sdim      const UUPairMap &PairMap) {
903314564Sdim  using namespace Hexagon;
904314564Sdim
905292915Sdim  MachineOperand &Op0 = MI->getOperand(0);
906292915Sdim  MachineOperand &Op1 = MI->getOperand(1);
907292915Sdim  MachineOperand &Op2 = MI->getOperand(2);
908292915Sdim  MachineOperand &Op3 = MI->getOperand(3);
909292915Sdim  assert(Op0.isReg() && Op1.isReg() && Op2.isReg() && Op3.isImm());
910292915Sdim  int64_t Sh64 = Op3.getImm();
911292915Sdim  assert(Sh64 >= 0 && Sh64 < 64);
912292915Sdim  unsigned S = Sh64;
913292915Sdim
914292915Sdim  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
915292915Sdim  assert(F != PairMap.end());
916292915Sdim  const UUPair &P = F->second;
917292915Sdim  unsigned LoR = P.first;
918292915Sdim  unsigned HiR = P.second;
919292915Sdim
920292915Sdim  MachineBasicBlock &B = *MI->getParent();
921292915Sdim  DebugLoc DL = MI->getDebugLoc();
922292915Sdim  unsigned RS1 = getRegState(Op1);
923292915Sdim  unsigned RS2 = getRegState(Op2);
924292915Sdim  const TargetRegisterClass *IntRC = &IntRegsRegClass;
925292915Sdim
926314564Sdim  unsigned LoSR = isub_lo;
927314564Sdim  unsigned HiSR = isub_hi;
928292915Sdim
929292915Sdim  // Op0 = S2_asl_i_p_or Op1, Op2, Op3
930292915Sdim  // means:  Op0 = or (Op1, asl(Op2, Op3))
931292915Sdim
932292915Sdim  // Expansion of
933292915Sdim  //   DR = or (R1, asl(R2, #s))
934292915Sdim  //
935292915Sdim  //   LoR  = or (R1.lo, asl(R2.lo, #s))
936292915Sdim  //   Tmp1 = extractu R2.lo, #s, #32-s
937292915Sdim  //   Tmp2 = or R1.hi, Tmp1
938292915Sdim  //   HiR  = or (Tmp2, asl(R2.hi, #s))
939292915Sdim
940292915Sdim  if (S == 0) {
941292915Sdim    // DR  = or (R1, asl(R2, #0))
942292915Sdim    //    -> or (R1, R2)
943292915Sdim    // i.e. LoR = or R1.lo, R2.lo
944292915Sdim    //      HiR = or R1.hi, R2.hi
945292915Sdim    BuildMI(B, MI, DL, TII->get(A2_or), LoR)
946292915Sdim      .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
947292915Sdim      .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR);
948292915Sdim    BuildMI(B, MI, DL, TII->get(A2_or), HiR)
949292915Sdim      .addReg(Op1.getReg(), RS1, HiSR)
950292915Sdim      .addReg(Op2.getReg(), RS2, HiSR);
951292915Sdim  } else if (S < 32) {
952292915Sdim    BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), LoR)
953292915Sdim      .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
954292915Sdim      .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
955292915Sdim      .addImm(S);
956360784Sdim    Register TmpR1 = MRI->createVirtualRegister(IntRC);
957292915Sdim    BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1)
958292915Sdim      .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
959292915Sdim      .addImm(S)
960292915Sdim      .addImm(32-S);
961360784Sdim    Register TmpR2 = MRI->createVirtualRegister(IntRC);
962292915Sdim    BuildMI(B, MI, DL, TII->get(A2_or), TmpR2)
963292915Sdim      .addReg(Op1.getReg(), RS1, HiSR)
964292915Sdim      .addReg(TmpR1);
965292915Sdim    BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
966292915Sdim      .addReg(TmpR2)
967292915Sdim      .addReg(Op2.getReg(), RS2, HiSR)
968292915Sdim      .addImm(S);
969292915Sdim  } else if (S == 32) {
970292915Sdim    // DR  = or (R1, asl(R2, #32))
971292915Sdim    //    -> or R1, R2.lo
972292915Sdim    // LoR = R1.lo
973292915Sdim    // HiR = or R1.hi, R2.lo
974292915Sdim    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
975292915Sdim      .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
976292915Sdim    BuildMI(B, MI, DL, TII->get(A2_or), HiR)
977292915Sdim      .addReg(Op1.getReg(), RS1, HiSR)
978292915Sdim      .addReg(Op2.getReg(), RS2, LoSR);
979292915Sdim  } else if (S < 64) {
980292915Sdim    // DR  = or (R1, asl(R2, #s))
981292915Sdim    //
982292915Sdim    // LoR = R1:lo
983292915Sdim    // HiR = or (R1:hi, asl(R2:lo, #s-32))
984292915Sdim    S -= 32;
985292915Sdim    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
986292915Sdim      .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
987292915Sdim    BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
988292915Sdim      .addReg(Op1.getReg(), RS1, HiSR)
989292915Sdim      .addReg(Op2.getReg(), RS2, LoSR)
990292915Sdim      .addImm(S);
991292915Sdim  }
992292915Sdim}
993292915Sdim
994292915Sdimbool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI,
995292915Sdim      const UUPairMap &PairMap) {
996314564Sdim  using namespace Hexagon;
997314564Sdim
998341825Sdim  LLVM_DEBUG(dbgs() << "Splitting: " << *MI);
999292915Sdim  bool Split = false;
1000292915Sdim  unsigned Opc = MI->getOpcode();
1001292915Sdim
1002292915Sdim  switch (Opc) {
1003292915Sdim    case TargetOpcode::PHI:
1004292915Sdim    case TargetOpcode::COPY: {
1005360784Sdim      Register DstR = MI->getOperand(0).getReg();
1006292915Sdim      if (MRI->getRegClass(DstR) == DoubleRC) {
1007314564Sdim        createHalfInstr(Opc, MI, PairMap, isub_lo);
1008314564Sdim        createHalfInstr(Opc, MI, PairMap, isub_hi);
1009292915Sdim        Split = true;
1010292915Sdim      }
1011292915Sdim      break;
1012292915Sdim    }
1013292915Sdim    case A2_andp:
1014314564Sdim      createHalfInstr(A2_and, MI, PairMap, isub_lo);
1015314564Sdim      createHalfInstr(A2_and, MI, PairMap, isub_hi);
1016292915Sdim      Split = true;
1017292915Sdim      break;
1018292915Sdim    case A2_orp:
1019314564Sdim      createHalfInstr(A2_or, MI, PairMap, isub_lo);
1020314564Sdim      createHalfInstr(A2_or, MI, PairMap, isub_hi);
1021292915Sdim      Split = true;
1022292915Sdim      break;
1023292915Sdim    case A2_xorp:
1024314564Sdim      createHalfInstr(A2_xor, MI, PairMap, isub_lo);
1025314564Sdim      createHalfInstr(A2_xor, MI, PairMap, isub_hi);
1026292915Sdim      Split = true;
1027292915Sdim      break;
1028292915Sdim
1029292915Sdim    case L2_loadrd_io:
1030292915Sdim    case L2_loadrd_pi:
1031292915Sdim    case S2_storerd_io:
1032292915Sdim    case S2_storerd_pi:
1033292915Sdim      splitMemRef(MI, PairMap);
1034292915Sdim      Split = true;
1035292915Sdim      break;
1036292915Sdim
1037292915Sdim    case A2_tfrpi:
1038314564Sdim    case CONST64:
1039292915Sdim      splitImmediate(MI, PairMap);
1040292915Sdim      Split = true;
1041292915Sdim      break;
1042292915Sdim
1043292915Sdim    case A2_combineii:
1044292915Sdim    case A4_combineir:
1045292915Sdim    case A4_combineii:
1046292915Sdim    case A4_combineri:
1047292915Sdim    case A2_combinew:
1048292915Sdim      splitCombine(MI, PairMap);
1049292915Sdim      Split = true;
1050292915Sdim      break;
1051292915Sdim
1052292915Sdim    case A2_sxtw:
1053292915Sdim      splitExt(MI, PairMap);
1054292915Sdim      Split = true;
1055292915Sdim      break;
1056292915Sdim
1057292915Sdim    case S2_asl_i_p:
1058292915Sdim    case S2_asr_i_p:
1059292915Sdim    case S2_lsr_i_p:
1060292915Sdim      splitShift(MI, PairMap);
1061292915Sdim      Split = true;
1062292915Sdim      break;
1063292915Sdim
1064292915Sdim    case S2_asl_i_p_or:
1065292915Sdim      splitAslOr(MI, PairMap);
1066292915Sdim      Split = true;
1067292915Sdim      break;
1068292915Sdim
1069292915Sdim    default:
1070292915Sdim      llvm_unreachable("Instruction not splitable");
1071292915Sdim      return false;
1072292915Sdim  }
1073292915Sdim
1074292915Sdim  return Split;
1075292915Sdim}
1076292915Sdim
1077292915Sdimvoid HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI,
1078292915Sdim      const UUPairMap &PairMap) {
1079292915Sdim  for (auto &Op : MI->operands()) {
1080292915Sdim    if (!Op.isReg() || !Op.isUse() || !Op.getSubReg())
1081292915Sdim      continue;
1082360784Sdim    Register R = Op.getReg();
1083292915Sdim    UUPairMap::const_iterator F = PairMap.find(R);
1084292915Sdim    if (F == PairMap.end())
1085292915Sdim      continue;
1086292915Sdim    const UUPair &P = F->second;
1087292915Sdim    switch (Op.getSubReg()) {
1088314564Sdim      case Hexagon::isub_lo:
1089292915Sdim        Op.setReg(P.first);
1090292915Sdim        break;
1091314564Sdim      case Hexagon::isub_hi:
1092292915Sdim        Op.setReg(P.second);
1093292915Sdim        break;
1094292915Sdim    }
1095292915Sdim    Op.setSubReg(0);
1096292915Sdim  }
1097292915Sdim}
1098292915Sdim
1099292915Sdimvoid HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI,
1100292915Sdim      const UUPairMap &PairMap) {
1101292915Sdim  MachineBasicBlock &B = *MI->getParent();
1102292915Sdim  DebugLoc DL = MI->getDebugLoc();
1103292915Sdim
1104292915Sdim  for (auto &Op : MI->operands()) {
1105292915Sdim    if (!Op.isReg() || !Op.isUse())
1106292915Sdim      continue;
1107360784Sdim    Register R = Op.getReg();
1108360784Sdim    if (!Register::isVirtualRegister(R))
1109292915Sdim      continue;
1110292915Sdim    if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg())
1111292915Sdim      continue;
1112292915Sdim    UUPairMap::const_iterator F = PairMap.find(R);
1113292915Sdim    if (F == PairMap.end())
1114292915Sdim      continue;
1115292915Sdim    const UUPair &Pr = F->second;
1116360784Sdim    Register NewDR = MRI->createVirtualRegister(DoubleRC);
1117292915Sdim    BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR)
1118292915Sdim      .addReg(Pr.first)
1119314564Sdim      .addImm(Hexagon::isub_lo)
1120292915Sdim      .addReg(Pr.second)
1121314564Sdim      .addImm(Hexagon::isub_hi);
1122292915Sdim    Op.setReg(NewDR);
1123292915Sdim  }
1124292915Sdim}
1125292915Sdim
1126292915Sdimbool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
1127327952Sdim  using MISet = std::set<MachineInstr *>;
1128327952Sdim
1129292915Sdim  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
1130292915Sdim  bool Changed = false;
1131292915Sdim
1132341825Sdim  LLVM_DEBUG(dbgs() << "Splitting partition: ";
1133341825Sdim             dump_partition(dbgs(), Part, *TRI); dbgs() << '\n');
1134292915Sdim
1135292915Sdim  UUPairMap PairMap;
1136292915Sdim
1137292915Sdim  MISet SplitIns;
1138292915Sdim  for (unsigned DR : Part) {
1139292915Sdim    MachineInstr *DefI = MRI->getVRegDef(DR);
1140292915Sdim    SplitIns.insert(DefI);
1141292915Sdim
1142292915Sdim    // Collect all instructions, including fixed ones.  We won't split them,
1143292915Sdim    // but we need to visit them again to insert the REG_SEQUENCE instructions.
1144292915Sdim    for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
1145292915Sdim         U != W; ++U)
1146292915Sdim      SplitIns.insert(U->getParent());
1147292915Sdim
1148360784Sdim    Register LoR = MRI->createVirtualRegister(IntRC);
1149360784Sdim    Register HiR = MRI->createVirtualRegister(IntRC);
1150341825Sdim    LLVM_DEBUG(dbgs() << "Created mapping: " << printReg(DR, TRI) << " -> "
1151341825Sdim                      << printReg(HiR, TRI) << ':' << printReg(LoR, TRI)
1152341825Sdim                      << '\n');
1153292915Sdim    PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR)));
1154292915Sdim  }
1155292915Sdim
1156292915Sdim  MISet Erase;
1157292915Sdim  for (auto MI : SplitIns) {
1158292915Sdim    if (isFixedInstr(MI)) {
1159292915Sdim      collapseRegPairs(MI, PairMap);
1160292915Sdim    } else {
1161292915Sdim      bool Done = splitInstr(MI, PairMap);
1162292915Sdim      if (Done)
1163292915Sdim        Erase.insert(MI);
1164292915Sdim      Changed |= Done;
1165292915Sdim    }
1166292915Sdim  }
1167292915Sdim
1168292915Sdim  for (unsigned DR : Part) {
1169292915Sdim    // Before erasing "double" instructions, revisit all uses of the double
1170292915Sdim    // registers in this partition, and replace all uses of them with subre-
1171292915Sdim    // gisters, with the corresponding single registers.
1172292915Sdim    MISet Uses;
1173292915Sdim    for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
1174292915Sdim         U != W; ++U)
1175292915Sdim      Uses.insert(U->getParent());
1176292915Sdim    for (auto M : Uses)
1177292915Sdim      replaceSubregUses(M, PairMap);
1178292915Sdim  }
1179292915Sdim
1180292915Sdim  for (auto MI : Erase) {
1181292915Sdim    MachineBasicBlock *B = MI->getParent();
1182292915Sdim    B->erase(MI);
1183292915Sdim  }
1184292915Sdim
1185292915Sdim  return Changed;
1186292915Sdim}
1187292915Sdim
1188292915Sdimbool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) {
1189327952Sdim  if (skipFunction(MF.getFunction()))
1190309124Sdim    return false;
1191309124Sdim
1192341825Sdim  LLVM_DEBUG(dbgs() << "Splitting double registers in function: "
1193341825Sdim                    << MF.getName() << '\n');
1194341825Sdim
1195292915Sdim  auto &ST = MF.getSubtarget<HexagonSubtarget>();
1196292915Sdim  TRI = ST.getRegisterInfo();
1197292915Sdim  TII = ST.getInstrInfo();
1198292915Sdim  MRI = &MF.getRegInfo();
1199292915Sdim  MLI = &getAnalysis<MachineLoopInfo>();
1200292915Sdim
1201292915Sdim  UUSetMap P2Rs;
1202292915Sdim  LoopRegMap IRM;
1203292915Sdim
1204292915Sdim  collectIndRegs(IRM);
1205292915Sdim  partitionRegisters(P2Rs);
1206292915Sdim
1207341825Sdim  LLVM_DEBUG({
1208292915Sdim    dbgs() << "Register partitioning: (partition #0 is fixed)\n";
1209292915Sdim    for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
1210292915Sdim      dbgs() << '#' << I->first << " -> ";
1211292915Sdim      dump_partition(dbgs(), I->second, *TRI);
1212292915Sdim      dbgs() << '\n';
1213292915Sdim    }
1214292915Sdim  });
1215292915Sdim
1216292915Sdim  bool Changed = false;
1217292915Sdim  int Limit = MaxHSDR;
1218292915Sdim
1219292915Sdim  for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
1220292915Sdim    if (I->first == 0)
1221292915Sdim      continue;
1222292915Sdim    if (Limit >= 0 && Counter >= Limit)
1223292915Sdim      break;
1224292915Sdim    USet &Part = I->second;
1225341825Sdim    LLVM_DEBUG(dbgs() << "Calculating profit for partition #" << I->first
1226341825Sdim                      << '\n');
1227292915Sdim    if (!isProfitable(Part, IRM))
1228292915Sdim      continue;
1229292915Sdim    Counter++;
1230292915Sdim    Changed |= splitPartition(Part);
1231292915Sdim  }
1232292915Sdim
1233292915Sdim  return Changed;
1234292915Sdim}
1235292915Sdim
1236292915SdimFunctionPass *llvm::createHexagonSplitDoubleRegs() {
1237292915Sdim  return new HexagonSplitDoubleRegs();
1238292915Sdim}
1239