1//===- R600MergeVectorRegisters.cpp ---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass merges inputs of swizzeable instructions into vector sharing
11/// common data and/or have enough undef subreg using swizzle abilities.
12///
13/// For instance let's consider the following pseudo code :
14/// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
15/// ...
16/// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3
17/// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3
18///
19/// is turned into :
20/// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
21/// ...
22/// %7 = INSERT_SUBREG %4, sub3
23/// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3
24///
25/// This allow regalloc to reduce register pressure for vector registers and
26/// to reduce MOV count.
27//===----------------------------------------------------------------------===//
28
29#include "AMDGPU.h"
30#include "AMDGPUSubtarget.h"
31#include "R600Defines.h"
32#include "R600InstrInfo.h"
33#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
34#include "llvm/ADT/DenseMap.h"
35#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/StringRef.h"
37#include "llvm/CodeGen/MachineBasicBlock.h"
38#include "llvm/CodeGen/MachineDominators.h"
39#include "llvm/CodeGen/MachineFunction.h"
40#include "llvm/CodeGen/MachineFunctionPass.h"
41#include "llvm/CodeGen/MachineInstr.h"
42#include "llvm/CodeGen/MachineInstrBuilder.h"
43#include "llvm/CodeGen/MachineLoopInfo.h"
44#include "llvm/CodeGen/MachineOperand.h"
45#include "llvm/CodeGen/MachineRegisterInfo.h"
46#include "llvm/IR/DebugLoc.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/ErrorHandling.h"
50#include "llvm/Support/raw_ostream.h"
51#include <cassert>
52#include <utility>
53#include <vector>
54
55using namespace llvm;
56
57#define DEBUG_TYPE "vec-merger"
58
59static bool isImplicitlyDef(MachineRegisterInfo &MRI, Register Reg) {
60  if (Reg.isPhysical())
61    return false;
62  const MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
63  return MI && MI->isImplicitDef();
64}
65
66namespace {
67
68class RegSeqInfo {
69public:
70  MachineInstr *Instr;
71  DenseMap<Register, unsigned> RegToChan;
72  std::vector<Register> UndefReg;
73
74  RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
75    assert(MI->getOpcode() == R600::REG_SEQUENCE);
76    for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
77      MachineOperand &MO = Instr->getOperand(i);
78      unsigned Chan = Instr->getOperand(i + 1).getImm();
79      if (isImplicitlyDef(MRI, MO.getReg()))
80        UndefReg.push_back(Chan);
81      else
82        RegToChan[MO.getReg()] = Chan;
83    }
84  }
85
86  RegSeqInfo() = default;
87
88  bool operator==(const RegSeqInfo &RSI) const {
89    return RSI.Instr == Instr;
90  }
91};
92
93class R600VectorRegMerger : public MachineFunctionPass {
94private:
95  using InstructionSetMap = DenseMap<unsigned, std::vector<MachineInstr *>>;
96
97  MachineRegisterInfo *MRI;
98  const R600InstrInfo *TII = nullptr;
99  DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq;
100  InstructionSetMap PreviousRegSeqByReg;
101  InstructionSetMap PreviousRegSeqByUndefCount;
102
103  bool canSwizzle(const MachineInstr &MI) const;
104  bool areAllUsesSwizzeable(Register Reg) const;
105  void SwizzleInput(MachineInstr &,
106      const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
107  bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge,
108      std::vector<std::pair<unsigned, unsigned>> &Remap) const;
109  bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
110      std::vector<std::pair<unsigned, unsigned>> &RemapChan);
111  bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
112      std::vector<std::pair<unsigned, unsigned>> &RemapChan);
113  MachineInstr *RebuildVector(RegSeqInfo *MI, const RegSeqInfo *BaseVec,
114      const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
115  void RemoveMI(MachineInstr *);
116  void trackRSI(const RegSeqInfo &RSI);
117
118public:
119  static char ID;
120
121  R600VectorRegMerger() : MachineFunctionPass(ID) {}
122
123  void getAnalysisUsage(AnalysisUsage &AU) const override {
124    AU.setPreservesCFG();
125    AU.addRequired<MachineDominatorTree>();
126    AU.addPreserved<MachineDominatorTree>();
127    AU.addRequired<MachineLoopInfo>();
128    AU.addPreserved<MachineLoopInfo>();
129    MachineFunctionPass::getAnalysisUsage(AU);
130  }
131
132  MachineFunctionProperties getRequiredProperties() const override {
133    return MachineFunctionProperties()
134      .set(MachineFunctionProperties::Property::IsSSA);
135  }
136
137  StringRef getPassName() const override {
138    return "R600 Vector Registers Merge Pass";
139  }
140
141  bool runOnMachineFunction(MachineFunction &Fn) override;
142};
143
144} // end anonymous namespace
145
146INITIALIZE_PASS_BEGIN(R600VectorRegMerger, DEBUG_TYPE,
147                     "R600 Vector Reg Merger", false, false)
148INITIALIZE_PASS_END(R600VectorRegMerger, DEBUG_TYPE,
149                    "R600 Vector Reg Merger", false, false)
150
151char R600VectorRegMerger::ID = 0;
152
153char &llvm::R600VectorRegMergerID = R600VectorRegMerger::ID;
154
155bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
156    const {
157  if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
158    return true;
159  switch (MI.getOpcode()) {
160  case R600::R600_ExportSwz:
161  case R600::EG_ExportSwz:
162    return true;
163  default:
164    return false;
165  }
166}
167
168bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
169    RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap)
170    const {
171  unsigned CurrentUndexIdx = 0;
172  for (DenseMap<Register, unsigned>::iterator It = ToMerge->RegToChan.begin(),
173      E = ToMerge->RegToChan.end(); It != E; ++It) {
174    DenseMap<Register, unsigned>::const_iterator PosInUntouched =
175        Untouched->RegToChan.find((*It).first);
176    if (PosInUntouched != Untouched->RegToChan.end()) {
177      Remap.push_back(std::pair<unsigned, unsigned>
178          ((*It).second, (*PosInUntouched).second));
179      continue;
180    }
181    if (CurrentUndexIdx >= Untouched->UndefReg.size())
182      return false;
183    Remap.push_back(std::pair<unsigned, unsigned>
184        ((*It).second, Untouched->UndefReg[CurrentUndexIdx++]));
185  }
186
187  return true;
188}
189
190static
191unsigned getReassignedChan(
192    const std::vector<std::pair<unsigned, unsigned>> &RemapChan,
193    unsigned Chan) {
194  for (unsigned j = 0, je = RemapChan.size(); j < je; j++) {
195    if (RemapChan[j].first == Chan)
196      return RemapChan[j].second;
197  }
198  llvm_unreachable("Chan wasn't reassigned");
199}
200
201MachineInstr *R600VectorRegMerger::RebuildVector(
202    RegSeqInfo *RSI, const RegSeqInfo *BaseRSI,
203    const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
204  Register Reg = RSI->Instr->getOperand(0).getReg();
205  MachineBasicBlock::iterator Pos = RSI->Instr;
206  MachineBasicBlock &MBB = *Pos->getParent();
207  DebugLoc DL = Pos->getDebugLoc();
208
209  Register SrcVec = BaseRSI->Instr->getOperand(0).getReg();
210  DenseMap<Register, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
211  std::vector<Register> UpdatedUndef = BaseRSI->UndefReg;
212  for (DenseMap<Register, unsigned>::iterator It = RSI->RegToChan.begin(),
213      E = RSI->RegToChan.end(); It != E; ++It) {
214    Register DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
215    unsigned SubReg = (*It).first;
216    unsigned Swizzle = (*It).second;
217    unsigned Chan = getReassignedChan(RemapChan, Swizzle);
218
219    MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG),
220        DstReg)
221        .addReg(SrcVec)
222        .addReg(SubReg)
223        .addImm(Chan);
224    UpdatedRegToChan[SubReg] = Chan;
225    std::vector<Register>::iterator ChanPos = llvm::find(UpdatedUndef, Chan);
226    if (ChanPos != UpdatedUndef.end())
227      UpdatedUndef.erase(ChanPos);
228    assert(!is_contained(UpdatedUndef, Chan) &&
229           "UpdatedUndef shouldn't contain Chan more than once!");
230    LLVM_DEBUG(dbgs() << "    ->"; Tmp->dump(););
231    (void)Tmp;
232    SrcVec = DstReg;
233  }
234  MachineInstr *NewMI =
235      BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec);
236  LLVM_DEBUG(dbgs() << "    ->"; NewMI->dump(););
237
238  LLVM_DEBUG(dbgs() << "  Updating Swizzle:\n");
239  for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
240      E = MRI->use_instr_end(); It != E; ++It) {
241    LLVM_DEBUG(dbgs() << "    "; (*It).dump(); dbgs() << "    ->");
242    SwizzleInput(*It, RemapChan);
243    LLVM_DEBUG((*It).dump());
244  }
245  RSI->Instr->eraseFromParent();
246
247  // Update RSI
248  RSI->Instr = NewMI;
249  RSI->RegToChan = UpdatedRegToChan;
250  RSI->UndefReg = UpdatedUndef;
251
252  return NewMI;
253}
254
255void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
256  for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
257      E = PreviousRegSeqByReg.end(); It != E; ++It) {
258    std::vector<MachineInstr *> &MIs = (*It).second;
259    MIs.erase(llvm::find(MIs, MI), MIs.end());
260  }
261  for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(),
262      E = PreviousRegSeqByUndefCount.end(); It != E; ++It) {
263    std::vector<MachineInstr *> &MIs = (*It).second;
264    MIs.erase(llvm::find(MIs, MI), MIs.end());
265  }
266}
267
268void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
269    const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
270  unsigned Offset;
271  if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
272    Offset = 2;
273  else
274    Offset = 3;
275  for (unsigned i = 0; i < 4; i++) {
276    unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1;
277    for (unsigned j = 0, e = RemapChan.size(); j < e; j++) {
278      if (RemapChan[j].first == Swizzle) {
279        MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1);
280        break;
281      }
282    }
283  }
284}
285
286bool R600VectorRegMerger::areAllUsesSwizzeable(Register Reg) const {
287  for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
288      E = MRI->use_instr_end(); It != E; ++It) {
289    if (!canSwizzle(*It))
290      return false;
291  }
292  return true;
293}
294
295bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
296    RegSeqInfo &CompatibleRSI,
297    std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
298  for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(),
299      MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) {
300    if (!MOp->isReg())
301      continue;
302    if (PreviousRegSeqByReg[MOp->getReg()].empty())
303      continue;
304    for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) {
305      CompatibleRSI = PreviousRegSeq[MI];
306      if (RSI == CompatibleRSI)
307        continue;
308      if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
309        return true;
310    }
311  }
312  return false;
313}
314
315bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
316    RegSeqInfo &CompatibleRSI,
317    std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
318  unsigned NeededUndefs = 4 - RSI.UndefReg.size();
319  if (PreviousRegSeqByUndefCount[NeededUndefs].empty())
320    return false;
321  std::vector<MachineInstr *> &MIs =
322      PreviousRegSeqByUndefCount[NeededUndefs];
323  CompatibleRSI = PreviousRegSeq[MIs.back()];
324  tryMergeVector(&CompatibleRSI, &RSI, RemapChan);
325  return true;
326}
327
328void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
329  for (DenseMap<Register, unsigned>::const_iterator
330  It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) {
331    PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr);
332  }
333  PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr);
334  PreviousRegSeq[RSI.Instr] = RSI;
335}
336
337bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
338  if (skipFunction(Fn.getFunction()))
339    return false;
340
341  const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
342  TII = ST.getInstrInfo();
343  MRI = &Fn.getRegInfo();
344
345  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
346       MBB != MBBe; ++MBB) {
347    MachineBasicBlock *MB = &*MBB;
348    PreviousRegSeq.clear();
349    PreviousRegSeqByReg.clear();
350    PreviousRegSeqByUndefCount.clear();
351
352    for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
353         MII != MIIE; ++MII) {
354      MachineInstr &MI = *MII;
355      if (MI.getOpcode() != R600::REG_SEQUENCE) {
356        if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
357          Register Reg = MI.getOperand(1).getReg();
358          for (MachineRegisterInfo::def_instr_iterator
359               It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end();
360               It != E; ++It) {
361            RemoveMI(&(*It));
362          }
363        }
364        continue;
365      }
366
367      RegSeqInfo RSI(*MRI, &MI);
368
369      // All uses of MI are swizzeable ?
370      Register Reg = MI.getOperand(0).getReg();
371      if (!areAllUsesSwizzeable(Reg))
372        continue;
373
374      LLVM_DEBUG({
375        dbgs() << "Trying to optimize ";
376        MI.dump();
377      });
378
379      RegSeqInfo CandidateRSI;
380      std::vector<std::pair<unsigned, unsigned>> RemapChan;
381      LLVM_DEBUG(dbgs() << "Using common slots...\n";);
382      if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
383        // Remove CandidateRSI mapping
384        RemoveMI(CandidateRSI.Instr);
385        MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
386        trackRSI(RSI);
387        continue;
388      }
389      LLVM_DEBUG(dbgs() << "Using free slots...\n";);
390      RemapChan.clear();
391      if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) {
392        RemoveMI(CandidateRSI.Instr);
393        MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
394        trackRSI(RSI);
395        continue;
396      }
397      //Failed to merge
398      trackRSI(RSI);
399    }
400  }
401  return false;
402}
403
404llvm::FunctionPass *llvm::createR600VectorRegMerger() {
405  return new R600VectorRegMerger();
406}
407