1//===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass combines split register tuple initialization into a single pseudo:
11///
12///   undef %0.sub1:sreg_64 = S_MOV_B32 1
13///   %0.sub0:sreg_64 = S_MOV_B32 2
14/// =>
15///   %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
16///
17/// This is to allow rematerialization of a value instead of spilling. It is
18/// supposed to be done after register coalescer to allow it to do its job and
19/// before actual register allocation to allow rematerialization.
20///
21/// Right now the pass only handles 64 bit SGPRs with immediate initializers,
22/// although the same shall be possible with other register classes and
23/// instructions if necessary.
24///
25//===----------------------------------------------------------------------===//
26
27#include "AMDGPU.h"
28#include "GCNSubtarget.h"
29#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
30#include "llvm/CodeGen/LiveIntervals.h"
31#include "llvm/CodeGen/MachineFunctionPass.h"
32#include "llvm/InitializePasses.h"
33
34using namespace llvm;
35
36#define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
37
38namespace {
39
40class GCNPreRAOptimizations : public MachineFunctionPass {
41private:
42  const SIInstrInfo *TII;
43  const SIRegisterInfo *TRI;
44  MachineRegisterInfo *MRI;
45  LiveIntervals *LIS;
46
47  bool processReg(Register Reg);
48
49public:
50  static char ID;
51
52  GCNPreRAOptimizations() : MachineFunctionPass(ID) {
53    initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry());
54  }
55
56  bool runOnMachineFunction(MachineFunction &MF) override;
57
58  StringRef getPassName() const override {
59    return "AMDGPU Pre-RA optimizations";
60  }
61
62  void getAnalysisUsage(AnalysisUsage &AU) const override {
63    AU.addRequired<LiveIntervals>();
64    AU.setPreservesAll();
65    MachineFunctionPass::getAnalysisUsage(AU);
66  }
67};
68
69} // End anonymous namespace.
70
71INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE,
72                      "AMDGPU Pre-RA optimizations", false, false)
73INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
74INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations",
75                    false, false)
76
77char GCNPreRAOptimizations::ID = 0;
78
79char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID;
80
81FunctionPass *llvm::createGCNPreRAOptimizationsPass() {
82  return new GCNPreRAOptimizations();
83}
84
85bool GCNPreRAOptimizations::processReg(Register Reg) {
86  MachineInstr *Def0 = nullptr;
87  MachineInstr *Def1 = nullptr;
88  uint64_t Init = 0;
89  bool Changed = false;
90  SmallSet<Register, 32> ModifiedRegs;
91  bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg));
92
93  for (MachineInstr &I : MRI->def_instructions(Reg)) {
94    switch (I.getOpcode()) {
95    default:
96      return false;
97    case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
98      break;
99    case AMDGPU::COPY: {
100      // Some subtargets cannot do an AGPR to AGPR copy directly, and need an
101      // intermdiate temporary VGPR register. Try to find the defining
102      // accvgpr_write to avoid temporary registers.
103
104      if (!IsAGPRDst)
105        return false;
106
107      Register SrcReg = I.getOperand(1).getReg();
108
109      if (!SrcReg.isVirtual())
110        break;
111
112      // Check if source of copy is from another AGPR.
113      bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg));
114      if (!IsAGPRSrc)
115        break;
116
117      // def_instructions() does not look at subregs so it may give us a
118      // different instruction that defines the same vreg but different subreg
119      // so we have to manually check subreg.
120      Register SrcSubReg = I.getOperand(1).getSubReg();
121      for (auto &Def : MRI->def_instructions(SrcReg)) {
122        if (SrcSubReg != Def.getOperand(0).getSubReg())
123          continue;
124
125        if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
126          MachineOperand DefSrcMO = Def.getOperand(1);
127
128          // Immediates are not an issue and can be propagated in
129          // postrapseudos pass. Only handle cases where defining
130          // accvgpr_write source is a vreg.
131          if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) {
132            // Propagate source reg of accvgpr write to this copy instruction
133            I.getOperand(1).setReg(DefSrcMO.getReg());
134            I.getOperand(1).setSubReg(DefSrcMO.getSubReg());
135
136            // Reg uses were changed, collect unique set of registers to update
137            // live intervals at the end.
138            ModifiedRegs.insert(DefSrcMO.getReg());
139            ModifiedRegs.insert(SrcReg);
140
141            Changed = true;
142          }
143
144          // Found the defining accvgpr_write, stop looking any further.
145          break;
146        }
147      }
148      break;
149    }
150    case AMDGPU::S_MOV_B32:
151      if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() ||
152          I.getNumOperands() != 2)
153        return false;
154
155      switch (I.getOperand(0).getSubReg()) {
156      default:
157        return false;
158      case AMDGPU::sub0:
159        if (Def0)
160          return false;
161        Def0 = &I;
162        Init |= I.getOperand(1).getImm() & 0xffffffff;
163        break;
164      case AMDGPU::sub1:
165        if (Def1)
166          return false;
167        Def1 = &I;
168        Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
169        break;
170      }
171      break;
172    }
173  }
174
175  // For AGPR reg, check if live intervals need to be updated.
176  if (IsAGPRDst) {
177    if (Changed) {
178      for (Register RegToUpdate : ModifiedRegs) {
179        LIS->removeInterval(RegToUpdate);
180        LIS->createAndComputeVirtRegInterval(RegToUpdate);
181      }
182    }
183
184    return Changed;
185  }
186
187  // For SGPR reg, check if we can combine instructions.
188  if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
189    return Changed;
190
191  LLVM_DEBUG(dbgs() << "Combining:\n  " << *Def0 << "  " << *Def1
192                    << "    =>\n");
193
194  if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1),
195                                LIS->getInstructionIndex(*Def0)))
196    std::swap(Def0, Def1);
197
198  LIS->RemoveMachineInstrFromMaps(*Def0);
199  LIS->RemoveMachineInstrFromMaps(*Def1);
200  auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
201                      TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
202                  .addImm(Init);
203
204  Def0->eraseFromParent();
205  Def1->eraseFromParent();
206  LIS->InsertMachineInstrInMaps(*NewI);
207  LIS->removeInterval(Reg);
208  LIS->createAndComputeVirtRegInterval(Reg);
209
210  LLVM_DEBUG(dbgs() << "  " << *NewI);
211
212  return true;
213}
214
215bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
216  if (skipFunction(MF.getFunction()))
217    return false;
218
219  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
220  TII = ST.getInstrInfo();
221  MRI = &MF.getRegInfo();
222  LIS = &getAnalysis<LiveIntervals>();
223  TRI = ST.getRegisterInfo();
224
225  bool Changed = false;
226
227  for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
228    Register Reg = Register::index2VirtReg(I);
229    if (!LIS->hasInterval(Reg))
230      continue;
231    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
232    if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) &&
233        (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC)))
234      continue;
235
236    Changed |= processReg(Reg);
237  }
238
239  return Changed;
240}
241