1//===-- GCNPreRAOptimizations.cpp -----------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9/// \file 10/// This pass combines split register tuple initialization into a single pseudo: 11/// 12/// undef %0.sub1:sreg_64 = S_MOV_B32 1 13/// %0.sub0:sreg_64 = S_MOV_B32 2 14/// => 15/// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001 16/// 17/// This is to allow rematerialization of a value instead of spilling. It is 18/// supposed to be done after register coalescer to allow it to do its job and 19/// before actual register allocation to allow rematerialization. 20/// 21/// Right now the pass only handles 64 bit SGPRs with immediate initializers, 22/// although the same shall be possible with other register classes and 23/// instructions if necessary. 24/// 25//===----------------------------------------------------------------------===// 26 27#include "AMDGPU.h" 28#include "GCNSubtarget.h" 29#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 30#include "llvm/CodeGen/LiveIntervals.h" 31#include "llvm/CodeGen/MachineFunctionPass.h" 32#include "llvm/InitializePasses.h" 33 34using namespace llvm; 35 36#define DEBUG_TYPE "amdgpu-pre-ra-optimizations" 37 38namespace { 39 40class GCNPreRAOptimizations : public MachineFunctionPass { 41private: 42 const SIInstrInfo *TII; 43 const SIRegisterInfo *TRI; 44 MachineRegisterInfo *MRI; 45 LiveIntervals *LIS; 46 47 bool processReg(Register Reg); 48 49public: 50 static char ID; 51 52 GCNPreRAOptimizations() : MachineFunctionPass(ID) { 53 initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry()); 54 } 55 56 bool runOnMachineFunction(MachineFunction &MF) override; 57 58 StringRef getPassName() const override { 59 return "AMDGPU Pre-RA optimizations"; 60 } 61 62 void getAnalysisUsage(AnalysisUsage &AU) const override { 63 AU.addRequired<LiveIntervals>(); 64 AU.setPreservesAll(); 65 MachineFunctionPass::getAnalysisUsage(AU); 66 } 67}; 68 69} // End anonymous namespace. 70 71INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE, 72 "AMDGPU Pre-RA optimizations", false, false) 73INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 74INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations", 75 false, false) 76 77char GCNPreRAOptimizations::ID = 0; 78 79char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID; 80 81FunctionPass *llvm::createGCNPreRAOptimizationsPass() { 82 return new GCNPreRAOptimizations(); 83} 84 85bool GCNPreRAOptimizations::processReg(Register Reg) { 86 MachineInstr *Def0 = nullptr; 87 MachineInstr *Def1 = nullptr; 88 uint64_t Init = 0; 89 bool Changed = false; 90 SmallSet<Register, 32> ModifiedRegs; 91 bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg)); 92 93 for (MachineInstr &I : MRI->def_instructions(Reg)) { 94 switch (I.getOpcode()) { 95 default: 96 return false; 97 case AMDGPU::V_ACCVGPR_WRITE_B32_e64: 98 break; 99 case AMDGPU::COPY: { 100 // Some subtargets cannot do an AGPR to AGPR copy directly, and need an 101 // intermdiate temporary VGPR register. Try to find the defining 102 // accvgpr_write to avoid temporary registers. 103 104 if (!IsAGPRDst) 105 return false; 106 107 Register SrcReg = I.getOperand(1).getReg(); 108 109 if (!SrcReg.isVirtual()) 110 break; 111 112 // Check if source of copy is from another AGPR. 113 bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg)); 114 if (!IsAGPRSrc) 115 break; 116 117 // def_instructions() does not look at subregs so it may give us a 118 // different instruction that defines the same vreg but different subreg 119 // so we have to manually check subreg. 120 Register SrcSubReg = I.getOperand(1).getSubReg(); 121 for (auto &Def : MRI->def_instructions(SrcReg)) { 122 if (SrcSubReg != Def.getOperand(0).getSubReg()) 123 continue; 124 125 if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) { 126 MachineOperand DefSrcMO = Def.getOperand(1); 127 128 // Immediates are not an issue and can be propagated in 129 // postrapseudos pass. Only handle cases where defining 130 // accvgpr_write source is a vreg. 131 if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) { 132 // Propagate source reg of accvgpr write to this copy instruction 133 I.getOperand(1).setReg(DefSrcMO.getReg()); 134 I.getOperand(1).setSubReg(DefSrcMO.getSubReg()); 135 136 // Reg uses were changed, collect unique set of registers to update 137 // live intervals at the end. 138 ModifiedRegs.insert(DefSrcMO.getReg()); 139 ModifiedRegs.insert(SrcReg); 140 141 Changed = true; 142 } 143 144 // Found the defining accvgpr_write, stop looking any further. 145 break; 146 } 147 } 148 break; 149 } 150 case AMDGPU::S_MOV_B32: 151 if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() || 152 I.getNumOperands() != 2) 153 return false; 154 155 switch (I.getOperand(0).getSubReg()) { 156 default: 157 return false; 158 case AMDGPU::sub0: 159 if (Def0) 160 return false; 161 Def0 = &I; 162 Init |= I.getOperand(1).getImm() & 0xffffffff; 163 break; 164 case AMDGPU::sub1: 165 if (Def1) 166 return false; 167 Def1 = &I; 168 Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32; 169 break; 170 } 171 break; 172 } 173 } 174 175 // For AGPR reg, check if live intervals need to be updated. 176 if (IsAGPRDst) { 177 if (Changed) { 178 for (Register RegToUpdate : ModifiedRegs) { 179 LIS->removeInterval(RegToUpdate); 180 LIS->createAndComputeVirtRegInterval(RegToUpdate); 181 } 182 } 183 184 return Changed; 185 } 186 187 // For SGPR reg, check if we can combine instructions. 188 if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent()) 189 return Changed; 190 191 LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1 192 << " =>\n"); 193 194 if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1), 195 LIS->getInstructionIndex(*Def0))) 196 std::swap(Def0, Def1); 197 198 LIS->RemoveMachineInstrFromMaps(*Def0); 199 LIS->RemoveMachineInstrFromMaps(*Def1); 200 auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(), 201 TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg) 202 .addImm(Init); 203 204 Def0->eraseFromParent(); 205 Def1->eraseFromParent(); 206 LIS->InsertMachineInstrInMaps(*NewI); 207 LIS->removeInterval(Reg); 208 LIS->createAndComputeVirtRegInterval(Reg); 209 210 LLVM_DEBUG(dbgs() << " " << *NewI); 211 212 return true; 213} 214 215bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) { 216 if (skipFunction(MF.getFunction())) 217 return false; 218 219 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 220 TII = ST.getInstrInfo(); 221 MRI = &MF.getRegInfo(); 222 LIS = &getAnalysis<LiveIntervals>(); 223 TRI = ST.getRegisterInfo(); 224 225 bool Changed = false; 226 227 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { 228 Register Reg = Register::index2VirtReg(I); 229 if (!LIS->hasInterval(Reg)) 230 continue; 231 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 232 if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) && 233 (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC))) 234 continue; 235 236 Changed |= processReg(Reg); 237 } 238 239 return Changed; 240} 241