SIAddIMGInit.cpp revision 343171
1//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// Any MIMG instructions that use tfe or lwe require an initialization of the 12/// result register that will be written in the case of a memory access failure 13/// The required code is also added to tie this init code to the result of the 14/// img instruction 15/// 16//===----------------------------------------------------------------------===// 17// 18 19#include "AMDGPU.h" 20#include "AMDGPUSubtarget.h" 21#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22#include "SIInstrInfo.h" 23#include "llvm/CodeGen/MachineFunctionPass.h" 24#include "llvm/CodeGen/MachineInstrBuilder.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/IR/Function.h" 27#include "llvm/Support/Debug.h" 28#include "llvm/Target/TargetMachine.h" 29 30#define DEBUG_TYPE "si-img-init" 31 32using namespace llvm; 33 34namespace { 35 36class SIAddIMGInit : public MachineFunctionPass { 37public: 38 static char ID; 39 40public: 41 SIAddIMGInit() : MachineFunctionPass(ID) { 42 initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry()); 43 } 44 45 bool runOnMachineFunction(MachineFunction &MF) override; 46 47 void getAnalysisUsage(AnalysisUsage &AU) const override { 48 AU.setPreservesCFG(); 49 MachineFunctionPass::getAnalysisUsage(AU); 50 } 51}; 52 53} // End anonymous namespace. 54 55INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false) 56 57char SIAddIMGInit::ID = 0; 58 59char &llvm::SIAddIMGInitID = SIAddIMGInit::ID; 60 61FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); } 62 63bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { 64 MachineRegisterInfo &MRI = MF.getRegInfo(); 65 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 66 const SIInstrInfo *TII = ST.getInstrInfo(); 67 const SIRegisterInfo *RI = ST.getRegisterInfo(); 68 bool Changed = false; 69 70 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; 71 ++BI) { 72 MachineBasicBlock &MBB = *BI; 73 MachineBasicBlock::iterator I, Next; 74 for (I = MBB.begin(); I != MBB.end(); I = Next) { 75 Next = std::next(I); 76 MachineInstr &MI = *I; 77 78 auto Opcode = MI.getOpcode(); 79 if (TII->isMIMG(Opcode) && !MI.mayStore()) { 80 MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe); 81 MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe); 82 MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16); 83 84 // Check for instructions that don't have tfe or lwe fields 85 // There shouldn't be any at this point. 86 assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction"); 87 88 unsigned TFEVal = TFE->getImm(); 89 unsigned LWEVal = LWE->getImm(); 90 unsigned D16Val = D16 ? D16->getImm() : 0; 91 92 if (TFEVal || LWEVal) { 93 // At least one of TFE or LWE are non-zero 94 // We have to insert a suitable initialization of the result value and 95 // tie this to the dest of the image instruction. 96 97 const DebugLoc &DL = MI.getDebugLoc(); 98 99 int DstIdx = 100 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); 101 102 // Calculate which dword we have to initialize to 0. 103 MachineOperand *MO_Dmask = 104 TII->getNamedOperand(MI, AMDGPU::OpName::dmask); 105 106 // check that dmask operand is found. 107 assert(MO_Dmask && "Expected dmask operand in instruction"); 108 109 unsigned dmask = MO_Dmask->getImm(); 110 // Determine the number of active lanes taking into account the 111 // Gather4 special case 112 unsigned ActiveLanes = 113 TII->isGather4(Opcode) ? 4 : countPopulation(dmask); 114 115 // Subreg indices are counted from 1 116 // When D16 then we want next whole VGPR after write data. 117 static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected"); 118 119 bool Packed = !ST.hasUnpackedD16VMem(); 120 121 unsigned InitIdx = 122 D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1; 123 124 // Abandon attempt if the dst size isn't large enough 125 // - this is in fact an error but this is picked up elsewhere and 126 // reported correctly. 127 uint32_t DstSize = 128 RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32; 129 if (DstSize < InitIdx) 130 continue; 131 132 // Create a register for the intialization value. 133 unsigned PrevDst = 134 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); 135 unsigned NewDst = 0; // Final initialized value will be in here 136 137 // If PRTStrictNull feature is enabled (the default) then initialize 138 // all the result registers to 0, otherwise just the error indication 139 // register (VGPRn+1) 140 unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1; 141 unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx; 142 143 if (DstSize == 1) { 144 // In this case we can just initialize the result directly 145 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst) 146 .addImm(0); 147 NewDst = PrevDst; 148 } else { 149 BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst); 150 for (; SizeLeft; SizeLeft--, CurrIdx++) { 151 NewDst = 152 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); 153 // Initialize dword 154 unsigned SubReg = 155 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 156 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg) 157 .addImm(0); 158 // Insert into the super-reg 159 BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst) 160 .addReg(PrevDst) 161 .addReg(SubReg) 162 .addImm(CurrIdx); 163 164 PrevDst = NewDst; 165 } 166 } 167 168 // Add as an implicit operand 169 MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit); 170 171 // Tie the just added implicit operand to the dst 172 MI.tieOperands(DstIdx, MI.getNumOperands() - 1); 173 174 Changed = true; 175 } 176 } 177 } 178 } 179 180 return Changed; 181} 182