1//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9/// \file 10/// Any MIMG instructions that use tfe or lwe require an initialization of the 11/// result register that will be written in the case of a memory access failure 12/// The required code is also added to tie this init code to the result of the 13/// img instruction 14/// 15//===----------------------------------------------------------------------===// 16// 17 18#include "AMDGPU.h" 19#include "AMDGPUSubtarget.h" 20#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21#include "SIInstrInfo.h" 22#include "llvm/CodeGen/MachineFunctionPass.h" 23#include "llvm/CodeGen/MachineInstrBuilder.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25#include "llvm/IR/Function.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Target/TargetMachine.h" 28 29#define DEBUG_TYPE "si-img-init" 30 31using namespace llvm; 32 33namespace { 34 35class SIAddIMGInit : public MachineFunctionPass { 36public: 37 static char ID; 38 39public: 40 SIAddIMGInit() : MachineFunctionPass(ID) { 41 initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry()); 42 } 43 44 bool runOnMachineFunction(MachineFunction &MF) override; 45 46 void getAnalysisUsage(AnalysisUsage &AU) const override { 47 AU.setPreservesCFG(); 48 MachineFunctionPass::getAnalysisUsage(AU); 49 } 50}; 51 52} // End anonymous namespace. 53 54INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false) 55 56char SIAddIMGInit::ID = 0; 57 58char &llvm::SIAddIMGInitID = SIAddIMGInit::ID; 59 60FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); } 61 62bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { 63 MachineRegisterInfo &MRI = MF.getRegInfo(); 64 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 65 const SIInstrInfo *TII = ST.getInstrInfo(); 66 const SIRegisterInfo *RI = ST.getRegisterInfo(); 67 bool Changed = false; 68 69 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; 70 ++BI) { 71 MachineBasicBlock &MBB = *BI; 72 MachineBasicBlock::iterator I, Next; 73 for (I = MBB.begin(); I != MBB.end(); I = Next) { 74 Next = std::next(I); 75 MachineInstr &MI = *I; 76 77 auto Opcode = MI.getOpcode(); 78 if (TII->isMIMG(Opcode) && !MI.mayStore()) { 79 MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe); 80 MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe); 81 MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16); 82 83 // Check for instructions that don't have tfe or lwe fields 84 // There shouldn't be any at this point. 85 assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction"); 86 87 unsigned TFEVal = TFE->getImm(); 88 unsigned LWEVal = LWE->getImm(); 89 unsigned D16Val = D16 ? D16->getImm() : 0; 90 91 if (TFEVal || LWEVal) { 92 // At least one of TFE or LWE are non-zero 93 // We have to insert a suitable initialization of the result value and 94 // tie this to the dest of the image instruction. 95 96 const DebugLoc &DL = MI.getDebugLoc(); 97 98 int DstIdx = 99 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); 100 101 // Calculate which dword we have to initialize to 0. 102 MachineOperand *MO_Dmask = 103 TII->getNamedOperand(MI, AMDGPU::OpName::dmask); 104 105 // check that dmask operand is found. 106 assert(MO_Dmask && "Expected dmask operand in instruction"); 107 108 unsigned dmask = MO_Dmask->getImm(); 109 // Determine the number of active lanes taking into account the 110 // Gather4 special case 111 unsigned ActiveLanes = 112 TII->isGather4(Opcode) ? 4 : countPopulation(dmask); 113 114 // Subreg indices are counted from 1 115 // When D16 then we want next whole VGPR after write data. 116 static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected"); 117 118 bool Packed = !ST.hasUnpackedD16VMem(); 119 120 unsigned InitIdx = 121 D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1; 122 123 // Abandon attempt if the dst size isn't large enough 124 // - this is in fact an error but this is picked up elsewhere and 125 // reported correctly. 126 uint32_t DstSize = 127 RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32; 128 if (DstSize < InitIdx) 129 continue; 130 131 // Create a register for the intialization value. 132 Register PrevDst = 133 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); 134 unsigned NewDst = 0; // Final initialized value will be in here 135 136 // If PRTStrictNull feature is enabled (the default) then initialize 137 // all the result registers to 0, otherwise just the error indication 138 // register (VGPRn+1) 139 unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1; 140 unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx; 141 142 if (DstSize == 1) { 143 // In this case we can just initialize the result directly 144 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst) 145 .addImm(0); 146 NewDst = PrevDst; 147 } else { 148 BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst); 149 for (; SizeLeft; SizeLeft--, CurrIdx++) { 150 NewDst = 151 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); 152 // Initialize dword 153 Register SubReg = 154 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 155 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg) 156 .addImm(0); 157 // Insert into the super-reg 158 BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst) 159 .addReg(PrevDst) 160 .addReg(SubReg) 161 .addImm(CurrIdx); 162 163 PrevDst = NewDst; 164 } 165 } 166 167 // Add as an implicit operand 168 MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit); 169 170 // Tie the just added implicit operand to the dst 171 MI.tieOperands(DstIdx, MI.getNumOperands() - 1); 172 173 Changed = true; 174 } 175 } 176 } 177 } 178 179 return Changed; 180} 181