1//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9/// \file 10/// Any MIMG instructions that use tfe or lwe require an initialization of the 11/// result register that will be written in the case of a memory access failure 12/// The required code is also added to tie this init code to the result of the 13/// img instruction 14/// 15//===----------------------------------------------------------------------===// 16// 17 18#include "AMDGPU.h" 19#include "AMDGPUSubtarget.h" 20#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21#include "SIInstrInfo.h" 22#include "llvm/CodeGen/MachineFunctionPass.h" 23#include "llvm/CodeGen/MachineInstrBuilder.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25#include "llvm/IR/Function.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Target/TargetMachine.h" 28 29#define DEBUG_TYPE "si-img-init" 30 31using namespace llvm; 32 33namespace { 34 35class SIAddIMGInit : public MachineFunctionPass { 36public: 37 static char ID; 38 39public: 40 SIAddIMGInit() : MachineFunctionPass(ID) { 41 initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry()); 42 } 43 44 bool runOnMachineFunction(MachineFunction &MF) override; 45 46 void getAnalysisUsage(AnalysisUsage &AU) const override { 47 AU.setPreservesCFG(); 48 MachineFunctionPass::getAnalysisUsage(AU); 49 } 50}; 51 52} // End anonymous namespace. 53 54INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false) 55 56char SIAddIMGInit::ID = 0; 57 58char &llvm::SIAddIMGInitID = SIAddIMGInit::ID; 59 60FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); } 61 62bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { 63 MachineRegisterInfo &MRI = MF.getRegInfo(); 64 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 65 const SIInstrInfo *TII = ST.getInstrInfo(); 66 const SIRegisterInfo *RI = ST.getRegisterInfo(); 67 bool Changed = false; 68 69 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; 70 ++BI) { 71 MachineBasicBlock &MBB = *BI; 72 MachineBasicBlock::iterator I, Next; 73 for (I = MBB.begin(); I != MBB.end(); I = Next) { 74 Next = std::next(I); 75 MachineInstr &MI = *I; 76 77 auto Opcode = MI.getOpcode(); 78 if (TII->isMIMG(Opcode) && !MI.mayStore()) { 79 MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe); 80 MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe); 81 MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16); 82 83 // Check for instructions that don't have tfe or lwe fields 84 // There shouldn't be any at this point. 85 assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction"); 86 87 unsigned TFEVal = TFE->getImm(); 88 unsigned LWEVal = LWE->getImm(); 89 unsigned D16Val = D16 ? D16->getImm() : 0; 90 91 if (TFEVal || LWEVal) { 92 // At least one of TFE or LWE are non-zero 93 // We have to insert a suitable initialization of the result value and 94 // tie this to the dest of the image instruction. 95 96 const DebugLoc &DL = MI.getDebugLoc(); 97 98 int DstIdx = 99 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); 100 101 // Calculate which dword we have to initialize to 0. 102 MachineOperand *MO_Dmask = 103 TII->getNamedOperand(MI, AMDGPU::OpName::dmask); 104 105 // check that dmask operand is found. 106 assert(MO_Dmask && "Expected dmask operand in instruction"); 107 108 unsigned dmask = MO_Dmask->getImm(); 109 // Determine the number of active lanes taking into account the 110 // Gather4 special case 111 unsigned ActiveLanes = 112 TII->isGather4(Opcode) ? 4 : countPopulation(dmask); 113 114 bool Packed = !ST.hasUnpackedD16VMem(); 115 116 unsigned InitIdx = 117 D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1; 118 119 // Abandon attempt if the dst size isn't large enough 120 // - this is in fact an error but this is picked up elsewhere and 121 // reported correctly. 122 uint32_t DstSize = 123 RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32; 124 if (DstSize < InitIdx) 125 continue; 126 127 // Create a register for the intialization value. 128 Register PrevDst = 129 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); 130 unsigned NewDst = 0; // Final initialized value will be in here 131 132 // If PRTStrictNull feature is enabled (the default) then initialize 133 // all the result registers to 0, otherwise just the error indication 134 // register (VGPRn+1) 135 unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1; 136 unsigned CurrIdx = ST.usePRTStrictNull() ? 0 : (InitIdx - 1); 137 138 if (DstSize == 1) { 139 // In this case we can just initialize the result directly 140 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst) 141 .addImm(0); 142 NewDst = PrevDst; 143 } else { 144 BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst); 145 for (; SizeLeft; SizeLeft--, CurrIdx++) { 146 NewDst = 147 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); 148 // Initialize dword 149 Register SubReg = 150 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 151 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg) 152 .addImm(0); 153 // Insert into the super-reg 154 BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst) 155 .addReg(PrevDst) 156 .addReg(SubReg) 157 .addImm(SIRegisterInfo::getSubRegFromChannel(CurrIdx)); 158 159 PrevDst = NewDst; 160 } 161 } 162 163 // Add as an implicit operand 164 MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit); 165 166 // Tie the just added implicit operand to the dst 167 MI.tieOperands(DstIdx, MI.getNumOperands() - 1); 168 169 Changed = true; 170 } 171 } 172 } 173 } 174 175 return Changed; 176} 177