1343171Sdim//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===// 2343171Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6343171Sdim// 7343171Sdim//===----------------------------------------------------------------------===// 8343171Sdim// 9343171Sdim/// \file 10343171Sdim/// Any MIMG instructions that use tfe or lwe require an initialization of the 11343171Sdim/// result register that will be written in the case of a memory access failure 12343171Sdim/// The required code is also added to tie this init code to the result of the 13343171Sdim/// img instruction 14343171Sdim/// 15343171Sdim//===----------------------------------------------------------------------===// 16343171Sdim// 17343171Sdim 18343171Sdim#include "AMDGPU.h" 19343171Sdim#include "AMDGPUSubtarget.h" 20343171Sdim#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21343171Sdim#include "SIInstrInfo.h" 22343171Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 23343171Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 24343171Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 25343171Sdim#include "llvm/IR/Function.h" 26343171Sdim#include "llvm/Support/Debug.h" 27343171Sdim#include "llvm/Target/TargetMachine.h" 28343171Sdim 29343171Sdim#define DEBUG_TYPE "si-img-init" 30343171Sdim 31343171Sdimusing namespace llvm; 32343171Sdim 33343171Sdimnamespace { 34343171Sdim 35343171Sdimclass SIAddIMGInit : public MachineFunctionPass { 36343171Sdimpublic: 37343171Sdim static char ID; 38343171Sdim 39343171Sdimpublic: 40343171Sdim SIAddIMGInit() : MachineFunctionPass(ID) { 41343171Sdim initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry()); 42343171Sdim } 43343171Sdim 44343171Sdim bool runOnMachineFunction(MachineFunction &MF) override; 45343171Sdim 46343171Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 47343171Sdim AU.setPreservesCFG(); 48343171Sdim MachineFunctionPass::getAnalysisUsage(AU); 49343171Sdim } 50343171Sdim}; 51343171Sdim 52343171Sdim} // End anonymous namespace. 53343171Sdim 54343171SdimINITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false) 55343171Sdim 56343171Sdimchar SIAddIMGInit::ID = 0; 57343171Sdim 58343171Sdimchar &llvm::SIAddIMGInitID = SIAddIMGInit::ID; 59343171Sdim 60343171SdimFunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); } 61343171Sdim 62343171Sdimbool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { 63343171Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 64343171Sdim const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 65343171Sdim const SIInstrInfo *TII = ST.getInstrInfo(); 66343171Sdim const SIRegisterInfo *RI = ST.getRegisterInfo(); 67343171Sdim bool Changed = false; 68343171Sdim 69343171Sdim for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; 70343171Sdim ++BI) { 71343171Sdim MachineBasicBlock &MBB = *BI; 72343171Sdim MachineBasicBlock::iterator I, Next; 73343171Sdim for (I = MBB.begin(); I != MBB.end(); I = Next) { 74343171Sdim Next = std::next(I); 75343171Sdim MachineInstr &MI = *I; 76343171Sdim 77343171Sdim auto Opcode = MI.getOpcode(); 78343171Sdim if (TII->isMIMG(Opcode) && !MI.mayStore()) { 79343171Sdim MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe); 80343171Sdim MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe); 81343171Sdim MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16); 82343171Sdim 83343171Sdim // Check for instructions that don't have tfe or lwe fields 84343171Sdim // There shouldn't be any at this point. 85343171Sdim assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction"); 86343171Sdim 87343171Sdim unsigned TFEVal = TFE->getImm(); 88343171Sdim unsigned LWEVal = LWE->getImm(); 89343171Sdim unsigned D16Val = D16 ? D16->getImm() : 0; 90343171Sdim 91343171Sdim if (TFEVal || LWEVal) { 92343171Sdim // At least one of TFE or LWE are non-zero 93343171Sdim // We have to insert a suitable initialization of the result value and 94343171Sdim // tie this to the dest of the image instruction. 95343171Sdim 96343171Sdim const DebugLoc &DL = MI.getDebugLoc(); 97343171Sdim 98343171Sdim int DstIdx = 99343171Sdim AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); 100343171Sdim 101343171Sdim // Calculate which dword we have to initialize to 0. 102343171Sdim MachineOperand *MO_Dmask = 103343171Sdim TII->getNamedOperand(MI, AMDGPU::OpName::dmask); 104343171Sdim 105343171Sdim // check that dmask operand is found. 106343171Sdim assert(MO_Dmask && "Expected dmask operand in instruction"); 107343171Sdim 108343171Sdim unsigned dmask = MO_Dmask->getImm(); 109343171Sdim // Determine the number of active lanes taking into account the 110343171Sdim // Gather4 special case 111343171Sdim unsigned ActiveLanes = 112343171Sdim TII->isGather4(Opcode) ? 4 : countPopulation(dmask); 113343171Sdim 114343171Sdim // Subreg indices are counted from 1 115343171Sdim // When D16 then we want next whole VGPR after write data. 116343171Sdim static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected"); 117343171Sdim 118343171Sdim bool Packed = !ST.hasUnpackedD16VMem(); 119343171Sdim 120343171Sdim unsigned InitIdx = 121343171Sdim D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1; 122343171Sdim 123343171Sdim // Abandon attempt if the dst size isn't large enough 124343171Sdim // - this is in fact an error but this is picked up elsewhere and 125343171Sdim // reported correctly. 126343171Sdim uint32_t DstSize = 127343171Sdim RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32; 128343171Sdim if (DstSize < InitIdx) 129343171Sdim continue; 130343171Sdim 131343171Sdim // Create a register for the intialization value. 132360784Sdim Register PrevDst = 133343171Sdim MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); 134343171Sdim unsigned NewDst = 0; // Final initialized value will be in here 135343171Sdim 136343171Sdim // If PRTStrictNull feature is enabled (the default) then initialize 137343171Sdim // all the result registers to 0, otherwise just the error indication 138343171Sdim // register (VGPRn+1) 139343171Sdim unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1; 140343171Sdim unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx; 141343171Sdim 142343171Sdim if (DstSize == 1) { 143343171Sdim // In this case we can just initialize the result directly 144343171Sdim BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst) 145343171Sdim .addImm(0); 146343171Sdim NewDst = PrevDst; 147343171Sdim } else { 148343171Sdim BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst); 149343171Sdim for (; SizeLeft; SizeLeft--, CurrIdx++) { 150343171Sdim NewDst = 151343171Sdim MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); 152343171Sdim // Initialize dword 153360784Sdim Register SubReg = 154343171Sdim MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 155343171Sdim BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg) 156343171Sdim .addImm(0); 157343171Sdim // Insert into the super-reg 158343171Sdim BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst) 159343171Sdim .addReg(PrevDst) 160343171Sdim .addReg(SubReg) 161343171Sdim .addImm(CurrIdx); 162343171Sdim 163343171Sdim PrevDst = NewDst; 164343171Sdim } 165343171Sdim } 166343171Sdim 167343171Sdim // Add as an implicit operand 168343171Sdim MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit); 169343171Sdim 170343171Sdim // Tie the just added implicit operand to the dst 171343171Sdim MI.tieOperands(DstIdx, MI.getNumOperands() - 1); 172343171Sdim 173343171Sdim Changed = true; 174343171Sdim } 175343171Sdim } 176343171Sdim } 177343171Sdim } 178343171Sdim 179343171Sdim return Changed; 180343171Sdim} 181