1343171Sdim//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
2343171Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6343171Sdim//
7343171Sdim//===----------------------------------------------------------------------===//
8343171Sdim//
9343171Sdim/// \file
10343171Sdim/// Any MIMG instructions that use tfe or lwe require an initialization of the
11343171Sdim/// result register that will be written in the case of a memory access failure
12343171Sdim/// The required code is also added to tie this init code to the result of the
13343171Sdim/// img instruction
14343171Sdim///
15343171Sdim//===----------------------------------------------------------------------===//
16343171Sdim//
17343171Sdim
18343171Sdim#include "AMDGPU.h"
19343171Sdim#include "AMDGPUSubtarget.h"
20343171Sdim#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21343171Sdim#include "SIInstrInfo.h"
22343171Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
23343171Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
24343171Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
25343171Sdim#include "llvm/IR/Function.h"
26343171Sdim#include "llvm/Support/Debug.h"
27343171Sdim#include "llvm/Target/TargetMachine.h"
28343171Sdim
29343171Sdim#define DEBUG_TYPE "si-img-init"
30343171Sdim
31343171Sdimusing namespace llvm;
32343171Sdim
33343171Sdimnamespace {
34343171Sdim
35343171Sdimclass SIAddIMGInit : public MachineFunctionPass {
36343171Sdimpublic:
37343171Sdim  static char ID;
38343171Sdim
39343171Sdimpublic:
40343171Sdim  SIAddIMGInit() : MachineFunctionPass(ID) {
41343171Sdim    initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
42343171Sdim  }
43343171Sdim
44343171Sdim  bool runOnMachineFunction(MachineFunction &MF) override;
45343171Sdim
46343171Sdim  void getAnalysisUsage(AnalysisUsage &AU) const override {
47343171Sdim    AU.setPreservesCFG();
48343171Sdim    MachineFunctionPass::getAnalysisUsage(AU);
49343171Sdim  }
50343171Sdim};
51343171Sdim
52343171Sdim} // End anonymous namespace.
53343171Sdim
54343171SdimINITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
55343171Sdim
56343171Sdimchar SIAddIMGInit::ID = 0;
57343171Sdim
58343171Sdimchar &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
59343171Sdim
60343171SdimFunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
61343171Sdim
62343171Sdimbool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
63343171Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
64343171Sdim  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
65343171Sdim  const SIInstrInfo *TII = ST.getInstrInfo();
66343171Sdim  const SIRegisterInfo *RI = ST.getRegisterInfo();
67343171Sdim  bool Changed = false;
68343171Sdim
69343171Sdim  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
70343171Sdim       ++BI) {
71343171Sdim    MachineBasicBlock &MBB = *BI;
72343171Sdim    MachineBasicBlock::iterator I, Next;
73343171Sdim    for (I = MBB.begin(); I != MBB.end(); I = Next) {
74343171Sdim      Next = std::next(I);
75343171Sdim      MachineInstr &MI = *I;
76343171Sdim
77343171Sdim      auto Opcode = MI.getOpcode();
78343171Sdim      if (TII->isMIMG(Opcode) && !MI.mayStore()) {
79343171Sdim        MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
80343171Sdim        MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
81343171Sdim        MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
82343171Sdim
83343171Sdim        // Check for instructions that don't have tfe or lwe fields
84343171Sdim        // There shouldn't be any at this point.
85343171Sdim        assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
86343171Sdim
87343171Sdim        unsigned TFEVal = TFE->getImm();
88343171Sdim        unsigned LWEVal = LWE->getImm();
89343171Sdim        unsigned D16Val = D16 ? D16->getImm() : 0;
90343171Sdim
91343171Sdim        if (TFEVal || LWEVal) {
92343171Sdim          // At least one of TFE or LWE are non-zero
93343171Sdim          // We have to insert a suitable initialization of the result value and
94343171Sdim          // tie this to the dest of the image instruction.
95343171Sdim
96343171Sdim          const DebugLoc &DL = MI.getDebugLoc();
97343171Sdim
98343171Sdim          int DstIdx =
99343171Sdim              AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
100343171Sdim
101343171Sdim          // Calculate which dword we have to initialize to 0.
102343171Sdim          MachineOperand *MO_Dmask =
103343171Sdim              TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
104343171Sdim
105343171Sdim          // check that dmask operand is found.
106343171Sdim          assert(MO_Dmask && "Expected dmask operand in instruction");
107343171Sdim
108343171Sdim          unsigned dmask = MO_Dmask->getImm();
109343171Sdim          // Determine the number of active lanes taking into account the
110343171Sdim          // Gather4 special case
111343171Sdim          unsigned ActiveLanes =
112343171Sdim              TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
113343171Sdim
114343171Sdim          // Subreg indices are counted from 1
115343171Sdim          // When D16 then we want next whole VGPR after write data.
116343171Sdim          static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
117343171Sdim
118343171Sdim          bool Packed = !ST.hasUnpackedD16VMem();
119343171Sdim
120343171Sdim          unsigned InitIdx =
121343171Sdim              D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
122343171Sdim
123343171Sdim          // Abandon attempt if the dst size isn't large enough
124343171Sdim          // - this is in fact an error but this is picked up elsewhere and
125343171Sdim          // reported correctly.
126343171Sdim          uint32_t DstSize =
127343171Sdim              RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
128343171Sdim          if (DstSize < InitIdx)
129343171Sdim            continue;
130343171Sdim
131343171Sdim          // Create a register for the intialization value.
132360784Sdim          Register PrevDst =
133343171Sdim              MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
134343171Sdim          unsigned NewDst = 0; // Final initialized value will be in here
135343171Sdim
136343171Sdim          // If PRTStrictNull feature is enabled (the default) then initialize
137343171Sdim          // all the result registers to 0, otherwise just the error indication
138343171Sdim          // register (VGPRn+1)
139343171Sdim          unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
140343171Sdim          unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
141343171Sdim
142343171Sdim          if (DstSize == 1) {
143343171Sdim            // In this case we can just initialize the result directly
144343171Sdim            BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
145343171Sdim                .addImm(0);
146343171Sdim            NewDst = PrevDst;
147343171Sdim          } else {
148343171Sdim            BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
149343171Sdim            for (; SizeLeft; SizeLeft--, CurrIdx++) {
150343171Sdim              NewDst =
151343171Sdim                  MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
152343171Sdim              // Initialize dword
153360784Sdim              Register SubReg =
154343171Sdim                  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
155343171Sdim              BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
156343171Sdim                  .addImm(0);
157343171Sdim              // Insert into the super-reg
158343171Sdim              BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
159343171Sdim                  .addReg(PrevDst)
160343171Sdim                  .addReg(SubReg)
161343171Sdim                  .addImm(CurrIdx);
162343171Sdim
163343171Sdim              PrevDst = NewDst;
164343171Sdim            }
165343171Sdim          }
166343171Sdim
167343171Sdim          // Add as an implicit operand
168343171Sdim          MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
169343171Sdim
170343171Sdim          // Tie the just added implicit operand to the dst
171343171Sdim          MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
172343171Sdim
173343171Sdim          Changed = true;
174343171Sdim        }
175343171Sdim      }
176343171Sdim    }
177343171Sdim  }
178343171Sdim
179343171Sdim  return Changed;
180343171Sdim}
181