1//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Any MIMG instructions that use tfe or lwe require an initialization of the
11/// result register that will be written in the case of a memory access failure
12/// The required code is also added to tie this init code to the result of the
13/// img instruction
14///
15//===----------------------------------------------------------------------===//
16//
17
18#include "AMDGPU.h"
19#include "AMDGPUSubtarget.h"
20#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21#include "SIInstrInfo.h"
22#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/IR/Function.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Target/TargetMachine.h"
28
29#define DEBUG_TYPE "si-img-init"
30
31using namespace llvm;
32
33namespace {
34
35class SIAddIMGInit : public MachineFunctionPass {
36public:
37  static char ID;
38
39public:
40  SIAddIMGInit() : MachineFunctionPass(ID) {
41    initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
42  }
43
44  bool runOnMachineFunction(MachineFunction &MF) override;
45
46  void getAnalysisUsage(AnalysisUsage &AU) const override {
47    AU.setPreservesCFG();
48    MachineFunctionPass::getAnalysisUsage(AU);
49  }
50};
51
52} // End anonymous namespace.
53
54INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
55
56char SIAddIMGInit::ID = 0;
57
58char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
59
60FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
61
62bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
63  MachineRegisterInfo &MRI = MF.getRegInfo();
64  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
65  const SIInstrInfo *TII = ST.getInstrInfo();
66  const SIRegisterInfo *RI = ST.getRegisterInfo();
67  bool Changed = false;
68
69  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
70       ++BI) {
71    MachineBasicBlock &MBB = *BI;
72    MachineBasicBlock::iterator I, Next;
73    for (I = MBB.begin(); I != MBB.end(); I = Next) {
74      Next = std::next(I);
75      MachineInstr &MI = *I;
76
77      auto Opcode = MI.getOpcode();
78      if (TII->isMIMG(Opcode) && !MI.mayStore()) {
79        MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
80        MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
81        MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
82
83        // Check for instructions that don't have tfe or lwe fields
84        // There shouldn't be any at this point.
85        assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
86
87        unsigned TFEVal = TFE->getImm();
88        unsigned LWEVal = LWE->getImm();
89        unsigned D16Val = D16 ? D16->getImm() : 0;
90
91        if (TFEVal || LWEVal) {
92          // At least one of TFE or LWE are non-zero
93          // We have to insert a suitable initialization of the result value and
94          // tie this to the dest of the image instruction.
95
96          const DebugLoc &DL = MI.getDebugLoc();
97
98          int DstIdx =
99              AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
100
101          // Calculate which dword we have to initialize to 0.
102          MachineOperand *MO_Dmask =
103              TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
104
105          // check that dmask operand is found.
106          assert(MO_Dmask && "Expected dmask operand in instruction");
107
108          unsigned dmask = MO_Dmask->getImm();
109          // Determine the number of active lanes taking into account the
110          // Gather4 special case
111          unsigned ActiveLanes =
112              TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
113
114          // Subreg indices are counted from 1
115          // When D16 then we want next whole VGPR after write data.
116          static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
117
118          bool Packed = !ST.hasUnpackedD16VMem();
119
120          unsigned InitIdx =
121              D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
122
123          // Abandon attempt if the dst size isn't large enough
124          // - this is in fact an error but this is picked up elsewhere and
125          // reported correctly.
126          uint32_t DstSize =
127              RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
128          if (DstSize < InitIdx)
129            continue;
130
131          // Create a register for the intialization value.
132          Register PrevDst =
133              MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
134          unsigned NewDst = 0; // Final initialized value will be in here
135
136          // If PRTStrictNull feature is enabled (the default) then initialize
137          // all the result registers to 0, otherwise just the error indication
138          // register (VGPRn+1)
139          unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
140          unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
141
142          if (DstSize == 1) {
143            // In this case we can just initialize the result directly
144            BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
145                .addImm(0);
146            NewDst = PrevDst;
147          } else {
148            BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
149            for (; SizeLeft; SizeLeft--, CurrIdx++) {
150              NewDst =
151                  MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
152              // Initialize dword
153              Register SubReg =
154                  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
155              BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
156                  .addImm(0);
157              // Insert into the super-reg
158              BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
159                  .addReg(PrevDst)
160                  .addReg(SubReg)
161                  .addImm(CurrIdx);
162
163              PrevDst = NewDst;
164            }
165          }
166
167          // Add as an implicit operand
168          MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
169
170          // Tie the just added implicit operand to the dst
171          MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
172
173          Changed = true;
174        }
175      }
176    }
177  }
178
179  return Changed;
180}
181