SIAddIMGInit.cpp revision 343171
1//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// Any MIMG instructions that use tfe or lwe require an initialization of the
12/// result register that will be written in the case of a memory access failure
13/// The required code is also added to tie this init code to the result of the
14/// img instruction
15///
16//===----------------------------------------------------------------------===//
17//
18
19#include "AMDGPU.h"
20#include "AMDGPUSubtarget.h"
21#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22#include "SIInstrInfo.h"
23#include "llvm/CodeGen/MachineFunctionPass.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/IR/Function.h"
27#include "llvm/Support/Debug.h"
28#include "llvm/Target/TargetMachine.h"
29
30#define DEBUG_TYPE "si-img-init"
31
32using namespace llvm;
33
34namespace {
35
36class SIAddIMGInit : public MachineFunctionPass {
37public:
38  static char ID;
39
40public:
41  SIAddIMGInit() : MachineFunctionPass(ID) {
42    initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
43  }
44
45  bool runOnMachineFunction(MachineFunction &MF) override;
46
47  void getAnalysisUsage(AnalysisUsage &AU) const override {
48    AU.setPreservesCFG();
49    MachineFunctionPass::getAnalysisUsage(AU);
50  }
51};
52
53} // End anonymous namespace.
54
55INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
56
57char SIAddIMGInit::ID = 0;
58
59char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
60
61FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
62
63bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
64  MachineRegisterInfo &MRI = MF.getRegInfo();
65  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
66  const SIInstrInfo *TII = ST.getInstrInfo();
67  const SIRegisterInfo *RI = ST.getRegisterInfo();
68  bool Changed = false;
69
70  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
71       ++BI) {
72    MachineBasicBlock &MBB = *BI;
73    MachineBasicBlock::iterator I, Next;
74    for (I = MBB.begin(); I != MBB.end(); I = Next) {
75      Next = std::next(I);
76      MachineInstr &MI = *I;
77
78      auto Opcode = MI.getOpcode();
79      if (TII->isMIMG(Opcode) && !MI.mayStore()) {
80        MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
81        MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
82        MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
83
84        // Check for instructions that don't have tfe or lwe fields
85        // There shouldn't be any at this point.
86        assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
87
88        unsigned TFEVal = TFE->getImm();
89        unsigned LWEVal = LWE->getImm();
90        unsigned D16Val = D16 ? D16->getImm() : 0;
91
92        if (TFEVal || LWEVal) {
93          // At least one of TFE or LWE are non-zero
94          // We have to insert a suitable initialization of the result value and
95          // tie this to the dest of the image instruction.
96
97          const DebugLoc &DL = MI.getDebugLoc();
98
99          int DstIdx =
100              AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
101
102          // Calculate which dword we have to initialize to 0.
103          MachineOperand *MO_Dmask =
104              TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
105
106          // check that dmask operand is found.
107          assert(MO_Dmask && "Expected dmask operand in instruction");
108
109          unsigned dmask = MO_Dmask->getImm();
110          // Determine the number of active lanes taking into account the
111          // Gather4 special case
112          unsigned ActiveLanes =
113              TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
114
115          // Subreg indices are counted from 1
116          // When D16 then we want next whole VGPR after write data.
117          static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
118
119          bool Packed = !ST.hasUnpackedD16VMem();
120
121          unsigned InitIdx =
122              D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
123
124          // Abandon attempt if the dst size isn't large enough
125          // - this is in fact an error but this is picked up elsewhere and
126          // reported correctly.
127          uint32_t DstSize =
128              RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
129          if (DstSize < InitIdx)
130            continue;
131
132          // Create a register for the intialization value.
133          unsigned PrevDst =
134              MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
135          unsigned NewDst = 0; // Final initialized value will be in here
136
137          // If PRTStrictNull feature is enabled (the default) then initialize
138          // all the result registers to 0, otherwise just the error indication
139          // register (VGPRn+1)
140          unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
141          unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
142
143          if (DstSize == 1) {
144            // In this case we can just initialize the result directly
145            BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
146                .addImm(0);
147            NewDst = PrevDst;
148          } else {
149            BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
150            for (; SizeLeft; SizeLeft--, CurrIdx++) {
151              NewDst =
152                  MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
153              // Initialize dword
154              unsigned SubReg =
155                  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
156              BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
157                  .addImm(0);
158              // Insert into the super-reg
159              BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
160                  .addReg(PrevDst)
161                  .addReg(SubReg)
162                  .addImm(CurrIdx);
163
164              PrevDst = NewDst;
165            }
166          }
167
168          // Add as an implicit operand
169          MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
170
171          // Tie the just added implicit operand to the dst
172          MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
173
174          Changed = true;
175        }
176      }
177    }
178  }
179
180  return Changed;
181}
182