1//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
11/// instructions that produce single-use VGPR values. If the value is forwarded
12/// to the consumer instruction prior to VGPR writeback, the hardware can
13/// then skip (kill) the VGPR write.
14//
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPU.h"
18#include "GCNSubtarget.h"
19#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20#include "SIInstrInfo.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/CodeGen/MachineBasicBlock.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineOperand.h"
30#include "llvm/CodeGen/Register.h"
31#include "llvm/CodeGen/TargetSubtargetInfo.h"
32#include "llvm/IR/DebugLoc.h"
33#include "llvm/MC/MCRegister.h"
34#include "llvm/Pass.h"
35
36using namespace llvm;
37
38#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
39
40namespace {
41class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
42private:
43  const SIInstrInfo *SII;
44
45public:
46  static char ID;
47
48  AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
49
50  void emitSingleUseVDST(MachineInstr &MI) const {
51    // Mark the following instruction as a single-use producer:
52    //   s_singleuse_vdst { supr0: 1 }
53    BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST))
54        .addImm(0x1);
55  }
56
57  bool runOnMachineFunction(MachineFunction &MF) override {
58    const auto &ST = MF.getSubtarget<GCNSubtarget>();
59    if (!ST.hasVGPRSingleUseHintInsts())
60      return false;
61
62    SII = ST.getInstrInfo();
63    const auto *TRI = &SII->getRegisterInfo();
64    bool InstructionEmitted = false;
65
66    for (MachineBasicBlock &MBB : MF) {
67      DenseMap<MCPhysReg, unsigned> RegisterUseCount; // TODO: MCRegUnits
68
69      // Handle boundaries at the end of basic block separately to avoid
70      // false positives. If they are live at the end of a basic block then
71      // assume it has more uses later on.
72      for (const auto &Liveouts : MBB.liveouts())
73        RegisterUseCount[Liveouts.PhysReg] = 2;
74
75      for (MachineInstr &MI : reverse(MBB.instrs())) {
76        // All registers in all operands need to be single use for an
77        // instruction to be marked as a single use producer.
78        bool AllProducerOperandsAreSingleUse = true;
79
80        for (const auto &Operand : MI.operands()) {
81          if (!Operand.isReg())
82            continue;
83          const auto Reg = Operand.getReg();
84
85          // Count the number of times each register is read.
86          if (Operand.readsReg())
87            RegisterUseCount[Reg]++;
88
89          // Do not attempt to optimise across exec mask changes.
90          if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
91            for (auto &UsedReg : RegisterUseCount)
92              UsedReg.second = 2;
93          }
94
95          // If we are at the point where the register first became live,
96          // check if the operands are single use.
97          if (!MI.modifiesRegister(Reg, TRI))
98            continue;
99          if (RegisterUseCount[Reg] > 1)
100            AllProducerOperandsAreSingleUse = false;
101          // Reset uses count when a register is no longer live.
102          RegisterUseCount.erase(Reg);
103        }
104        if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) {
105          // TODO: Replace with candidate logging for instruction grouping
106          // later.
107          emitSingleUseVDST(MI);
108          InstructionEmitted = true;
109        }
110      }
111    }
112    return InstructionEmitted;
113  }
114};
115} // namespace
116
117char AMDGPUInsertSingleUseVDST::ID = 0;
118
119char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;
120
121INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
122                "AMDGPU Insert SingleUseVDST", false, false)
123