1//===-- SIFormMemoryClauses.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass extends the live ranges of registers used as pointers in
10/// sequences of adjacent SMEM and VMEM instructions if XNACK is enabled. A
11/// load that would overwrite a pointer would require breaking the soft clause.
12/// Artificially extend the live ranges of the pointer operands by adding
13/// implicit-def early-clobber operands throughout the soft clause.
14///
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPU.h"
18#include "GCNRegPressure.h"
19#include "SIMachineFunctionInfo.h"
20#include "llvm/InitializePasses.h"
21
22using namespace llvm;
23
24#define DEBUG_TYPE "si-form-memory-clauses"
25
26// Clauses longer then 15 instructions would overflow one of the counters
27// and stall. They can stall even earlier if there are outstanding counters.
28static cl::opt<unsigned>
29MaxClause("amdgpu-max-memory-clause", cl::Hidden, cl::init(15),
30          cl::desc("Maximum length of a memory clause, instructions"));
31
32namespace {
33
34class SIFormMemoryClauses : public MachineFunctionPass {
35  typedef DenseMap<unsigned, std::pair<unsigned, LaneBitmask>> RegUse;
36
37public:
38  static char ID;
39
40public:
41  SIFormMemoryClauses() : MachineFunctionPass(ID) {
42    initializeSIFormMemoryClausesPass(*PassRegistry::getPassRegistry());
43  }
44
45  bool runOnMachineFunction(MachineFunction &MF) override;
46
47  StringRef getPassName() const override {
48    return "SI Form memory clauses";
49  }
50
51  void getAnalysisUsage(AnalysisUsage &AU) const override {
52    AU.addRequired<LiveIntervals>();
53    AU.setPreservesAll();
54    MachineFunctionPass::getAnalysisUsage(AU);
55  }
56
57  MachineFunctionProperties getClearedProperties() const override {
58    return MachineFunctionProperties().set(
59        MachineFunctionProperties::Property::IsSSA);
60  }
61
62private:
63  bool canBundle(const MachineInstr &MI, const RegUse &Defs,
64                 const RegUse &Uses) const;
65  bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT);
66  void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const;
67  bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses,
68                      GCNDownwardRPTracker &RPT);
69
70  const GCNSubtarget *ST;
71  const SIRegisterInfo *TRI;
72  const MachineRegisterInfo *MRI;
73  SIMachineFunctionInfo *MFI;
74
75  unsigned LastRecordedOccupancy;
76  unsigned MaxVGPRs;
77  unsigned MaxSGPRs;
78};
79
80} // End anonymous namespace.
81
82INITIALIZE_PASS_BEGIN(SIFormMemoryClauses, DEBUG_TYPE,
83                      "SI Form memory clauses", false, false)
84INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
85INITIALIZE_PASS_END(SIFormMemoryClauses, DEBUG_TYPE,
86                    "SI Form memory clauses", false, false)
87
88
89char SIFormMemoryClauses::ID = 0;
90
91char &llvm::SIFormMemoryClausesID = SIFormMemoryClauses::ID;
92
93FunctionPass *llvm::createSIFormMemoryClausesPass() {
94  return new SIFormMemoryClauses();
95}
96
97static bool isVMEMClauseInst(const MachineInstr &MI) {
98  return SIInstrInfo::isFLAT(MI) || SIInstrInfo::isVMEM(MI);
99}
100
101static bool isSMEMClauseInst(const MachineInstr &MI) {
102  return SIInstrInfo::isSMRD(MI);
103}
104
105// There no sense to create store clauses, they do not define anything,
106// thus there is nothing to set early-clobber.
107static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
108  assert(!MI.isDebugInstr() && "debug instructions should not reach here");
109  if (MI.isBundled())
110    return false;
111  if (!MI.mayLoad() || MI.mayStore())
112    return false;
113  if (SIInstrInfo::isAtomic(MI))
114    return false;
115  if (IsVMEMClause && !isVMEMClauseInst(MI))
116    return false;
117  if (!IsVMEMClause && !isSMEMClauseInst(MI))
118    return false;
119  // If this is a load instruction where the result has been coalesced with an operand, then we cannot clause it.
120  for (const MachineOperand &ResMO : MI.defs()) {
121    Register ResReg = ResMO.getReg();
122    for (const MachineOperand &MO : MI.uses()) {
123      if (!MO.isReg() || MO.isDef())
124        continue;
125      if (MO.getReg() == ResReg)
126        return false;
127    }
128    break; // Only check the first def.
129  }
130  return true;
131}
132
133static unsigned getMopState(const MachineOperand &MO) {
134  unsigned S = 0;
135  if (MO.isImplicit())
136    S |= RegState::Implicit;
137  if (MO.isDead())
138    S |= RegState::Dead;
139  if (MO.isUndef())
140    S |= RegState::Undef;
141  if (MO.isKill())
142    S |= RegState::Kill;
143  if (MO.isEarlyClobber())
144    S |= RegState::EarlyClobber;
145  if (MO.getReg().isPhysical() && MO.isRenamable())
146    S |= RegState::Renamable;
147  return S;
148}
149
150// Returns false if there is a use of a def already in the map.
151// In this case we must break the clause.
152bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, const RegUse &Defs,
153                                    const RegUse &Uses) const {
154  // Check interference with defs.
155  for (const MachineOperand &MO : MI.operands()) {
156    // TODO: Prologue/Epilogue Insertion pass does not process bundled
157    //       instructions.
158    if (MO.isFI())
159      return false;
160
161    if (!MO.isReg())
162      continue;
163
164    Register Reg = MO.getReg();
165
166    // If it is tied we will need to write same register as we read.
167    if (MO.isTied())
168      return false;
169
170    const RegUse &Map = MO.isDef() ? Uses : Defs;
171    auto Conflict = Map.find(Reg);
172    if (Conflict == Map.end())
173      continue;
174
175    if (Reg.isPhysical())
176      return false;
177
178    LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
179    if ((Conflict->second.second & Mask).any())
180      return false;
181  }
182
183  return true;
184}
185
186// Since all defs in the clause are early clobber we can run out of registers.
187// Function returns false if pressure would hit the limit if instruction is
188// bundled into a memory clause.
189bool SIFormMemoryClauses::checkPressure(const MachineInstr &MI,
190                                        GCNDownwardRPTracker &RPT) {
191  // NB: skip advanceBeforeNext() call. Since all defs will be marked
192  // early-clobber they will all stay alive at least to the end of the
193  // clause. Therefor we should not decrease pressure even if load
194  // pointer becomes dead and could otherwise be reused for destination.
195  RPT.advanceToNext();
196  GCNRegPressure MaxPressure = RPT.moveMaxPressure();
197  unsigned Occupancy = MaxPressure.getOccupancy(*ST);
198
199  // Don't push over half the register budget. We don't want to introduce
200  // spilling just to form a soft clause.
201  //
202  // FIXME: This pressure check is fundamentally broken. First, this is checking
203  // the global pressure, not the pressure at this specific point in the
204  // program. Second, it's not accounting for the increased liveness of the use
205  // operands due to the early clobber we will introduce. Third, the pressure
206  // tracking does not account for the alignment requirements for SGPRs, or the
207  // fragmentation of registers the allocator will need to satisfy.
208  if (Occupancy >= MFI->getMinAllowedOccupancy() &&
209      MaxPressure.getVGPRNum(ST->hasGFX90AInsts()) <= MaxVGPRs / 2 &&
210      MaxPressure.getSGPRNum() <= MaxSGPRs / 2) {
211    LastRecordedOccupancy = Occupancy;
212    return true;
213  }
214  return false;
215}
216
217// Collect register defs and uses along with their lane masks and states.
218void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI,
219                                         RegUse &Defs, RegUse &Uses) const {
220  for (const MachineOperand &MO : MI.operands()) {
221    if (!MO.isReg())
222      continue;
223    Register Reg = MO.getReg();
224    if (!Reg)
225      continue;
226
227    LaneBitmask Mask = Reg.isVirtual()
228                           ? TRI->getSubRegIndexLaneMask(MO.getSubReg())
229                           : LaneBitmask::getAll();
230    RegUse &Map = MO.isDef() ? Defs : Uses;
231
232    auto Loc = Map.find(Reg);
233    unsigned State = getMopState(MO);
234    if (Loc == Map.end()) {
235      Map[Reg] = std::make_pair(State, Mask);
236    } else {
237      Loc->second.first |= State;
238      Loc->second.second |= Mask;
239    }
240  }
241}
242
243// Check register def/use conflicts, occupancy limits and collect def/use maps.
244// Return true if instruction can be bundled with previous. It it cannot
245// def/use maps are not updated.
246bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI,
247                                         RegUse &Defs, RegUse &Uses,
248                                         GCNDownwardRPTracker &RPT) {
249  if (!canBundle(MI, Defs, Uses))
250    return false;
251
252  if (!checkPressure(MI, RPT))
253    return false;
254
255  collectRegUses(MI, Defs, Uses);
256  return true;
257}
258
259bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
260  if (skipFunction(MF.getFunction()))
261    return false;
262
263  ST = &MF.getSubtarget<GCNSubtarget>();
264  if (!ST->isXNACKEnabled())
265    return false;
266
267  const SIInstrInfo *TII = ST->getInstrInfo();
268  TRI = ST->getRegisterInfo();
269  MRI = &MF.getRegInfo();
270  MFI = MF.getInfo<SIMachineFunctionInfo>();
271  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
272  SlotIndexes *Ind = LIS->getSlotIndexes();
273  bool Changed = false;
274
275  MaxVGPRs = TRI->getAllocatableSet(MF, &AMDGPU::VGPR_32RegClass).count();
276  MaxSGPRs = TRI->getAllocatableSet(MF, &AMDGPU::SGPR_32RegClass).count();
277  unsigned FuncMaxClause = AMDGPU::getIntegerAttribute(
278      MF.getFunction(), "amdgpu-max-memory-clause", MaxClause);
279
280  for (MachineBasicBlock &MBB : MF) {
281    GCNDownwardRPTracker RPT(*LIS);
282    MachineBasicBlock::instr_iterator Next;
283    for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) {
284      MachineInstr &MI = *I;
285      Next = std::next(I);
286
287      if (MI.isMetaInstruction())
288        continue;
289
290      bool IsVMEM = isVMEMClauseInst(MI);
291
292      if (!isValidClauseInst(MI, IsVMEM))
293        continue;
294
295      if (!RPT.getNext().isValid())
296        RPT.reset(MI);
297      else { // Advance the state to the current MI.
298        RPT.advance(MachineBasicBlock::const_iterator(MI));
299        RPT.advanceBeforeNext();
300      }
301
302      const GCNRPTracker::LiveRegSet LiveRegsCopy(RPT.getLiveRegs());
303      RegUse Defs, Uses;
304      if (!processRegUses(MI, Defs, Uses, RPT)) {
305        RPT.reset(MI, &LiveRegsCopy);
306        continue;
307      }
308
309      MachineBasicBlock::iterator LastClauseInst = Next;
310      unsigned Length = 1;
311      for ( ; Next != E && Length < FuncMaxClause; ++Next) {
312        // Debug instructions should not change the kill insertion.
313        if (Next->isMetaInstruction())
314          continue;
315
316        if (!isValidClauseInst(*Next, IsVMEM))
317          break;
318
319        // A load from pointer which was loaded inside the same bundle is an
320        // impossible clause because we will need to write and read the same
321        // register inside. In this case processRegUses will return false.
322        if (!processRegUses(*Next, Defs, Uses, RPT))
323          break;
324
325        LastClauseInst = Next;
326        ++Length;
327      }
328      if (Length < 2) {
329        RPT.reset(MI, &LiveRegsCopy);
330        continue;
331      }
332
333      Changed = true;
334      MFI->limitOccupancy(LastRecordedOccupancy);
335
336      assert(!LastClauseInst->isMetaInstruction());
337
338      SlotIndex ClauseLiveInIdx = LIS->getInstructionIndex(MI);
339      SlotIndex ClauseLiveOutIdx =
340          LIS->getInstructionIndex(*LastClauseInst).getNextIndex();
341
342      // Track the last inserted kill.
343      MachineInstrBuilder Kill;
344
345      // Insert one kill per register, with operands covering all necessary
346      // subregisters.
347      for (auto &&R : Uses) {
348        Register Reg = R.first;
349        if (Reg.isPhysical())
350          continue;
351
352        // Collect the register operands we should extend the live ranges of.
353        SmallVector<std::tuple<unsigned, unsigned>> KillOps;
354        const LiveInterval &LI = LIS->getInterval(R.first);
355
356        if (!LI.hasSubRanges()) {
357          if (!LI.liveAt(ClauseLiveOutIdx)) {
358            KillOps.emplace_back(R.second.first | RegState::Kill,
359                                 AMDGPU::NoSubRegister);
360          }
361        } else {
362          LaneBitmask KilledMask;
363          for (const LiveInterval::SubRange &SR : LI.subranges()) {
364            if (SR.liveAt(ClauseLiveInIdx) && !SR.liveAt(ClauseLiveOutIdx))
365              KilledMask |= SR.LaneMask;
366          }
367
368          if (KilledMask.none())
369            continue;
370
371          SmallVector<unsigned> KilledIndexes;
372          bool Success = TRI->getCoveringSubRegIndexes(
373              *MRI, MRI->getRegClass(Reg), KilledMask, KilledIndexes);
374          (void)Success;
375          assert(Success && "Failed to find subregister mask to cover lanes");
376          for (unsigned SubReg : KilledIndexes) {
377            KillOps.emplace_back(R.second.first | RegState::Kill, SubReg);
378          }
379        }
380
381        if (KillOps.empty())
382          continue;
383
384        // We only want to extend the live ranges of used registers. If they
385        // already have existing uses beyond the bundle, we don't need the kill.
386        //
387        // It's possible all of the use registers were already live past the
388        // bundle.
389        Kill = BuildMI(*MI.getParent(), std::next(LastClauseInst),
390                       DebugLoc(), TII->get(AMDGPU::KILL));
391        for (auto &Op : KillOps)
392          Kill.addUse(Reg, std::get<0>(Op), std::get<1>(Op));
393        Ind->insertMachineInstrInMaps(*Kill);
394      }
395
396      if (!Kill) {
397        RPT.reset(MI, &LiveRegsCopy);
398        continue;
399      }
400
401      // Restore the state after processing the end of the bundle.
402      RPT.reset(*Kill, &LiveRegsCopy);
403
404      for (auto &&R : Defs) {
405        Register Reg = R.first;
406        Uses.erase(Reg);
407        if (Reg.isPhysical())
408          continue;
409        LIS->removeInterval(Reg);
410        LIS->createAndComputeVirtRegInterval(Reg);
411      }
412
413      for (auto &&R : Uses) {
414        Register Reg = R.first;
415        if (Reg.isPhysical())
416          continue;
417        LIS->removeInterval(Reg);
418        LIS->createAndComputeVirtRegInterval(Reg);
419      }
420    }
421  }
422
423  return Changed;
424}
425