GCNHazardRecognizer.cpp revision 341825
1303231Sdim//===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2303231Sdim//
3303231Sdim//                     The LLVM Compiler Infrastructure
4303231Sdim//
5303231Sdim// This file is distributed under the University of Illinois Open Source
6303231Sdim// License. See LICENSE.TXT for details.
7303231Sdim//
8303231Sdim//===----------------------------------------------------------------------===//
9303231Sdim//
10303231Sdim// This file implements hazard recognizers for scheduling on GCN processors.
11303231Sdim//
12303231Sdim//===----------------------------------------------------------------------===//
13303231Sdim
14303231Sdim#include "GCNHazardRecognizer.h"
15303231Sdim#include "AMDGPUSubtarget.h"
16321369Sdim#include "SIDefines.h"
17303231Sdim#include "SIInstrInfo.h"
18321369Sdim#include "SIRegisterInfo.h"
19341825Sdim#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20321369Sdim#include "Utils/AMDGPUBaseInfo.h"
21321369Sdim#include "llvm/ADT/iterator_range.h"
22321369Sdim#include "llvm/CodeGen/MachineFunction.h"
23321369Sdim#include "llvm/CodeGen/MachineInstr.h"
24321369Sdim#include "llvm/CodeGen/MachineOperand.h"
25303231Sdim#include "llvm/CodeGen/ScheduleDAG.h"
26321369Sdim#include "llvm/MC/MCInstrDesc.h"
27321369Sdim#include "llvm/Support/ErrorHandling.h"
28321369Sdim#include <algorithm>
29321369Sdim#include <cassert>
30321369Sdim#include <limits>
31321369Sdim#include <set>
32321369Sdim#include <vector>
33303231Sdim
34303231Sdimusing namespace llvm;
35303231Sdim
36303231Sdim//===----------------------------------------------------------------------===//
37303231Sdim// Hazard Recoginizer Implementation
38303231Sdim//===----------------------------------------------------------------------===//
39303231Sdim
40303231SdimGCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
41303231Sdim  CurrCycleInstr(nullptr),
42303231Sdim  MF(MF),
43341825Sdim  ST(MF.getSubtarget<GCNSubtarget>()),
44327952Sdim  TII(*ST.getInstrInfo()),
45327952Sdim  TRI(TII.getRegisterInfo()),
46327952Sdim  ClauseUses(TRI.getNumRegUnits()),
47327952Sdim  ClauseDefs(TRI.getNumRegUnits()) {
48303231Sdim  MaxLookAhead = 5;
49303231Sdim}
50303231Sdim
51303231Sdimvoid GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
52303231Sdim  EmitInstruction(SU->getInstr());
53303231Sdim}
54303231Sdim
55303231Sdimvoid GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
56303231Sdim  CurrCycleInstr = MI;
57303231Sdim}
58303231Sdim
59314564Sdimstatic bool isDivFMas(unsigned Opcode) {
60314564Sdim  return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
61314564Sdim}
62314564Sdim
63314564Sdimstatic bool isSGetReg(unsigned Opcode) {
64314564Sdim  return Opcode == AMDGPU::S_GETREG_B32;
65314564Sdim}
66314564Sdim
67314564Sdimstatic bool isSSetReg(unsigned Opcode) {
68314564Sdim  return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
69314564Sdim}
70314564Sdim
71314564Sdimstatic bool isRWLane(unsigned Opcode) {
72314564Sdim  return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
73314564Sdim}
74314564Sdim
75314564Sdimstatic bool isRFE(unsigned Opcode) {
76314564Sdim  return Opcode == AMDGPU::S_RFE_B64;
77314564Sdim}
78314564Sdim
79321369Sdimstatic bool isSMovRel(unsigned Opcode) {
80321369Sdim  switch (Opcode) {
81321369Sdim  case AMDGPU::S_MOVRELS_B32:
82321369Sdim  case AMDGPU::S_MOVRELS_B64:
83321369Sdim  case AMDGPU::S_MOVRELD_B32:
84321369Sdim  case AMDGPU::S_MOVRELD_B64:
85321369Sdim    return true;
86321369Sdim  default:
87321369Sdim    return false;
88321369Sdim  }
89321369Sdim}
90321369Sdim
91327952Sdimstatic bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
92327952Sdim  switch (MI.getOpcode()) {
93327952Sdim  case AMDGPU::S_SENDMSG:
94327952Sdim  case AMDGPU::S_SENDMSGHALT:
95327952Sdim  case AMDGPU::S_TTRACEDATA:
96327952Sdim    return true;
97327952Sdim  default:
98327952Sdim    // TODO: GDS
99327952Sdim    return false;
100327952Sdim  }
101327952Sdim}
102327952Sdim
103314564Sdimstatic unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
104314564Sdim  const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
105314564Sdim                                                     AMDGPU::OpName::simm16);
106314564Sdim  return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
107314564Sdim}
108314564Sdim
109303231SdimScheduleHazardRecognizer::HazardType
110303231SdimGCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
111303231Sdim  MachineInstr *MI = SU->getInstr();
112303231Sdim
113303231Sdim  if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
114303231Sdim    return NoopHazard;
115303231Sdim
116327952Sdim  // FIXME: Should flat be considered vmem?
117327952Sdim  if ((SIInstrInfo::isVMEM(*MI) ||
118327952Sdim       SIInstrInfo::isFLAT(*MI))
119327952Sdim      && checkVMEMHazards(MI) > 0)
120303231Sdim    return NoopHazard;
121303231Sdim
122314564Sdim  if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
123314564Sdim    return NoopHazard;
124314564Sdim
125303231Sdim  if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
126303231Sdim    return NoopHazard;
127303231Sdim
128314564Sdim  if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
129314564Sdim    return NoopHazard;
130314564Sdim
131314564Sdim  if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
132314564Sdim    return NoopHazard;
133314564Sdim
134314564Sdim  if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
135314564Sdim    return NoopHazard;
136314564Sdim
137314564Sdim  if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
138314564Sdim    return NoopHazard;
139314564Sdim
140314564Sdim  if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
141314564Sdim    return NoopHazard;
142314564Sdim
143327952Sdim  if (ST.hasReadM0MovRelInterpHazard() &&
144327952Sdim      (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
145321369Sdim      checkReadM0Hazards(MI) > 0)
146321369Sdim    return NoopHazard;
147321369Sdim
148327952Sdim  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
149327952Sdim      checkReadM0Hazards(MI) > 0)
150327952Sdim    return NoopHazard;
151327952Sdim
152327952Sdim  if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
153327952Sdim    return NoopHazard;
154327952Sdim
155321369Sdim  if (checkAnyInstHazards(MI) > 0)
156321369Sdim    return NoopHazard;
157321369Sdim
158303231Sdim  return NoHazard;
159303231Sdim}
160303231Sdim
161303231Sdimunsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
162303231Sdim  return PreEmitNoops(SU->getInstr());
163303231Sdim}
164303231Sdim
165303231Sdimunsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
166321369Sdim  int WaitStates = std::max(0, checkAnyInstHazards(MI));
167321369Sdim
168303231Sdim  if (SIInstrInfo::isSMRD(*MI))
169321369Sdim    return std::max(WaitStates, checkSMRDHazards(MI));
170303231Sdim
171327952Sdim  if (SIInstrInfo::isVALU(*MI))
172327952Sdim    WaitStates = std::max(WaitStates, checkVALUHazards(MI));
173303231Sdim
174327952Sdim  if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
175327952Sdim    WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
176303231Sdim
177327952Sdim  if (SIInstrInfo::isDPP(*MI))
178327952Sdim    WaitStates = std::max(WaitStates, checkDPPHazards(MI));
179314564Sdim
180327952Sdim  if (isDivFMas(MI->getOpcode()))
181327952Sdim    WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
182314564Sdim
183327952Sdim  if (isRWLane(MI->getOpcode()))
184327952Sdim    WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
185314564Sdim
186327952Sdim  if (MI->isInlineAsm())
187327952Sdim    return std::max(WaitStates, checkInlineAsmHazards(MI));
188321369Sdim
189314564Sdim  if (isSGetReg(MI->getOpcode()))
190321369Sdim    return std::max(WaitStates, checkGetRegHazards(MI));
191314564Sdim
192314564Sdim  if (isSSetReg(MI->getOpcode()))
193321369Sdim    return std::max(WaitStates, checkSetRegHazards(MI));
194314564Sdim
195314564Sdim  if (isRFE(MI->getOpcode()))
196321369Sdim    return std::max(WaitStates, checkRFEHazards(MI));
197314564Sdim
198327952Sdim  if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
199327952Sdim                                           isSMovRel(MI->getOpcode())))
200321369Sdim    return std::max(WaitStates, checkReadM0Hazards(MI));
201321369Sdim
202327952Sdim  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
203327952Sdim    return std::max(WaitStates, checkReadM0Hazards(MI));
204327952Sdim
205321369Sdim  return WaitStates;
206303231Sdim}
207303231Sdim
208303231Sdimvoid GCNHazardRecognizer::EmitNoop() {
209303231Sdim  EmittedInstrs.push_front(nullptr);
210303231Sdim}
211303231Sdim
212303231Sdimvoid GCNHazardRecognizer::AdvanceCycle() {
213303231Sdim  // When the scheduler detects a stall, it will call AdvanceCycle() without
214303231Sdim  // emitting any instructions.
215303231Sdim  if (!CurrCycleInstr)
216303231Sdim    return;
217303231Sdim
218321369Sdim  unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
219303231Sdim
220303231Sdim  // Keep track of emitted instructions
221303231Sdim  EmittedInstrs.push_front(CurrCycleInstr);
222303231Sdim
223303231Sdim  // Add a nullptr for each additional wait state after the first.  Make sure
224303231Sdim  // not to add more than getMaxLookAhead() items to the list, since we
225303231Sdim  // truncate the list to that size right after this loop.
226303231Sdim  for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
227303231Sdim       i < e; ++i) {
228303231Sdim    EmittedInstrs.push_front(nullptr);
229303231Sdim  }
230303231Sdim
231303231Sdim  // getMaxLookahead() is the largest number of wait states we will ever need
232303231Sdim  // to insert, so there is no point in keeping track of more than that many
233303231Sdim  // wait states.
234303231Sdim  EmittedInstrs.resize(getMaxLookAhead());
235303231Sdim
236303231Sdim  CurrCycleInstr = nullptr;
237303231Sdim}
238303231Sdim
239303231Sdimvoid GCNHazardRecognizer::RecedeCycle() {
240303231Sdim  llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
241303231Sdim}
242303231Sdim
243303231Sdim//===----------------------------------------------------------------------===//
244303231Sdim// Helper Functions
245303231Sdim//===----------------------------------------------------------------------===//
246303231Sdim
247314564Sdimint GCNHazardRecognizer::getWaitStatesSince(
248314564Sdim    function_ref<bool(MachineInstr *)> IsHazard) {
249326496Sdim  int WaitStates = 0;
250303231Sdim  for (MachineInstr *MI : EmittedInstrs) {
251326496Sdim    if (MI) {
252326496Sdim      if (IsHazard(MI))
253326496Sdim        return WaitStates;
254326496Sdim
255326496Sdim      unsigned Opcode = MI->getOpcode();
256327952Sdim      if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF ||
257327952Sdim          Opcode == AMDGPU::INLINEASM)
258326496Sdim        continue;
259326496Sdim    }
260303231Sdim    ++WaitStates;
261303231Sdim  }
262303231Sdim  return std::numeric_limits<int>::max();
263303231Sdim}
264303231Sdim
265314564Sdimint GCNHazardRecognizer::getWaitStatesSinceDef(
266314564Sdim    unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
267314564Sdim  const SIRegisterInfo *TRI = ST.getRegisterInfo();
268314564Sdim
269314564Sdim  auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
270314564Sdim    return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
271314564Sdim  };
272314564Sdim
273314564Sdim  return getWaitStatesSince(IsHazardFn);
274314564Sdim}
275314564Sdim
276314564Sdimint GCNHazardRecognizer::getWaitStatesSinceSetReg(
277314564Sdim    function_ref<bool(MachineInstr *)> IsHazard) {
278314564Sdim  auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
279314564Sdim    return isSSetReg(MI->getOpcode()) && IsHazard(MI);
280314564Sdim  };
281314564Sdim
282314564Sdim  return getWaitStatesSince(IsHazardFn);
283314564Sdim}
284314564Sdim
285303231Sdim//===----------------------------------------------------------------------===//
286303231Sdim// No-op Hazard Detection
287303231Sdim//===----------------------------------------------------------------------===//
288303231Sdim
289327952Sdimstatic void addRegUnits(const SIRegisterInfo &TRI,
290327952Sdim                        BitVector &BV, unsigned Reg) {
291327952Sdim  for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
292327952Sdim    BV.set(*RUI);
293327952Sdim}
294327952Sdim
295327952Sdimstatic void addRegsToSet(const SIRegisterInfo &TRI,
296327952Sdim                         iterator_range<MachineInstr::const_mop_iterator> Ops,
297327952Sdim                         BitVector &Set) {
298303231Sdim  for (const MachineOperand &Op : Ops) {
299303231Sdim    if (Op.isReg())
300327952Sdim      addRegUnits(TRI, Set, Op.getReg());
301303231Sdim  }
302303231Sdim}
303303231Sdim
304327952Sdimvoid GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
305327952Sdim  // XXX: Do we need to worry about implicit operands
306327952Sdim  addRegsToSet(TRI, MI.defs(), ClauseDefs);
307327952Sdim  addRegsToSet(TRI, MI.uses(), ClauseUses);
308327952Sdim}
309327952Sdim
310327952Sdimint GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
311327952Sdim  // SMEM soft clause are only present on VI+, and only matter if xnack is
312327952Sdim  // enabled.
313327952Sdim  if (!ST.isXNACKEnabled())
314303231Sdim    return 0;
315303231Sdim
316327952Sdim  bool IsSMRD = TII.isSMRD(*MEM);
317327952Sdim
318327952Sdim  resetClause();
319327952Sdim
320303231Sdim  // A soft-clause is any group of consecutive SMEM instructions.  The
321303231Sdim  // instructions in this group may return out of order and/or may be
322303231Sdim  // replayed (i.e. the same instruction issued more than once).
323303231Sdim  //
324303231Sdim  // In order to handle these situations correctly we need to make sure
325303231Sdim  // that when a clause has more than one instruction, no instruction in the
326303231Sdim  // clause writes to a register that is read another instruction in the clause
327303231Sdim  // (including itself). If we encounter this situaion, we need to break the
328303231Sdim  // clause by inserting a non SMEM instruction.
329303231Sdim
330303231Sdim  for (MachineInstr *MI : EmittedInstrs) {
331303231Sdim    // When we hit a non-SMEM instruction then we have passed the start of the
332303231Sdim    // clause and we can stop.
333327952Sdim    if (!MI)
334303231Sdim      break;
335303231Sdim
336327952Sdim    if (IsSMRD != SIInstrInfo::isSMRD(*MI))
337327952Sdim      break;
338327952Sdim
339327952Sdim    addClauseInst(*MI);
340303231Sdim  }
341303231Sdim
342327952Sdim  if (ClauseDefs.none())
343303231Sdim    return 0;
344303231Sdim
345327952Sdim  // We need to make sure not to put loads and stores in the same clause if they
346327952Sdim  // use the same address. For now, just start a new clause whenever we see a
347327952Sdim  // store.
348327952Sdim  if (MEM->mayStore())
349303231Sdim    return 1;
350303231Sdim
351327952Sdim  addClauseInst(*MEM);
352303231Sdim
353303231Sdim  // If the set of defs and uses intersect then we cannot add this instruction
354303231Sdim  // to the clause, so we have a hazard.
355327952Sdim  return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
356303231Sdim}
357303231Sdim
358303231Sdimint GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
359341825Sdim  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
360303231Sdim  int WaitStatesNeeded = 0;
361303231Sdim
362327952Sdim  WaitStatesNeeded = checkSoftClauseHazards(SMRD);
363303231Sdim
364303231Sdim  // This SMRD hazard only affects SI.
365341825Sdim  if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS)
366303231Sdim    return WaitStatesNeeded;
367303231Sdim
368303231Sdim  // A read of an SGPR by SMRD instruction requires 4 wait states when the
369303231Sdim  // SGPR was written by a VALU instruction.
370303231Sdim  int SmrdSgprWaitStates = 4;
371321369Sdim  auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
372327952Sdim  auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
373303231Sdim
374327952Sdim  bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
375327952Sdim
376303231Sdim  for (const MachineOperand &Use : SMRD->uses()) {
377303231Sdim    if (!Use.isReg())
378303231Sdim      continue;
379303231Sdim    int WaitStatesNeededForUse =
380303231Sdim        SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
381303231Sdim    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
382327952Sdim
383327952Sdim    // This fixes what appears to be undocumented hardware behavior in SI where
384327952Sdim    // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
385327952Sdim    // needs some number of nops in between. We don't know how many we need, but
386327952Sdim    // let's use 4. This wasn't discovered before probably because the only
387327952Sdim    // case when this happens is when we expand a 64-bit pointer into a full
388327952Sdim    // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
389327952Sdim    // probably never encountered in the closed-source land.
390327952Sdim    if (IsBufferSMRD) {
391327952Sdim      int WaitStatesNeededForUse =
392327952Sdim        SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
393327952Sdim                                                   IsBufferHazardDefFn);
394327952Sdim      WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
395327952Sdim    }
396303231Sdim  }
397327952Sdim
398303231Sdim  return WaitStatesNeeded;
399303231Sdim}
400303231Sdim
401303231Sdimint GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
402341825Sdim  if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
403303231Sdim    return 0;
404303231Sdim
405327952Sdim  int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
406303231Sdim
407303231Sdim  // A read of an SGPR by a VMEM instruction requires 5 wait states when the
408303231Sdim  // SGPR was written by a VALU Instruction.
409327952Sdim  const int VmemSgprWaitStates = 5;
410327952Sdim  auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
411303231Sdim
412303231Sdim  for (const MachineOperand &Use : VMEM->uses()) {
413303231Sdim    if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
414303231Sdim      continue;
415303231Sdim
416303231Sdim    int WaitStatesNeededForUse =
417303231Sdim        VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
418303231Sdim    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
419303231Sdim  }
420303231Sdim  return WaitStatesNeeded;
421303231Sdim}
422303231Sdim
423303231Sdimint GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
424303231Sdim  const SIRegisterInfo *TRI = ST.getRegisterInfo();
425327952Sdim  const SIInstrInfo *TII = ST.getInstrInfo();
426303231Sdim
427327952Sdim  // Check for DPP VGPR read after VALU VGPR write and EXEC write.
428303231Sdim  int DppVgprWaitStates = 2;
429327952Sdim  int DppExecWaitStates = 5;
430303231Sdim  int WaitStatesNeeded = 0;
431327952Sdim  auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
432303231Sdim
433303231Sdim  for (const MachineOperand &Use : DPP->uses()) {
434303231Sdim    if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
435303231Sdim      continue;
436303231Sdim    int WaitStatesNeededForUse =
437303231Sdim        DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg());
438303231Sdim    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
439303231Sdim  }
440303231Sdim
441327952Sdim  WaitStatesNeeded = std::max(
442327952Sdim      WaitStatesNeeded,
443327952Sdim      DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn));
444327952Sdim
445303231Sdim  return WaitStatesNeeded;
446303231Sdim}
447314564Sdim
448314564Sdimint GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
449314564Sdim  const SIInstrInfo *TII = ST.getInstrInfo();
450314564Sdim
451314564Sdim  // v_div_fmas requires 4 wait states after a write to vcc from a VALU
452314564Sdim  // instruction.
453314564Sdim  const int DivFMasWaitStates = 4;
454314564Sdim  auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
455314564Sdim  int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
456314564Sdim
457314564Sdim  return DivFMasWaitStates - WaitStatesNeeded;
458314564Sdim}
459314564Sdim
460314564Sdimint GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
461314564Sdim  const SIInstrInfo *TII = ST.getInstrInfo();
462314564Sdim  unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
463314564Sdim
464314564Sdim  const int GetRegWaitStates = 2;
465314564Sdim  auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
466314564Sdim    return GetRegHWReg == getHWReg(TII, *MI);
467314564Sdim  };
468314564Sdim  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
469314564Sdim
470314564Sdim  return GetRegWaitStates - WaitStatesNeeded;
471314564Sdim}
472314564Sdim
473314564Sdimint GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
474314564Sdim  const SIInstrInfo *TII = ST.getInstrInfo();
475314564Sdim  unsigned HWReg = getHWReg(TII, *SetRegInstr);
476314564Sdim
477314564Sdim  const int SetRegWaitStates =
478314564Sdim      ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2;
479314564Sdim  auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
480314564Sdim    return HWReg == getHWReg(TII, *MI);
481314564Sdim  };
482314564Sdim  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
483314564Sdim  return SetRegWaitStates - WaitStatesNeeded;
484314564Sdim}
485314564Sdim
486314564Sdimint GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
487314564Sdim  if (!MI.mayStore())
488314564Sdim    return -1;
489314564Sdim
490314564Sdim  const SIInstrInfo *TII = ST.getInstrInfo();
491314564Sdim  unsigned Opcode = MI.getOpcode();
492314564Sdim  const MCInstrDesc &Desc = MI.getDesc();
493314564Sdim
494314564Sdim  int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
495314564Sdim  int VDataRCID = -1;
496314564Sdim  if (VDataIdx != -1)
497314564Sdim    VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
498314564Sdim
499314564Sdim  if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
500314564Sdim    // There is no hazard if the instruction does not use vector regs
501314564Sdim    // (like wbinvl1)
502314564Sdim    if (VDataIdx == -1)
503314564Sdim      return -1;
504314564Sdim    // For MUBUF/MTBUF instructions this hazard only exists if the
505314564Sdim    // instruction is not using a register in the soffset field.
506314564Sdim    const MachineOperand *SOffset =
507314564Sdim        TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
508314564Sdim    // If we have no soffset operand, then assume this field has been
509314564Sdim    // hardcoded to zero.
510314564Sdim    if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
511314564Sdim        (!SOffset || !SOffset->isReg()))
512314564Sdim      return VDataIdx;
513314564Sdim  }
514314564Sdim
515314564Sdim  // MIMG instructions create a hazard if they don't use a 256-bit T# and
516314564Sdim  // the store size is greater than 8 bytes and they have more than two bits
517314564Sdim  // of their dmask set.
518314564Sdim  // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
519314564Sdim  if (TII->isMIMG(MI)) {
520314564Sdim    int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
521314564Sdim    assert(SRsrcIdx != -1 &&
522314564Sdim           AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
523314564Sdim    (void)SRsrcIdx;
524314564Sdim  }
525314564Sdim
526314564Sdim  if (TII->isFLAT(MI)) {
527314564Sdim    int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
528314564Sdim    if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
529314564Sdim      return DataIdx;
530314564Sdim  }
531314564Sdim
532314564Sdim  return -1;
533314564Sdim}
534314564Sdim
535327952Sdimint GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
536327952Sdim						const MachineRegisterInfo &MRI) {
537327952Sdim  // Helper to check for the hazard where VMEM instructions that store more than
538327952Sdim  // 8 bytes can have there store data over written by the next instruction.
539327952Sdim  const SIRegisterInfo *TRI = ST.getRegisterInfo();
540327952Sdim
541327952Sdim  const int VALUWaitStates = 1;
542327952Sdim  int WaitStatesNeeded = 0;
543327952Sdim
544327952Sdim  if (!TRI->isVGPR(MRI, Def.getReg()))
545327952Sdim    return WaitStatesNeeded;
546327952Sdim  unsigned Reg = Def.getReg();
547327952Sdim  auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
548327952Sdim    int DataIdx = createsVALUHazard(*MI);
549327952Sdim    return DataIdx >= 0 &&
550327952Sdim    TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
551327952Sdim  };
552327952Sdim  int WaitStatesNeededForDef =
553327952Sdim    VALUWaitStates - getWaitStatesSince(IsHazardFn);
554327952Sdim  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
555327952Sdim
556327952Sdim  return WaitStatesNeeded;
557327952Sdim}
558327952Sdim
559314564Sdimint GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
560314564Sdim  // This checks for the hazard where VMEM instructions that store more than
561314564Sdim  // 8 bytes can have there store data over written by the next instruction.
562314564Sdim  if (!ST.has12DWordStoreHazard())
563314564Sdim    return 0;
564314564Sdim
565327952Sdim  const MachineRegisterInfo &MRI = MF.getRegInfo();
566314564Sdim  int WaitStatesNeeded = 0;
567314564Sdim
568314564Sdim  for (const MachineOperand &Def : VALU->defs()) {
569327952Sdim    WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
570314564Sdim  }
571327952Sdim
572314564Sdim  return WaitStatesNeeded;
573314564Sdim}
574314564Sdim
575327952Sdimint GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
576327952Sdim  // This checks for hazards associated with inline asm statements.
577327952Sdim  // Since inline asms can contain just about anything, we use this
578327952Sdim  // to call/leverage other check*Hazard routines. Note that
579327952Sdim  // this function doesn't attempt to address all possible inline asm
580327952Sdim  // hazards (good luck), but is a collection of what has been
581327952Sdim  // problematic thus far.
582327952Sdim
583327952Sdim  // see checkVALUHazards()
584327952Sdim  if (!ST.has12DWordStoreHazard())
585327952Sdim    return 0;
586327952Sdim
587327952Sdim  const MachineRegisterInfo &MRI = MF.getRegInfo();
588327952Sdim  int WaitStatesNeeded = 0;
589327952Sdim
590327952Sdim  for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
591327952Sdim       I != E; ++I) {
592327952Sdim    const MachineOperand &Op = IA->getOperand(I);
593327952Sdim    if (Op.isReg() && Op.isDef()) {
594327952Sdim      WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
595327952Sdim    }
596327952Sdim  }
597327952Sdim
598327952Sdim  return WaitStatesNeeded;
599327952Sdim}
600327952Sdim
601314564Sdimint GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
602314564Sdim  const SIInstrInfo *TII = ST.getInstrInfo();
603314564Sdim  const SIRegisterInfo *TRI = ST.getRegisterInfo();
604327952Sdim  const MachineRegisterInfo &MRI = MF.getRegInfo();
605314564Sdim
606314564Sdim  const MachineOperand *LaneSelectOp =
607314564Sdim      TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
608314564Sdim
609314564Sdim  if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
610314564Sdim    return 0;
611314564Sdim
612314564Sdim  unsigned LaneSelectReg = LaneSelectOp->getReg();
613314564Sdim  auto IsHazardFn = [TII] (MachineInstr *MI) {
614314564Sdim    return TII->isVALU(*MI);
615314564Sdim  };
616314564Sdim
617314564Sdim  const int RWLaneWaitStates = 4;
618314564Sdim  int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn);
619314564Sdim  return RWLaneWaitStates - WaitStatesSince;
620314564Sdim}
621314564Sdim
622314564Sdimint GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
623314564Sdim  if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
624314564Sdim    return 0;
625314564Sdim
626314564Sdim  const SIInstrInfo *TII = ST.getInstrInfo();
627314564Sdim
628314564Sdim  const int RFEWaitStates = 1;
629314564Sdim
630314564Sdim  auto IsHazardFn = [TII] (MachineInstr *MI) {
631314564Sdim    return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
632314564Sdim  };
633314564Sdim  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
634314564Sdim  return RFEWaitStates - WaitStatesNeeded;
635314564Sdim}
636321369Sdim
637321369Sdimint GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
638341825Sdim  if (MI->isDebugInstr())
639321369Sdim    return 0;
640321369Sdim
641321369Sdim  const SIRegisterInfo *TRI = ST.getRegisterInfo();
642321369Sdim  if (!ST.hasSMovFedHazard())
643321369Sdim    return 0;
644321369Sdim
645321369Sdim  // Check for any instruction reading an SGPR after a write from
646321369Sdim  // s_mov_fed_b32.
647321369Sdim  int MovFedWaitStates = 1;
648321369Sdim  int WaitStatesNeeded = 0;
649321369Sdim
650321369Sdim  for (const MachineOperand &Use : MI->uses()) {
651321369Sdim    if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
652321369Sdim      continue;
653321369Sdim    auto IsHazardFn = [] (MachineInstr *MI) {
654321369Sdim      return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
655321369Sdim    };
656321369Sdim    int WaitStatesNeededForUse =
657321369Sdim        MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn);
658321369Sdim    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
659321369Sdim  }
660321369Sdim
661321369Sdim  return WaitStatesNeeded;
662321369Sdim}
663321369Sdim
664321369Sdimint GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
665321369Sdim  const SIInstrInfo *TII = ST.getInstrInfo();
666327952Sdim  const int SMovRelWaitStates = 1;
667321369Sdim  auto IsHazardFn = [TII] (MachineInstr *MI) {
668321369Sdim    return TII->isSALU(*MI);
669321369Sdim  };
670321369Sdim  return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn);
671321369Sdim}
672