GCNHazardRecognizer.cpp revision 341825
1303231Sdim//===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===// 2303231Sdim// 3303231Sdim// The LLVM Compiler Infrastructure 4303231Sdim// 5303231Sdim// This file is distributed under the University of Illinois Open Source 6303231Sdim// License. See LICENSE.TXT for details. 7303231Sdim// 8303231Sdim//===----------------------------------------------------------------------===// 9303231Sdim// 10303231Sdim// This file implements hazard recognizers for scheduling on GCN processors. 11303231Sdim// 12303231Sdim//===----------------------------------------------------------------------===// 13303231Sdim 14303231Sdim#include "GCNHazardRecognizer.h" 15303231Sdim#include "AMDGPUSubtarget.h" 16321369Sdim#include "SIDefines.h" 17303231Sdim#include "SIInstrInfo.h" 18321369Sdim#include "SIRegisterInfo.h" 19341825Sdim#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 20321369Sdim#include "Utils/AMDGPUBaseInfo.h" 21321369Sdim#include "llvm/ADT/iterator_range.h" 22321369Sdim#include "llvm/CodeGen/MachineFunction.h" 23321369Sdim#include "llvm/CodeGen/MachineInstr.h" 24321369Sdim#include "llvm/CodeGen/MachineOperand.h" 25303231Sdim#include "llvm/CodeGen/ScheduleDAG.h" 26321369Sdim#include "llvm/MC/MCInstrDesc.h" 27321369Sdim#include "llvm/Support/ErrorHandling.h" 28321369Sdim#include <algorithm> 29321369Sdim#include <cassert> 30321369Sdim#include <limits> 31321369Sdim#include <set> 32321369Sdim#include <vector> 33303231Sdim 34303231Sdimusing namespace llvm; 35303231Sdim 36303231Sdim//===----------------------------------------------------------------------===// 37303231Sdim// Hazard Recoginizer Implementation 38303231Sdim//===----------------------------------------------------------------------===// 39303231Sdim 40303231SdimGCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : 41303231Sdim CurrCycleInstr(nullptr), 42303231Sdim MF(MF), 43341825Sdim ST(MF.getSubtarget<GCNSubtarget>()), 44327952Sdim TII(*ST.getInstrInfo()), 45327952Sdim TRI(TII.getRegisterInfo()), 46327952Sdim ClauseUses(TRI.getNumRegUnits()), 47327952Sdim ClauseDefs(TRI.getNumRegUnits()) { 48303231Sdim MaxLookAhead = 5; 49303231Sdim} 50303231Sdim 51303231Sdimvoid GCNHazardRecognizer::EmitInstruction(SUnit *SU) { 52303231Sdim EmitInstruction(SU->getInstr()); 53303231Sdim} 54303231Sdim 55303231Sdimvoid GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) { 56303231Sdim CurrCycleInstr = MI; 57303231Sdim} 58303231Sdim 59314564Sdimstatic bool isDivFMas(unsigned Opcode) { 60314564Sdim return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64; 61314564Sdim} 62314564Sdim 63314564Sdimstatic bool isSGetReg(unsigned Opcode) { 64314564Sdim return Opcode == AMDGPU::S_GETREG_B32; 65314564Sdim} 66314564Sdim 67314564Sdimstatic bool isSSetReg(unsigned Opcode) { 68314564Sdim return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32; 69314564Sdim} 70314564Sdim 71314564Sdimstatic bool isRWLane(unsigned Opcode) { 72314564Sdim return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32; 73314564Sdim} 74314564Sdim 75314564Sdimstatic bool isRFE(unsigned Opcode) { 76314564Sdim return Opcode == AMDGPU::S_RFE_B64; 77314564Sdim} 78314564Sdim 79321369Sdimstatic bool isSMovRel(unsigned Opcode) { 80321369Sdim switch (Opcode) { 81321369Sdim case AMDGPU::S_MOVRELS_B32: 82321369Sdim case AMDGPU::S_MOVRELS_B64: 83321369Sdim case AMDGPU::S_MOVRELD_B32: 84321369Sdim case AMDGPU::S_MOVRELD_B64: 85321369Sdim return true; 86321369Sdim default: 87321369Sdim return false; 88321369Sdim } 89321369Sdim} 90321369Sdim 91327952Sdimstatic bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) { 92327952Sdim switch (MI.getOpcode()) { 93327952Sdim case AMDGPU::S_SENDMSG: 94327952Sdim case AMDGPU::S_SENDMSGHALT: 95327952Sdim case AMDGPU::S_TTRACEDATA: 96327952Sdim return true; 97327952Sdim default: 98327952Sdim // TODO: GDS 99327952Sdim return false; 100327952Sdim } 101327952Sdim} 102327952Sdim 103314564Sdimstatic unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { 104314564Sdim const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, 105314564Sdim AMDGPU::OpName::simm16); 106314564Sdim return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_; 107314564Sdim} 108314564Sdim 109303231SdimScheduleHazardRecognizer::HazardType 110303231SdimGCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 111303231Sdim MachineInstr *MI = SU->getInstr(); 112303231Sdim 113303231Sdim if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0) 114303231Sdim return NoopHazard; 115303231Sdim 116327952Sdim // FIXME: Should flat be considered vmem? 117327952Sdim if ((SIInstrInfo::isVMEM(*MI) || 118327952Sdim SIInstrInfo::isFLAT(*MI)) 119327952Sdim && checkVMEMHazards(MI) > 0) 120303231Sdim return NoopHazard; 121303231Sdim 122314564Sdim if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0) 123314564Sdim return NoopHazard; 124314564Sdim 125303231Sdim if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) 126303231Sdim return NoopHazard; 127303231Sdim 128314564Sdim if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) 129314564Sdim return NoopHazard; 130314564Sdim 131314564Sdim if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0) 132314564Sdim return NoopHazard; 133314564Sdim 134314564Sdim if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0) 135314564Sdim return NoopHazard; 136314564Sdim 137314564Sdim if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0) 138314564Sdim return NoopHazard; 139314564Sdim 140314564Sdim if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0) 141314564Sdim return NoopHazard; 142314564Sdim 143327952Sdim if (ST.hasReadM0MovRelInterpHazard() && 144327952Sdim (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) && 145321369Sdim checkReadM0Hazards(MI) > 0) 146321369Sdim return NoopHazard; 147321369Sdim 148327952Sdim if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) && 149327952Sdim checkReadM0Hazards(MI) > 0) 150327952Sdim return NoopHazard; 151327952Sdim 152327952Sdim if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0) 153327952Sdim return NoopHazard; 154327952Sdim 155321369Sdim if (checkAnyInstHazards(MI) > 0) 156321369Sdim return NoopHazard; 157321369Sdim 158303231Sdim return NoHazard; 159303231Sdim} 160303231Sdim 161303231Sdimunsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) { 162303231Sdim return PreEmitNoops(SU->getInstr()); 163303231Sdim} 164303231Sdim 165303231Sdimunsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { 166321369Sdim int WaitStates = std::max(0, checkAnyInstHazards(MI)); 167321369Sdim 168303231Sdim if (SIInstrInfo::isSMRD(*MI)) 169321369Sdim return std::max(WaitStates, checkSMRDHazards(MI)); 170303231Sdim 171327952Sdim if (SIInstrInfo::isVALU(*MI)) 172327952Sdim WaitStates = std::max(WaitStates, checkVALUHazards(MI)); 173303231Sdim 174327952Sdim if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI)) 175327952Sdim WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); 176303231Sdim 177327952Sdim if (SIInstrInfo::isDPP(*MI)) 178327952Sdim WaitStates = std::max(WaitStates, checkDPPHazards(MI)); 179314564Sdim 180327952Sdim if (isDivFMas(MI->getOpcode())) 181327952Sdim WaitStates = std::max(WaitStates, checkDivFMasHazards(MI)); 182314564Sdim 183327952Sdim if (isRWLane(MI->getOpcode())) 184327952Sdim WaitStates = std::max(WaitStates, checkRWLaneHazards(MI)); 185314564Sdim 186327952Sdim if (MI->isInlineAsm()) 187327952Sdim return std::max(WaitStates, checkInlineAsmHazards(MI)); 188321369Sdim 189314564Sdim if (isSGetReg(MI->getOpcode())) 190321369Sdim return std::max(WaitStates, checkGetRegHazards(MI)); 191314564Sdim 192314564Sdim if (isSSetReg(MI->getOpcode())) 193321369Sdim return std::max(WaitStates, checkSetRegHazards(MI)); 194314564Sdim 195314564Sdim if (isRFE(MI->getOpcode())) 196321369Sdim return std::max(WaitStates, checkRFEHazards(MI)); 197314564Sdim 198327952Sdim if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) || 199327952Sdim isSMovRel(MI->getOpcode()))) 200321369Sdim return std::max(WaitStates, checkReadM0Hazards(MI)); 201321369Sdim 202327952Sdim if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI)) 203327952Sdim return std::max(WaitStates, checkReadM0Hazards(MI)); 204327952Sdim 205321369Sdim return WaitStates; 206303231Sdim} 207303231Sdim 208303231Sdimvoid GCNHazardRecognizer::EmitNoop() { 209303231Sdim EmittedInstrs.push_front(nullptr); 210303231Sdim} 211303231Sdim 212303231Sdimvoid GCNHazardRecognizer::AdvanceCycle() { 213303231Sdim // When the scheduler detects a stall, it will call AdvanceCycle() without 214303231Sdim // emitting any instructions. 215303231Sdim if (!CurrCycleInstr) 216303231Sdim return; 217303231Sdim 218321369Sdim unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr); 219303231Sdim 220303231Sdim // Keep track of emitted instructions 221303231Sdim EmittedInstrs.push_front(CurrCycleInstr); 222303231Sdim 223303231Sdim // Add a nullptr for each additional wait state after the first. Make sure 224303231Sdim // not to add more than getMaxLookAhead() items to the list, since we 225303231Sdim // truncate the list to that size right after this loop. 226303231Sdim for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); 227303231Sdim i < e; ++i) { 228303231Sdim EmittedInstrs.push_front(nullptr); 229303231Sdim } 230303231Sdim 231303231Sdim // getMaxLookahead() is the largest number of wait states we will ever need 232303231Sdim // to insert, so there is no point in keeping track of more than that many 233303231Sdim // wait states. 234303231Sdim EmittedInstrs.resize(getMaxLookAhead()); 235303231Sdim 236303231Sdim CurrCycleInstr = nullptr; 237303231Sdim} 238303231Sdim 239303231Sdimvoid GCNHazardRecognizer::RecedeCycle() { 240303231Sdim llvm_unreachable("hazard recognizer does not support bottom-up scheduling."); 241303231Sdim} 242303231Sdim 243303231Sdim//===----------------------------------------------------------------------===// 244303231Sdim// Helper Functions 245303231Sdim//===----------------------------------------------------------------------===// 246303231Sdim 247314564Sdimint GCNHazardRecognizer::getWaitStatesSince( 248314564Sdim function_ref<bool(MachineInstr *)> IsHazard) { 249326496Sdim int WaitStates = 0; 250303231Sdim for (MachineInstr *MI : EmittedInstrs) { 251326496Sdim if (MI) { 252326496Sdim if (IsHazard(MI)) 253326496Sdim return WaitStates; 254326496Sdim 255326496Sdim unsigned Opcode = MI->getOpcode(); 256327952Sdim if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF || 257327952Sdim Opcode == AMDGPU::INLINEASM) 258326496Sdim continue; 259326496Sdim } 260303231Sdim ++WaitStates; 261303231Sdim } 262303231Sdim return std::numeric_limits<int>::max(); 263303231Sdim} 264303231Sdim 265314564Sdimint GCNHazardRecognizer::getWaitStatesSinceDef( 266314564Sdim unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) { 267314564Sdim const SIRegisterInfo *TRI = ST.getRegisterInfo(); 268314564Sdim 269314564Sdim auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) { 270314564Sdim return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI); 271314564Sdim }; 272314564Sdim 273314564Sdim return getWaitStatesSince(IsHazardFn); 274314564Sdim} 275314564Sdim 276314564Sdimint GCNHazardRecognizer::getWaitStatesSinceSetReg( 277314564Sdim function_ref<bool(MachineInstr *)> IsHazard) { 278314564Sdim auto IsHazardFn = [IsHazard] (MachineInstr *MI) { 279314564Sdim return isSSetReg(MI->getOpcode()) && IsHazard(MI); 280314564Sdim }; 281314564Sdim 282314564Sdim return getWaitStatesSince(IsHazardFn); 283314564Sdim} 284314564Sdim 285303231Sdim//===----------------------------------------------------------------------===// 286303231Sdim// No-op Hazard Detection 287303231Sdim//===----------------------------------------------------------------------===// 288303231Sdim 289327952Sdimstatic void addRegUnits(const SIRegisterInfo &TRI, 290327952Sdim BitVector &BV, unsigned Reg) { 291327952Sdim for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) 292327952Sdim BV.set(*RUI); 293327952Sdim} 294327952Sdim 295327952Sdimstatic void addRegsToSet(const SIRegisterInfo &TRI, 296327952Sdim iterator_range<MachineInstr::const_mop_iterator> Ops, 297327952Sdim BitVector &Set) { 298303231Sdim for (const MachineOperand &Op : Ops) { 299303231Sdim if (Op.isReg()) 300327952Sdim addRegUnits(TRI, Set, Op.getReg()); 301303231Sdim } 302303231Sdim} 303303231Sdim 304327952Sdimvoid GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) { 305327952Sdim // XXX: Do we need to worry about implicit operands 306327952Sdim addRegsToSet(TRI, MI.defs(), ClauseDefs); 307327952Sdim addRegsToSet(TRI, MI.uses(), ClauseUses); 308327952Sdim} 309327952Sdim 310327952Sdimint GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) { 311327952Sdim // SMEM soft clause are only present on VI+, and only matter if xnack is 312327952Sdim // enabled. 313327952Sdim if (!ST.isXNACKEnabled()) 314303231Sdim return 0; 315303231Sdim 316327952Sdim bool IsSMRD = TII.isSMRD(*MEM); 317327952Sdim 318327952Sdim resetClause(); 319327952Sdim 320303231Sdim // A soft-clause is any group of consecutive SMEM instructions. The 321303231Sdim // instructions in this group may return out of order and/or may be 322303231Sdim // replayed (i.e. the same instruction issued more than once). 323303231Sdim // 324303231Sdim // In order to handle these situations correctly we need to make sure 325303231Sdim // that when a clause has more than one instruction, no instruction in the 326303231Sdim // clause writes to a register that is read another instruction in the clause 327303231Sdim // (including itself). If we encounter this situaion, we need to break the 328303231Sdim // clause by inserting a non SMEM instruction. 329303231Sdim 330303231Sdim for (MachineInstr *MI : EmittedInstrs) { 331303231Sdim // When we hit a non-SMEM instruction then we have passed the start of the 332303231Sdim // clause and we can stop. 333327952Sdim if (!MI) 334303231Sdim break; 335303231Sdim 336327952Sdim if (IsSMRD != SIInstrInfo::isSMRD(*MI)) 337327952Sdim break; 338327952Sdim 339327952Sdim addClauseInst(*MI); 340303231Sdim } 341303231Sdim 342327952Sdim if (ClauseDefs.none()) 343303231Sdim return 0; 344303231Sdim 345327952Sdim // We need to make sure not to put loads and stores in the same clause if they 346327952Sdim // use the same address. For now, just start a new clause whenever we see a 347327952Sdim // store. 348327952Sdim if (MEM->mayStore()) 349303231Sdim return 1; 350303231Sdim 351327952Sdim addClauseInst(*MEM); 352303231Sdim 353303231Sdim // If the set of defs and uses intersect then we cannot add this instruction 354303231Sdim // to the clause, so we have a hazard. 355327952Sdim return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0; 356303231Sdim} 357303231Sdim 358303231Sdimint GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { 359341825Sdim const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 360303231Sdim int WaitStatesNeeded = 0; 361303231Sdim 362327952Sdim WaitStatesNeeded = checkSoftClauseHazards(SMRD); 363303231Sdim 364303231Sdim // This SMRD hazard only affects SI. 365341825Sdim if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS) 366303231Sdim return WaitStatesNeeded; 367303231Sdim 368303231Sdim // A read of an SGPR by SMRD instruction requires 4 wait states when the 369303231Sdim // SGPR was written by a VALU instruction. 370303231Sdim int SmrdSgprWaitStates = 4; 371321369Sdim auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; 372327952Sdim auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); }; 373303231Sdim 374327952Sdim bool IsBufferSMRD = TII.isBufferSMRD(*SMRD); 375327952Sdim 376303231Sdim for (const MachineOperand &Use : SMRD->uses()) { 377303231Sdim if (!Use.isReg()) 378303231Sdim continue; 379303231Sdim int WaitStatesNeededForUse = 380303231Sdim SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); 381303231Sdim WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 382327952Sdim 383327952Sdim // This fixes what appears to be undocumented hardware behavior in SI where 384327952Sdim // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor 385327952Sdim // needs some number of nops in between. We don't know how many we need, but 386327952Sdim // let's use 4. This wasn't discovered before probably because the only 387327952Sdim // case when this happens is when we expand a 64-bit pointer into a full 388327952Sdim // descriptor and use s_buffer_load_dword instead of s_load_dword, which was 389327952Sdim // probably never encountered in the closed-source land. 390327952Sdim if (IsBufferSMRD) { 391327952Sdim int WaitStatesNeededForUse = 392327952Sdim SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), 393327952Sdim IsBufferHazardDefFn); 394327952Sdim WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 395327952Sdim } 396303231Sdim } 397327952Sdim 398303231Sdim return WaitStatesNeeded; 399303231Sdim} 400303231Sdim 401303231Sdimint GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { 402341825Sdim if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) 403303231Sdim return 0; 404303231Sdim 405327952Sdim int WaitStatesNeeded = checkSoftClauseHazards(VMEM); 406303231Sdim 407303231Sdim // A read of an SGPR by a VMEM instruction requires 5 wait states when the 408303231Sdim // SGPR was written by a VALU Instruction. 409327952Sdim const int VmemSgprWaitStates = 5; 410327952Sdim auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; 411303231Sdim 412303231Sdim for (const MachineOperand &Use : VMEM->uses()) { 413303231Sdim if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 414303231Sdim continue; 415303231Sdim 416303231Sdim int WaitStatesNeededForUse = 417303231Sdim VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); 418303231Sdim WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 419303231Sdim } 420303231Sdim return WaitStatesNeeded; 421303231Sdim} 422303231Sdim 423303231Sdimint GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { 424303231Sdim const SIRegisterInfo *TRI = ST.getRegisterInfo(); 425327952Sdim const SIInstrInfo *TII = ST.getInstrInfo(); 426303231Sdim 427327952Sdim // Check for DPP VGPR read after VALU VGPR write and EXEC write. 428303231Sdim int DppVgprWaitStates = 2; 429327952Sdim int DppExecWaitStates = 5; 430303231Sdim int WaitStatesNeeded = 0; 431327952Sdim auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 432303231Sdim 433303231Sdim for (const MachineOperand &Use : DPP->uses()) { 434303231Sdim if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) 435303231Sdim continue; 436303231Sdim int WaitStatesNeededForUse = 437303231Sdim DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg()); 438303231Sdim WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 439303231Sdim } 440303231Sdim 441327952Sdim WaitStatesNeeded = std::max( 442327952Sdim WaitStatesNeeded, 443327952Sdim DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn)); 444327952Sdim 445303231Sdim return WaitStatesNeeded; 446303231Sdim} 447314564Sdim 448314564Sdimint GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) { 449314564Sdim const SIInstrInfo *TII = ST.getInstrInfo(); 450314564Sdim 451314564Sdim // v_div_fmas requires 4 wait states after a write to vcc from a VALU 452314564Sdim // instruction. 453314564Sdim const int DivFMasWaitStates = 4; 454314564Sdim auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 455314564Sdim int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn); 456314564Sdim 457314564Sdim return DivFMasWaitStates - WaitStatesNeeded; 458314564Sdim} 459314564Sdim 460314564Sdimint GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) { 461314564Sdim const SIInstrInfo *TII = ST.getInstrInfo(); 462314564Sdim unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr); 463314564Sdim 464314564Sdim const int GetRegWaitStates = 2; 465314564Sdim auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) { 466314564Sdim return GetRegHWReg == getHWReg(TII, *MI); 467314564Sdim }; 468314564Sdim int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); 469314564Sdim 470314564Sdim return GetRegWaitStates - WaitStatesNeeded; 471314564Sdim} 472314564Sdim 473314564Sdimint GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) { 474314564Sdim const SIInstrInfo *TII = ST.getInstrInfo(); 475314564Sdim unsigned HWReg = getHWReg(TII, *SetRegInstr); 476314564Sdim 477314564Sdim const int SetRegWaitStates = 478314564Sdim ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2; 479314564Sdim auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) { 480314564Sdim return HWReg == getHWReg(TII, *MI); 481314564Sdim }; 482314564Sdim int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); 483314564Sdim return SetRegWaitStates - WaitStatesNeeded; 484314564Sdim} 485314564Sdim 486314564Sdimint GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) { 487314564Sdim if (!MI.mayStore()) 488314564Sdim return -1; 489314564Sdim 490314564Sdim const SIInstrInfo *TII = ST.getInstrInfo(); 491314564Sdim unsigned Opcode = MI.getOpcode(); 492314564Sdim const MCInstrDesc &Desc = MI.getDesc(); 493314564Sdim 494314564Sdim int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 495314564Sdim int VDataRCID = -1; 496314564Sdim if (VDataIdx != -1) 497314564Sdim VDataRCID = Desc.OpInfo[VDataIdx].RegClass; 498314564Sdim 499314564Sdim if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) { 500314564Sdim // There is no hazard if the instruction does not use vector regs 501314564Sdim // (like wbinvl1) 502314564Sdim if (VDataIdx == -1) 503314564Sdim return -1; 504314564Sdim // For MUBUF/MTBUF instructions this hazard only exists if the 505314564Sdim // instruction is not using a register in the soffset field. 506314564Sdim const MachineOperand *SOffset = 507314564Sdim TII->getNamedOperand(MI, AMDGPU::OpName::soffset); 508314564Sdim // If we have no soffset operand, then assume this field has been 509314564Sdim // hardcoded to zero. 510314564Sdim if (AMDGPU::getRegBitWidth(VDataRCID) > 64 && 511314564Sdim (!SOffset || !SOffset->isReg())) 512314564Sdim return VDataIdx; 513314564Sdim } 514314564Sdim 515314564Sdim // MIMG instructions create a hazard if they don't use a 256-bit T# and 516314564Sdim // the store size is greater than 8 bytes and they have more than two bits 517314564Sdim // of their dmask set. 518314564Sdim // All our MIMG definitions use a 256-bit T#, so we can skip checking for them. 519314564Sdim if (TII->isMIMG(MI)) { 520314564Sdim int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc); 521314564Sdim assert(SRsrcIdx != -1 && 522314564Sdim AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256); 523314564Sdim (void)SRsrcIdx; 524314564Sdim } 525314564Sdim 526314564Sdim if (TII->isFLAT(MI)) { 527314564Sdim int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 528314564Sdim if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64) 529314564Sdim return DataIdx; 530314564Sdim } 531314564Sdim 532314564Sdim return -1; 533314564Sdim} 534314564Sdim 535327952Sdimint GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, 536327952Sdim const MachineRegisterInfo &MRI) { 537327952Sdim // Helper to check for the hazard where VMEM instructions that store more than 538327952Sdim // 8 bytes can have there store data over written by the next instruction. 539327952Sdim const SIRegisterInfo *TRI = ST.getRegisterInfo(); 540327952Sdim 541327952Sdim const int VALUWaitStates = 1; 542327952Sdim int WaitStatesNeeded = 0; 543327952Sdim 544327952Sdim if (!TRI->isVGPR(MRI, Def.getReg())) 545327952Sdim return WaitStatesNeeded; 546327952Sdim unsigned Reg = Def.getReg(); 547327952Sdim auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { 548327952Sdim int DataIdx = createsVALUHazard(*MI); 549327952Sdim return DataIdx >= 0 && 550327952Sdim TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); 551327952Sdim }; 552327952Sdim int WaitStatesNeededForDef = 553327952Sdim VALUWaitStates - getWaitStatesSince(IsHazardFn); 554327952Sdim WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 555327952Sdim 556327952Sdim return WaitStatesNeeded; 557327952Sdim} 558327952Sdim 559314564Sdimint GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { 560314564Sdim // This checks for the hazard where VMEM instructions that store more than 561314564Sdim // 8 bytes can have there store data over written by the next instruction. 562314564Sdim if (!ST.has12DWordStoreHazard()) 563314564Sdim return 0; 564314564Sdim 565327952Sdim const MachineRegisterInfo &MRI = MF.getRegInfo(); 566314564Sdim int WaitStatesNeeded = 0; 567314564Sdim 568314564Sdim for (const MachineOperand &Def : VALU->defs()) { 569327952Sdim WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI)); 570314564Sdim } 571327952Sdim 572314564Sdim return WaitStatesNeeded; 573314564Sdim} 574314564Sdim 575327952Sdimint GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) { 576327952Sdim // This checks for hazards associated with inline asm statements. 577327952Sdim // Since inline asms can contain just about anything, we use this 578327952Sdim // to call/leverage other check*Hazard routines. Note that 579327952Sdim // this function doesn't attempt to address all possible inline asm 580327952Sdim // hazards (good luck), but is a collection of what has been 581327952Sdim // problematic thus far. 582327952Sdim 583327952Sdim // see checkVALUHazards() 584327952Sdim if (!ST.has12DWordStoreHazard()) 585327952Sdim return 0; 586327952Sdim 587327952Sdim const MachineRegisterInfo &MRI = MF.getRegInfo(); 588327952Sdim int WaitStatesNeeded = 0; 589327952Sdim 590327952Sdim for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands(); 591327952Sdim I != E; ++I) { 592327952Sdim const MachineOperand &Op = IA->getOperand(I); 593327952Sdim if (Op.isReg() && Op.isDef()) { 594327952Sdim WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI)); 595327952Sdim } 596327952Sdim } 597327952Sdim 598327952Sdim return WaitStatesNeeded; 599327952Sdim} 600327952Sdim 601314564Sdimint GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) { 602314564Sdim const SIInstrInfo *TII = ST.getInstrInfo(); 603314564Sdim const SIRegisterInfo *TRI = ST.getRegisterInfo(); 604327952Sdim const MachineRegisterInfo &MRI = MF.getRegInfo(); 605314564Sdim 606314564Sdim const MachineOperand *LaneSelectOp = 607314564Sdim TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1); 608314564Sdim 609314564Sdim if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg())) 610314564Sdim return 0; 611314564Sdim 612314564Sdim unsigned LaneSelectReg = LaneSelectOp->getReg(); 613314564Sdim auto IsHazardFn = [TII] (MachineInstr *MI) { 614314564Sdim return TII->isVALU(*MI); 615314564Sdim }; 616314564Sdim 617314564Sdim const int RWLaneWaitStates = 4; 618314564Sdim int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn); 619314564Sdim return RWLaneWaitStates - WaitStatesSince; 620314564Sdim} 621314564Sdim 622314564Sdimint GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) { 623314564Sdim if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) 624314564Sdim return 0; 625314564Sdim 626314564Sdim const SIInstrInfo *TII = ST.getInstrInfo(); 627314564Sdim 628314564Sdim const int RFEWaitStates = 1; 629314564Sdim 630314564Sdim auto IsHazardFn = [TII] (MachineInstr *MI) { 631314564Sdim return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS; 632314564Sdim }; 633314564Sdim int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); 634314564Sdim return RFEWaitStates - WaitStatesNeeded; 635314564Sdim} 636321369Sdim 637321369Sdimint GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) { 638341825Sdim if (MI->isDebugInstr()) 639321369Sdim return 0; 640321369Sdim 641321369Sdim const SIRegisterInfo *TRI = ST.getRegisterInfo(); 642321369Sdim if (!ST.hasSMovFedHazard()) 643321369Sdim return 0; 644321369Sdim 645321369Sdim // Check for any instruction reading an SGPR after a write from 646321369Sdim // s_mov_fed_b32. 647321369Sdim int MovFedWaitStates = 1; 648321369Sdim int WaitStatesNeeded = 0; 649321369Sdim 650321369Sdim for (const MachineOperand &Use : MI->uses()) { 651321369Sdim if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg())) 652321369Sdim continue; 653321369Sdim auto IsHazardFn = [] (MachineInstr *MI) { 654321369Sdim return MI->getOpcode() == AMDGPU::S_MOV_FED_B32; 655321369Sdim }; 656321369Sdim int WaitStatesNeededForUse = 657321369Sdim MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn); 658321369Sdim WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 659321369Sdim } 660321369Sdim 661321369Sdim return WaitStatesNeeded; 662321369Sdim} 663321369Sdim 664321369Sdimint GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { 665321369Sdim const SIInstrInfo *TII = ST.getInstrInfo(); 666327952Sdim const int SMovRelWaitStates = 1; 667321369Sdim auto IsHazardFn = [TII] (MachineInstr *MI) { 668321369Sdim return TII->isSALU(*MI); 669321369Sdim }; 670321369Sdim return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn); 671321369Sdim} 672