1311116Sdim//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 2311116Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6311116Sdim// 7311116Sdim//===----------------------------------------------------------------------===// 8311116Sdim// 9311116Sdim/// \file 10311116Sdim/// This contains a MachineSchedStrategy implementation for maximizing wave 11311116Sdim/// occupancy on GCN hardware. 12311116Sdim//===----------------------------------------------------------------------===// 13311116Sdim 14311116Sdim#include "GCNSchedStrategy.h" 15311116Sdim#include "AMDGPUSubtarget.h" 16311116Sdim#include "SIInstrInfo.h" 17311116Sdim#include "SIMachineFunctionInfo.h" 18311116Sdim#include "SIRegisterInfo.h" 19360784Sdim#include "Utils/AMDGPUBaseInfo.h" 20311116Sdim#include "llvm/CodeGen/RegisterClassInfo.h" 21321369Sdim#include "llvm/Support/MathExtras.h" 22311116Sdim 23321369Sdim#define DEBUG_TYPE "machine-scheduler" 24311116Sdim 25311116Sdimusing namespace llvm; 26311116Sdim 27311116SdimGCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 28311116Sdim const MachineSchedContext *C) : 29321369Sdim GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { } 30311116Sdim 31321369Sdimvoid GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { 32321369Sdim GenericScheduler::initialize(DAG); 33321369Sdim 34321369Sdim const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 35321369Sdim 36321369Sdim MF = &DAG->MF; 37321369Sdim 38341825Sdim const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 39321369Sdim 40321369Sdim // FIXME: This is also necessary, because some passes that run after 41321369Sdim // scheduling and before regalloc increase register pressure. 42321369Sdim const int ErrorMargin = 3; 43321369Sdim 44321369Sdim SGPRExcessLimit = Context->RegClassInfo 45321369Sdim ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin; 46321369Sdim VGPRExcessLimit = Context->RegClassInfo 47321369Sdim ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin; 48321369Sdim if (TargetOccupancy) { 49321369Sdim SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true); 50321369Sdim VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy); 51321369Sdim } else { 52321369Sdim SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, 53321369Sdim SRI->getSGPRPressureSet()); 54321369Sdim VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, 55321369Sdim SRI->getVGPRPressureSet()); 56321369Sdim } 57321369Sdim 58321369Sdim SGPRCriticalLimit -= ErrorMargin; 59321369Sdim VGPRCriticalLimit -= ErrorMargin; 60321369Sdim} 61321369Sdim 62311116Sdimvoid GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 63311116Sdim bool AtTop, const RegPressureTracker &RPTracker, 64311116Sdim const SIRegisterInfo *SRI, 65321369Sdim unsigned SGPRPressure, 66321369Sdim unsigned VGPRPressure) { 67311116Sdim 68311116Sdim Cand.SU = SU; 69311116Sdim Cand.AtTop = AtTop; 70311116Sdim 71311116Sdim // getDownwardPressure() and getUpwardPressure() make temporary changes to 72341825Sdim // the tracker, so we need to pass those function a non-const copy. 73311116Sdim RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 74311116Sdim 75360784Sdim Pressure.clear(); 76360784Sdim MaxPressure.clear(); 77311116Sdim 78311116Sdim if (AtTop) 79311116Sdim TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 80311116Sdim else { 81311116Sdim // FIXME: I think for bottom up scheduling, the register pressure is cached 82311116Sdim // and can be retrieved by DAG->getPressureDif(SU). 83311116Sdim TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 84311116Sdim } 85311116Sdim 86321369Sdim unsigned NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()]; 87321369Sdim unsigned NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()]; 88311116Sdim 89311116Sdim // If two instructions increase the pressure of different register sets 90311116Sdim // by the same amount, the generic scheduler will prefer to schedule the 91311116Sdim // instruction that increases the set with the least amount of registers, 92311116Sdim // which in our case would be SGPRs. This is rarely what we want, so 93311116Sdim // when we report excess/critical register pressure, we do it either 94311116Sdim // only for VGPRs or only for SGPRs. 95311116Sdim 96311116Sdim // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 97321369Sdim const unsigned MaxVGPRPressureInc = 16; 98311116Sdim bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 99311116Sdim bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 100311116Sdim 101311116Sdim 102311116Sdim // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 103311116Sdim // to increase the likelihood we don't go over the limits. We should improve 104311116Sdim // the analysis to look through dependencies to find the path with the least 105311116Sdim // register pressure. 106311116Sdim 107360784Sdim // We only need to update the RPDelta for instructions that increase register 108360784Sdim // pressure. Instructions that decrease or keep reg pressure the same will be 109360784Sdim // marked as RegExcess in tryCandidate() when they are compared with 110360784Sdim // instructions that increase the register pressure. 111311116Sdim if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 112311116Sdim Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet()); 113311116Sdim Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 114311116Sdim } 115311116Sdim 116311116Sdim if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 117311116Sdim Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet()); 118321369Sdim Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); 119311116Sdim } 120311116Sdim 121311116Sdim // Register pressure is considered 'CRITICAL' if it is approaching a value 122311116Sdim // that would reduce the wave occupancy for the execution unit. When 123311116Sdim // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both 124311116Sdim // has the same cost, so we don't need to prefer one over the other. 125311116Sdim 126311116Sdim int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 127311116Sdim int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 128311116Sdim 129311116Sdim if (SGPRDelta >= 0 || VGPRDelta >= 0) { 130311116Sdim if (SGPRDelta > VGPRDelta) { 131311116Sdim Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet()); 132311116Sdim Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 133311116Sdim } else { 134311116Sdim Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet()); 135311116Sdim Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 136311116Sdim } 137311116Sdim } 138311116Sdim} 139311116Sdim 140311116Sdim// This function is mostly cut and pasted from 141311116Sdim// GenericScheduler::pickNodeFromQueue() 142311116Sdimvoid GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 143311116Sdim const CandPolicy &ZonePolicy, 144311116Sdim const RegPressureTracker &RPTracker, 145311116Sdim SchedCandidate &Cand) { 146311116Sdim const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 147311116Sdim ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 148311116Sdim unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()]; 149311116Sdim unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()]; 150311116Sdim ReadyQueue &Q = Zone.Available; 151311116Sdim for (SUnit *SU : Q) { 152311116Sdim 153311116Sdim SchedCandidate TryCand(ZonePolicy); 154311116Sdim initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, 155321369Sdim SGPRPressure, VGPRPressure); 156311116Sdim // Pass SchedBoundary only when comparing nodes from the same boundary. 157311116Sdim SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 158311116Sdim GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); 159311116Sdim if (TryCand.Reason != NoCand) { 160311116Sdim // Initialize resource delta if needed in case future heuristics query it. 161311116Sdim if (TryCand.ResDelta == SchedResourceDelta()) 162311116Sdim TryCand.initResourceDelta(Zone.DAG, SchedModel); 163311116Sdim Cand.setBest(TryCand); 164360784Sdim LLVM_DEBUG(traceCandidate(Cand)); 165311116Sdim } 166311116Sdim } 167311116Sdim} 168311116Sdim 169311116Sdim// This function is mostly cut and pasted from 170311116Sdim// GenericScheduler::pickNodeBidirectional() 171311116SdimSUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 172311116Sdim // Schedule as far as possible in the direction of no choice. This is most 173311116Sdim // efficient, but also provides the best heuristics for CriticalPSets. 174311116Sdim if (SUnit *SU = Bot.pickOnlyChoice()) { 175311116Sdim IsTopNode = false; 176311116Sdim return SU; 177311116Sdim } 178311116Sdim if (SUnit *SU = Top.pickOnlyChoice()) { 179311116Sdim IsTopNode = true; 180311116Sdim return SU; 181311116Sdim } 182311116Sdim // Set the bottom-up policy based on the state of the current bottom zone and 183311116Sdim // the instructions outside the zone, including the top zone. 184311116Sdim CandPolicy BotPolicy; 185311116Sdim setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 186311116Sdim // Set the top-down policy based on the state of the current top zone and 187311116Sdim // the instructions outside the zone, including the bottom zone. 188311116Sdim CandPolicy TopPolicy; 189311116Sdim setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 190311116Sdim 191311116Sdim // See if BotCand is still valid (because we previously scheduled from Top). 192341825Sdim LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 193311116Sdim if (!BotCand.isValid() || BotCand.SU->isScheduled || 194311116Sdim BotCand.Policy != BotPolicy) { 195311116Sdim BotCand.reset(CandPolicy()); 196311116Sdim pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 197311116Sdim assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 198311116Sdim } else { 199341825Sdim LLVM_DEBUG(traceCandidate(BotCand)); 200360784Sdim#ifndef NDEBUG 201360784Sdim if (VerifyScheduling) { 202360784Sdim SchedCandidate TCand; 203360784Sdim TCand.reset(CandPolicy()); 204360784Sdim pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); 205360784Sdim assert(TCand.SU == BotCand.SU && 206360784Sdim "Last pick result should correspond to re-picking right now"); 207360784Sdim } 208360784Sdim#endif 209311116Sdim } 210311116Sdim 211311116Sdim // Check if the top Q has a better candidate. 212341825Sdim LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 213311116Sdim if (!TopCand.isValid() || TopCand.SU->isScheduled || 214311116Sdim TopCand.Policy != TopPolicy) { 215311116Sdim TopCand.reset(CandPolicy()); 216311116Sdim pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 217311116Sdim assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 218311116Sdim } else { 219341825Sdim LLVM_DEBUG(traceCandidate(TopCand)); 220360784Sdim#ifndef NDEBUG 221360784Sdim if (VerifyScheduling) { 222360784Sdim SchedCandidate TCand; 223360784Sdim TCand.reset(CandPolicy()); 224360784Sdim pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); 225360784Sdim assert(TCand.SU == TopCand.SU && 226360784Sdim "Last pick result should correspond to re-picking right now"); 227360784Sdim } 228360784Sdim#endif 229311116Sdim } 230311116Sdim 231311116Sdim // Pick best from BotCand and TopCand. 232341825Sdim LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); 233341825Sdim dbgs() << "Bot Cand: "; traceCandidate(BotCand);); 234311116Sdim SchedCandidate Cand; 235311116Sdim if (TopCand.Reason == BotCand.Reason) { 236311116Sdim Cand = BotCand; 237311116Sdim GenericSchedulerBase::CandReason TopReason = TopCand.Reason; 238311116Sdim TopCand.Reason = NoCand; 239311116Sdim GenericScheduler::tryCandidate(Cand, TopCand, nullptr); 240311116Sdim if (TopCand.Reason != NoCand) { 241311116Sdim Cand.setBest(TopCand); 242311116Sdim } else { 243311116Sdim TopCand.Reason = TopReason; 244311116Sdim } 245311116Sdim } else { 246311116Sdim if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) { 247311116Sdim Cand = TopCand; 248311116Sdim } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) { 249311116Sdim Cand = BotCand; 250311116Sdim } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) { 251311116Sdim Cand = TopCand; 252311116Sdim } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) { 253311116Sdim Cand = BotCand; 254311116Sdim } else { 255321369Sdim if (BotCand.Reason > TopCand.Reason) { 256311116Sdim Cand = TopCand; 257311116Sdim } else { 258311116Sdim Cand = BotCand; 259311116Sdim } 260311116Sdim } 261311116Sdim } 262341825Sdim LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); 263311116Sdim 264311116Sdim IsTopNode = Cand.AtTop; 265311116Sdim return Cand.SU; 266311116Sdim} 267311116Sdim 268311116Sdim// This function is mostly cut and pasted from 269311116Sdim// GenericScheduler::pickNode() 270311116SdimSUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { 271311116Sdim if (DAG->top() == DAG->bottom()) { 272311116Sdim assert(Top.Available.empty() && Top.Pending.empty() && 273311116Sdim Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 274311116Sdim return nullptr; 275311116Sdim } 276311116Sdim SUnit *SU; 277311116Sdim do { 278311116Sdim if (RegionPolicy.OnlyTopDown) { 279311116Sdim SU = Top.pickOnlyChoice(); 280311116Sdim if (!SU) { 281311116Sdim CandPolicy NoPolicy; 282311116Sdim TopCand.reset(NoPolicy); 283311116Sdim pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 284311116Sdim assert(TopCand.Reason != NoCand && "failed to find a candidate"); 285311116Sdim SU = TopCand.SU; 286311116Sdim } 287311116Sdim IsTopNode = true; 288311116Sdim } else if (RegionPolicy.OnlyBottomUp) { 289311116Sdim SU = Bot.pickOnlyChoice(); 290311116Sdim if (!SU) { 291311116Sdim CandPolicy NoPolicy; 292311116Sdim BotCand.reset(NoPolicy); 293311116Sdim pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 294311116Sdim assert(BotCand.Reason != NoCand && "failed to find a candidate"); 295311116Sdim SU = BotCand.SU; 296311116Sdim } 297311116Sdim IsTopNode = false; 298311116Sdim } else { 299311116Sdim SU = pickNodeBidirectional(IsTopNode); 300311116Sdim } 301311116Sdim } while (SU->isScheduled); 302311116Sdim 303311116Sdim if (SU->isTopReady()) 304311116Sdim Top.removeReady(SU); 305311116Sdim if (SU->isBottomReady()) 306311116Sdim Bot.removeReady(SU); 307311116Sdim 308341825Sdim LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 309341825Sdim << *SU->getInstr()); 310311116Sdim return SU; 311311116Sdim} 312321369Sdim 313321369SdimGCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, 314321369Sdim std::unique_ptr<MachineSchedStrategy> S) : 315321369Sdim ScheduleDAGMILive(C, std::move(S)), 316341825Sdim ST(MF.getSubtarget<GCNSubtarget>()), 317321369Sdim MFI(*MF.getInfo<SIMachineFunctionInfo>()), 318341825Sdim StartingOccupancy(MFI.getOccupancy()), 319321369Sdim MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) { 320321369Sdim 321341825Sdim LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); 322321369Sdim} 323321369Sdim 324321369Sdimvoid GCNScheduleDAGMILive::schedule() { 325321369Sdim if (Stage == 0) { 326321369Sdim // Just record regions at the first pass. 327321369Sdim Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); 328321369Sdim return; 329321369Sdim } 330321369Sdim 331321369Sdim std::vector<MachineInstr*> Unsched; 332321369Sdim Unsched.reserve(NumRegionInstrs); 333327952Sdim for (auto &I : *this) { 334321369Sdim Unsched.push_back(&I); 335327952Sdim } 336321369Sdim 337321369Sdim GCNRegPressure PressureBefore; 338321369Sdim if (LIS) { 339321369Sdim PressureBefore = Pressure[RegionIdx]; 340321369Sdim 341341825Sdim LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"; 342341825Sdim GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI); 343341825Sdim dbgs() << "Region live-in pressure: "; 344341825Sdim llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs()); 345341825Sdim dbgs() << "Region register pressure: "; 346341825Sdim PressureBefore.print(dbgs())); 347321369Sdim } 348321369Sdim 349321369Sdim ScheduleDAGMILive::schedule(); 350321369Sdim Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 351321369Sdim 352321369Sdim if (!LIS) 353321369Sdim return; 354321369Sdim 355321369Sdim // Check the results of scheduling. 356321369Sdim GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; 357321369Sdim auto PressureAfter = getRealRegPressure(); 358321369Sdim 359341825Sdim LLVM_DEBUG(dbgs() << "Pressure after scheduling: "; 360341825Sdim PressureAfter.print(dbgs())); 361321369Sdim 362321369Sdim if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && 363321369Sdim PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) { 364321369Sdim Pressure[RegionIdx] = PressureAfter; 365341825Sdim LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); 366321369Sdim return; 367321369Sdim } 368341825Sdim unsigned Occ = MFI.getOccupancy(); 369341825Sdim unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST)); 370341825Sdim unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST)); 371341825Sdim LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore 372341825Sdim << ", after " << WavesAfter << ".\n"); 373321369Sdim 374321369Sdim // We could not keep current target occupancy because of the just scheduled 375321369Sdim // region. Record new occupancy for next scheduling cycle. 376321369Sdim unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); 377341825Sdim // Allow memory bound functions to drop to 4 waves if not limited by an 378341825Sdim // attribute. 379341825Sdim if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy && 380341825Sdim WavesAfter >= MFI.getMinAllowedOccupancy()) { 381341825Sdim LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to " 382341825Sdim << MFI.getMinAllowedOccupancy() << " waves\n"); 383341825Sdim NewOccupancy = WavesAfter; 384341825Sdim } 385321369Sdim if (NewOccupancy < MinOccupancy) { 386321369Sdim MinOccupancy = NewOccupancy; 387341825Sdim MFI.limitOccupancy(MinOccupancy); 388341825Sdim LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " 389341825Sdim << MinOccupancy << ".\n"); 390321369Sdim } 391321369Sdim 392341825Sdim if (WavesAfter >= MinOccupancy) { 393360784Sdim unsigned TotalVGPRs = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST); 394360784Sdim unsigned TotalSGPRs = AMDGPU::IsaInfo::getAddressableNumSGPRs(&ST); 395360784Sdim if (WavesAfter > MFI.getMinWavesPerEU() || 396360784Sdim PressureAfter.less(ST, PressureBefore) || 397360784Sdim (TotalVGPRs >= PressureAfter.getVGPRNum() && 398360784Sdim TotalSGPRs >= PressureAfter.getSGPRNum())) { 399360784Sdim Pressure[RegionIdx] = PressureAfter; 400360784Sdim return; 401360784Sdim } 402360784Sdim LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n"); 403321369Sdim } 404321369Sdim 405341825Sdim LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); 406321369Sdim RegionEnd = RegionBegin; 407321369Sdim for (MachineInstr *MI : Unsched) { 408341825Sdim if (MI->isDebugInstr()) 409327952Sdim continue; 410327952Sdim 411321369Sdim if (MI->getIterator() != RegionEnd) { 412321369Sdim BB->remove(MI); 413321369Sdim BB->insert(RegionEnd, MI); 414341825Sdim if (!MI->isDebugInstr()) 415327952Sdim LIS->handleMove(*MI, true); 416321369Sdim } 417321369Sdim // Reset read-undef flags and update them later. 418321369Sdim for (auto &Op : MI->operands()) 419321369Sdim if (Op.isReg() && Op.isDef()) 420321369Sdim Op.setIsUndef(false); 421321369Sdim RegisterOperands RegOpers; 422321369Sdim RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); 423341825Sdim if (!MI->isDebugInstr()) { 424327952Sdim if (ShouldTrackLaneMasks) { 425327952Sdim // Adjust liveness and add missing dead+read-undef flags. 426327952Sdim SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); 427327952Sdim RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); 428327952Sdim } else { 429327952Sdim // Adjust for missing dead-def flags. 430327952Sdim RegOpers.detectDeadDefs(*MI, *LIS); 431327952Sdim } 432321369Sdim } 433321369Sdim RegionEnd = MI->getIterator(); 434321369Sdim ++RegionEnd; 435341825Sdim LLVM_DEBUG(dbgs() << "Scheduling " << *MI); 436321369Sdim } 437321369Sdim RegionBegin = Unsched.front()->getIterator(); 438321369Sdim Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 439321369Sdim 440321369Sdim placeDebugValues(); 441321369Sdim} 442321369Sdim 443321369SdimGCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const { 444321369Sdim GCNDownwardRPTracker RPTracker(*LIS); 445321369Sdim RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); 446321369Sdim return RPTracker.moveMaxPressure(); 447321369Sdim} 448321369Sdim 449321369Sdimvoid GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) { 450321369Sdim GCNDownwardRPTracker RPTracker(*LIS); 451321369Sdim 452321369Sdim // If the block has the only successor then live-ins of that successor are 453321369Sdim // live-outs of the current block. We can reuse calculated live set if the 454321369Sdim // successor will be sent to scheduling past current block. 455321369Sdim const MachineBasicBlock *OnlySucc = nullptr; 456321369Sdim if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) { 457321369Sdim SlotIndexes *Ind = LIS->getSlotIndexes(); 458321369Sdim if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin())) 459321369Sdim OnlySucc = *MBB->succ_begin(); 460321369Sdim } 461321369Sdim 462321369Sdim // Scheduler sends regions from the end of the block upwards. 463321369Sdim size_t CurRegion = RegionIdx; 464321369Sdim for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) 465321369Sdim if (Regions[CurRegion].first->getParent() != MBB) 466321369Sdim break; 467321369Sdim --CurRegion; 468321369Sdim 469321369Sdim auto I = MBB->begin(); 470321369Sdim auto LiveInIt = MBBLiveIns.find(MBB); 471321369Sdim if (LiveInIt != MBBLiveIns.end()) { 472321369Sdim auto LiveIn = std::move(LiveInIt->second); 473321369Sdim RPTracker.reset(*MBB->begin(), &LiveIn); 474321369Sdim MBBLiveIns.erase(LiveInIt); 475321369Sdim } else { 476353358Sdim auto &Rgn = Regions[CurRegion]; 477353358Sdim I = Rgn.first; 478353358Sdim auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); 479353358Sdim auto LRS = BBLiveInMap.lookup(NonDbgMI); 480353358Sdim assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); 481353358Sdim RPTracker.reset(*I, &LRS); 482321369Sdim } 483321369Sdim 484321369Sdim for ( ; ; ) { 485321369Sdim I = RPTracker.getNext(); 486321369Sdim 487321369Sdim if (Regions[CurRegion].first == I) { 488321369Sdim LiveIns[CurRegion] = RPTracker.getLiveRegs(); 489321369Sdim RPTracker.clearMaxPressure(); 490321369Sdim } 491321369Sdim 492321369Sdim if (Regions[CurRegion].second == I) { 493321369Sdim Pressure[CurRegion] = RPTracker.moveMaxPressure(); 494321369Sdim if (CurRegion-- == RegionIdx) 495321369Sdim break; 496321369Sdim } 497321369Sdim RPTracker.advanceToNext(); 498321369Sdim RPTracker.advanceBeforeNext(); 499321369Sdim } 500321369Sdim 501321369Sdim if (OnlySucc) { 502321369Sdim if (I != MBB->end()) { 503321369Sdim RPTracker.advanceToNext(); 504321369Sdim RPTracker.advance(MBB->end()); 505321369Sdim } 506321369Sdim RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); 507321369Sdim RPTracker.advanceBeforeNext(); 508321369Sdim MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); 509321369Sdim } 510321369Sdim} 511321369Sdim 512353358SdimDenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 513353358SdimGCNScheduleDAGMILive::getBBLiveInMap() const { 514353358Sdim assert(!Regions.empty()); 515353358Sdim std::vector<MachineInstr *> BBStarters; 516353358Sdim BBStarters.reserve(Regions.size()); 517353358Sdim auto I = Regions.rbegin(), E = Regions.rend(); 518353358Sdim auto *BB = I->first->getParent(); 519353358Sdim do { 520353358Sdim auto *MI = &*skipDebugInstructionsForward(I->first, I->second); 521353358Sdim BBStarters.push_back(MI); 522353358Sdim do { 523353358Sdim ++I; 524353358Sdim } while (I != E && I->first->getParent() == BB); 525353358Sdim } while (I != E); 526353358Sdim return getLiveRegMap(BBStarters, false /*After*/, *LIS); 527353358Sdim} 528353358Sdim 529321369Sdimvoid GCNScheduleDAGMILive::finalizeSchedule() { 530321369Sdim GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; 531341825Sdim LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); 532321369Sdim 533321369Sdim LiveIns.resize(Regions.size()); 534321369Sdim Pressure.resize(Regions.size()); 535321369Sdim 536353358Sdim if (!Regions.empty()) 537353358Sdim BBLiveInMap = getBBLiveInMap(); 538353358Sdim 539321369Sdim do { 540321369Sdim Stage++; 541321369Sdim RegionIdx = 0; 542321369Sdim MachineBasicBlock *MBB = nullptr; 543321369Sdim 544321369Sdim if (Stage > 1) { 545321369Sdim // Retry function scheduling if we found resulting occupancy and it is 546321369Sdim // lower than used for first pass scheduling. This will give more freedom 547321369Sdim // to schedule low register pressure blocks. 548321369Sdim // Code is partially copied from MachineSchedulerBase::scheduleRegions(). 549321369Sdim 550321369Sdim if (!LIS || StartingOccupancy <= MinOccupancy) 551321369Sdim break; 552321369Sdim 553341825Sdim LLVM_DEBUG( 554341825Sdim dbgs() 555341825Sdim << "Retrying function scheduling with lowest recorded occupancy " 556341825Sdim << MinOccupancy << ".\n"); 557321369Sdim 558321369Sdim S.setTargetOccupancy(MinOccupancy); 559321369Sdim } 560321369Sdim 561321369Sdim for (auto Region : Regions) { 562321369Sdim RegionBegin = Region.first; 563321369Sdim RegionEnd = Region.second; 564321369Sdim 565321369Sdim if (RegionBegin->getParent() != MBB) { 566321369Sdim if (MBB) finishBlock(); 567321369Sdim MBB = RegionBegin->getParent(); 568321369Sdim startBlock(MBB); 569321369Sdim if (Stage == 1) 570321369Sdim computeBlockPressure(MBB); 571321369Sdim } 572321369Sdim 573321369Sdim unsigned NumRegionInstrs = std::distance(begin(), end()); 574321369Sdim enterRegion(MBB, begin(), end(), NumRegionInstrs); 575321369Sdim 576321369Sdim // Skip empty scheduling regions (0 or 1 schedulable instructions). 577321369Sdim if (begin() == end() || begin() == std::prev(end())) { 578321369Sdim exitRegion(); 579321369Sdim continue; 580321369Sdim } 581321369Sdim 582341825Sdim LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 583341825Sdim LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " " 584341825Sdim << MBB->getName() << "\n From: " << *begin() 585341825Sdim << " To: "; 586341825Sdim if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 587341825Sdim else dbgs() << "End"; 588341825Sdim dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 589321369Sdim 590321369Sdim schedule(); 591321369Sdim 592321369Sdim exitRegion(); 593321369Sdim ++RegionIdx; 594321369Sdim } 595321369Sdim finishBlock(); 596321369Sdim 597321369Sdim } while (Stage < 2); 598321369Sdim} 599