1311116Sdim//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2311116Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6311116Sdim//
7311116Sdim//===----------------------------------------------------------------------===//
8311116Sdim//
9311116Sdim/// \file
10311116Sdim/// This contains a MachineSchedStrategy implementation for maximizing wave
11311116Sdim/// occupancy on GCN hardware.
12311116Sdim//===----------------------------------------------------------------------===//
13311116Sdim
14311116Sdim#include "GCNSchedStrategy.h"
15311116Sdim#include "AMDGPUSubtarget.h"
16311116Sdim#include "SIInstrInfo.h"
17311116Sdim#include "SIMachineFunctionInfo.h"
18311116Sdim#include "SIRegisterInfo.h"
19360784Sdim#include "Utils/AMDGPUBaseInfo.h"
20311116Sdim#include "llvm/CodeGen/RegisterClassInfo.h"
21321369Sdim#include "llvm/Support/MathExtras.h"
22311116Sdim
23321369Sdim#define DEBUG_TYPE "machine-scheduler"
24311116Sdim
25311116Sdimusing namespace llvm;
26311116Sdim
27311116SdimGCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
28311116Sdim    const MachineSchedContext *C) :
29321369Sdim    GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { }
30311116Sdim
31321369Sdimvoid GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
32321369Sdim  GenericScheduler::initialize(DAG);
33321369Sdim
34321369Sdim  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
35321369Sdim
36321369Sdim  MF = &DAG->MF;
37321369Sdim
38341825Sdim  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
39321369Sdim
40321369Sdim  // FIXME: This is also necessary, because some passes that run after
41321369Sdim  // scheduling and before regalloc increase register pressure.
42321369Sdim  const int ErrorMargin = 3;
43321369Sdim
44321369Sdim  SGPRExcessLimit = Context->RegClassInfo
45321369Sdim    ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin;
46321369Sdim  VGPRExcessLimit = Context->RegClassInfo
47321369Sdim    ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin;
48321369Sdim  if (TargetOccupancy) {
49321369Sdim    SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true);
50321369Sdim    VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy);
51321369Sdim  } else {
52321369Sdim    SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
53321369Sdim                                                    SRI->getSGPRPressureSet());
54321369Sdim    VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
55321369Sdim                                                    SRI->getVGPRPressureSet());
56321369Sdim  }
57321369Sdim
58321369Sdim  SGPRCriticalLimit -= ErrorMargin;
59321369Sdim  VGPRCriticalLimit -= ErrorMargin;
60321369Sdim}
61321369Sdim
62311116Sdimvoid GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
63311116Sdim                                     bool AtTop, const RegPressureTracker &RPTracker,
64311116Sdim                                     const SIRegisterInfo *SRI,
65321369Sdim                                     unsigned SGPRPressure,
66321369Sdim                                     unsigned VGPRPressure) {
67311116Sdim
68311116Sdim  Cand.SU = SU;
69311116Sdim  Cand.AtTop = AtTop;
70311116Sdim
71311116Sdim  // getDownwardPressure() and getUpwardPressure() make temporary changes to
72341825Sdim  // the tracker, so we need to pass those function a non-const copy.
73311116Sdim  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
74311116Sdim
75360784Sdim  Pressure.clear();
76360784Sdim  MaxPressure.clear();
77311116Sdim
78311116Sdim  if (AtTop)
79311116Sdim    TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
80311116Sdim  else {
81311116Sdim    // FIXME: I think for bottom up scheduling, the register pressure is cached
82311116Sdim    // and can be retrieved by DAG->getPressureDif(SU).
83311116Sdim    TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
84311116Sdim  }
85311116Sdim
86321369Sdim  unsigned NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
87321369Sdim  unsigned NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
88311116Sdim
89311116Sdim  // If two instructions increase the pressure of different register sets
90311116Sdim  // by the same amount, the generic scheduler will prefer to schedule the
91311116Sdim  // instruction that increases the set with the least amount of registers,
92311116Sdim  // which in our case would be SGPRs.  This is rarely what we want, so
93311116Sdim  // when we report excess/critical register pressure, we do it either
94311116Sdim  // only for VGPRs or only for SGPRs.
95311116Sdim
96311116Sdim  // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
97321369Sdim  const unsigned MaxVGPRPressureInc = 16;
98311116Sdim  bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
99311116Sdim  bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
100311116Sdim
101311116Sdim
102311116Sdim  // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
103311116Sdim  // to increase the likelihood we don't go over the limits.  We should improve
104311116Sdim  // the analysis to look through dependencies to find the path with the least
105311116Sdim  // register pressure.
106311116Sdim
107360784Sdim  // We only need to update the RPDelta for instructions that increase register
108360784Sdim  // pressure. Instructions that decrease or keep reg pressure the same will be
109360784Sdim  // marked as RegExcess in tryCandidate() when they are compared with
110360784Sdim  // instructions that increase the register pressure.
111311116Sdim  if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
112311116Sdim    Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
113311116Sdim    Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
114311116Sdim  }
115311116Sdim
116311116Sdim  if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
117311116Sdim    Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
118321369Sdim    Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
119311116Sdim  }
120311116Sdim
121311116Sdim  // Register pressure is considered 'CRITICAL' if it is approaching a value
122311116Sdim  // that would reduce the wave occupancy for the execution unit.  When
123311116Sdim  // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
124311116Sdim  // has the same cost, so we don't need to prefer one over the other.
125311116Sdim
126311116Sdim  int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
127311116Sdim  int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
128311116Sdim
129311116Sdim  if (SGPRDelta >= 0 || VGPRDelta >= 0) {
130311116Sdim    if (SGPRDelta > VGPRDelta) {
131311116Sdim      Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet());
132311116Sdim      Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
133311116Sdim    } else {
134311116Sdim      Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet());
135311116Sdim      Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
136311116Sdim    }
137311116Sdim  }
138311116Sdim}
139311116Sdim
140311116Sdim// This function is mostly cut and pasted from
141311116Sdim// GenericScheduler::pickNodeFromQueue()
142311116Sdimvoid GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
143311116Sdim                                         const CandPolicy &ZonePolicy,
144311116Sdim                                         const RegPressureTracker &RPTracker,
145311116Sdim                                         SchedCandidate &Cand) {
146311116Sdim  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
147311116Sdim  ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
148311116Sdim  unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
149311116Sdim  unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
150311116Sdim  ReadyQueue &Q = Zone.Available;
151311116Sdim  for (SUnit *SU : Q) {
152311116Sdim
153311116Sdim    SchedCandidate TryCand(ZonePolicy);
154311116Sdim    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
155321369Sdim                  SGPRPressure, VGPRPressure);
156311116Sdim    // Pass SchedBoundary only when comparing nodes from the same boundary.
157311116Sdim    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
158311116Sdim    GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
159311116Sdim    if (TryCand.Reason != NoCand) {
160311116Sdim      // Initialize resource delta if needed in case future heuristics query it.
161311116Sdim      if (TryCand.ResDelta == SchedResourceDelta())
162311116Sdim        TryCand.initResourceDelta(Zone.DAG, SchedModel);
163311116Sdim      Cand.setBest(TryCand);
164360784Sdim      LLVM_DEBUG(traceCandidate(Cand));
165311116Sdim    }
166311116Sdim  }
167311116Sdim}
168311116Sdim
169311116Sdim// This function is mostly cut and pasted from
170311116Sdim// GenericScheduler::pickNodeBidirectional()
171311116SdimSUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
172311116Sdim  // Schedule as far as possible in the direction of no choice. This is most
173311116Sdim  // efficient, but also provides the best heuristics for CriticalPSets.
174311116Sdim  if (SUnit *SU = Bot.pickOnlyChoice()) {
175311116Sdim    IsTopNode = false;
176311116Sdim    return SU;
177311116Sdim  }
178311116Sdim  if (SUnit *SU = Top.pickOnlyChoice()) {
179311116Sdim    IsTopNode = true;
180311116Sdim    return SU;
181311116Sdim  }
182311116Sdim  // Set the bottom-up policy based on the state of the current bottom zone and
183311116Sdim  // the instructions outside the zone, including the top zone.
184311116Sdim  CandPolicy BotPolicy;
185311116Sdim  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
186311116Sdim  // Set the top-down policy based on the state of the current top zone and
187311116Sdim  // the instructions outside the zone, including the bottom zone.
188311116Sdim  CandPolicy TopPolicy;
189311116Sdim  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
190311116Sdim
191311116Sdim  // See if BotCand is still valid (because we previously scheduled from Top).
192341825Sdim  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
193311116Sdim  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
194311116Sdim      BotCand.Policy != BotPolicy) {
195311116Sdim    BotCand.reset(CandPolicy());
196311116Sdim    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
197311116Sdim    assert(BotCand.Reason != NoCand && "failed to find the first candidate");
198311116Sdim  } else {
199341825Sdim    LLVM_DEBUG(traceCandidate(BotCand));
200360784Sdim#ifndef NDEBUG
201360784Sdim    if (VerifyScheduling) {
202360784Sdim      SchedCandidate TCand;
203360784Sdim      TCand.reset(CandPolicy());
204360784Sdim      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
205360784Sdim      assert(TCand.SU == BotCand.SU &&
206360784Sdim             "Last pick result should correspond to re-picking right now");
207360784Sdim    }
208360784Sdim#endif
209311116Sdim  }
210311116Sdim
211311116Sdim  // Check if the top Q has a better candidate.
212341825Sdim  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
213311116Sdim  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
214311116Sdim      TopCand.Policy != TopPolicy) {
215311116Sdim    TopCand.reset(CandPolicy());
216311116Sdim    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
217311116Sdim    assert(TopCand.Reason != NoCand && "failed to find the first candidate");
218311116Sdim  } else {
219341825Sdim    LLVM_DEBUG(traceCandidate(TopCand));
220360784Sdim#ifndef NDEBUG
221360784Sdim    if (VerifyScheduling) {
222360784Sdim      SchedCandidate TCand;
223360784Sdim      TCand.reset(CandPolicy());
224360784Sdim      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
225360784Sdim      assert(TCand.SU == TopCand.SU &&
226360784Sdim           "Last pick result should correspond to re-picking right now");
227360784Sdim    }
228360784Sdim#endif
229311116Sdim  }
230311116Sdim
231311116Sdim  // Pick best from BotCand and TopCand.
232341825Sdim  LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
233341825Sdim             dbgs() << "Bot Cand: "; traceCandidate(BotCand););
234311116Sdim  SchedCandidate Cand;
235311116Sdim  if (TopCand.Reason == BotCand.Reason) {
236311116Sdim    Cand = BotCand;
237311116Sdim    GenericSchedulerBase::CandReason TopReason = TopCand.Reason;
238311116Sdim    TopCand.Reason = NoCand;
239311116Sdim    GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
240311116Sdim    if (TopCand.Reason != NoCand) {
241311116Sdim      Cand.setBest(TopCand);
242311116Sdim    } else {
243311116Sdim      TopCand.Reason = TopReason;
244311116Sdim    }
245311116Sdim  } else {
246311116Sdim    if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) {
247311116Sdim      Cand = TopCand;
248311116Sdim    } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) {
249311116Sdim      Cand = BotCand;
250311116Sdim    } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
251311116Sdim      Cand = TopCand;
252311116Sdim    } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
253311116Sdim      Cand = BotCand;
254311116Sdim    } else {
255321369Sdim      if (BotCand.Reason > TopCand.Reason) {
256311116Sdim        Cand = TopCand;
257311116Sdim      } else {
258311116Sdim        Cand = BotCand;
259311116Sdim      }
260311116Sdim    }
261311116Sdim  }
262341825Sdim  LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
263311116Sdim
264311116Sdim  IsTopNode = Cand.AtTop;
265311116Sdim  return Cand.SU;
266311116Sdim}
267311116Sdim
268311116Sdim// This function is mostly cut and pasted from
269311116Sdim// GenericScheduler::pickNode()
270311116SdimSUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
271311116Sdim  if (DAG->top() == DAG->bottom()) {
272311116Sdim    assert(Top.Available.empty() && Top.Pending.empty() &&
273311116Sdim           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
274311116Sdim    return nullptr;
275311116Sdim  }
276311116Sdim  SUnit *SU;
277311116Sdim  do {
278311116Sdim    if (RegionPolicy.OnlyTopDown) {
279311116Sdim      SU = Top.pickOnlyChoice();
280311116Sdim      if (!SU) {
281311116Sdim        CandPolicy NoPolicy;
282311116Sdim        TopCand.reset(NoPolicy);
283311116Sdim        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
284311116Sdim        assert(TopCand.Reason != NoCand && "failed to find a candidate");
285311116Sdim        SU = TopCand.SU;
286311116Sdim      }
287311116Sdim      IsTopNode = true;
288311116Sdim    } else if (RegionPolicy.OnlyBottomUp) {
289311116Sdim      SU = Bot.pickOnlyChoice();
290311116Sdim      if (!SU) {
291311116Sdim        CandPolicy NoPolicy;
292311116Sdim        BotCand.reset(NoPolicy);
293311116Sdim        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
294311116Sdim        assert(BotCand.Reason != NoCand && "failed to find a candidate");
295311116Sdim        SU = BotCand.SU;
296311116Sdim      }
297311116Sdim      IsTopNode = false;
298311116Sdim    } else {
299311116Sdim      SU = pickNodeBidirectional(IsTopNode);
300311116Sdim    }
301311116Sdim  } while (SU->isScheduled);
302311116Sdim
303311116Sdim  if (SU->isTopReady())
304311116Sdim    Top.removeReady(SU);
305311116Sdim  if (SU->isBottomReady())
306311116Sdim    Bot.removeReady(SU);
307311116Sdim
308341825Sdim  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
309341825Sdim                    << *SU->getInstr());
310311116Sdim  return SU;
311311116Sdim}
312321369Sdim
313321369SdimGCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
314321369Sdim                        std::unique_ptr<MachineSchedStrategy> S) :
315321369Sdim  ScheduleDAGMILive(C, std::move(S)),
316341825Sdim  ST(MF.getSubtarget<GCNSubtarget>()),
317321369Sdim  MFI(*MF.getInfo<SIMachineFunctionInfo>()),
318341825Sdim  StartingOccupancy(MFI.getOccupancy()),
319321369Sdim  MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) {
320321369Sdim
321341825Sdim  LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
322321369Sdim}
323321369Sdim
324321369Sdimvoid GCNScheduleDAGMILive::schedule() {
325321369Sdim  if (Stage == 0) {
326321369Sdim    // Just record regions at the first pass.
327321369Sdim    Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
328321369Sdim    return;
329321369Sdim  }
330321369Sdim
331321369Sdim  std::vector<MachineInstr*> Unsched;
332321369Sdim  Unsched.reserve(NumRegionInstrs);
333327952Sdim  for (auto &I : *this) {
334321369Sdim    Unsched.push_back(&I);
335327952Sdim  }
336321369Sdim
337321369Sdim  GCNRegPressure PressureBefore;
338321369Sdim  if (LIS) {
339321369Sdim    PressureBefore = Pressure[RegionIdx];
340321369Sdim
341341825Sdim    LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:";
342341825Sdim               GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI);
343341825Sdim               dbgs() << "Region live-in pressure:  ";
344341825Sdim               llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs());
345341825Sdim               dbgs() << "Region register pressure: ";
346341825Sdim               PressureBefore.print(dbgs()));
347321369Sdim  }
348321369Sdim
349321369Sdim  ScheduleDAGMILive::schedule();
350321369Sdim  Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
351321369Sdim
352321369Sdim  if (!LIS)
353321369Sdim    return;
354321369Sdim
355321369Sdim  // Check the results of scheduling.
356321369Sdim  GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
357321369Sdim  auto PressureAfter = getRealRegPressure();
358321369Sdim
359341825Sdim  LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
360341825Sdim             PressureAfter.print(dbgs()));
361321369Sdim
362321369Sdim  if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
363321369Sdim      PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) {
364321369Sdim    Pressure[RegionIdx] = PressureAfter;
365341825Sdim    LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
366321369Sdim    return;
367321369Sdim  }
368341825Sdim  unsigned Occ = MFI.getOccupancy();
369341825Sdim  unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST));
370341825Sdim  unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST));
371341825Sdim  LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
372341825Sdim                    << ", after " << WavesAfter << ".\n");
373321369Sdim
374321369Sdim  // We could not keep current target occupancy because of the just scheduled
375321369Sdim  // region. Record new occupancy for next scheduling cycle.
376321369Sdim  unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
377341825Sdim  // Allow memory bound functions to drop to 4 waves if not limited by an
378341825Sdim  // attribute.
379341825Sdim  if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
380341825Sdim      WavesAfter >= MFI.getMinAllowedOccupancy()) {
381341825Sdim    LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
382341825Sdim                      << MFI.getMinAllowedOccupancy() << " waves\n");
383341825Sdim    NewOccupancy = WavesAfter;
384341825Sdim  }
385321369Sdim  if (NewOccupancy < MinOccupancy) {
386321369Sdim    MinOccupancy = NewOccupancy;
387341825Sdim    MFI.limitOccupancy(MinOccupancy);
388341825Sdim    LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
389341825Sdim                      << MinOccupancy << ".\n");
390321369Sdim  }
391321369Sdim
392341825Sdim  if (WavesAfter >= MinOccupancy) {
393360784Sdim    unsigned TotalVGPRs = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
394360784Sdim    unsigned TotalSGPRs = AMDGPU::IsaInfo::getAddressableNumSGPRs(&ST);
395360784Sdim    if (WavesAfter > MFI.getMinWavesPerEU() ||
396360784Sdim        PressureAfter.less(ST, PressureBefore) ||
397360784Sdim        (TotalVGPRs >= PressureAfter.getVGPRNum() &&
398360784Sdim         TotalSGPRs >= PressureAfter.getSGPRNum())) {
399360784Sdim      Pressure[RegionIdx] = PressureAfter;
400360784Sdim      return;
401360784Sdim    }
402360784Sdim    LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
403321369Sdim  }
404321369Sdim
405341825Sdim  LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
406321369Sdim  RegionEnd = RegionBegin;
407321369Sdim  for (MachineInstr *MI : Unsched) {
408341825Sdim    if (MI->isDebugInstr())
409327952Sdim      continue;
410327952Sdim
411321369Sdim    if (MI->getIterator() != RegionEnd) {
412321369Sdim      BB->remove(MI);
413321369Sdim      BB->insert(RegionEnd, MI);
414341825Sdim      if (!MI->isDebugInstr())
415327952Sdim        LIS->handleMove(*MI, true);
416321369Sdim    }
417321369Sdim    // Reset read-undef flags and update them later.
418321369Sdim    for (auto &Op : MI->operands())
419321369Sdim      if (Op.isReg() && Op.isDef())
420321369Sdim        Op.setIsUndef(false);
421321369Sdim    RegisterOperands RegOpers;
422321369Sdim    RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
423341825Sdim    if (!MI->isDebugInstr()) {
424327952Sdim      if (ShouldTrackLaneMasks) {
425327952Sdim        // Adjust liveness and add missing dead+read-undef flags.
426327952Sdim        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
427327952Sdim        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
428327952Sdim      } else {
429327952Sdim        // Adjust for missing dead-def flags.
430327952Sdim        RegOpers.detectDeadDefs(*MI, *LIS);
431327952Sdim      }
432321369Sdim    }
433321369Sdim    RegionEnd = MI->getIterator();
434321369Sdim    ++RegionEnd;
435341825Sdim    LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
436321369Sdim  }
437321369Sdim  RegionBegin = Unsched.front()->getIterator();
438321369Sdim  Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
439321369Sdim
440321369Sdim  placeDebugValues();
441321369Sdim}
442321369Sdim
443321369SdimGCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const {
444321369Sdim  GCNDownwardRPTracker RPTracker(*LIS);
445321369Sdim  RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
446321369Sdim  return RPTracker.moveMaxPressure();
447321369Sdim}
448321369Sdim
449321369Sdimvoid GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
450321369Sdim  GCNDownwardRPTracker RPTracker(*LIS);
451321369Sdim
452321369Sdim  // If the block has the only successor then live-ins of that successor are
453321369Sdim  // live-outs of the current block. We can reuse calculated live set if the
454321369Sdim  // successor will be sent to scheduling past current block.
455321369Sdim  const MachineBasicBlock *OnlySucc = nullptr;
456321369Sdim  if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) {
457321369Sdim    SlotIndexes *Ind = LIS->getSlotIndexes();
458321369Sdim    if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
459321369Sdim      OnlySucc = *MBB->succ_begin();
460321369Sdim  }
461321369Sdim
462321369Sdim  // Scheduler sends regions from the end of the block upwards.
463321369Sdim  size_t CurRegion = RegionIdx;
464321369Sdim  for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
465321369Sdim    if (Regions[CurRegion].first->getParent() != MBB)
466321369Sdim      break;
467321369Sdim  --CurRegion;
468321369Sdim
469321369Sdim  auto I = MBB->begin();
470321369Sdim  auto LiveInIt = MBBLiveIns.find(MBB);
471321369Sdim  if (LiveInIt != MBBLiveIns.end()) {
472321369Sdim    auto LiveIn = std::move(LiveInIt->second);
473321369Sdim    RPTracker.reset(*MBB->begin(), &LiveIn);
474321369Sdim    MBBLiveIns.erase(LiveInIt);
475321369Sdim  } else {
476353358Sdim    auto &Rgn = Regions[CurRegion];
477353358Sdim    I = Rgn.first;
478353358Sdim    auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
479353358Sdim    auto LRS = BBLiveInMap.lookup(NonDbgMI);
480353358Sdim    assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
481353358Sdim    RPTracker.reset(*I, &LRS);
482321369Sdim  }
483321369Sdim
484321369Sdim  for ( ; ; ) {
485321369Sdim    I = RPTracker.getNext();
486321369Sdim
487321369Sdim    if (Regions[CurRegion].first == I) {
488321369Sdim      LiveIns[CurRegion] = RPTracker.getLiveRegs();
489321369Sdim      RPTracker.clearMaxPressure();
490321369Sdim    }
491321369Sdim
492321369Sdim    if (Regions[CurRegion].second == I) {
493321369Sdim      Pressure[CurRegion] = RPTracker.moveMaxPressure();
494321369Sdim      if (CurRegion-- == RegionIdx)
495321369Sdim        break;
496321369Sdim    }
497321369Sdim    RPTracker.advanceToNext();
498321369Sdim    RPTracker.advanceBeforeNext();
499321369Sdim  }
500321369Sdim
501321369Sdim  if (OnlySucc) {
502321369Sdim    if (I != MBB->end()) {
503321369Sdim      RPTracker.advanceToNext();
504321369Sdim      RPTracker.advance(MBB->end());
505321369Sdim    }
506321369Sdim    RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs());
507321369Sdim    RPTracker.advanceBeforeNext();
508321369Sdim    MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
509321369Sdim  }
510321369Sdim}
511321369Sdim
512353358SdimDenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
513353358SdimGCNScheduleDAGMILive::getBBLiveInMap() const {
514353358Sdim  assert(!Regions.empty());
515353358Sdim  std::vector<MachineInstr *> BBStarters;
516353358Sdim  BBStarters.reserve(Regions.size());
517353358Sdim  auto I = Regions.rbegin(), E = Regions.rend();
518353358Sdim  auto *BB = I->first->getParent();
519353358Sdim  do {
520353358Sdim    auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
521353358Sdim    BBStarters.push_back(MI);
522353358Sdim    do {
523353358Sdim      ++I;
524353358Sdim    } while (I != E && I->first->getParent() == BB);
525353358Sdim  } while (I != E);
526353358Sdim  return getLiveRegMap(BBStarters, false /*After*/, *LIS);
527353358Sdim}
528353358Sdim
529321369Sdimvoid GCNScheduleDAGMILive::finalizeSchedule() {
530321369Sdim  GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
531341825Sdim  LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
532321369Sdim
533321369Sdim  LiveIns.resize(Regions.size());
534321369Sdim  Pressure.resize(Regions.size());
535321369Sdim
536353358Sdim  if (!Regions.empty())
537353358Sdim    BBLiveInMap = getBBLiveInMap();
538353358Sdim
539321369Sdim  do {
540321369Sdim    Stage++;
541321369Sdim    RegionIdx = 0;
542321369Sdim    MachineBasicBlock *MBB = nullptr;
543321369Sdim
544321369Sdim    if (Stage > 1) {
545321369Sdim      // Retry function scheduling if we found resulting occupancy and it is
546321369Sdim      // lower than used for first pass scheduling. This will give more freedom
547321369Sdim      // to schedule low register pressure blocks.
548321369Sdim      // Code is partially copied from MachineSchedulerBase::scheduleRegions().
549321369Sdim
550321369Sdim      if (!LIS || StartingOccupancy <= MinOccupancy)
551321369Sdim        break;
552321369Sdim
553341825Sdim      LLVM_DEBUG(
554341825Sdim          dbgs()
555341825Sdim          << "Retrying function scheduling with lowest recorded occupancy "
556341825Sdim          << MinOccupancy << ".\n");
557321369Sdim
558321369Sdim      S.setTargetOccupancy(MinOccupancy);
559321369Sdim    }
560321369Sdim
561321369Sdim    for (auto Region : Regions) {
562321369Sdim      RegionBegin = Region.first;
563321369Sdim      RegionEnd = Region.second;
564321369Sdim
565321369Sdim      if (RegionBegin->getParent() != MBB) {
566321369Sdim        if (MBB) finishBlock();
567321369Sdim        MBB = RegionBegin->getParent();
568321369Sdim        startBlock(MBB);
569321369Sdim        if (Stage == 1)
570321369Sdim          computeBlockPressure(MBB);
571321369Sdim      }
572321369Sdim
573321369Sdim      unsigned NumRegionInstrs = std::distance(begin(), end());
574321369Sdim      enterRegion(MBB, begin(), end(), NumRegionInstrs);
575321369Sdim
576321369Sdim      // Skip empty scheduling regions (0 or 1 schedulable instructions).
577321369Sdim      if (begin() == end() || begin() == std::prev(end())) {
578321369Sdim        exitRegion();
579321369Sdim        continue;
580321369Sdim      }
581321369Sdim
582341825Sdim      LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
583341825Sdim      LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " "
584341825Sdim                        << MBB->getName() << "\n  From: " << *begin()
585341825Sdim                        << "    To: ";
586341825Sdim                 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
587341825Sdim                 else dbgs() << "End";
588341825Sdim                 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
589321369Sdim
590321369Sdim      schedule();
591321369Sdim
592321369Sdim      exitRegion();
593321369Sdim      ++RegionIdx;
594321369Sdim    }
595321369Sdim    finishBlock();
596321369Sdim
597321369Sdim  } while (Stage < 2);
598321369Sdim}
599