1//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This contains a MachineSchedStrategy implementation for maximizing wave
11/// occupancy on GCN hardware.
12//===----------------------------------------------------------------------===//
13
14#include "GCNSchedStrategy.h"
15#include "AMDGPUSubtarget.h"
16#include "SIInstrInfo.h"
17#include "SIMachineFunctionInfo.h"
18#include "SIRegisterInfo.h"
19#include "Utils/AMDGPUBaseInfo.h"
20#include "llvm/CodeGen/RegisterClassInfo.h"
21#include "llvm/Support/MathExtras.h"
22
23#define DEBUG_TYPE "machine-scheduler"
24
25using namespace llvm;
26
27GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
28    const MachineSchedContext *C) :
29    GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { }
30
31void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
32  GenericScheduler::initialize(DAG);
33
34  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
35
36  MF = &DAG->MF;
37
38  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
39
40  // FIXME: This is also necessary, because some passes that run after
41  // scheduling and before regalloc increase register pressure.
42  const int ErrorMargin = 3;
43
44  SGPRExcessLimit = Context->RegClassInfo
45    ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin;
46  VGPRExcessLimit = Context->RegClassInfo
47    ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin;
48  if (TargetOccupancy) {
49    SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true);
50    VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy);
51  } else {
52    SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
53        AMDGPU::RegisterPressureSets::SReg_32);
54    VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
55        AMDGPU::RegisterPressureSets::VGPR_32);
56  }
57
58  SGPRCriticalLimit -= ErrorMargin;
59  VGPRCriticalLimit -= ErrorMargin;
60}
61
62void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
63                                     bool AtTop, const RegPressureTracker &RPTracker,
64                                     const SIRegisterInfo *SRI,
65                                     unsigned SGPRPressure,
66                                     unsigned VGPRPressure) {
67
68  Cand.SU = SU;
69  Cand.AtTop = AtTop;
70
71  // getDownwardPressure() and getUpwardPressure() make temporary changes to
72  // the tracker, so we need to pass those function a non-const copy.
73  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
74
75  Pressure.clear();
76  MaxPressure.clear();
77
78  if (AtTop)
79    TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
80  else {
81    // FIXME: I think for bottom up scheduling, the register pressure is cached
82    // and can be retrieved by DAG->getPressureDif(SU).
83    TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
84  }
85
86  unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
87  unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
88
89  // If two instructions increase the pressure of different register sets
90  // by the same amount, the generic scheduler will prefer to schedule the
91  // instruction that increases the set with the least amount of registers,
92  // which in our case would be SGPRs.  This is rarely what we want, so
93  // when we report excess/critical register pressure, we do it either
94  // only for VGPRs or only for SGPRs.
95
96  // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
97  const unsigned MaxVGPRPressureInc = 16;
98  bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
99  bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
100
101
102  // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
103  // to increase the likelihood we don't go over the limits.  We should improve
104  // the analysis to look through dependencies to find the path with the least
105  // register pressure.
106
107  // We only need to update the RPDelta for instructions that increase register
108  // pressure. Instructions that decrease or keep reg pressure the same will be
109  // marked as RegExcess in tryCandidate() when they are compared with
110  // instructions that increase the register pressure.
111  if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
112    Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
113    Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
114  }
115
116  if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
117    Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
118    Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
119  }
120
121  // Register pressure is considered 'CRITICAL' if it is approaching a value
122  // that would reduce the wave occupancy for the execution unit.  When
123  // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
124  // has the same cost, so we don't need to prefer one over the other.
125
126  int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
127  int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
128
129  if (SGPRDelta >= 0 || VGPRDelta >= 0) {
130    if (SGPRDelta > VGPRDelta) {
131      Cand.RPDelta.CriticalMax =
132        PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
133      Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
134    } else {
135      Cand.RPDelta.CriticalMax =
136        PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
137      Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
138    }
139  }
140}
141
142// This function is mostly cut and pasted from
143// GenericScheduler::pickNodeFromQueue()
144void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
145                                         const CandPolicy &ZonePolicy,
146                                         const RegPressureTracker &RPTracker,
147                                         SchedCandidate &Cand) {
148  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
149  ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
150  unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
151  unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
152  ReadyQueue &Q = Zone.Available;
153  for (SUnit *SU : Q) {
154
155    SchedCandidate TryCand(ZonePolicy);
156    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
157                  SGPRPressure, VGPRPressure);
158    // Pass SchedBoundary only when comparing nodes from the same boundary.
159    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
160    GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
161    if (TryCand.Reason != NoCand) {
162      // Initialize resource delta if needed in case future heuristics query it.
163      if (TryCand.ResDelta == SchedResourceDelta())
164        TryCand.initResourceDelta(Zone.DAG, SchedModel);
165      Cand.setBest(TryCand);
166      LLVM_DEBUG(traceCandidate(Cand));
167    }
168  }
169}
170
171// This function is mostly cut and pasted from
172// GenericScheduler::pickNodeBidirectional()
173SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
174  // Schedule as far as possible in the direction of no choice. This is most
175  // efficient, but also provides the best heuristics for CriticalPSets.
176  if (SUnit *SU = Bot.pickOnlyChoice()) {
177    IsTopNode = false;
178    return SU;
179  }
180  if (SUnit *SU = Top.pickOnlyChoice()) {
181    IsTopNode = true;
182    return SU;
183  }
184  // Set the bottom-up policy based on the state of the current bottom zone and
185  // the instructions outside the zone, including the top zone.
186  CandPolicy BotPolicy;
187  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
188  // Set the top-down policy based on the state of the current top zone and
189  // the instructions outside the zone, including the bottom zone.
190  CandPolicy TopPolicy;
191  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
192
193  // See if BotCand is still valid (because we previously scheduled from Top).
194  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
195  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
196      BotCand.Policy != BotPolicy) {
197    BotCand.reset(CandPolicy());
198    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
199    assert(BotCand.Reason != NoCand && "failed to find the first candidate");
200  } else {
201    LLVM_DEBUG(traceCandidate(BotCand));
202#ifndef NDEBUG
203    if (VerifyScheduling) {
204      SchedCandidate TCand;
205      TCand.reset(CandPolicy());
206      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
207      assert(TCand.SU == BotCand.SU &&
208             "Last pick result should correspond to re-picking right now");
209    }
210#endif
211  }
212
213  // Check if the top Q has a better candidate.
214  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
215  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
216      TopCand.Policy != TopPolicy) {
217    TopCand.reset(CandPolicy());
218    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
219    assert(TopCand.Reason != NoCand && "failed to find the first candidate");
220  } else {
221    LLVM_DEBUG(traceCandidate(TopCand));
222#ifndef NDEBUG
223    if (VerifyScheduling) {
224      SchedCandidate TCand;
225      TCand.reset(CandPolicy());
226      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
227      assert(TCand.SU == TopCand.SU &&
228           "Last pick result should correspond to re-picking right now");
229    }
230#endif
231  }
232
233  // Pick best from BotCand and TopCand.
234  LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
235             dbgs() << "Bot Cand: "; traceCandidate(BotCand););
236  SchedCandidate Cand = BotCand;
237  TopCand.Reason = NoCand;
238  GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
239  if (TopCand.Reason != NoCand) {
240    Cand.setBest(TopCand);
241  }
242  LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
243
244  IsTopNode = Cand.AtTop;
245  return Cand.SU;
246}
247
248// This function is mostly cut and pasted from
249// GenericScheduler::pickNode()
250SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
251  if (DAG->top() == DAG->bottom()) {
252    assert(Top.Available.empty() && Top.Pending.empty() &&
253           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
254    return nullptr;
255  }
256  SUnit *SU;
257  do {
258    if (RegionPolicy.OnlyTopDown) {
259      SU = Top.pickOnlyChoice();
260      if (!SU) {
261        CandPolicy NoPolicy;
262        TopCand.reset(NoPolicy);
263        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
264        assert(TopCand.Reason != NoCand && "failed to find a candidate");
265        SU = TopCand.SU;
266      }
267      IsTopNode = true;
268    } else if (RegionPolicy.OnlyBottomUp) {
269      SU = Bot.pickOnlyChoice();
270      if (!SU) {
271        CandPolicy NoPolicy;
272        BotCand.reset(NoPolicy);
273        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
274        assert(BotCand.Reason != NoCand && "failed to find a candidate");
275        SU = BotCand.SU;
276      }
277      IsTopNode = false;
278    } else {
279      SU = pickNodeBidirectional(IsTopNode);
280    }
281  } while (SU->isScheduled);
282
283  if (SU->isTopReady())
284    Top.removeReady(SU);
285  if (SU->isBottomReady())
286    Bot.removeReady(SU);
287
288  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
289                    << *SU->getInstr());
290  return SU;
291}
292
293GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
294                        std::unique_ptr<MachineSchedStrategy> S) :
295  ScheduleDAGMILive(C, std::move(S)),
296  ST(MF.getSubtarget<GCNSubtarget>()),
297  MFI(*MF.getInfo<SIMachineFunctionInfo>()),
298  StartingOccupancy(MFI.getOccupancy()),
299  MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) {
300
301  LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
302}
303
304void GCNScheduleDAGMILive::schedule() {
305  if (Stage == Collect) {
306    // Just record regions at the first pass.
307    Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
308    return;
309  }
310
311  std::vector<MachineInstr*> Unsched;
312  Unsched.reserve(NumRegionInstrs);
313  for (auto &I : *this) {
314    Unsched.push_back(&I);
315  }
316
317  GCNRegPressure PressureBefore;
318  if (LIS) {
319    PressureBefore = Pressure[RegionIdx];
320
321    LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:";
322               GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI);
323               dbgs() << "Region live-in pressure:  ";
324               llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs());
325               dbgs() << "Region register pressure: ";
326               PressureBefore.print(dbgs()));
327  }
328
329  ScheduleDAGMILive::schedule();
330  Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
331  RescheduleRegions[RegionIdx] = false;
332
333  if (!LIS)
334    return;
335
336  // Check the results of scheduling.
337  GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
338  auto PressureAfter = getRealRegPressure();
339
340  LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
341             PressureAfter.print(dbgs()));
342
343  if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
344      PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) {
345    Pressure[RegionIdx] = PressureAfter;
346    LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
347    return;
348  }
349  unsigned Occ = MFI.getOccupancy();
350  unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST));
351  unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST));
352  LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
353                    << ", after " << WavesAfter << ".\n");
354
355  // We could not keep current target occupancy because of the just scheduled
356  // region. Record new occupancy for next scheduling cycle.
357  unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
358  // Allow memory bound functions to drop to 4 waves if not limited by an
359  // attribute.
360  if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
361      WavesAfter >= MFI.getMinAllowedOccupancy()) {
362    LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
363                      << MFI.getMinAllowedOccupancy() << " waves\n");
364    NewOccupancy = WavesAfter;
365  }
366  if (NewOccupancy < MinOccupancy) {
367    MinOccupancy = NewOccupancy;
368    MFI.limitOccupancy(MinOccupancy);
369    LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
370                      << MinOccupancy << ".\n");
371  }
372
373  unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
374  unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
375  if (PressureAfter.getVGPRNum() > MaxVGPRs ||
376      PressureAfter.getSGPRNum() > MaxSGPRs)
377    RescheduleRegions[RegionIdx] = true;
378
379  if (WavesAfter >= MinOccupancy) {
380    if (Stage == UnclusteredReschedule &&
381        !PressureAfter.less(ST, PressureBefore)) {
382      LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
383    } else if (WavesAfter > MFI.getMinWavesPerEU() ||
384        PressureAfter.less(ST, PressureBefore) ||
385        !RescheduleRegions[RegionIdx]) {
386      Pressure[RegionIdx] = PressureAfter;
387      return;
388    } else {
389      LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
390    }
391  }
392
393  LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
394  RescheduleRegions[RegionIdx] = true;
395  RegionEnd = RegionBegin;
396  for (MachineInstr *MI : Unsched) {
397    if (MI->isDebugInstr())
398      continue;
399
400    if (MI->getIterator() != RegionEnd) {
401      BB->remove(MI);
402      BB->insert(RegionEnd, MI);
403      if (!MI->isDebugInstr())
404        LIS->handleMove(*MI, true);
405    }
406    // Reset read-undef flags and update them later.
407    for (auto &Op : MI->operands())
408      if (Op.isReg() && Op.isDef())
409        Op.setIsUndef(false);
410    RegisterOperands RegOpers;
411    RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
412    if (!MI->isDebugInstr()) {
413      if (ShouldTrackLaneMasks) {
414        // Adjust liveness and add missing dead+read-undef flags.
415        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
416        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
417      } else {
418        // Adjust for missing dead-def flags.
419        RegOpers.detectDeadDefs(*MI, *LIS);
420      }
421    }
422    RegionEnd = MI->getIterator();
423    ++RegionEnd;
424    LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
425  }
426  RegionBegin = Unsched.front()->getIterator();
427  Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
428
429  placeDebugValues();
430}
431
432GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const {
433  GCNDownwardRPTracker RPTracker(*LIS);
434  RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
435  return RPTracker.moveMaxPressure();
436}
437
438void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
439  GCNDownwardRPTracker RPTracker(*LIS);
440
441  // If the block has the only successor then live-ins of that successor are
442  // live-outs of the current block. We can reuse calculated live set if the
443  // successor will be sent to scheduling past current block.
444  const MachineBasicBlock *OnlySucc = nullptr;
445  if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) {
446    SlotIndexes *Ind = LIS->getSlotIndexes();
447    if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
448      OnlySucc = *MBB->succ_begin();
449  }
450
451  // Scheduler sends regions from the end of the block upwards.
452  size_t CurRegion = RegionIdx;
453  for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
454    if (Regions[CurRegion].first->getParent() != MBB)
455      break;
456  --CurRegion;
457
458  auto I = MBB->begin();
459  auto LiveInIt = MBBLiveIns.find(MBB);
460  if (LiveInIt != MBBLiveIns.end()) {
461    auto LiveIn = std::move(LiveInIt->second);
462    RPTracker.reset(*MBB->begin(), &LiveIn);
463    MBBLiveIns.erase(LiveInIt);
464  } else {
465    auto &Rgn = Regions[CurRegion];
466    I = Rgn.first;
467    auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
468    auto LRS = BBLiveInMap.lookup(NonDbgMI);
469    assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
470    RPTracker.reset(*I, &LRS);
471  }
472
473  for ( ; ; ) {
474    I = RPTracker.getNext();
475
476    if (Regions[CurRegion].first == I) {
477      LiveIns[CurRegion] = RPTracker.getLiveRegs();
478      RPTracker.clearMaxPressure();
479    }
480
481    if (Regions[CurRegion].second == I) {
482      Pressure[CurRegion] = RPTracker.moveMaxPressure();
483      if (CurRegion-- == RegionIdx)
484        break;
485    }
486    RPTracker.advanceToNext();
487    RPTracker.advanceBeforeNext();
488  }
489
490  if (OnlySucc) {
491    if (I != MBB->end()) {
492      RPTracker.advanceToNext();
493      RPTracker.advance(MBB->end());
494    }
495    RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs());
496    RPTracker.advanceBeforeNext();
497    MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
498  }
499}
500
501DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
502GCNScheduleDAGMILive::getBBLiveInMap() const {
503  assert(!Regions.empty());
504  std::vector<MachineInstr *> BBStarters;
505  BBStarters.reserve(Regions.size());
506  auto I = Regions.rbegin(), E = Regions.rend();
507  auto *BB = I->first->getParent();
508  do {
509    auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
510    BBStarters.push_back(MI);
511    do {
512      ++I;
513    } while (I != E && I->first->getParent() == BB);
514  } while (I != E);
515  return getLiveRegMap(BBStarters, false /*After*/, *LIS);
516}
517
518void GCNScheduleDAGMILive::finalizeSchedule() {
519  GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
520  LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
521
522  LiveIns.resize(Regions.size());
523  Pressure.resize(Regions.size());
524  RescheduleRegions.resize(Regions.size());
525  RescheduleRegions.set();
526
527  if (!Regions.empty())
528    BBLiveInMap = getBBLiveInMap();
529
530  std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
531
532  do {
533    Stage++;
534    RegionIdx = 0;
535    MachineBasicBlock *MBB = nullptr;
536
537    if (Stage > InitialSchedule) {
538      if (!LIS)
539        break;
540
541      // Retry function scheduling if we found resulting occupancy and it is
542      // lower than used for first pass scheduling. This will give more freedom
543      // to schedule low register pressure blocks.
544      // Code is partially copied from MachineSchedulerBase::scheduleRegions().
545
546      if (Stage == UnclusteredReschedule) {
547        if (RescheduleRegions.none())
548          continue;
549        LLVM_DEBUG(dbgs() <<
550          "Retrying function scheduling without clustering.\n");
551      }
552
553      if (Stage == ClusteredLowOccupancyReschedule) {
554        if (StartingOccupancy <= MinOccupancy)
555          break;
556
557        LLVM_DEBUG(
558            dbgs()
559            << "Retrying function scheduling with lowest recorded occupancy "
560            << MinOccupancy << ".\n");
561
562        S.setTargetOccupancy(MinOccupancy);
563      }
564    }
565
566    if (Stage == UnclusteredReschedule)
567      SavedMutations.swap(Mutations);
568
569    for (auto Region : Regions) {
570      if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx])
571        continue;
572
573      RegionBegin = Region.first;
574      RegionEnd = Region.second;
575
576      if (RegionBegin->getParent() != MBB) {
577        if (MBB) finishBlock();
578        MBB = RegionBegin->getParent();
579        startBlock(MBB);
580        if (Stage == InitialSchedule)
581          computeBlockPressure(MBB);
582      }
583
584      unsigned NumRegionInstrs = std::distance(begin(), end());
585      enterRegion(MBB, begin(), end(), NumRegionInstrs);
586
587      // Skip empty scheduling regions (0 or 1 schedulable instructions).
588      if (begin() == end() || begin() == std::prev(end())) {
589        exitRegion();
590        continue;
591      }
592
593      LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
594      LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " "
595                        << MBB->getName() << "\n  From: " << *begin()
596                        << "    To: ";
597                 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
598                 else dbgs() << "End";
599                 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
600
601      schedule();
602
603      exitRegion();
604      ++RegionIdx;
605    }
606    finishBlock();
607
608    if (Stage == UnclusteredReschedule)
609      SavedMutations.swap(Mutations);
610  } while (Stage != LastStage);
611}
612