1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/CodeGen/MachineScheduler.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/PriorityQueue.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/iterator_range.h"
22#include "llvm/Analysis/AliasAnalysis.h"
23#include "llvm/CodeGen/LiveInterval.h"
24#include "llvm/CodeGen/LiveIntervals.h"
25#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineDominators.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineFunctionPass.h"
29#include "llvm/CodeGen/MachineInstr.h"
30#include "llvm/CodeGen/MachineLoopInfo.h"
31#include "llvm/CodeGen/MachineOperand.h"
32#include "llvm/CodeGen/MachinePassRegistry.h"
33#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/CodeGen/Passes.h"
35#include "llvm/CodeGen/RegisterClassInfo.h"
36#include "llvm/CodeGen/RegisterPressure.h"
37#include "llvm/CodeGen/ScheduleDAG.h"
38#include "llvm/CodeGen/ScheduleDAGInstrs.h"
39#include "llvm/CodeGen/ScheduleDAGMutation.h"
40#include "llvm/CodeGen/ScheduleDFS.h"
41#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
42#include "llvm/CodeGen/SlotIndexes.h"
43#include "llvm/CodeGen/TargetFrameLowering.h"
44#include "llvm/CodeGen/TargetInstrInfo.h"
45#include "llvm/CodeGen/TargetLowering.h"
46#include "llvm/CodeGen/TargetPassConfig.h"
47#include "llvm/CodeGen/TargetRegisterInfo.h"
48#include "llvm/CodeGen/TargetSchedule.h"
49#include "llvm/CodeGen/TargetSubtargetInfo.h"
50#include "llvm/Config/llvm-config.h"
51#include "llvm/InitializePasses.h"
52#include "llvm/MC/LaneBitmask.h"
53#include "llvm/Pass.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Compiler.h"
56#include "llvm/Support/Debug.h"
57#include "llvm/Support/ErrorHandling.h"
58#include "llvm/Support/GraphWriter.h"
59#include "llvm/Support/MachineValueType.h"
60#include "llvm/Support/raw_ostream.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <limits>
66#include <memory>
67#include <string>
68#include <tuple>
69#include <utility>
70#include <vector>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "machine-scheduler"
75
76namespace llvm {
77
78cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
79                           cl::desc("Force top-down list scheduling"));
80cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
81                            cl::desc("Force bottom-up list scheduling"));
82cl::opt<bool>
83DumpCriticalPathLength("misched-dcpl", cl::Hidden,
84                       cl::desc("Print critical path length to stdout"));
85
86cl::opt<bool> VerifyScheduling(
87    "verify-misched", cl::Hidden,
88    cl::desc("Verify machine instrs before and after machine scheduling"));
89
90} // end namespace llvm
91
92#ifndef NDEBUG
93static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
94  cl::desc("Pop up a window to show MISched dags after they are processed"));
95
96/// In some situations a few uninteresting nodes depend on nearly all other
97/// nodes in the graph, provide a cutoff to hide them.
98static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
99  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
100
101static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
102  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
103
104static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
105  cl::desc("Only schedule this function"));
106static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
107                                        cl::desc("Only schedule this MBB#"));
108static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
109                              cl::desc("Print schedule DAGs"));
110#else
111static const bool ViewMISchedDAGs = false;
112static const bool PrintDAGs = false;
113#endif // NDEBUG
114
115/// Avoid quadratic complexity in unusually large basic blocks by limiting the
116/// size of the ready lists.
117static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
118  cl::desc("Limit ready list to N instructions"), cl::init(256));
119
120static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
121  cl::desc("Enable register pressure scheduling."), cl::init(true));
122
123static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
124  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
125
126static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
127                                        cl::desc("Enable memop clustering."),
128                                        cl::init(true));
129
130// DAG subtrees must have at least this many nodes.
131static const unsigned MinSubtreeSize = 8;
132
133// Pin the vtables to this file.
134void MachineSchedStrategy::anchor() {}
135
136void ScheduleDAGMutation::anchor() {}
137
138//===----------------------------------------------------------------------===//
139// Machine Instruction Scheduling Pass and Registry
140//===----------------------------------------------------------------------===//
141
142MachineSchedContext::MachineSchedContext() {
143  RegClassInfo = new RegisterClassInfo();
144}
145
146MachineSchedContext::~MachineSchedContext() {
147  delete RegClassInfo;
148}
149
150namespace {
151
152/// Base class for a machine scheduler class that can run at any point.
153class MachineSchedulerBase : public MachineSchedContext,
154                             public MachineFunctionPass {
155public:
156  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
157
158  void print(raw_ostream &O, const Module* = nullptr) const override;
159
160protected:
161  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
162};
163
164/// MachineScheduler runs after coalescing and before register allocation.
165class MachineScheduler : public MachineSchedulerBase {
166public:
167  MachineScheduler();
168
169  void getAnalysisUsage(AnalysisUsage &AU) const override;
170
171  bool runOnMachineFunction(MachineFunction&) override;
172
173  static char ID; // Class identification, replacement for typeinfo
174
175protected:
176  ScheduleDAGInstrs *createMachineScheduler();
177};
178
179/// PostMachineScheduler runs after shortly before code emission.
180class PostMachineScheduler : public MachineSchedulerBase {
181public:
182  PostMachineScheduler();
183
184  void getAnalysisUsage(AnalysisUsage &AU) const override;
185
186  bool runOnMachineFunction(MachineFunction&) override;
187
188  static char ID; // Class identification, replacement for typeinfo
189
190protected:
191  ScheduleDAGInstrs *createPostMachineScheduler();
192};
193
194} // end anonymous namespace
195
196char MachineScheduler::ID = 0;
197
198char &llvm::MachineSchedulerID = MachineScheduler::ID;
199
200INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
201                      "Machine Instruction Scheduler", false, false)
202INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
203INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
204INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
205INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
206INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
207INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
208                    "Machine Instruction Scheduler", false, false)
209
210MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
211  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
212}
213
214void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
215  AU.setPreservesCFG();
216  AU.addRequired<MachineDominatorTree>();
217  AU.addRequired<MachineLoopInfo>();
218  AU.addRequired<AAResultsWrapperPass>();
219  AU.addRequired<TargetPassConfig>();
220  AU.addRequired<SlotIndexes>();
221  AU.addPreserved<SlotIndexes>();
222  AU.addRequired<LiveIntervals>();
223  AU.addPreserved<LiveIntervals>();
224  MachineFunctionPass::getAnalysisUsage(AU);
225}
226
227char PostMachineScheduler::ID = 0;
228
229char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
230
231INITIALIZE_PASS(PostMachineScheduler, "postmisched",
232                "PostRA Machine Instruction Scheduler", false, false)
233
234PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
235  initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
236}
237
238void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
239  AU.setPreservesCFG();
240  AU.addRequired<MachineDominatorTree>();
241  AU.addRequired<MachineLoopInfo>();
242  AU.addRequired<AAResultsWrapperPass>();
243  AU.addRequired<TargetPassConfig>();
244  MachineFunctionPass::getAnalysisUsage(AU);
245}
246
247MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
248    MachineSchedRegistry::Registry;
249
250/// A dummy default scheduler factory indicates whether the scheduler
251/// is overridden on the command line.
252static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
253  return nullptr;
254}
255
256/// MachineSchedOpt allows command line selection of the scheduler.
257static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
258               RegisterPassParser<MachineSchedRegistry>>
259MachineSchedOpt("misched",
260                cl::init(&useDefaultMachineSched), cl::Hidden,
261                cl::desc("Machine instruction scheduler to use"));
262
263static MachineSchedRegistry
264DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
265                     useDefaultMachineSched);
266
267static cl::opt<bool> EnableMachineSched(
268    "enable-misched",
269    cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
270    cl::Hidden);
271
272static cl::opt<bool> EnablePostRAMachineSched(
273    "enable-post-misched",
274    cl::desc("Enable the post-ra machine instruction scheduling pass."),
275    cl::init(true), cl::Hidden);
276
277/// Decrement this iterator until reaching the top or a non-debug instr.
278static MachineBasicBlock::const_iterator
279priorNonDebug(MachineBasicBlock::const_iterator I,
280              MachineBasicBlock::const_iterator Beg) {
281  assert(I != Beg && "reached the top of the region, cannot decrement");
282  while (--I != Beg) {
283    if (!I->isDebugInstr())
284      break;
285  }
286  return I;
287}
288
289/// Non-const version.
290static MachineBasicBlock::iterator
291priorNonDebug(MachineBasicBlock::iterator I,
292              MachineBasicBlock::const_iterator Beg) {
293  return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
294      .getNonConstIterator();
295}
296
297/// If this iterator is a debug value, increment until reaching the End or a
298/// non-debug instruction.
299static MachineBasicBlock::const_iterator
300nextIfDebug(MachineBasicBlock::const_iterator I,
301            MachineBasicBlock::const_iterator End) {
302  for(; I != End; ++I) {
303    if (!I->isDebugInstr())
304      break;
305  }
306  return I;
307}
308
309/// Non-const version.
310static MachineBasicBlock::iterator
311nextIfDebug(MachineBasicBlock::iterator I,
312            MachineBasicBlock::const_iterator End) {
313  return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
314      .getNonConstIterator();
315}
316
317/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
318ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
319  // Select the scheduler, or set the default.
320  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
321  if (Ctor != useDefaultMachineSched)
322    return Ctor(this);
323
324  // Get the default scheduler set by the target for this function.
325  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
326  if (Scheduler)
327    return Scheduler;
328
329  // Default to GenericScheduler.
330  return createGenericSchedLive(this);
331}
332
333/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
334/// the caller. We don't have a command line option to override the postRA
335/// scheduler. The Target must configure it.
336ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
337  // Get the postRA scheduler set by the target for this function.
338  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
339  if (Scheduler)
340    return Scheduler;
341
342  // Default to GenericScheduler.
343  return createGenericSchedPostRA(this);
344}
345
346/// Top-level MachineScheduler pass driver.
347///
348/// Visit blocks in function order. Divide each block into scheduling regions
349/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
350/// consistent with the DAG builder, which traverses the interior of the
351/// scheduling regions bottom-up.
352///
353/// This design avoids exposing scheduling boundaries to the DAG builder,
354/// simplifying the DAG builder's support for "special" target instructions.
355/// At the same time the design allows target schedulers to operate across
356/// scheduling boundaries, for example to bundle the boundary instructions
357/// without reordering them. This creates complexity, because the target
358/// scheduler must update the RegionBegin and RegionEnd positions cached by
359/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
360/// design would be to split blocks at scheduling boundaries, but LLVM has a
361/// general bias against block splitting purely for implementation simplicity.
362bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
363  if (skipFunction(mf.getFunction()))
364    return false;
365
366  if (EnableMachineSched.getNumOccurrences()) {
367    if (!EnableMachineSched)
368      return false;
369  } else if (!mf.getSubtarget().enableMachineScheduler())
370    return false;
371
372  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
373
374  // Initialize the context of the pass.
375  MF = &mf;
376  MLI = &getAnalysis<MachineLoopInfo>();
377  MDT = &getAnalysis<MachineDominatorTree>();
378  PassConfig = &getAnalysis<TargetPassConfig>();
379  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
380
381  LIS = &getAnalysis<LiveIntervals>();
382
383  if (VerifyScheduling) {
384    LLVM_DEBUG(LIS->dump());
385    MF->verify(this, "Before machine scheduling.");
386  }
387  RegClassInfo->runOnMachineFunction(*MF);
388
389  // Instantiate the selected scheduler for this target, function, and
390  // optimization level.
391  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
392  scheduleRegions(*Scheduler, false);
393
394  LLVM_DEBUG(LIS->dump());
395  if (VerifyScheduling)
396    MF->verify(this, "After machine scheduling.");
397  return true;
398}
399
400bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
401  if (skipFunction(mf.getFunction()))
402    return false;
403
404  if (EnablePostRAMachineSched.getNumOccurrences()) {
405    if (!EnablePostRAMachineSched)
406      return false;
407  } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
408    LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
409    return false;
410  }
411  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
412
413  // Initialize the context of the pass.
414  MF = &mf;
415  MLI = &getAnalysis<MachineLoopInfo>();
416  PassConfig = &getAnalysis<TargetPassConfig>();
417  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
418
419  if (VerifyScheduling)
420    MF->verify(this, "Before post machine scheduling.");
421
422  // Instantiate the selected scheduler for this target, function, and
423  // optimization level.
424  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
425  scheduleRegions(*Scheduler, true);
426
427  if (VerifyScheduling)
428    MF->verify(this, "After post machine scheduling.");
429  return true;
430}
431
432/// Return true of the given instruction should not be included in a scheduling
433/// region.
434///
435/// MachineScheduler does not currently support scheduling across calls. To
436/// handle calls, the DAG builder needs to be modified to create register
437/// anti/output dependencies on the registers clobbered by the call's regmask
438/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
439/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
440/// the boundary, but there would be no benefit to postRA scheduling across
441/// calls this late anyway.
442static bool isSchedBoundary(MachineBasicBlock::iterator MI,
443                            MachineBasicBlock *MBB,
444                            MachineFunction *MF,
445                            const TargetInstrInfo *TII) {
446  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
447}
448
449/// A region of an MBB for scheduling.
450namespace {
451struct SchedRegion {
452  /// RegionBegin is the first instruction in the scheduling region, and
453  /// RegionEnd is either MBB->end() or the scheduling boundary after the
454  /// last instruction in the scheduling region. These iterators cannot refer
455  /// to instructions outside of the identified scheduling region because
456  /// those may be reordered before scheduling this region.
457  MachineBasicBlock::iterator RegionBegin;
458  MachineBasicBlock::iterator RegionEnd;
459  unsigned NumRegionInstrs;
460
461  SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
462              unsigned N) :
463    RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
464};
465} // end anonymous namespace
466
467using MBBRegionsVector = SmallVector<SchedRegion, 16>;
468
469static void
470getSchedRegions(MachineBasicBlock *MBB,
471                MBBRegionsVector &Regions,
472                bool RegionsTopDown) {
473  MachineFunction *MF = MBB->getParent();
474  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
475
476  MachineBasicBlock::iterator I = nullptr;
477  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
478      RegionEnd != MBB->begin(); RegionEnd = I) {
479
480    // Avoid decrementing RegionEnd for blocks with no terminator.
481    if (RegionEnd != MBB->end() ||
482        isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
483      --RegionEnd;
484    }
485
486    // The next region starts above the previous region. Look backward in the
487    // instruction stream until we find the nearest boundary.
488    unsigned NumRegionInstrs = 0;
489    I = RegionEnd;
490    for (;I != MBB->begin(); --I) {
491      MachineInstr &MI = *std::prev(I);
492      if (isSchedBoundary(&MI, &*MBB, MF, TII))
493        break;
494      if (!MI.isDebugInstr()) {
495        // MBB::size() uses instr_iterator to count. Here we need a bundle to
496        // count as a single instruction.
497        ++NumRegionInstrs;
498      }
499    }
500
501    // It's possible we found a scheduling region that only has debug
502    // instructions. Don't bother scheduling these.
503    if (NumRegionInstrs != 0)
504      Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
505  }
506
507  if (RegionsTopDown)
508    std::reverse(Regions.begin(), Regions.end());
509}
510
511/// Main driver for both MachineScheduler and PostMachineScheduler.
512void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
513                                           bool FixKillFlags) {
514  // Visit all machine basic blocks.
515  //
516  // TODO: Visit blocks in global postorder or postorder within the bottom-up
517  // loop tree. Then we can optionally compute global RegPressure.
518  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
519       MBB != MBBEnd; ++MBB) {
520
521    Scheduler.startBlock(&*MBB);
522
523#ifndef NDEBUG
524    if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
525      continue;
526    if (SchedOnlyBlock.getNumOccurrences()
527        && (int)SchedOnlyBlock != MBB->getNumber())
528      continue;
529#endif
530
531    // Break the block into scheduling regions [I, RegionEnd). RegionEnd
532    // points to the scheduling boundary at the bottom of the region. The DAG
533    // does not include RegionEnd, but the region does (i.e. the next
534    // RegionEnd is above the previous RegionBegin). If the current block has
535    // no terminator then RegionEnd == MBB->end() for the bottom region.
536    //
537    // All the regions of MBB are first found and stored in MBBRegions, which
538    // will be processed (MBB) top-down if initialized with true.
539    //
540    // The Scheduler may insert instructions during either schedule() or
541    // exitRegion(), even for empty regions. So the local iterators 'I' and
542    // 'RegionEnd' are invalid across these calls. Instructions must not be
543    // added to other regions than the current one without updating MBBRegions.
544
545    MBBRegionsVector MBBRegions;
546    getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
547    for (MBBRegionsVector::iterator R = MBBRegions.begin();
548         R != MBBRegions.end(); ++R) {
549      MachineBasicBlock::iterator I = R->RegionBegin;
550      MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
551      unsigned NumRegionInstrs = R->NumRegionInstrs;
552
553      // Notify the scheduler of the region, even if we may skip scheduling
554      // it. Perhaps it still needs to be bundled.
555      Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
556
557      // Skip empty scheduling regions (0 or 1 schedulable instructions).
558      if (I == RegionEnd || I == std::prev(RegionEnd)) {
559        // Close the current region. Bundle the terminator if needed.
560        // This invalidates 'RegionEnd' and 'I'.
561        Scheduler.exitRegion();
562        continue;
563      }
564      LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
565      LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
566                        << " " << MBB->getName() << "\n  From: " << *I
567                        << "    To: ";
568                 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
569                 else dbgs() << "End";
570                 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
571      if (DumpCriticalPathLength) {
572        errs() << MF->getName();
573        errs() << ":%bb. " << MBB->getNumber();
574        errs() << " " << MBB->getName() << " \n";
575      }
576
577      // Schedule a region: possibly reorder instructions.
578      // This invalidates the original region iterators.
579      Scheduler.schedule();
580
581      // Close the current region.
582      Scheduler.exitRegion();
583    }
584    Scheduler.finishBlock();
585    // FIXME: Ideally, no further passes should rely on kill flags. However,
586    // thumb2 size reduction is currently an exception, so the PostMIScheduler
587    // needs to do this.
588    if (FixKillFlags)
589      Scheduler.fixupKills(*MBB);
590  }
591  Scheduler.finalizeSchedule();
592}
593
594void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
595  // unimplemented
596}
597
598#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
599LLVM_DUMP_METHOD void ReadyQueue::dump() const {
600  dbgs() << "Queue " << Name << ": ";
601  for (const SUnit *SU : Queue)
602    dbgs() << SU->NodeNum << " ";
603  dbgs() << "\n";
604}
605#endif
606
607//===----------------------------------------------------------------------===//
608// ScheduleDAGMI - Basic machine instruction scheduling. This is
609// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
610// virtual registers.
611// ===----------------------------------------------------------------------===/
612
613// Provide a vtable anchor.
614ScheduleDAGMI::~ScheduleDAGMI() = default;
615
616/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
617/// NumPredsLeft reaches zero, release the successor node.
618///
619/// FIXME: Adjust SuccSU height based on MinLatency.
620void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
621  SUnit *SuccSU = SuccEdge->getSUnit();
622
623  if (SuccEdge->isWeak()) {
624    --SuccSU->WeakPredsLeft;
625    if (SuccEdge->isCluster())
626      NextClusterSucc = SuccSU;
627    return;
628  }
629#ifndef NDEBUG
630  if (SuccSU->NumPredsLeft == 0) {
631    dbgs() << "*** Scheduling failed! ***\n";
632    dumpNode(*SuccSU);
633    dbgs() << " has been released too many times!\n";
634    llvm_unreachable(nullptr);
635  }
636#endif
637  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
638  // CurrCycle may have advanced since then.
639  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
640    SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
641
642  --SuccSU->NumPredsLeft;
643  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
644    SchedImpl->releaseTopNode(SuccSU);
645}
646
647/// releaseSuccessors - Call releaseSucc on each of SU's successors.
648void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
649  for (SDep &Succ : SU->Succs)
650    releaseSucc(SU, &Succ);
651}
652
653/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
654/// NumSuccsLeft reaches zero, release the predecessor node.
655///
656/// FIXME: Adjust PredSU height based on MinLatency.
657void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
658  SUnit *PredSU = PredEdge->getSUnit();
659
660  if (PredEdge->isWeak()) {
661    --PredSU->WeakSuccsLeft;
662    if (PredEdge->isCluster())
663      NextClusterPred = PredSU;
664    return;
665  }
666#ifndef NDEBUG
667  if (PredSU->NumSuccsLeft == 0) {
668    dbgs() << "*** Scheduling failed! ***\n";
669    dumpNode(*PredSU);
670    dbgs() << " has been released too many times!\n";
671    llvm_unreachable(nullptr);
672  }
673#endif
674  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
675  // CurrCycle may have advanced since then.
676  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
677    PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
678
679  --PredSU->NumSuccsLeft;
680  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
681    SchedImpl->releaseBottomNode(PredSU);
682}
683
684/// releasePredecessors - Call releasePred on each of SU's predecessors.
685void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
686  for (SDep &Pred : SU->Preds)
687    releasePred(SU, &Pred);
688}
689
690void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
691  ScheduleDAGInstrs::startBlock(bb);
692  SchedImpl->enterMBB(bb);
693}
694
695void ScheduleDAGMI::finishBlock() {
696  SchedImpl->leaveMBB();
697  ScheduleDAGInstrs::finishBlock();
698}
699
700/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
701/// crossing a scheduling boundary. [begin, end) includes all instructions in
702/// the region, including the boundary itself and single-instruction regions
703/// that don't get scheduled.
704void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
705                                     MachineBasicBlock::iterator begin,
706                                     MachineBasicBlock::iterator end,
707                                     unsigned regioninstrs)
708{
709  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
710
711  SchedImpl->initPolicy(begin, end, regioninstrs);
712}
713
714/// This is normally called from the main scheduler loop but may also be invoked
715/// by the scheduling strategy to perform additional code motion.
716void ScheduleDAGMI::moveInstruction(
717  MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
718  // Advance RegionBegin if the first instruction moves down.
719  if (&*RegionBegin == MI)
720    ++RegionBegin;
721
722  // Update the instruction stream.
723  BB->splice(InsertPos, BB, MI);
724
725  // Update LiveIntervals
726  if (LIS)
727    LIS->handleMove(*MI, /*UpdateFlags=*/true);
728
729  // Recede RegionBegin if an instruction moves above the first.
730  if (RegionBegin == InsertPos)
731    RegionBegin = MI;
732}
733
734bool ScheduleDAGMI::checkSchedLimit() {
735#ifndef NDEBUG
736  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
737    CurrentTop = CurrentBottom;
738    return false;
739  }
740  ++NumInstrsScheduled;
741#endif
742  return true;
743}
744
745/// Per-region scheduling driver, called back from
746/// MachineScheduler::runOnMachineFunction. This is a simplified driver that
747/// does not consider liveness or register pressure. It is useful for PostRA
748/// scheduling and potentially other custom schedulers.
749void ScheduleDAGMI::schedule() {
750  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
751  LLVM_DEBUG(SchedImpl->dumpPolicy());
752
753  // Build the DAG.
754  buildSchedGraph(AA);
755
756  postprocessDAG();
757
758  SmallVector<SUnit*, 8> TopRoots, BotRoots;
759  findRootsAndBiasEdges(TopRoots, BotRoots);
760
761  LLVM_DEBUG(dump());
762  if (PrintDAGs) dump();
763  if (ViewMISchedDAGs) viewGraph();
764
765  // Initialize the strategy before modifying the DAG.
766  // This may initialize a DFSResult to be used for queue priority.
767  SchedImpl->initialize(this);
768
769  // Initialize ready queues now that the DAG and priority data are finalized.
770  initQueues(TopRoots, BotRoots);
771
772  bool IsTopNode = false;
773  while (true) {
774    LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
775    SUnit *SU = SchedImpl->pickNode(IsTopNode);
776    if (!SU) break;
777
778    assert(!SU->isScheduled && "Node already scheduled");
779    if (!checkSchedLimit())
780      break;
781
782    MachineInstr *MI = SU->getInstr();
783    if (IsTopNode) {
784      assert(SU->isTopReady() && "node still has unscheduled dependencies");
785      if (&*CurrentTop == MI)
786        CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
787      else
788        moveInstruction(MI, CurrentTop);
789    } else {
790      assert(SU->isBottomReady() && "node still has unscheduled dependencies");
791      MachineBasicBlock::iterator priorII =
792        priorNonDebug(CurrentBottom, CurrentTop);
793      if (&*priorII == MI)
794        CurrentBottom = priorII;
795      else {
796        if (&*CurrentTop == MI)
797          CurrentTop = nextIfDebug(++CurrentTop, priorII);
798        moveInstruction(MI, CurrentBottom);
799        CurrentBottom = MI;
800      }
801    }
802    // Notify the scheduling strategy before updating the DAG.
803    // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
804    // runs, it can then use the accurate ReadyCycle time to determine whether
805    // newly released nodes can move to the readyQ.
806    SchedImpl->schedNode(SU, IsTopNode);
807
808    updateQueues(SU, IsTopNode);
809  }
810  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
811
812  placeDebugValues();
813
814  LLVM_DEBUG({
815    dbgs() << "*** Final schedule for "
816           << printMBBReference(*begin()->getParent()) << " ***\n";
817    dumpSchedule();
818    dbgs() << '\n';
819  });
820}
821
822/// Apply each ScheduleDAGMutation step in order.
823void ScheduleDAGMI::postprocessDAG() {
824  for (auto &m : Mutations)
825    m->apply(this);
826}
827
828void ScheduleDAGMI::
829findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
830                      SmallVectorImpl<SUnit*> &BotRoots) {
831  for (SUnit &SU : SUnits) {
832    assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
833
834    // Order predecessors so DFSResult follows the critical path.
835    SU.biasCriticalPath();
836
837    // A SUnit is ready to top schedule if it has no predecessors.
838    if (!SU.NumPredsLeft)
839      TopRoots.push_back(&SU);
840    // A SUnit is ready to bottom schedule if it has no successors.
841    if (!SU.NumSuccsLeft)
842      BotRoots.push_back(&SU);
843  }
844  ExitSU.biasCriticalPath();
845}
846
847/// Identify DAG roots and setup scheduler queues.
848void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
849                               ArrayRef<SUnit*> BotRoots) {
850  NextClusterSucc = nullptr;
851  NextClusterPred = nullptr;
852
853  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
854  //
855  // Nodes with unreleased weak edges can still be roots.
856  // Release top roots in forward order.
857  for (SUnit *SU : TopRoots)
858    SchedImpl->releaseTopNode(SU);
859
860  // Release bottom roots in reverse order so the higher priority nodes appear
861  // first. This is more natural and slightly more efficient.
862  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
863         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
864    SchedImpl->releaseBottomNode(*I);
865  }
866
867  releaseSuccessors(&EntrySU);
868  releasePredecessors(&ExitSU);
869
870  SchedImpl->registerRoots();
871
872  // Advance past initial DebugValues.
873  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
874  CurrentBottom = RegionEnd;
875}
876
877/// Update scheduler queues after scheduling an instruction.
878void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
879  // Release dependent instructions for scheduling.
880  if (IsTopNode)
881    releaseSuccessors(SU);
882  else
883    releasePredecessors(SU);
884
885  SU->isScheduled = true;
886}
887
888/// Reinsert any remaining debug_values, just like the PostRA scheduler.
889void ScheduleDAGMI::placeDebugValues() {
890  // If first instruction was a DBG_VALUE then put it back.
891  if (FirstDbgValue) {
892    BB->splice(RegionBegin, BB, FirstDbgValue);
893    RegionBegin = FirstDbgValue;
894  }
895
896  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
897         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
898    std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
899    MachineInstr *DbgValue = P.first;
900    MachineBasicBlock::iterator OrigPrevMI = P.second;
901    if (&*RegionBegin == DbgValue)
902      ++RegionBegin;
903    BB->splice(++OrigPrevMI, BB, DbgValue);
904    if (OrigPrevMI == std::prev(RegionEnd))
905      RegionEnd = DbgValue;
906  }
907  DbgValues.clear();
908  FirstDbgValue = nullptr;
909}
910
911#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
912LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
913  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
914    if (SUnit *SU = getSUnit(&(*MI)))
915      dumpNode(*SU);
916    else
917      dbgs() << "Missing SUnit\n";
918  }
919}
920#endif
921
922//===----------------------------------------------------------------------===//
923// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
924// preservation.
925//===----------------------------------------------------------------------===//
926
927ScheduleDAGMILive::~ScheduleDAGMILive() {
928  delete DFSResult;
929}
930
931void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
932  const MachineInstr &MI = *SU.getInstr();
933  for (const MachineOperand &MO : MI.operands()) {
934    if (!MO.isReg())
935      continue;
936    if (!MO.readsReg())
937      continue;
938    if (TrackLaneMasks && !MO.isUse())
939      continue;
940
941    Register Reg = MO.getReg();
942    if (!Register::isVirtualRegister(Reg))
943      continue;
944
945    // Ignore re-defs.
946    if (TrackLaneMasks) {
947      bool FoundDef = false;
948      for (const MachineOperand &MO2 : MI.operands()) {
949        if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
950          FoundDef = true;
951          break;
952        }
953      }
954      if (FoundDef)
955        continue;
956    }
957
958    // Record this local VReg use.
959    VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
960    for (; UI != VRegUses.end(); ++UI) {
961      if (UI->SU == &SU)
962        break;
963    }
964    if (UI == VRegUses.end())
965      VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
966  }
967}
968
969/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
970/// crossing a scheduling boundary. [begin, end) includes all instructions in
971/// the region, including the boundary itself and single-instruction regions
972/// that don't get scheduled.
973void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
974                                MachineBasicBlock::iterator begin,
975                                MachineBasicBlock::iterator end,
976                                unsigned regioninstrs)
977{
978  // ScheduleDAGMI initializes SchedImpl's per-region policy.
979  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
980
981  // For convenience remember the end of the liveness region.
982  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
983
984  SUPressureDiffs.clear();
985
986  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
987  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
988
989  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
990         "ShouldTrackLaneMasks requires ShouldTrackPressure");
991}
992
993// Setup the register pressure trackers for the top scheduled and bottom
994// scheduled regions.
995void ScheduleDAGMILive::initRegPressure() {
996  VRegUses.clear();
997  VRegUses.setUniverse(MRI.getNumVirtRegs());
998  for (SUnit &SU : SUnits)
999    collectVRegUses(SU);
1000
1001  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1002                    ShouldTrackLaneMasks, false);
1003  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1004                    ShouldTrackLaneMasks, false);
1005
1006  // Close the RPTracker to finalize live ins.
1007  RPTracker.closeRegion();
1008
1009  LLVM_DEBUG(RPTracker.dump());
1010
1011  // Initialize the live ins and live outs.
1012  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1013  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1014
1015  // Close one end of the tracker so we can call
1016  // getMaxUpward/DownwardPressureDelta before advancing across any
1017  // instructions. This converts currently live regs into live ins/outs.
1018  TopRPTracker.closeTop();
1019  BotRPTracker.closeBottom();
1020
1021  BotRPTracker.initLiveThru(RPTracker);
1022  if (!BotRPTracker.getLiveThru().empty()) {
1023    TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1024    LLVM_DEBUG(dbgs() << "Live Thru: ";
1025               dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1026  };
1027
1028  // For each live out vreg reduce the pressure change associated with other
1029  // uses of the same vreg below the live-out reaching def.
1030  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1031
1032  // Account for liveness generated by the region boundary.
1033  if (LiveRegionEnd != RegionEnd) {
1034    SmallVector<RegisterMaskPair, 8> LiveUses;
1035    BotRPTracker.recede(&LiveUses);
1036    updatePressureDiffs(LiveUses);
1037  }
1038
1039  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1040             dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1041             dbgs() << "Bottom Pressure:\n";
1042             dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1043
1044  assert((BotRPTracker.getPos() == RegionEnd ||
1045          (RegionEnd->isDebugInstr() &&
1046           BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1047         "Can't find the region bottom");
1048
1049  // Cache the list of excess pressure sets in this region. This will also track
1050  // the max pressure in the scheduled code for these sets.
1051  RegionCriticalPSets.clear();
1052  const std::vector<unsigned> &RegionPressure =
1053    RPTracker.getPressure().MaxSetPressure;
1054  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1055    unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1056    if (RegionPressure[i] > Limit) {
1057      LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1058                        << " Actual " << RegionPressure[i] << "\n");
1059      RegionCriticalPSets.push_back(PressureChange(i));
1060    }
1061  }
1062  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1063             for (const PressureChange &RCPS
1064                  : RegionCriticalPSets) dbgs()
1065             << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1066             dbgs() << "\n");
1067}
1068
1069void ScheduleDAGMILive::
1070updateScheduledPressure(const SUnit *SU,
1071                        const std::vector<unsigned> &NewMaxPressure) {
1072  const PressureDiff &PDiff = getPressureDiff(SU);
1073  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1074  for (const PressureChange &PC : PDiff) {
1075    if (!PC.isValid())
1076      break;
1077    unsigned ID = PC.getPSet();
1078    while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1079      ++CritIdx;
1080    if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1081      if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1082          && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1083        RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1084    }
1085    unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1086    if (NewMaxPressure[ID] >= Limit - 2) {
1087      LLVM_DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
1088                        << NewMaxPressure[ID]
1089                        << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1090                        << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1091                        << " livethru)\n");
1092    }
1093  }
1094}
1095
1096/// Update the PressureDiff array for liveness after scheduling this
1097/// instruction.
1098void ScheduleDAGMILive::updatePressureDiffs(
1099    ArrayRef<RegisterMaskPair> LiveUses) {
1100  for (const RegisterMaskPair &P : LiveUses) {
1101    unsigned Reg = P.RegUnit;
1102    /// FIXME: Currently assuming single-use physregs.
1103    if (!Register::isVirtualRegister(Reg))
1104      continue;
1105
1106    if (ShouldTrackLaneMasks) {
1107      // If the register has just become live then other uses won't change
1108      // this fact anymore => decrement pressure.
1109      // If the register has just become dead then other uses make it come
1110      // back to life => increment pressure.
1111      bool Decrement = P.LaneMask.any();
1112
1113      for (const VReg2SUnit &V2SU
1114           : make_range(VRegUses.find(Reg), VRegUses.end())) {
1115        SUnit &SU = *V2SU.SU;
1116        if (SU.isScheduled || &SU == &ExitSU)
1117          continue;
1118
1119        PressureDiff &PDiff = getPressureDiff(&SU);
1120        PDiff.addPressureChange(Reg, Decrement, &MRI);
1121        LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
1122                          << printReg(Reg, TRI) << ':'
1123                          << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1124                   dbgs() << "              to "; PDiff.dump(*TRI););
1125      }
1126    } else {
1127      assert(P.LaneMask.any());
1128      LLVM_DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1129      // This may be called before CurrentBottom has been initialized. However,
1130      // BotRPTracker must have a valid position. We want the value live into the
1131      // instruction or live out of the block, so ask for the previous
1132      // instruction's live-out.
1133      const LiveInterval &LI = LIS->getInterval(Reg);
1134      VNInfo *VNI;
1135      MachineBasicBlock::const_iterator I =
1136        nextIfDebug(BotRPTracker.getPos(), BB->end());
1137      if (I == BB->end())
1138        VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1139      else {
1140        LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1141        VNI = LRQ.valueIn();
1142      }
1143      // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1144      assert(VNI && "No live value at use.");
1145      for (const VReg2SUnit &V2SU
1146           : make_range(VRegUses.find(Reg), VRegUses.end())) {
1147        SUnit *SU = V2SU.SU;
1148        // If this use comes before the reaching def, it cannot be a last use,
1149        // so decrease its pressure change.
1150        if (!SU->isScheduled && SU != &ExitSU) {
1151          LiveQueryResult LRQ =
1152              LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1153          if (LRQ.valueIn() == VNI) {
1154            PressureDiff &PDiff = getPressureDiff(SU);
1155            PDiff.addPressureChange(Reg, true, &MRI);
1156            LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
1157                              << *SU->getInstr();
1158                       dbgs() << "              to "; PDiff.dump(*TRI););
1159          }
1160        }
1161      }
1162    }
1163  }
1164}
1165
1166void ScheduleDAGMILive::dump() const {
1167#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1168  if (EntrySU.getInstr() != nullptr)
1169    dumpNodeAll(EntrySU);
1170  for (const SUnit &SU : SUnits) {
1171    dumpNodeAll(SU);
1172    if (ShouldTrackPressure) {
1173      dbgs() << "  Pressure Diff      : ";
1174      getPressureDiff(&SU).dump(*TRI);
1175    }
1176    dbgs() << "  Single Issue       : ";
1177    if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1178        SchedModel.mustEndGroup(SU.getInstr()))
1179      dbgs() << "true;";
1180    else
1181      dbgs() << "false;";
1182    dbgs() << '\n';
1183  }
1184  if (ExitSU.getInstr() != nullptr)
1185    dumpNodeAll(ExitSU);
1186#endif
1187}
1188
1189/// schedule - Called back from MachineScheduler::runOnMachineFunction
1190/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1191/// only includes instructions that have DAG nodes, not scheduling boundaries.
1192///
1193/// This is a skeletal driver, with all the functionality pushed into helpers,
1194/// so that it can be easily extended by experimental schedulers. Generally,
1195/// implementing MachineSchedStrategy should be sufficient to implement a new
1196/// scheduling algorithm. However, if a scheduler further subclasses
1197/// ScheduleDAGMILive then it will want to override this virtual method in order
1198/// to update any specialized state.
1199void ScheduleDAGMILive::schedule() {
1200  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1201  LLVM_DEBUG(SchedImpl->dumpPolicy());
1202  buildDAGWithRegPressure();
1203
1204  postprocessDAG();
1205
1206  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1207  findRootsAndBiasEdges(TopRoots, BotRoots);
1208
1209  // Initialize the strategy before modifying the DAG.
1210  // This may initialize a DFSResult to be used for queue priority.
1211  SchedImpl->initialize(this);
1212
1213  LLVM_DEBUG(dump());
1214  if (PrintDAGs) dump();
1215  if (ViewMISchedDAGs) viewGraph();
1216
1217  // Initialize ready queues now that the DAG and priority data are finalized.
1218  initQueues(TopRoots, BotRoots);
1219
1220  bool IsTopNode = false;
1221  while (true) {
1222    LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1223    SUnit *SU = SchedImpl->pickNode(IsTopNode);
1224    if (!SU) break;
1225
1226    assert(!SU->isScheduled && "Node already scheduled");
1227    if (!checkSchedLimit())
1228      break;
1229
1230    scheduleMI(SU, IsTopNode);
1231
1232    if (DFSResult) {
1233      unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1234      if (!ScheduledTrees.test(SubtreeID)) {
1235        ScheduledTrees.set(SubtreeID);
1236        DFSResult->scheduleTree(SubtreeID);
1237        SchedImpl->scheduleTree(SubtreeID);
1238      }
1239    }
1240
1241    // Notify the scheduling strategy after updating the DAG.
1242    SchedImpl->schedNode(SU, IsTopNode);
1243
1244    updateQueues(SU, IsTopNode);
1245  }
1246  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1247
1248  placeDebugValues();
1249
1250  LLVM_DEBUG({
1251    dbgs() << "*** Final schedule for "
1252           << printMBBReference(*begin()->getParent()) << " ***\n";
1253    dumpSchedule();
1254    dbgs() << '\n';
1255  });
1256}
1257
1258/// Build the DAG and setup three register pressure trackers.
1259void ScheduleDAGMILive::buildDAGWithRegPressure() {
1260  if (!ShouldTrackPressure) {
1261    RPTracker.reset();
1262    RegionCriticalPSets.clear();
1263    buildSchedGraph(AA);
1264    return;
1265  }
1266
1267  // Initialize the register pressure tracker used by buildSchedGraph.
1268  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1269                 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1270
1271  // Account for liveness generate by the region boundary.
1272  if (LiveRegionEnd != RegionEnd)
1273    RPTracker.recede();
1274
1275  // Build the DAG, and compute current register pressure.
1276  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1277
1278  // Initialize top/bottom trackers after computing region pressure.
1279  initRegPressure();
1280}
1281
1282void ScheduleDAGMILive::computeDFSResult() {
1283  if (!DFSResult)
1284    DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1285  DFSResult->clear();
1286  ScheduledTrees.clear();
1287  DFSResult->resize(SUnits.size());
1288  DFSResult->compute(SUnits);
1289  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1290}
1291
1292/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1293/// only provides the critical path for single block loops. To handle loops that
1294/// span blocks, we could use the vreg path latencies provided by
1295/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1296/// available for use in the scheduler.
1297///
1298/// The cyclic path estimation identifies a def-use pair that crosses the back
1299/// edge and considers the depth and height of the nodes. For example, consider
1300/// the following instruction sequence where each instruction has unit latency
1301/// and defines an epomymous virtual register:
1302///
1303/// a->b(a,c)->c(b)->d(c)->exit
1304///
1305/// The cyclic critical path is a two cycles: b->c->b
1306/// The acyclic critical path is four cycles: a->b->c->d->exit
1307/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1308/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1309/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1310/// LiveInDepth = depth(b) = len(a->b) = 1
1311///
1312/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1313/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1314/// CyclicCriticalPath = min(2, 2) = 2
1315///
1316/// This could be relevant to PostRA scheduling, but is currently implemented
1317/// assuming LiveIntervals.
1318unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1319  // This only applies to single block loop.
1320  if (!BB->isSuccessor(BB))
1321    return 0;
1322
1323  unsigned MaxCyclicLatency = 0;
1324  // Visit each live out vreg def to find def/use pairs that cross iterations.
1325  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1326    unsigned Reg = P.RegUnit;
1327    if (!Register::isVirtualRegister(Reg))
1328      continue;
1329    const LiveInterval &LI = LIS->getInterval(Reg);
1330    const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1331    if (!DefVNI)
1332      continue;
1333
1334    MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1335    const SUnit *DefSU = getSUnit(DefMI);
1336    if (!DefSU)
1337      continue;
1338
1339    unsigned LiveOutHeight = DefSU->getHeight();
1340    unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1341    // Visit all local users of the vreg def.
1342    for (const VReg2SUnit &V2SU
1343         : make_range(VRegUses.find(Reg), VRegUses.end())) {
1344      SUnit *SU = V2SU.SU;
1345      if (SU == &ExitSU)
1346        continue;
1347
1348      // Only consider uses of the phi.
1349      LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1350      if (!LRQ.valueIn()->isPHIDef())
1351        continue;
1352
1353      // Assume that a path spanning two iterations is a cycle, which could
1354      // overestimate in strange cases. This allows cyclic latency to be
1355      // estimated as the minimum slack of the vreg's depth or height.
1356      unsigned CyclicLatency = 0;
1357      if (LiveOutDepth > SU->getDepth())
1358        CyclicLatency = LiveOutDepth - SU->getDepth();
1359
1360      unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1361      if (LiveInHeight > LiveOutHeight) {
1362        if (LiveInHeight - LiveOutHeight < CyclicLatency)
1363          CyclicLatency = LiveInHeight - LiveOutHeight;
1364      } else
1365        CyclicLatency = 0;
1366
1367      LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1368                        << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1369      if (CyclicLatency > MaxCyclicLatency)
1370        MaxCyclicLatency = CyclicLatency;
1371    }
1372  }
1373  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1374  return MaxCyclicLatency;
1375}
1376
1377/// Release ExitSU predecessors and setup scheduler queues. Re-position
1378/// the Top RP tracker in case the region beginning has changed.
1379void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1380                                   ArrayRef<SUnit*> BotRoots) {
1381  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1382  if (ShouldTrackPressure) {
1383    assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1384    TopRPTracker.setPos(CurrentTop);
1385  }
1386}
1387
1388/// Move an instruction and update register pressure.
1389void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1390  // Move the instruction to its new location in the instruction stream.
1391  MachineInstr *MI = SU->getInstr();
1392
1393  if (IsTopNode) {
1394    assert(SU->isTopReady() && "node still has unscheduled dependencies");
1395    if (&*CurrentTop == MI)
1396      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1397    else {
1398      moveInstruction(MI, CurrentTop);
1399      TopRPTracker.setPos(MI);
1400    }
1401
1402    if (ShouldTrackPressure) {
1403      // Update top scheduled pressure.
1404      RegisterOperands RegOpers;
1405      RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1406      if (ShouldTrackLaneMasks) {
1407        // Adjust liveness and add missing dead+read-undef flags.
1408        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1409        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1410      } else {
1411        // Adjust for missing dead-def flags.
1412        RegOpers.detectDeadDefs(*MI, *LIS);
1413      }
1414
1415      TopRPTracker.advance(RegOpers);
1416      assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1417      LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1418                     TopRPTracker.getRegSetPressureAtPos(), TRI););
1419
1420      updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1421    }
1422  } else {
1423    assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1424    MachineBasicBlock::iterator priorII =
1425      priorNonDebug(CurrentBottom, CurrentTop);
1426    if (&*priorII == MI)
1427      CurrentBottom = priorII;
1428    else {
1429      if (&*CurrentTop == MI) {
1430        CurrentTop = nextIfDebug(++CurrentTop, priorII);
1431        TopRPTracker.setPos(CurrentTop);
1432      }
1433      moveInstruction(MI, CurrentBottom);
1434      CurrentBottom = MI;
1435      BotRPTracker.setPos(CurrentBottom);
1436    }
1437    if (ShouldTrackPressure) {
1438      RegisterOperands RegOpers;
1439      RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1440      if (ShouldTrackLaneMasks) {
1441        // Adjust liveness and add missing dead+read-undef flags.
1442        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1443        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1444      } else {
1445        // Adjust for missing dead-def flags.
1446        RegOpers.detectDeadDefs(*MI, *LIS);
1447      }
1448
1449      if (BotRPTracker.getPos() != CurrentBottom)
1450        BotRPTracker.recedeSkipDebugValues();
1451      SmallVector<RegisterMaskPair, 8> LiveUses;
1452      BotRPTracker.recede(RegOpers, &LiveUses);
1453      assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1454      LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1455                     BotRPTracker.getRegSetPressureAtPos(), TRI););
1456
1457      updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1458      updatePressureDiffs(LiveUses);
1459    }
1460  }
1461}
1462
1463//===----------------------------------------------------------------------===//
1464// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1465//===----------------------------------------------------------------------===//
1466
1467namespace {
1468
1469/// Post-process the DAG to create cluster edges between neighboring
1470/// loads or between neighboring stores.
1471class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1472  struct MemOpInfo {
1473    SUnit *SU;
1474    SmallVector<const MachineOperand *, 4> BaseOps;
1475    int64_t Offset;
1476    unsigned Width;
1477
1478    MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1479              int64_t Offset, unsigned Width)
1480        : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
1481          Width(Width) {}
1482
1483    static bool Compare(const MachineOperand *const &A,
1484                        const MachineOperand *const &B) {
1485      if (A->getType() != B->getType())
1486        return A->getType() < B->getType();
1487      if (A->isReg())
1488        return A->getReg() < B->getReg();
1489      if (A->isFI()) {
1490        const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1491        const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1492        bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1493                              TargetFrameLowering::StackGrowsDown;
1494        return StackGrowsDown ? A->getIndex() > B->getIndex()
1495                              : A->getIndex() < B->getIndex();
1496      }
1497
1498      llvm_unreachable("MemOpClusterMutation only supports register or frame "
1499                       "index bases.");
1500    }
1501
1502    bool operator<(const MemOpInfo &RHS) const {
1503      // FIXME: Don't compare everything twice. Maybe use C++20 three way
1504      // comparison instead when it's available.
1505      if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1506                                       RHS.BaseOps.begin(), RHS.BaseOps.end(),
1507                                       Compare))
1508        return true;
1509      if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1510                                       BaseOps.begin(), BaseOps.end(), Compare))
1511        return false;
1512      if (Offset != RHS.Offset)
1513        return Offset < RHS.Offset;
1514      return SU->NodeNum < RHS.SU->NodeNum;
1515    }
1516  };
1517
1518  const TargetInstrInfo *TII;
1519  const TargetRegisterInfo *TRI;
1520  bool IsLoad;
1521
1522public:
1523  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1524                           const TargetRegisterInfo *tri, bool IsLoad)
1525      : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1526
1527  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1528
1529protected:
1530  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);
1531};
1532
1533class StoreClusterMutation : public BaseMemOpClusterMutation {
1534public:
1535  StoreClusterMutation(const TargetInstrInfo *tii,
1536                       const TargetRegisterInfo *tri)
1537      : BaseMemOpClusterMutation(tii, tri, false) {}
1538};
1539
1540class LoadClusterMutation : public BaseMemOpClusterMutation {
1541public:
1542  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1543      : BaseMemOpClusterMutation(tii, tri, true) {}
1544};
1545
1546} // end anonymous namespace
1547
1548namespace llvm {
1549
1550std::unique_ptr<ScheduleDAGMutation>
1551createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1552                             const TargetRegisterInfo *TRI) {
1553  return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
1554                            : nullptr;
1555}
1556
1557std::unique_ptr<ScheduleDAGMutation>
1558createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1559                              const TargetRegisterInfo *TRI) {
1560  return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
1561                            : nullptr;
1562}
1563
1564} // end namespace llvm
1565
1566void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1567    ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
1568  SmallVector<MemOpInfo, 32> MemOpRecords;
1569  for (SUnit *SU : MemOps) {
1570    const MachineInstr &MI = *SU->getInstr();
1571    SmallVector<const MachineOperand *, 4> BaseOps;
1572    int64_t Offset;
1573    bool OffsetIsScalable;
1574    unsigned Width;
1575    if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
1576                                           OffsetIsScalable, Width, TRI)) {
1577      MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width));
1578
1579      LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1580                        << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1581                        << ", Width: " << Width << "\n");
1582    }
1583#ifndef NDEBUG
1584    for (auto *Op : BaseOps)
1585      assert(Op);
1586#endif
1587  }
1588  if (MemOpRecords.size() < 2)
1589    return;
1590
1591  llvm::sort(MemOpRecords);
1592
1593  // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1594  // cluster mem ops collected within `MemOpRecords` array.
1595  unsigned ClusterLength = 1;
1596  unsigned CurrentClusterBytes = MemOpRecords[0].Width;
1597  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1598    // Decision to cluster mem ops is taken based on target dependent logic
1599    auto MemOpa = MemOpRecords[Idx];
1600    auto MemOpb = MemOpRecords[Idx + 1];
1601    ++ClusterLength;
1602    CurrentClusterBytes += MemOpb.Width;
1603    if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
1604                                  CurrentClusterBytes)) {
1605      // Current mem ops pair could not be clustered, reset cluster length, and
1606      // go to next pair
1607      ClusterLength = 1;
1608      CurrentClusterBytes = MemOpb.Width;
1609      continue;
1610    }
1611
1612    SUnit *SUa = MemOpa.SU;
1613    SUnit *SUb = MemOpb.SU;
1614    if (SUa->NodeNum > SUb->NodeNum)
1615      std::swap(SUa, SUb);
1616
1617    // FIXME: Is this check really required?
1618    if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1619      ClusterLength = 1;
1620      CurrentClusterBytes = MemOpb.Width;
1621      continue;
1622    }
1623
1624    LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1625                      << SUb->NodeNum << ")\n");
1626
1627    // Copy successor edges from SUa to SUb. Interleaving computation
1628    // dependent on SUa can prevent load combining due to register reuse.
1629    // Predecessor edges do not need to be copied from SUb to SUa since
1630    // nearby loads should have effectively the same inputs.
1631    for (const SDep &Succ : SUa->Succs) {
1632      if (Succ.getSUnit() == SUb)
1633        continue;
1634      LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum
1635                        << ")\n");
1636      DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1637    }
1638
1639    LLVM_DEBUG(dbgs() << "  Curr cluster length: " << ClusterLength
1640                      << ", Curr cluster bytes: " << CurrentClusterBytes
1641                      << "\n");
1642  }
1643}
1644
1645/// Callback from DAG postProcessing to create cluster edges for loads.
1646void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
1647  // Map DAG NodeNum to a set of dependent MemOps in store chain.
1648  DenseMap<unsigned, SmallVector<SUnit *, 4>> StoreChains;
1649  for (SUnit &SU : DAG->SUnits) {
1650    if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1651        (!IsLoad && !SU.getInstr()->mayStore()))
1652      continue;
1653
1654    unsigned ChainPredID = DAG->SUnits.size();
1655    for (const SDep &Pred : SU.Preds) {
1656      if (Pred.isCtrl() && !Pred.isArtificial()) {
1657        ChainPredID = Pred.getSUnit()->NodeNum;
1658        break;
1659      }
1660    }
1661    // Insert the SU to corresponding store chain.
1662    auto &Chain = StoreChains.FindAndConstruct(ChainPredID).second;
1663    Chain.push_back(&SU);
1664  }
1665
1666  // Iterate over the store chains.
1667  for (auto &SCD : StoreChains)
1668    clusterNeighboringMemOps(SCD.second, DAG);
1669}
1670
1671//===----------------------------------------------------------------------===//
1672// CopyConstrain - DAG post-processing to encourage copy elimination.
1673//===----------------------------------------------------------------------===//
1674
1675namespace {
1676
1677/// Post-process the DAG to create weak edges from all uses of a copy to
1678/// the one use that defines the copy's source vreg, most likely an induction
1679/// variable increment.
1680class CopyConstrain : public ScheduleDAGMutation {
1681  // Transient state.
1682  SlotIndex RegionBeginIdx;
1683
1684  // RegionEndIdx is the slot index of the last non-debug instruction in the
1685  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1686  SlotIndex RegionEndIdx;
1687
1688public:
1689  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1690
1691  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1692
1693protected:
1694  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1695};
1696
1697} // end anonymous namespace
1698
1699namespace llvm {
1700
1701std::unique_ptr<ScheduleDAGMutation>
1702createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1703                               const TargetRegisterInfo *TRI) {
1704  return std::make_unique<CopyConstrain>(TII, TRI);
1705}
1706
1707} // end namespace llvm
1708
1709/// constrainLocalCopy handles two possibilities:
1710/// 1) Local src:
1711/// I0:     = dst
1712/// I1: src = ...
1713/// I2:     = dst
1714/// I3: dst = src (copy)
1715/// (create pred->succ edges I0->I1, I2->I1)
1716///
1717/// 2) Local copy:
1718/// I0: dst = src (copy)
1719/// I1:     = dst
1720/// I2: src = ...
1721/// I3:     = dst
1722/// (create pred->succ edges I1->I2, I3->I2)
1723///
1724/// Although the MachineScheduler is currently constrained to single blocks,
1725/// this algorithm should handle extended blocks. An EBB is a set of
1726/// contiguously numbered blocks such that the previous block in the EBB is
1727/// always the single predecessor.
1728void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1729  LiveIntervals *LIS = DAG->getLIS();
1730  MachineInstr *Copy = CopySU->getInstr();
1731
1732  // Check for pure vreg copies.
1733  const MachineOperand &SrcOp = Copy->getOperand(1);
1734  Register SrcReg = SrcOp.getReg();
1735  if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1736    return;
1737
1738  const MachineOperand &DstOp = Copy->getOperand(0);
1739  Register DstReg = DstOp.getReg();
1740  if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
1741    return;
1742
1743  // Check if either the dest or source is local. If it's live across a back
1744  // edge, it's not local. Note that if both vregs are live across the back
1745  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1746  // If both the copy's source and dest are local live intervals, then we
1747  // should treat the dest as the global for the purpose of adding
1748  // constraints. This adds edges from source's other uses to the copy.
1749  unsigned LocalReg = SrcReg;
1750  unsigned GlobalReg = DstReg;
1751  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1752  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1753    LocalReg = DstReg;
1754    GlobalReg = SrcReg;
1755    LocalLI = &LIS->getInterval(LocalReg);
1756    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1757      return;
1758  }
1759  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1760
1761  // Find the global segment after the start of the local LI.
1762  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1763  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1764  // local live range. We could create edges from other global uses to the local
1765  // start, but the coalescer should have already eliminated these cases, so
1766  // don't bother dealing with it.
1767  if (GlobalSegment == GlobalLI->end())
1768    return;
1769
1770  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1771  // returned the next global segment. But if GlobalSegment overlaps with
1772  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1773  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1774  if (GlobalSegment->contains(LocalLI->beginIndex()))
1775    ++GlobalSegment;
1776
1777  if (GlobalSegment == GlobalLI->end())
1778    return;
1779
1780  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1781  if (GlobalSegment != GlobalLI->begin()) {
1782    // Two address defs have no hole.
1783    if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1784                               GlobalSegment->start)) {
1785      return;
1786    }
1787    // If the prior global segment may be defined by the same two-address
1788    // instruction that also defines LocalLI, then can't make a hole here.
1789    if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1790                               LocalLI->beginIndex())) {
1791      return;
1792    }
1793    // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1794    // it would be a disconnected component in the live range.
1795    assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1796           "Disconnected LRG within the scheduling region.");
1797  }
1798  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1799  if (!GlobalDef)
1800    return;
1801
1802  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1803  if (!GlobalSU)
1804    return;
1805
1806  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1807  // constraining the uses of the last local def to precede GlobalDef.
1808  SmallVector<SUnit*,8> LocalUses;
1809  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1810  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1811  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1812  for (const SDep &Succ : LastLocalSU->Succs) {
1813    if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1814      continue;
1815    if (Succ.getSUnit() == GlobalSU)
1816      continue;
1817    if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1818      return;
1819    LocalUses.push_back(Succ.getSUnit());
1820  }
1821  // Open the top of the GlobalLI hole by constraining any earlier global uses
1822  // to precede the start of LocalLI.
1823  SmallVector<SUnit*,8> GlobalUses;
1824  MachineInstr *FirstLocalDef =
1825    LIS->getInstructionFromIndex(LocalLI->beginIndex());
1826  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1827  for (const SDep &Pred : GlobalSU->Preds) {
1828    if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1829      continue;
1830    if (Pred.getSUnit() == FirstLocalSU)
1831      continue;
1832    if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1833      return;
1834    GlobalUses.push_back(Pred.getSUnit());
1835  }
1836  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1837  // Add the weak edges.
1838  for (SmallVectorImpl<SUnit*>::const_iterator
1839         I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1840    LLVM_DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
1841                      << GlobalSU->NodeNum << ")\n");
1842    DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1843  }
1844  for (SmallVectorImpl<SUnit*>::const_iterator
1845         I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1846    LLVM_DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
1847                      << FirstLocalSU->NodeNum << ")\n");
1848    DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1849  }
1850}
1851
1852/// Callback from DAG postProcessing to create weak edges to encourage
1853/// copy elimination.
1854void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1855  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1856  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1857
1858  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1859  if (FirstPos == DAG->end())
1860    return;
1861  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1862  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1863      *priorNonDebug(DAG->end(), DAG->begin()));
1864
1865  for (SUnit &SU : DAG->SUnits) {
1866    if (!SU.getInstr()->isCopy())
1867      continue;
1868
1869    constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1870  }
1871}
1872
1873//===----------------------------------------------------------------------===//
1874// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1875// and possibly other custom schedulers.
1876//===----------------------------------------------------------------------===//
1877
1878static const unsigned InvalidCycle = ~0U;
1879
1880SchedBoundary::~SchedBoundary() { delete HazardRec; }
1881
1882/// Given a Count of resource usage and a Latency value, return true if a
1883/// SchedBoundary becomes resource limited.
1884/// If we are checking after scheduling a node, we should return true when
1885/// we just reach the resource limit.
1886static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1887                               unsigned Latency, bool AfterSchedNode) {
1888  int ResCntFactor = (int)(Count - (Latency * LFactor));
1889  if (AfterSchedNode)
1890    return ResCntFactor >= (int)LFactor;
1891  else
1892    return ResCntFactor > (int)LFactor;
1893}
1894
1895void SchedBoundary::reset() {
1896  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1897  // Destroying and reconstructing it is very expensive though. So keep
1898  // invalid, placeholder HazardRecs.
1899  if (HazardRec && HazardRec->isEnabled()) {
1900    delete HazardRec;
1901    HazardRec = nullptr;
1902  }
1903  Available.clear();
1904  Pending.clear();
1905  CheckPending = false;
1906  CurrCycle = 0;
1907  CurrMOps = 0;
1908  MinReadyCycle = std::numeric_limits<unsigned>::max();
1909  ExpectedLatency = 0;
1910  DependentLatency = 0;
1911  RetiredMOps = 0;
1912  MaxExecutedResCount = 0;
1913  ZoneCritResIdx = 0;
1914  IsResourceLimited = false;
1915  ReservedCycles.clear();
1916  ReservedCyclesIndex.clear();
1917#ifndef NDEBUG
1918  // Track the maximum number of stall cycles that could arise either from the
1919  // latency of a DAG edge or the number of cycles that a processor resource is
1920  // reserved (SchedBoundary::ReservedCycles).
1921  MaxObservedStall = 0;
1922#endif
1923  // Reserve a zero-count for invalid CritResIdx.
1924  ExecutedResCounts.resize(1);
1925  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1926}
1927
1928void SchedRemainder::
1929init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1930  reset();
1931  if (!SchedModel->hasInstrSchedModel())
1932    return;
1933  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1934  for (SUnit &SU : DAG->SUnits) {
1935    const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1936    RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1937      * SchedModel->getMicroOpFactor();
1938    for (TargetSchedModel::ProcResIter
1939           PI = SchedModel->getWriteProcResBegin(SC),
1940           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1941      unsigned PIdx = PI->ProcResourceIdx;
1942      unsigned Factor = SchedModel->getResourceFactor(PIdx);
1943      RemainingCounts[PIdx] += (Factor * PI->Cycles);
1944    }
1945  }
1946}
1947
1948void SchedBoundary::
1949init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1950  reset();
1951  DAG = dag;
1952  SchedModel = smodel;
1953  Rem = rem;
1954  if (SchedModel->hasInstrSchedModel()) {
1955    unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
1956    ReservedCyclesIndex.resize(ResourceCount);
1957    ExecutedResCounts.resize(ResourceCount);
1958    unsigned NumUnits = 0;
1959
1960    for (unsigned i = 0; i < ResourceCount; ++i) {
1961      ReservedCyclesIndex[i] = NumUnits;
1962      NumUnits += SchedModel->getProcResource(i)->NumUnits;
1963    }
1964
1965    ReservedCycles.resize(NumUnits, InvalidCycle);
1966  }
1967}
1968
1969/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1970/// these "soft stalls" differently than the hard stall cycles based on CPU
1971/// resources and computed by checkHazard(). A fully in-order model
1972/// (MicroOpBufferSize==0) will not make use of this since instructions are not
1973/// available for scheduling until they are ready. However, a weaker in-order
1974/// model may use this for heuristics. For example, if a processor has in-order
1975/// behavior when reading certain resources, this may come into play.
1976unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
1977  if (!SU->isUnbuffered)
1978    return 0;
1979
1980  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1981  if (ReadyCycle > CurrCycle)
1982    return ReadyCycle - CurrCycle;
1983  return 0;
1984}
1985
1986/// Compute the next cycle at which the given processor resource unit
1987/// can be scheduled.
1988unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
1989                                                       unsigned Cycles) {
1990  unsigned NextUnreserved = ReservedCycles[InstanceIdx];
1991  // If this resource has never been used, always return cycle zero.
1992  if (NextUnreserved == InvalidCycle)
1993    return 0;
1994  // For bottom-up scheduling add the cycles needed for the current operation.
1995  if (!isTop())
1996    NextUnreserved += Cycles;
1997  return NextUnreserved;
1998}
1999
2000/// Compute the next cycle at which the given processor resource can be
2001/// scheduled.  Returns the next cycle and the index of the processor resource
2002/// instance in the reserved cycles vector.
2003std::pair<unsigned, unsigned>
2004SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
2005  unsigned MinNextUnreserved = InvalidCycle;
2006  unsigned InstanceIdx = 0;
2007  unsigned StartIndex = ReservedCyclesIndex[PIdx];
2008  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2009  assert(NumberOfInstances > 0 &&
2010         "Cannot have zero instances of a ProcResource");
2011
2012  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2013       ++I) {
2014    unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
2015    if (MinNextUnreserved > NextUnreserved) {
2016      InstanceIdx = I;
2017      MinNextUnreserved = NextUnreserved;
2018    }
2019  }
2020  return std::make_pair(MinNextUnreserved, InstanceIdx);
2021}
2022
2023/// Does this SU have a hazard within the current instruction group.
2024///
2025/// The scheduler supports two modes of hazard recognition. The first is the
2026/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2027/// supports highly complicated in-order reservation tables
2028/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2029///
2030/// The second is a streamlined mechanism that checks for hazards based on
2031/// simple counters that the scheduler itself maintains. It explicitly checks
2032/// for instruction dispatch limitations, including the number of micro-ops that
2033/// can dispatch per cycle.
2034///
2035/// TODO: Also check whether the SU must start a new group.
2036bool SchedBoundary::checkHazard(SUnit *SU) {
2037  if (HazardRec->isEnabled()
2038      && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
2039    return true;
2040  }
2041
2042  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2043  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2044    LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
2045                      << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2046    return true;
2047  }
2048
2049  if (CurrMOps > 0 &&
2050      ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2051       (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2052    LLVM_DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
2053                      << (isTop() ? "begin" : "end") << " group\n");
2054    return true;
2055  }
2056
2057  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2058    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2059    for (const MCWriteProcResEntry &PE :
2060          make_range(SchedModel->getWriteProcResBegin(SC),
2061                     SchedModel->getWriteProcResEnd(SC))) {
2062      unsigned ResIdx = PE.ProcResourceIdx;
2063      unsigned Cycles = PE.Cycles;
2064      unsigned NRCycle, InstanceIdx;
2065      std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
2066      if (NRCycle > CurrCycle) {
2067#ifndef NDEBUG
2068        MaxObservedStall = std::max(Cycles, MaxObservedStall);
2069#endif
2070        LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
2071                          << SchedModel->getResourceName(ResIdx)
2072                          << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx]  << ']'
2073                          << "=" << NRCycle << "c\n");
2074        return true;
2075      }
2076    }
2077  }
2078  return false;
2079}
2080
2081// Find the unscheduled node in ReadySUs with the highest latency.
2082unsigned SchedBoundary::
2083findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
2084  SUnit *LateSU = nullptr;
2085  unsigned RemLatency = 0;
2086  for (SUnit *SU : ReadySUs) {
2087    unsigned L = getUnscheduledLatency(SU);
2088    if (L > RemLatency) {
2089      RemLatency = L;
2090      LateSU = SU;
2091    }
2092  }
2093  if (LateSU) {
2094    LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2095                      << LateSU->NodeNum << ") " << RemLatency << "c\n");
2096  }
2097  return RemLatency;
2098}
2099
2100// Count resources in this zone and the remaining unscheduled
2101// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2102// resource index, or zero if the zone is issue limited.
2103unsigned SchedBoundary::
2104getOtherResourceCount(unsigned &OtherCritIdx) {
2105  OtherCritIdx = 0;
2106  if (!SchedModel->hasInstrSchedModel())
2107    return 0;
2108
2109  unsigned OtherCritCount = Rem->RemIssueCount
2110    + (RetiredMOps * SchedModel->getMicroOpFactor());
2111  LLVM_DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
2112                    << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2113  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2114       PIdx != PEnd; ++PIdx) {
2115    unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2116    if (OtherCount > OtherCritCount) {
2117      OtherCritCount = OtherCount;
2118      OtherCritIdx = PIdx;
2119    }
2120  }
2121  if (OtherCritIdx) {
2122    LLVM_DEBUG(
2123        dbgs() << "  " << Available.getName() << " + Remain CritRes: "
2124               << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2125               << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2126  }
2127  return OtherCritCount;
2128}
2129
2130void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2131                                unsigned Idx) {
2132  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2133
2134#ifndef NDEBUG
2135  // ReadyCycle was been bumped up to the CurrCycle when this node was
2136  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2137  // scheduling, so may now be greater than ReadyCycle.
2138  if (ReadyCycle > CurrCycle)
2139    MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2140#endif
2141
2142  if (ReadyCycle < MinReadyCycle)
2143    MinReadyCycle = ReadyCycle;
2144
2145  // Check for interlocks first. For the purpose of other heuristics, an
2146  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2147  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2148  bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2149                        checkHazard(SU) || (Available.size() >= ReadyListLimit);
2150
2151  if (!HazardDetected) {
2152    Available.push(SU);
2153
2154    if (InPQueue)
2155      Pending.remove(Pending.begin() + Idx);
2156    return;
2157  }
2158
2159  if (!InPQueue)
2160    Pending.push(SU);
2161}
2162
2163/// Move the boundary of scheduled code by one cycle.
2164void SchedBoundary::bumpCycle(unsigned NextCycle) {
2165  if (SchedModel->getMicroOpBufferSize() == 0) {
2166    assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2167           "MinReadyCycle uninitialized");
2168    if (MinReadyCycle > NextCycle)
2169      NextCycle = MinReadyCycle;
2170  }
2171  // Update the current micro-ops, which will issue in the next cycle.
2172  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2173  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2174
2175  // Decrement DependentLatency based on the next cycle.
2176  if ((NextCycle - CurrCycle) > DependentLatency)
2177    DependentLatency = 0;
2178  else
2179    DependentLatency -= (NextCycle - CurrCycle);
2180
2181  if (!HazardRec->isEnabled()) {
2182    // Bypass HazardRec virtual calls.
2183    CurrCycle = NextCycle;
2184  } else {
2185    // Bypass getHazardType calls in case of long latency.
2186    for (; CurrCycle != NextCycle; ++CurrCycle) {
2187      if (isTop())
2188        HazardRec->AdvanceCycle();
2189      else
2190        HazardRec->RecedeCycle();
2191    }
2192  }
2193  CheckPending = true;
2194  IsResourceLimited =
2195      checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2196                         getScheduledLatency(), true);
2197
2198  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2199                    << '\n');
2200}
2201
2202void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2203  ExecutedResCounts[PIdx] += Count;
2204  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2205    MaxExecutedResCount = ExecutedResCounts[PIdx];
2206}
2207
2208/// Add the given processor resource to this scheduled zone.
2209///
2210/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2211/// during which this resource is consumed.
2212///
2213/// \return the next cycle at which the instruction may execute without
2214/// oversubscribing resources.
2215unsigned SchedBoundary::
2216countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2217  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2218  unsigned Count = Factor * Cycles;
2219  LLVM_DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) << " +"
2220                    << Cycles << "x" << Factor << "u\n");
2221
2222  // Update Executed resources counts.
2223  incExecutedResources(PIdx, Count);
2224  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2225  Rem->RemainingCounts[PIdx] -= Count;
2226
2227  // Check if this resource exceeds the current critical resource. If so, it
2228  // becomes the critical resource.
2229  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2230    ZoneCritResIdx = PIdx;
2231    LLVM_DEBUG(dbgs() << "  *** Critical resource "
2232                      << SchedModel->getResourceName(PIdx) << ": "
2233                      << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2234                      << "c\n");
2235  }
2236  // For reserved resources, record the highest cycle using the resource.
2237  unsigned NextAvailable, InstanceIdx;
2238  std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
2239  if (NextAvailable > CurrCycle) {
2240    LLVM_DEBUG(dbgs() << "  Resource conflict: "
2241                      << SchedModel->getResourceName(PIdx)
2242                      << '[' << InstanceIdx - ReservedCyclesIndex[PIdx]  << ']'
2243                      << " reserved until @" << NextAvailable << "\n");
2244  }
2245  return NextAvailable;
2246}
2247
2248/// Move the boundary of scheduled code by one SUnit.
2249void SchedBoundary::bumpNode(SUnit *SU) {
2250  // Update the reservation table.
2251  if (HazardRec->isEnabled()) {
2252    if (!isTop() && SU->isCall) {
2253      // Calls are scheduled with their preceding instructions. For bottom-up
2254      // scheduling, clear the pipeline state before emitting.
2255      HazardRec->Reset();
2256    }
2257    HazardRec->EmitInstruction(SU);
2258    // Scheduling an instruction may have made pending instructions available.
2259    CheckPending = true;
2260  }
2261  // checkHazard should prevent scheduling multiple instructions per cycle that
2262  // exceed the issue width.
2263  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2264  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2265  assert(
2266      (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2267      "Cannot schedule this instruction's MicroOps in the current cycle.");
2268
2269  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2270  LLVM_DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
2271
2272  unsigned NextCycle = CurrCycle;
2273  switch (SchedModel->getMicroOpBufferSize()) {
2274  case 0:
2275    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2276    break;
2277  case 1:
2278    if (ReadyCycle > NextCycle) {
2279      NextCycle = ReadyCycle;
2280      LLVM_DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
2281    }
2282    break;
2283  default:
2284    // We don't currently model the OOO reorder buffer, so consider all
2285    // scheduled MOps to be "retired". We do loosely model in-order resource
2286    // latency. If this instruction uses an in-order resource, account for any
2287    // likely stall cycles.
2288    if (SU->isUnbuffered && ReadyCycle > NextCycle)
2289      NextCycle = ReadyCycle;
2290    break;
2291  }
2292  RetiredMOps += IncMOps;
2293
2294  // Update resource counts and critical resource.
2295  if (SchedModel->hasInstrSchedModel()) {
2296    unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2297    assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2298    Rem->RemIssueCount -= DecRemIssue;
2299    if (ZoneCritResIdx) {
2300      // Scale scheduled micro-ops for comparing with the critical resource.
2301      unsigned ScaledMOps =
2302        RetiredMOps * SchedModel->getMicroOpFactor();
2303
2304      // If scaled micro-ops are now more than the previous critical resource by
2305      // a full cycle, then micro-ops issue becomes critical.
2306      if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2307          >= (int)SchedModel->getLatencyFactor()) {
2308        ZoneCritResIdx = 0;
2309        LLVM_DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
2310                          << ScaledMOps / SchedModel->getLatencyFactor()
2311                          << "c\n");
2312      }
2313    }
2314    for (TargetSchedModel::ProcResIter
2315           PI = SchedModel->getWriteProcResBegin(SC),
2316           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2317      unsigned RCycle =
2318        countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2319      if (RCycle > NextCycle)
2320        NextCycle = RCycle;
2321    }
2322    if (SU->hasReservedResource) {
2323      // For reserved resources, record the highest cycle using the resource.
2324      // For top-down scheduling, this is the cycle in which we schedule this
2325      // instruction plus the number of cycles the operations reserves the
2326      // resource. For bottom-up is it simply the instruction's cycle.
2327      for (TargetSchedModel::ProcResIter
2328             PI = SchedModel->getWriteProcResBegin(SC),
2329             PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2330        unsigned PIdx = PI->ProcResourceIdx;
2331        if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2332          unsigned ReservedUntil, InstanceIdx;
2333          std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
2334          if (isTop()) {
2335            ReservedCycles[InstanceIdx] =
2336                std::max(ReservedUntil, NextCycle + PI->Cycles);
2337          } else
2338            ReservedCycles[InstanceIdx] = NextCycle;
2339        }
2340      }
2341    }
2342  }
2343  // Update ExpectedLatency and DependentLatency.
2344  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2345  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2346  if (SU->getDepth() > TopLatency) {
2347    TopLatency = SU->getDepth();
2348    LLVM_DEBUG(dbgs() << "  " << Available.getName() << " TopLatency SU("
2349                      << SU->NodeNum << ") " << TopLatency << "c\n");
2350  }
2351  if (SU->getHeight() > BotLatency) {
2352    BotLatency = SU->getHeight();
2353    LLVM_DEBUG(dbgs() << "  " << Available.getName() << " BotLatency SU("
2354                      << SU->NodeNum << ") " << BotLatency << "c\n");
2355  }
2356  // If we stall for any reason, bump the cycle.
2357  if (NextCycle > CurrCycle)
2358    bumpCycle(NextCycle);
2359  else
2360    // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2361    // resource limited. If a stall occurred, bumpCycle does this.
2362    IsResourceLimited =
2363        checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2364                           getScheduledLatency(), true);
2365
2366  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2367  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2368  // one cycle.  Since we commonly reach the max MOps here, opportunistically
2369  // bump the cycle to avoid uselessly checking everything in the readyQ.
2370  CurrMOps += IncMOps;
2371
2372  // Bump the cycle count for issue group constraints.
2373  // This must be done after NextCycle has been adjust for all other stalls.
2374  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2375  // currCycle to X.
2376  if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||
2377      (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2378    LLVM_DEBUG(dbgs() << "  Bump cycle to " << (isTop() ? "end" : "begin")
2379                      << " group\n");
2380    bumpCycle(++NextCycle);
2381  }
2382
2383  while (CurrMOps >= SchedModel->getIssueWidth()) {
2384    LLVM_DEBUG(dbgs() << "  *** Max MOps " << CurrMOps << " at cycle "
2385                      << CurrCycle << '\n');
2386    bumpCycle(++NextCycle);
2387  }
2388  LLVM_DEBUG(dumpScheduledState());
2389}
2390
2391/// Release pending ready nodes in to the available queue. This makes them
2392/// visible to heuristics.
2393void SchedBoundary::releasePending() {
2394  // If the available queue is empty, it is safe to reset MinReadyCycle.
2395  if (Available.empty())
2396    MinReadyCycle = std::numeric_limits<unsigned>::max();
2397
2398  // Check to see if any of the pending instructions are ready to issue.  If
2399  // so, add them to the available queue.
2400  for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2401    SUnit *SU = *(Pending.begin() + I);
2402    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2403
2404    if (ReadyCycle < MinReadyCycle)
2405      MinReadyCycle = ReadyCycle;
2406
2407    if (Available.size() >= ReadyListLimit)
2408      break;
2409
2410    releaseNode(SU, ReadyCycle, true, I);
2411    if (E != Pending.size()) {
2412      --I;
2413      --E;
2414    }
2415  }
2416  CheckPending = false;
2417}
2418
2419/// Remove SU from the ready set for this boundary.
2420void SchedBoundary::removeReady(SUnit *SU) {
2421  if (Available.isInQueue(SU))
2422    Available.remove(Available.find(SU));
2423  else {
2424    assert(Pending.isInQueue(SU) && "bad ready count");
2425    Pending.remove(Pending.find(SU));
2426  }
2427}
2428
2429/// If this queue only has one ready candidate, return it. As a side effect,
2430/// defer any nodes that now hit a hazard, and advance the cycle until at least
2431/// one node is ready. If multiple instructions are ready, return NULL.
2432SUnit *SchedBoundary::pickOnlyChoice() {
2433  if (CheckPending)
2434    releasePending();
2435
2436  // Defer any ready instrs that now have a hazard.
2437  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2438    if (checkHazard(*I)) {
2439      Pending.push(*I);
2440      I = Available.remove(I);
2441      continue;
2442    }
2443    ++I;
2444  }
2445  for (unsigned i = 0; Available.empty(); ++i) {
2446//  FIXME: Re-enable assert once PR20057 is resolved.
2447//    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2448//           "permanent hazard");
2449    (void)i;
2450    bumpCycle(CurrCycle + 1);
2451    releasePending();
2452  }
2453
2454  LLVM_DEBUG(Pending.dump());
2455  LLVM_DEBUG(Available.dump());
2456
2457  if (Available.size() == 1)
2458    return *Available.begin();
2459  return nullptr;
2460}
2461
2462#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2463// This is useful information to dump after bumpNode.
2464// Note that the Queue contents are more useful before pickNodeFromQueue.
2465LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
2466  unsigned ResFactor;
2467  unsigned ResCount;
2468  if (ZoneCritResIdx) {
2469    ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2470    ResCount = getResourceCount(ZoneCritResIdx);
2471  } else {
2472    ResFactor = SchedModel->getMicroOpFactor();
2473    ResCount = RetiredMOps * ResFactor;
2474  }
2475  unsigned LFactor = SchedModel->getLatencyFactor();
2476  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2477         << "  Retired: " << RetiredMOps;
2478  dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2479  dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2480         << ResCount / ResFactor << " "
2481         << SchedModel->getResourceName(ZoneCritResIdx)
2482         << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2483         << (IsResourceLimited ? "  - Resource" : "  - Latency")
2484         << " limited.\n";
2485}
2486#endif
2487
2488//===----------------------------------------------------------------------===//
2489// GenericScheduler - Generic implementation of MachineSchedStrategy.
2490//===----------------------------------------------------------------------===//
2491
2492void GenericSchedulerBase::SchedCandidate::
2493initResourceDelta(const ScheduleDAGMI *DAG,
2494                  const TargetSchedModel *SchedModel) {
2495  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2496    return;
2497
2498  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2499  for (TargetSchedModel::ProcResIter
2500         PI = SchedModel->getWriteProcResBegin(SC),
2501         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2502    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2503      ResDelta.CritResources += PI->Cycles;
2504    if (PI->ProcResourceIdx == Policy.DemandResIdx)
2505      ResDelta.DemandedResources += PI->Cycles;
2506  }
2507}
2508
2509/// Compute remaining latency. We need this both to determine whether the
2510/// overall schedule has become latency-limited and whether the instructions
2511/// outside this zone are resource or latency limited.
2512///
2513/// The "dependent" latency is updated incrementally during scheduling as the
2514/// max height/depth of scheduled nodes minus the cycles since it was
2515/// scheduled:
2516///   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2517///
2518/// The "independent" latency is the max ready queue depth:
2519///   ILat = max N.depth for N in Available|Pending
2520///
2521/// RemainingLatency is the greater of independent and dependent latency.
2522///
2523/// These computations are expensive, especially in DAGs with many edges, so
2524/// only do them if necessary.
2525static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2526  unsigned RemLatency = CurrZone.getDependentLatency();
2527  RemLatency = std::max(RemLatency,
2528                        CurrZone.findMaxLatency(CurrZone.Available.elements()));
2529  RemLatency = std::max(RemLatency,
2530                        CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2531  return RemLatency;
2532}
2533
2534/// Returns true if the current cycle plus remaning latency is greater than
2535/// the critical path in the scheduling region.
2536bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2537                                               SchedBoundary &CurrZone,
2538                                               bool ComputeRemLatency,
2539                                               unsigned &RemLatency) const {
2540  // The current cycle is already greater than the critical path, so we are
2541  // already latency limited and don't need to compute the remaining latency.
2542  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2543    return true;
2544
2545  // If we haven't scheduled anything yet, then we aren't latency limited.
2546  if (CurrZone.getCurrCycle() == 0)
2547    return false;
2548
2549  if (ComputeRemLatency)
2550    RemLatency = computeRemLatency(CurrZone);
2551
2552  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2553}
2554
2555/// Set the CandPolicy given a scheduling zone given the current resources and
2556/// latencies inside and outside the zone.
2557void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2558                                     SchedBoundary &CurrZone,
2559                                     SchedBoundary *OtherZone) {
2560  // Apply preemptive heuristics based on the total latency and resources
2561  // inside and outside this zone. Potential stalls should be considered before
2562  // following this policy.
2563
2564  // Compute the critical resource outside the zone.
2565  unsigned OtherCritIdx = 0;
2566  unsigned OtherCount =
2567    OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2568
2569  bool OtherResLimited = false;
2570  unsigned RemLatency = 0;
2571  bool RemLatencyComputed = false;
2572  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2573    RemLatency = computeRemLatency(CurrZone);
2574    RemLatencyComputed = true;
2575    OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2576                                         OtherCount, RemLatency, false);
2577  }
2578
2579  // Schedule aggressively for latency in PostRA mode. We don't check for
2580  // acyclic latency during PostRA, and highly out-of-order processors will
2581  // skip PostRA scheduling.
2582  if (!OtherResLimited &&
2583      (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2584                                       RemLatency))) {
2585    Policy.ReduceLatency |= true;
2586    LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName()
2587                      << " RemainingLatency " << RemLatency << " + "
2588                      << CurrZone.getCurrCycle() << "c > CritPath "
2589                      << Rem.CriticalPath << "\n");
2590  }
2591  // If the same resource is limiting inside and outside the zone, do nothing.
2592  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2593    return;
2594
2595  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2596    dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
2597           << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2598  } if (OtherResLimited) dbgs()
2599                 << "  RemainingLimit: "
2600                 << SchedModel->getResourceName(OtherCritIdx) << "\n";
2601             if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2602             << "  Latency limited both directions.\n");
2603
2604  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2605    Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2606
2607  if (OtherResLimited)
2608    Policy.DemandResIdx = OtherCritIdx;
2609}
2610
2611#ifndef NDEBUG
2612const char *GenericSchedulerBase::getReasonStr(
2613  GenericSchedulerBase::CandReason Reason) {
2614  switch (Reason) {
2615  case NoCand:         return "NOCAND    ";
2616  case Only1:          return "ONLY1     ";
2617  case PhysReg:        return "PHYS-REG  ";
2618  case RegExcess:      return "REG-EXCESS";
2619  case RegCritical:    return "REG-CRIT  ";
2620  case Stall:          return "STALL     ";
2621  case Cluster:        return "CLUSTER   ";
2622  case Weak:           return "WEAK      ";
2623  case RegMax:         return "REG-MAX   ";
2624  case ResourceReduce: return "RES-REDUCE";
2625  case ResourceDemand: return "RES-DEMAND";
2626  case TopDepthReduce: return "TOP-DEPTH ";
2627  case TopPathReduce:  return "TOP-PATH  ";
2628  case BotHeightReduce:return "BOT-HEIGHT";
2629  case BotPathReduce:  return "BOT-PATH  ";
2630  case NextDefUse:     return "DEF-USE   ";
2631  case NodeOrder:      return "ORDER     ";
2632  };
2633  llvm_unreachable("Unknown reason!");
2634}
2635
2636void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
2637  PressureChange P;
2638  unsigned ResIdx = 0;
2639  unsigned Latency = 0;
2640  switch (Cand.Reason) {
2641  default:
2642    break;
2643  case RegExcess:
2644    P = Cand.RPDelta.Excess;
2645    break;
2646  case RegCritical:
2647    P = Cand.RPDelta.CriticalMax;
2648    break;
2649  case RegMax:
2650    P = Cand.RPDelta.CurrentMax;
2651    break;
2652  case ResourceReduce:
2653    ResIdx = Cand.Policy.ReduceResIdx;
2654    break;
2655  case ResourceDemand:
2656    ResIdx = Cand.Policy.DemandResIdx;
2657    break;
2658  case TopDepthReduce:
2659    Latency = Cand.SU->getDepth();
2660    break;
2661  case TopPathReduce:
2662    Latency = Cand.SU->getHeight();
2663    break;
2664  case BotHeightReduce:
2665    Latency = Cand.SU->getHeight();
2666    break;
2667  case BotPathReduce:
2668    Latency = Cand.SU->getDepth();
2669    break;
2670  }
2671  dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2672  if (P.isValid())
2673    dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2674           << ":" << P.getUnitInc() << " ";
2675  else
2676    dbgs() << "      ";
2677  if (ResIdx)
2678    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2679  else
2680    dbgs() << "         ";
2681  if (Latency)
2682    dbgs() << " " << Latency << " cycles ";
2683  else
2684    dbgs() << "          ";
2685  dbgs() << '\n';
2686}
2687#endif
2688
2689namespace llvm {
2690/// Return true if this heuristic determines order.
2691bool tryLess(int TryVal, int CandVal,
2692             GenericSchedulerBase::SchedCandidate &TryCand,
2693             GenericSchedulerBase::SchedCandidate &Cand,
2694             GenericSchedulerBase::CandReason Reason) {
2695  if (TryVal < CandVal) {
2696    TryCand.Reason = Reason;
2697    return true;
2698  }
2699  if (TryVal > CandVal) {
2700    if (Cand.Reason > Reason)
2701      Cand.Reason = Reason;
2702    return true;
2703  }
2704  return false;
2705}
2706
2707bool tryGreater(int TryVal, int CandVal,
2708                GenericSchedulerBase::SchedCandidate &TryCand,
2709                GenericSchedulerBase::SchedCandidate &Cand,
2710                GenericSchedulerBase::CandReason Reason) {
2711  if (TryVal > CandVal) {
2712    TryCand.Reason = Reason;
2713    return true;
2714  }
2715  if (TryVal < CandVal) {
2716    if (Cand.Reason > Reason)
2717      Cand.Reason = Reason;
2718    return true;
2719  }
2720  return false;
2721}
2722
2723bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
2724                GenericSchedulerBase::SchedCandidate &Cand,
2725                SchedBoundary &Zone) {
2726  if (Zone.isTop()) {
2727    if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2728      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2729                  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2730        return true;
2731    }
2732    if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2733                   TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2734      return true;
2735  } else {
2736    if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2737      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2738                  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2739        return true;
2740    }
2741    if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2742                   TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2743      return true;
2744  }
2745  return false;
2746}
2747} // end namespace llvm
2748
2749static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2750  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2751                    << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2752}
2753
2754static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
2755  tracePick(Cand.Reason, Cand.AtTop);
2756}
2757
2758void GenericScheduler::initialize(ScheduleDAGMI *dag) {
2759  assert(dag->hasVRegLiveness() &&
2760         "(PreRA)GenericScheduler needs vreg liveness");
2761  DAG = static_cast<ScheduleDAGMILive*>(dag);
2762  SchedModel = DAG->getSchedModel();
2763  TRI = DAG->TRI;
2764
2765  if (RegionPolicy.ComputeDFSResult)
2766    DAG->computeDFSResult();
2767
2768  Rem.init(DAG, SchedModel);
2769  Top.init(DAG, SchedModel, &Rem);
2770  Bot.init(DAG, SchedModel, &Rem);
2771
2772  // Initialize resource counts.
2773
2774  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2775  // are disabled, then these HazardRecs will be disabled.
2776  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2777  if (!Top.HazardRec) {
2778    Top.HazardRec =
2779        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2780            Itin, DAG);
2781  }
2782  if (!Bot.HazardRec) {
2783    Bot.HazardRec =
2784        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2785            Itin, DAG);
2786  }
2787  TopCand.SU = nullptr;
2788  BotCand.SU = nullptr;
2789}
2790
2791/// Initialize the per-region scheduling policy.
2792void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
2793                                  MachineBasicBlock::iterator End,
2794                                  unsigned NumRegionInstrs) {
2795  const MachineFunction &MF = *Begin->getMF();
2796  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2797
2798  // Avoid setting up the register pressure tracker for small regions to save
2799  // compile time. As a rough heuristic, only track pressure when the number of
2800  // schedulable instructions exceeds half the integer register file.
2801  RegionPolicy.ShouldTrackPressure = true;
2802  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2803    MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2804    if (TLI->isTypeLegal(LegalIntVT)) {
2805      unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2806        TLI->getRegClassFor(LegalIntVT));
2807      RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2808    }
2809  }
2810
2811  // For generic targets, we default to bottom-up, because it's simpler and more
2812  // compile-time optimizations have been implemented in that direction.
2813  RegionPolicy.OnlyBottomUp = true;
2814
2815  // Allow the subtarget to override default policy.
2816  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2817
2818  // After subtarget overrides, apply command line options.
2819  if (!EnableRegPressure) {
2820    RegionPolicy.ShouldTrackPressure = false;
2821    RegionPolicy.ShouldTrackLaneMasks = false;
2822  }
2823
2824  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2825  // e.g. -misched-bottomup=false allows scheduling in both directions.
2826  assert((!ForceTopDown || !ForceBottomUp) &&
2827         "-misched-topdown incompatible with -misched-bottomup");
2828  if (ForceBottomUp.getNumOccurrences() > 0) {
2829    RegionPolicy.OnlyBottomUp = ForceBottomUp;
2830    if (RegionPolicy.OnlyBottomUp)
2831      RegionPolicy.OnlyTopDown = false;
2832  }
2833  if (ForceTopDown.getNumOccurrences() > 0) {
2834    RegionPolicy.OnlyTopDown = ForceTopDown;
2835    if (RegionPolicy.OnlyTopDown)
2836      RegionPolicy.OnlyBottomUp = false;
2837  }
2838}
2839
2840void GenericScheduler::dumpPolicy() const {
2841  // Cannot completely remove virtual function even in release mode.
2842#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2843  dbgs() << "GenericScheduler RegionPolicy: "
2844         << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2845         << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2846         << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2847         << "\n";
2848#endif
2849}
2850
2851/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2852/// critical path by more cycles than it takes to drain the instruction buffer.
2853/// We estimate an upper bounds on in-flight instructions as:
2854///
2855/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2856/// InFlightIterations = AcyclicPath / CyclesPerIteration
2857/// InFlightResources = InFlightIterations * LoopResources
2858///
2859/// TODO: Check execution resources in addition to IssueCount.
2860void GenericScheduler::checkAcyclicLatency() {
2861  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2862    return;
2863
2864  // Scaled number of cycles per loop iteration.
2865  unsigned IterCount =
2866    std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2867             Rem.RemIssueCount);
2868  // Scaled acyclic critical path.
2869  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2870  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2871  unsigned InFlightCount =
2872    (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2873  unsigned BufferLimit =
2874    SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2875
2876  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2877
2878  LLVM_DEBUG(
2879      dbgs() << "IssueCycles="
2880             << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2881             << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2882             << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
2883             << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2884             << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2885      if (Rem.IsAcyclicLatencyLimited) dbgs() << "  ACYCLIC LATENCY LIMIT\n");
2886}
2887
2888void GenericScheduler::registerRoots() {
2889  Rem.CriticalPath = DAG->ExitSU.getDepth();
2890
2891  // Some roots may not feed into ExitSU. Check all of them in case.
2892  for (const SUnit *SU : Bot.Available) {
2893    if (SU->getDepth() > Rem.CriticalPath)
2894      Rem.CriticalPath = SU->getDepth();
2895  }
2896  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2897  if (DumpCriticalPathLength) {
2898    errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2899  }
2900
2901  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2902    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2903    checkAcyclicLatency();
2904  }
2905}
2906
2907namespace llvm {
2908bool tryPressure(const PressureChange &TryP,
2909                 const PressureChange &CandP,
2910                 GenericSchedulerBase::SchedCandidate &TryCand,
2911                 GenericSchedulerBase::SchedCandidate &Cand,
2912                 GenericSchedulerBase::CandReason Reason,
2913                 const TargetRegisterInfo *TRI,
2914                 const MachineFunction &MF) {
2915  // If one candidate decreases and the other increases, go with it.
2916  // Invalid candidates have UnitInc==0.
2917  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2918                 Reason)) {
2919    return true;
2920  }
2921  // Do not compare the magnitude of pressure changes between top and bottom
2922  // boundary.
2923  if (Cand.AtTop != TryCand.AtTop)
2924    return false;
2925
2926  // If both candidates affect the same set in the same boundary, go with the
2927  // smallest increase.
2928  unsigned TryPSet = TryP.getPSetOrMax();
2929  unsigned CandPSet = CandP.getPSetOrMax();
2930  if (TryPSet == CandPSet) {
2931    return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2932                   Reason);
2933  }
2934
2935  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2936                                 std::numeric_limits<int>::max();
2937
2938  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2939                                   std::numeric_limits<int>::max();
2940
2941  // If the candidates are decreasing pressure, reverse priority.
2942  if (TryP.getUnitInc() < 0)
2943    std::swap(TryRank, CandRank);
2944  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2945}
2946
2947unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2948  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2949}
2950
2951/// Minimize physical register live ranges. Regalloc wants them adjacent to
2952/// their physreg def/use.
2953///
2954/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2955/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2956/// with the operation that produces or consumes the physreg. We'll do this when
2957/// regalloc has support for parallel copies.
2958int biasPhysReg(const SUnit *SU, bool isTop) {
2959  const MachineInstr *MI = SU->getInstr();
2960
2961  if (MI->isCopy()) {
2962    unsigned ScheduledOper = isTop ? 1 : 0;
2963    unsigned UnscheduledOper = isTop ? 0 : 1;
2964    // If we have already scheduled the physreg produce/consumer, immediately
2965    // schedule the copy.
2966    if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
2967      return 1;
2968    // If the physreg is at the boundary, defer it. Otherwise schedule it
2969    // immediately to free the dependent. We can hoist the copy later.
2970    bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2971    if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
2972      return AtBoundary ? -1 : 1;
2973  }
2974
2975  if (MI->isMoveImmediate()) {
2976    // If we have a move immediate and all successors have been assigned, bias
2977    // towards scheduling this later. Make sure all register defs are to
2978    // physical registers.
2979    bool DoBias = true;
2980    for (const MachineOperand &Op : MI->defs()) {
2981      if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
2982        DoBias = false;
2983        break;
2984      }
2985    }
2986
2987    if (DoBias)
2988      return isTop ? -1 : 1;
2989  }
2990
2991  return 0;
2992}
2993} // end namespace llvm
2994
2995void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
2996                                     bool AtTop,
2997                                     const RegPressureTracker &RPTracker,
2998                                     RegPressureTracker &TempTracker) {
2999  Cand.SU = SU;
3000  Cand.AtTop = AtTop;
3001  if (DAG->isTrackingPressure()) {
3002    if (AtTop) {
3003      TempTracker.getMaxDownwardPressureDelta(
3004        Cand.SU->getInstr(),
3005        Cand.RPDelta,
3006        DAG->getRegionCriticalPSets(),
3007        DAG->getRegPressure().MaxSetPressure);
3008    } else {
3009      if (VerifyScheduling) {
3010        TempTracker.getMaxUpwardPressureDelta(
3011          Cand.SU->getInstr(),
3012          &DAG->getPressureDiff(Cand.SU),
3013          Cand.RPDelta,
3014          DAG->getRegionCriticalPSets(),
3015          DAG->getRegPressure().MaxSetPressure);
3016      } else {
3017        RPTracker.getUpwardPressureDelta(
3018          Cand.SU->getInstr(),
3019          DAG->getPressureDiff(Cand.SU),
3020          Cand.RPDelta,
3021          DAG->getRegionCriticalPSets(),
3022          DAG->getRegPressure().MaxSetPressure);
3023      }
3024    }
3025  }
3026  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3027             << "  Try  SU(" << Cand.SU->NodeNum << ") "
3028             << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3029             << Cand.RPDelta.Excess.getUnitInc() << "\n");
3030}
3031
3032/// Apply a set of heuristics to a new candidate. Heuristics are currently
3033/// hierarchical. This may be more efficient than a graduated cost model because
3034/// we don't need to evaluate all aspects of the model for each node in the
3035/// queue. But it's really done to make the heuristics easier to debug and
3036/// statistically analyze.
3037///
3038/// \param Cand provides the policy and current best candidate.
3039/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3040/// \param Zone describes the scheduled zone that we are extending, or nullptr
3041//              if Cand is from a different zone than TryCand.
3042void GenericScheduler::tryCandidate(SchedCandidate &Cand,
3043                                    SchedCandidate &TryCand,
3044                                    SchedBoundary *Zone) const {
3045  // Initialize the candidate if needed.
3046  if (!Cand.isValid()) {
3047    TryCand.Reason = NodeOrder;
3048    return;
3049  }
3050
3051  // Bias PhysReg Defs and copies to their uses and defined respectively.
3052  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3053                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3054    return;
3055
3056  // Avoid exceeding the target's limit.
3057  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3058                                               Cand.RPDelta.Excess,
3059                                               TryCand, Cand, RegExcess, TRI,
3060                                               DAG->MF))
3061    return;
3062
3063  // Avoid increasing the max critical pressure in the scheduled region.
3064  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3065                                               Cand.RPDelta.CriticalMax,
3066                                               TryCand, Cand, RegCritical, TRI,
3067                                               DAG->MF))
3068    return;
3069
3070  // We only compare a subset of features when comparing nodes between
3071  // Top and Bottom boundary. Some properties are simply incomparable, in many
3072  // other instances we should only override the other boundary if something
3073  // is a clear good pick on one boundary. Skip heuristics that are more
3074  // "tie-breaking" in nature.
3075  bool SameBoundary = Zone != nullptr;
3076  if (SameBoundary) {
3077    // For loops that are acyclic path limited, aggressively schedule for
3078    // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3079    // heuristics to take precedence.
3080    if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3081        tryLatency(TryCand, Cand, *Zone))
3082      return;
3083
3084    // Prioritize instructions that read unbuffered resources by stall cycles.
3085    if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3086                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3087      return;
3088  }
3089
3090  // Keep clustered nodes together to encourage downstream peephole
3091  // optimizations which may reduce resource requirements.
3092  //
3093  // This is a best effort to set things up for a post-RA pass. Optimizations
3094  // like generating loads of multiple registers should ideally be done within
3095  // the scheduler pass by combining the loads during DAG postprocessing.
3096  const SUnit *CandNextClusterSU =
3097    Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3098  const SUnit *TryCandNextClusterSU =
3099    TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3100  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3101                 Cand.SU == CandNextClusterSU,
3102                 TryCand, Cand, Cluster))
3103    return;
3104
3105  if (SameBoundary) {
3106    // Weak edges are for clustering and other constraints.
3107    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3108                getWeakLeft(Cand.SU, Cand.AtTop),
3109                TryCand, Cand, Weak))
3110      return;
3111  }
3112
3113  // Avoid increasing the max pressure of the entire region.
3114  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3115                                               Cand.RPDelta.CurrentMax,
3116                                               TryCand, Cand, RegMax, TRI,
3117                                               DAG->MF))
3118    return;
3119
3120  if (SameBoundary) {
3121    // Avoid critical resource consumption and balance the schedule.
3122    TryCand.initResourceDelta(DAG, SchedModel);
3123    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3124                TryCand, Cand, ResourceReduce))
3125      return;
3126    if (tryGreater(TryCand.ResDelta.DemandedResources,
3127                   Cand.ResDelta.DemandedResources,
3128                   TryCand, Cand, ResourceDemand))
3129      return;
3130
3131    // Avoid serializing long latency dependence chains.
3132    // For acyclic path limited loops, latency was already checked above.
3133    if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3134        !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3135      return;
3136
3137    // Fall through to original instruction order.
3138    if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3139        || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3140      TryCand.Reason = NodeOrder;
3141    }
3142  }
3143}
3144
3145/// Pick the best candidate from the queue.
3146///
3147/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3148/// DAG building. To adjust for the current scheduling location we need to
3149/// maintain the number of vreg uses remaining to be top-scheduled.
3150void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3151                                         const CandPolicy &ZonePolicy,
3152                                         const RegPressureTracker &RPTracker,
3153                                         SchedCandidate &Cand) {
3154  // getMaxPressureDelta temporarily modifies the tracker.
3155  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3156
3157  ReadyQueue &Q = Zone.Available;
3158  for (SUnit *SU : Q) {
3159
3160    SchedCandidate TryCand(ZonePolicy);
3161    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3162    // Pass SchedBoundary only when comparing nodes from the same boundary.
3163    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3164    tryCandidate(Cand, TryCand, ZoneArg);
3165    if (TryCand.Reason != NoCand) {
3166      // Initialize resource delta if needed in case future heuristics query it.
3167      if (TryCand.ResDelta == SchedResourceDelta())
3168        TryCand.initResourceDelta(DAG, SchedModel);
3169      Cand.setBest(TryCand);
3170      LLVM_DEBUG(traceCandidate(Cand));
3171    }
3172  }
3173}
3174
3175/// Pick the best candidate node from either the top or bottom queue.
3176SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
3177  // Schedule as far as possible in the direction of no choice. This is most
3178  // efficient, but also provides the best heuristics for CriticalPSets.
3179  if (SUnit *SU = Bot.pickOnlyChoice()) {
3180    IsTopNode = false;
3181    tracePick(Only1, false);
3182    return SU;
3183  }
3184  if (SUnit *SU = Top.pickOnlyChoice()) {
3185    IsTopNode = true;
3186    tracePick(Only1, true);
3187    return SU;
3188  }
3189  // Set the bottom-up policy based on the state of the current bottom zone and
3190  // the instructions outside the zone, including the top zone.
3191  CandPolicy BotPolicy;
3192  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3193  // Set the top-down policy based on the state of the current top zone and
3194  // the instructions outside the zone, including the bottom zone.
3195  CandPolicy TopPolicy;
3196  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3197
3198  // See if BotCand is still valid (because we previously scheduled from Top).
3199  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3200  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3201      BotCand.Policy != BotPolicy) {
3202    BotCand.reset(CandPolicy());
3203    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3204    assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3205  } else {
3206    LLVM_DEBUG(traceCandidate(BotCand));
3207#ifndef NDEBUG
3208    if (VerifyScheduling) {
3209      SchedCandidate TCand;
3210      TCand.reset(CandPolicy());
3211      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3212      assert(TCand.SU == BotCand.SU &&
3213             "Last pick result should correspond to re-picking right now");
3214    }
3215#endif
3216  }
3217
3218  // Check if the top Q has a better candidate.
3219  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3220  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3221      TopCand.Policy != TopPolicy) {
3222    TopCand.reset(CandPolicy());
3223    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3224    assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3225  } else {
3226    LLVM_DEBUG(traceCandidate(TopCand));
3227#ifndef NDEBUG
3228    if (VerifyScheduling) {
3229      SchedCandidate TCand;
3230      TCand.reset(CandPolicy());
3231      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3232      assert(TCand.SU == TopCand.SU &&
3233           "Last pick result should correspond to re-picking right now");
3234    }
3235#endif
3236  }
3237
3238  // Pick best from BotCand and TopCand.
3239  assert(BotCand.isValid());
3240  assert(TopCand.isValid());
3241  SchedCandidate Cand = BotCand;
3242  TopCand.Reason = NoCand;
3243  tryCandidate(Cand, TopCand, nullptr);
3244  if (TopCand.Reason != NoCand) {
3245    Cand.setBest(TopCand);
3246    LLVM_DEBUG(traceCandidate(Cand));
3247  }
3248
3249  IsTopNode = Cand.AtTop;
3250  tracePick(Cand);
3251  return Cand.SU;
3252}
3253
3254/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3255SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
3256  if (DAG->top() == DAG->bottom()) {
3257    assert(Top.Available.empty() && Top.Pending.empty() &&
3258           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3259    return nullptr;
3260  }
3261  SUnit *SU;
3262  do {
3263    if (RegionPolicy.OnlyTopDown) {
3264      SU = Top.pickOnlyChoice();
3265      if (!SU) {
3266        CandPolicy NoPolicy;
3267        TopCand.reset(NoPolicy);
3268        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3269        assert(TopCand.Reason != NoCand && "failed to find a candidate");
3270        tracePick(TopCand);
3271        SU = TopCand.SU;
3272      }
3273      IsTopNode = true;
3274    } else if (RegionPolicy.OnlyBottomUp) {
3275      SU = Bot.pickOnlyChoice();
3276      if (!SU) {
3277        CandPolicy NoPolicy;
3278        BotCand.reset(NoPolicy);
3279        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3280        assert(BotCand.Reason != NoCand && "failed to find a candidate");
3281        tracePick(BotCand);
3282        SU = BotCand.SU;
3283      }
3284      IsTopNode = false;
3285    } else {
3286      SU = pickNodeBidirectional(IsTopNode);
3287    }
3288  } while (SU->isScheduled);
3289
3290  if (SU->isTopReady())
3291    Top.removeReady(SU);
3292  if (SU->isBottomReady())
3293    Bot.removeReady(SU);
3294
3295  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3296                    << *SU->getInstr());
3297  return SU;
3298}
3299
3300void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
3301  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3302  if (!isTop)
3303    ++InsertPos;
3304  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3305
3306  // Find already scheduled copies with a single physreg dependence and move
3307  // them just above the scheduled instruction.
3308  for (SDep &Dep : Deps) {
3309    if (Dep.getKind() != SDep::Data ||
3310        !Register::isPhysicalRegister(Dep.getReg()))
3311      continue;
3312    SUnit *DepSU = Dep.getSUnit();
3313    if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3314      continue;
3315    MachineInstr *Copy = DepSU->getInstr();
3316    if (!Copy->isCopy() && !Copy->isMoveImmediate())
3317      continue;
3318    LLVM_DEBUG(dbgs() << "  Rescheduling physreg copy ";
3319               DAG->dumpNode(*Dep.getSUnit()));
3320    DAG->moveInstruction(Copy, InsertPos);
3321  }
3322}
3323
3324/// Update the scheduler's state after scheduling a node. This is the same node
3325/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3326/// update it's state based on the current cycle before MachineSchedStrategy
3327/// does.
3328///
3329/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3330/// them here. See comments in biasPhysReg.
3331void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3332  if (IsTopNode) {
3333    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3334    Top.bumpNode(SU);
3335    if (SU->hasPhysRegUses)
3336      reschedulePhysReg(SU, true);
3337  } else {
3338    SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3339    Bot.bumpNode(SU);
3340    if (SU->hasPhysRegDefs)
3341      reschedulePhysReg(SU, false);
3342  }
3343}
3344
3345/// Create the standard converging machine scheduler. This will be used as the
3346/// default scheduler if the target does not set a default.
3347ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
3348  ScheduleDAGMILive *DAG =
3349      new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3350  // Register DAG post-processors.
3351  //
3352  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3353  // data and pass it to later mutations. Have a single mutation that gathers
3354  // the interesting nodes in one pass.
3355  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
3356  return DAG;
3357}
3358
3359static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
3360  return createGenericSchedLive(C);
3361}
3362
3363static MachineSchedRegistry
3364GenericSchedRegistry("converge", "Standard converging scheduler.",
3365                     createConveringSched);
3366
3367//===----------------------------------------------------------------------===//
3368// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3369//===----------------------------------------------------------------------===//
3370
3371void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3372  DAG = Dag;
3373  SchedModel = DAG->getSchedModel();
3374  TRI = DAG->TRI;
3375
3376  Rem.init(DAG, SchedModel);
3377  Top.init(DAG, SchedModel, &Rem);
3378  BotRoots.clear();
3379
3380  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3381  // or are disabled, then these HazardRecs will be disabled.
3382  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3383  if (!Top.HazardRec) {
3384    Top.HazardRec =
3385        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3386            Itin, DAG);
3387  }
3388}
3389
3390void PostGenericScheduler::registerRoots() {
3391  Rem.CriticalPath = DAG->ExitSU.getDepth();
3392
3393  // Some roots may not feed into ExitSU. Check all of them in case.
3394  for (const SUnit *SU : BotRoots) {
3395    if (SU->getDepth() > Rem.CriticalPath)
3396      Rem.CriticalPath = SU->getDepth();
3397  }
3398  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3399  if (DumpCriticalPathLength) {
3400    errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3401  }
3402}
3403
3404/// Apply a set of heuristics to a new candidate for PostRA scheduling.
3405///
3406/// \param Cand provides the policy and current best candidate.
3407/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3408void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3409                                        SchedCandidate &TryCand) {
3410  // Initialize the candidate if needed.
3411  if (!Cand.isValid()) {
3412    TryCand.Reason = NodeOrder;
3413    return;
3414  }
3415
3416  // Prioritize instructions that read unbuffered resources by stall cycles.
3417  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3418              Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3419    return;
3420
3421  // Keep clustered nodes together.
3422  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3423                 Cand.SU == DAG->getNextClusterSucc(),
3424                 TryCand, Cand, Cluster))
3425    return;
3426
3427  // Avoid critical resource consumption and balance the schedule.
3428  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3429              TryCand, Cand, ResourceReduce))
3430    return;
3431  if (tryGreater(TryCand.ResDelta.DemandedResources,
3432                 Cand.ResDelta.DemandedResources,
3433                 TryCand, Cand, ResourceDemand))
3434    return;
3435
3436  // Avoid serializing long latency dependence chains.
3437  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3438    return;
3439  }
3440
3441  // Fall through to original instruction order.
3442  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3443    TryCand.Reason = NodeOrder;
3444}
3445
3446void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
3447  ReadyQueue &Q = Top.Available;
3448  for (SUnit *SU : Q) {
3449    SchedCandidate TryCand(Cand.Policy);
3450    TryCand.SU = SU;
3451    TryCand.AtTop = true;
3452    TryCand.initResourceDelta(DAG, SchedModel);
3453    tryCandidate(Cand, TryCand);
3454    if (TryCand.Reason != NoCand) {
3455      Cand.setBest(TryCand);
3456      LLVM_DEBUG(traceCandidate(Cand));
3457    }
3458  }
3459}
3460
3461/// Pick the next node to schedule.
3462SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3463  if (DAG->top() == DAG->bottom()) {
3464    assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3465    return nullptr;
3466  }
3467  SUnit *SU;
3468  do {
3469    SU = Top.pickOnlyChoice();
3470    if (SU) {
3471      tracePick(Only1, true);
3472    } else {
3473      CandPolicy NoPolicy;
3474      SchedCandidate TopCand(NoPolicy);
3475      // Set the top-down policy based on the state of the current top zone and
3476      // the instructions outside the zone, including the bottom zone.
3477      setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3478      pickNodeFromQueue(TopCand);
3479      assert(TopCand.Reason != NoCand && "failed to find a candidate");
3480      tracePick(TopCand);
3481      SU = TopCand.SU;
3482    }
3483  } while (SU->isScheduled);
3484
3485  IsTopNode = true;
3486  Top.removeReady(SU);
3487
3488  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3489                    << *SU->getInstr());
3490  return SU;
3491}
3492
3493/// Called after ScheduleDAGMI has scheduled an instruction and updated
3494/// scheduled/remaining flags in the DAG nodes.
3495void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3496  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3497  Top.bumpNode(SU);
3498}
3499
3500ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
3501  return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
3502                           /*RemoveKillFlags=*/true);
3503}
3504
3505//===----------------------------------------------------------------------===//
3506// ILP Scheduler. Currently for experimental analysis of heuristics.
3507//===----------------------------------------------------------------------===//
3508
3509namespace {
3510
3511/// Order nodes by the ILP metric.
3512struct ILPOrder {
3513  const SchedDFSResult *DFSResult = nullptr;
3514  const BitVector *ScheduledTrees = nullptr;
3515  bool MaximizeILP;
3516
3517  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3518
3519  /// Apply a less-than relation on node priority.
3520  ///
3521  /// (Return true if A comes after B in the Q.)
3522  bool operator()(const SUnit *A, const SUnit *B) const {
3523    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3524    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3525    if (SchedTreeA != SchedTreeB) {
3526      // Unscheduled trees have lower priority.
3527      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3528        return ScheduledTrees->test(SchedTreeB);
3529
3530      // Trees with shallower connections have have lower priority.
3531      if (DFSResult->getSubtreeLevel(SchedTreeA)
3532          != DFSResult->getSubtreeLevel(SchedTreeB)) {
3533        return DFSResult->getSubtreeLevel(SchedTreeA)
3534          < DFSResult->getSubtreeLevel(SchedTreeB);
3535      }
3536    }
3537    if (MaximizeILP)
3538      return DFSResult->getILP(A) < DFSResult->getILP(B);
3539    else
3540      return DFSResult->getILP(A) > DFSResult->getILP(B);
3541  }
3542};
3543
3544/// Schedule based on the ILP metric.
3545class ILPScheduler : public MachineSchedStrategy {
3546  ScheduleDAGMILive *DAG = nullptr;
3547  ILPOrder Cmp;
3548
3549  std::vector<SUnit*> ReadyQ;
3550
3551public:
3552  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3553
3554  void initialize(ScheduleDAGMI *dag) override {
3555    assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3556    DAG = static_cast<ScheduleDAGMILive*>(dag);
3557    DAG->computeDFSResult();
3558    Cmp.DFSResult = DAG->getDFSResult();
3559    Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3560    ReadyQ.clear();
3561  }
3562
3563  void registerRoots() override {
3564    // Restore the heap in ReadyQ with the updated DFS results.
3565    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3566  }
3567
3568  /// Implement MachineSchedStrategy interface.
3569  /// -----------------------------------------
3570
3571  /// Callback to select the highest priority node from the ready Q.
3572  SUnit *pickNode(bool &IsTopNode) override {
3573    if (ReadyQ.empty()) return nullptr;
3574    std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3575    SUnit *SU = ReadyQ.back();
3576    ReadyQ.pop_back();
3577    IsTopNode = false;
3578    LLVM_DEBUG(dbgs() << "Pick node "
3579                      << "SU(" << SU->NodeNum << ") "
3580                      << " ILP: " << DAG->getDFSResult()->getILP(SU)
3581                      << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3582                      << " @"
3583                      << DAG->getDFSResult()->getSubtreeLevel(
3584                             DAG->getDFSResult()->getSubtreeID(SU))
3585                      << '\n'
3586                      << "Scheduling " << *SU->getInstr());
3587    return SU;
3588  }
3589
3590  /// Scheduler callback to notify that a new subtree is scheduled.
3591  void scheduleTree(unsigned SubtreeID) override {
3592    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3593  }
3594
3595  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3596  /// DFSResults, and resort the priority Q.
3597  void schedNode(SUnit *SU, bool IsTopNode) override {
3598    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3599  }
3600
3601  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3602
3603  void releaseBottomNode(SUnit *SU) override {
3604    ReadyQ.push_back(SU);
3605    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3606  }
3607};
3608
3609} // end anonymous namespace
3610
3611static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
3612  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
3613}
3614static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
3615  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
3616}
3617
3618static MachineSchedRegistry ILPMaxRegistry(
3619  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3620static MachineSchedRegistry ILPMinRegistry(
3621  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3622
3623//===----------------------------------------------------------------------===//
3624// Machine Instruction Shuffler for Correctness Testing
3625//===----------------------------------------------------------------------===//
3626
3627#ifndef NDEBUG
3628namespace {
3629
3630/// Apply a less-than relation on the node order, which corresponds to the
3631/// instruction order prior to scheduling. IsReverse implements greater-than.
3632template<bool IsReverse>
3633struct SUnitOrder {
3634  bool operator()(SUnit *A, SUnit *B) const {
3635    if (IsReverse)
3636      return A->NodeNum > B->NodeNum;
3637    else
3638      return A->NodeNum < B->NodeNum;
3639  }
3640};
3641
3642/// Reorder instructions as much as possible.
3643class InstructionShuffler : public MachineSchedStrategy {
3644  bool IsAlternating;
3645  bool IsTopDown;
3646
3647  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3648  // gives nodes with a higher number higher priority causing the latest
3649  // instructions to be scheduled first.
3650  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3651    TopQ;
3652
3653  // When scheduling bottom-up, use greater-than as the queue priority.
3654  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
3655    BottomQ;
3656
3657public:
3658  InstructionShuffler(bool alternate, bool topdown)
3659    : IsAlternating(alternate), IsTopDown(topdown) {}
3660
3661  void initialize(ScheduleDAGMI*) override {
3662    TopQ.clear();
3663    BottomQ.clear();
3664  }
3665
3666  /// Implement MachineSchedStrategy interface.
3667  /// -----------------------------------------
3668
3669  SUnit *pickNode(bool &IsTopNode) override {
3670    SUnit *SU;
3671    if (IsTopDown) {
3672      do {
3673        if (TopQ.empty()) return nullptr;
3674        SU = TopQ.top();
3675        TopQ.pop();
3676      } while (SU->isScheduled);
3677      IsTopNode = true;
3678    } else {
3679      do {
3680        if (BottomQ.empty()) return nullptr;
3681        SU = BottomQ.top();
3682        BottomQ.pop();
3683      } while (SU->isScheduled);
3684      IsTopNode = false;
3685    }
3686    if (IsAlternating)
3687      IsTopDown = !IsTopDown;
3688    return SU;
3689  }
3690
3691  void schedNode(SUnit *SU, bool IsTopNode) override {}
3692
3693  void releaseTopNode(SUnit *SU) override {
3694    TopQ.push(SU);
3695  }
3696  void releaseBottomNode(SUnit *SU) override {
3697    BottomQ.push(SU);
3698  }
3699};
3700
3701} // end anonymous namespace
3702
3703static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
3704  bool Alternate = !ForceTopDown && !ForceBottomUp;
3705  bool TopDown = !ForceBottomUp;
3706  assert((TopDown || !ForceTopDown) &&
3707         "-misched-topdown incompatible with -misched-bottomup");
3708  return new ScheduleDAGMILive(
3709      C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
3710}
3711
3712static MachineSchedRegistry ShufflerRegistry(
3713  "shuffle", "Shuffle machine instructions alternating directions",
3714  createInstructionShuffler);
3715#endif // !NDEBUG
3716
3717//===----------------------------------------------------------------------===//
3718// GraphWriter support for ScheduleDAGMILive.
3719//===----------------------------------------------------------------------===//
3720
3721#ifndef NDEBUG
3722namespace llvm {
3723
3724template<> struct GraphTraits<
3725  ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
3726
3727template<>
3728struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
3729  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
3730
3731  static std::string getGraphName(const ScheduleDAG *G) {
3732    return std::string(G->MF.getName());
3733  }
3734
3735  static bool renderGraphFromBottomUp() {
3736    return true;
3737  }
3738
3739  static bool isNodeHidden(const SUnit *Node) {
3740    if (ViewMISchedCutoff == 0)
3741      return false;
3742    return (Node->Preds.size() > ViewMISchedCutoff
3743         || Node->Succs.size() > ViewMISchedCutoff);
3744  }
3745
3746  /// If you want to override the dot attributes printed for a particular
3747  /// edge, override this method.
3748  static std::string getEdgeAttributes(const SUnit *Node,
3749                                       SUnitIterator EI,
3750                                       const ScheduleDAG *Graph) {
3751    if (EI.isArtificialDep())
3752      return "color=cyan,style=dashed";
3753    if (EI.isCtrlDep())
3754      return "color=blue,style=dashed";
3755    return "";
3756  }
3757
3758  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3759    std::string Str;
3760    raw_string_ostream SS(Str);
3761    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3762    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3763      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3764    SS << "SU:" << SU->NodeNum;
3765    if (DFS)
3766      SS << " I:" << DFS->getNumInstrs(SU);
3767    return SS.str();
3768  }
3769
3770  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3771    return G->getGraphNodeLabel(SU);
3772  }
3773
3774  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3775    std::string Str("shape=Mrecord");
3776    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3777    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3778      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3779    if (DFS) {
3780      Str += ",style=filled,fillcolor=\"#";
3781      Str += DOT::getColorString(DFS->getSubtreeID(N));
3782      Str += '"';
3783    }
3784    return Str;
3785  }
3786};
3787
3788} // end namespace llvm
3789#endif // NDEBUG
3790
3791/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3792/// rendered using 'dot'.
3793void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3794#ifndef NDEBUG
3795  ViewGraph(this, Name, false, Title);
3796#else
3797  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3798         << "systems with Graphviz or gv!\n";
3799#endif  // NDEBUG
3800}
3801
3802/// Out-of-line implementation with no arguments is handy for gdb.
3803void ScheduleDAGMI::viewGraph() {
3804  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3805}
3806