MachineScheduler.cpp revision 360784
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/CodeGen/MachineScheduler.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/PriorityQueue.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/iterator_range.h"
22#include "llvm/Analysis/AliasAnalysis.h"
23#include "llvm/CodeGen/LiveInterval.h"
24#include "llvm/CodeGen/LiveIntervals.h"
25#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineDominators.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineFunctionPass.h"
29#include "llvm/CodeGen/MachineInstr.h"
30#include "llvm/CodeGen/MachineLoopInfo.h"
31#include "llvm/CodeGen/MachineOperand.h"
32#include "llvm/CodeGen/MachinePassRegistry.h"
33#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/CodeGen/Passes.h"
35#include "llvm/CodeGen/RegisterClassInfo.h"
36#include "llvm/CodeGen/RegisterPressure.h"
37#include "llvm/CodeGen/ScheduleDAG.h"
38#include "llvm/CodeGen/ScheduleDAGInstrs.h"
39#include "llvm/CodeGen/ScheduleDAGMutation.h"
40#include "llvm/CodeGen/ScheduleDFS.h"
41#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
42#include "llvm/CodeGen/SlotIndexes.h"
43#include "llvm/CodeGen/TargetFrameLowering.h"
44#include "llvm/CodeGen/TargetInstrInfo.h"
45#include "llvm/CodeGen/TargetLowering.h"
46#include "llvm/CodeGen/TargetPassConfig.h"
47#include "llvm/CodeGen/TargetRegisterInfo.h"
48#include "llvm/CodeGen/TargetSchedule.h"
49#include "llvm/CodeGen/TargetSubtargetInfo.h"
50#include "llvm/Config/llvm-config.h"
51#include "llvm/InitializePasses.h"
52#include "llvm/MC/LaneBitmask.h"
53#include "llvm/Pass.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Compiler.h"
56#include "llvm/Support/Debug.h"
57#include "llvm/Support/ErrorHandling.h"
58#include "llvm/Support/GraphWriter.h"
59#include "llvm/Support/MachineValueType.h"
60#include "llvm/Support/raw_ostream.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <limits>
66#include <memory>
67#include <string>
68#include <tuple>
69#include <utility>
70#include <vector>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "machine-scheduler"
75
76namespace llvm {
77
78cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
79                           cl::desc("Force top-down list scheduling"));
80cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
81                            cl::desc("Force bottom-up list scheduling"));
82cl::opt<bool>
83DumpCriticalPathLength("misched-dcpl", cl::Hidden,
84                       cl::desc("Print critical path length to stdout"));
85
86cl::opt<bool> VerifyScheduling(
87    "verify-misched", cl::Hidden,
88    cl::desc("Verify machine instrs before and after machine scheduling"));
89
90} // end namespace llvm
91
92#ifndef NDEBUG
93static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
94  cl::desc("Pop up a window to show MISched dags after they are processed"));
95
96/// In some situations a few uninteresting nodes depend on nearly all other
97/// nodes in the graph, provide a cutoff to hide them.
98static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
99  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
100
101static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
102  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
103
104static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
105  cl::desc("Only schedule this function"));
106static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
107                                        cl::desc("Only schedule this MBB#"));
108static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
109                              cl::desc("Print schedule DAGs"));
110#else
111static const bool ViewMISchedDAGs = false;
112static const bool PrintDAGs = false;
113#endif // NDEBUG
114
115/// Avoid quadratic complexity in unusually large basic blocks by limiting the
116/// size of the ready lists.
117static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
118  cl::desc("Limit ready list to N instructions"), cl::init(256));
119
120static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
121  cl::desc("Enable register pressure scheduling."), cl::init(true));
122
123static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
124  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
125
126static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
127                                        cl::desc("Enable memop clustering."),
128                                        cl::init(true));
129
130// DAG subtrees must have at least this many nodes.
131static const unsigned MinSubtreeSize = 8;
132
133// Pin the vtables to this file.
134void MachineSchedStrategy::anchor() {}
135
136void ScheduleDAGMutation::anchor() {}
137
138//===----------------------------------------------------------------------===//
139// Machine Instruction Scheduling Pass and Registry
140//===----------------------------------------------------------------------===//
141
142MachineSchedContext::MachineSchedContext() {
143  RegClassInfo = new RegisterClassInfo();
144}
145
146MachineSchedContext::~MachineSchedContext() {
147  delete RegClassInfo;
148}
149
150namespace {
151
152/// Base class for a machine scheduler class that can run at any point.
153class MachineSchedulerBase : public MachineSchedContext,
154                             public MachineFunctionPass {
155public:
156  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
157
158  void print(raw_ostream &O, const Module* = nullptr) const override;
159
160protected:
161  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
162};
163
164/// MachineScheduler runs after coalescing and before register allocation.
165class MachineScheduler : public MachineSchedulerBase {
166public:
167  MachineScheduler();
168
169  void getAnalysisUsage(AnalysisUsage &AU) const override;
170
171  bool runOnMachineFunction(MachineFunction&) override;
172
173  static char ID; // Class identification, replacement for typeinfo
174
175protected:
176  ScheduleDAGInstrs *createMachineScheduler();
177};
178
179/// PostMachineScheduler runs after shortly before code emission.
180class PostMachineScheduler : public MachineSchedulerBase {
181public:
182  PostMachineScheduler();
183
184  void getAnalysisUsage(AnalysisUsage &AU) const override;
185
186  bool runOnMachineFunction(MachineFunction&) override;
187
188  static char ID; // Class identification, replacement for typeinfo
189
190protected:
191  ScheduleDAGInstrs *createPostMachineScheduler();
192};
193
194} // end anonymous namespace
195
196char MachineScheduler::ID = 0;
197
198char &llvm::MachineSchedulerID = MachineScheduler::ID;
199
200INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
201                      "Machine Instruction Scheduler", false, false)
202INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
203INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
204INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
205INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
206INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
207INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
208                    "Machine Instruction Scheduler", false, false)
209
210MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
211  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
212}
213
214void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
215  AU.setPreservesCFG();
216  AU.addRequired<MachineDominatorTree>();
217  AU.addRequired<MachineLoopInfo>();
218  AU.addRequired<AAResultsWrapperPass>();
219  AU.addRequired<TargetPassConfig>();
220  AU.addRequired<SlotIndexes>();
221  AU.addPreserved<SlotIndexes>();
222  AU.addRequired<LiveIntervals>();
223  AU.addPreserved<LiveIntervals>();
224  MachineFunctionPass::getAnalysisUsage(AU);
225}
226
227char PostMachineScheduler::ID = 0;
228
229char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
230
231INITIALIZE_PASS(PostMachineScheduler, "postmisched",
232                "PostRA Machine Instruction Scheduler", false, false)
233
234PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
235  initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
236}
237
238void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
239  AU.setPreservesCFG();
240  AU.addRequired<MachineDominatorTree>();
241  AU.addRequired<MachineLoopInfo>();
242  AU.addRequired<AAResultsWrapperPass>();
243  AU.addRequired<TargetPassConfig>();
244  MachineFunctionPass::getAnalysisUsage(AU);
245}
246
247MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
248    MachineSchedRegistry::Registry;
249
250/// A dummy default scheduler factory indicates whether the scheduler
251/// is overridden on the command line.
252static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
253  return nullptr;
254}
255
256/// MachineSchedOpt allows command line selection of the scheduler.
257static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
258               RegisterPassParser<MachineSchedRegistry>>
259MachineSchedOpt("misched",
260                cl::init(&useDefaultMachineSched), cl::Hidden,
261                cl::desc("Machine instruction scheduler to use"));
262
263static MachineSchedRegistry
264DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
265                     useDefaultMachineSched);
266
267static cl::opt<bool> EnableMachineSched(
268    "enable-misched",
269    cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
270    cl::Hidden);
271
272static cl::opt<bool> EnablePostRAMachineSched(
273    "enable-post-misched",
274    cl::desc("Enable the post-ra machine instruction scheduling pass."),
275    cl::init(true), cl::Hidden);
276
277/// Decrement this iterator until reaching the top or a non-debug instr.
278static MachineBasicBlock::const_iterator
279priorNonDebug(MachineBasicBlock::const_iterator I,
280              MachineBasicBlock::const_iterator Beg) {
281  assert(I != Beg && "reached the top of the region, cannot decrement");
282  while (--I != Beg) {
283    if (!I->isDebugInstr())
284      break;
285  }
286  return I;
287}
288
289/// Non-const version.
290static MachineBasicBlock::iterator
291priorNonDebug(MachineBasicBlock::iterator I,
292              MachineBasicBlock::const_iterator Beg) {
293  return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
294      .getNonConstIterator();
295}
296
297/// If this iterator is a debug value, increment until reaching the End or a
298/// non-debug instruction.
299static MachineBasicBlock::const_iterator
300nextIfDebug(MachineBasicBlock::const_iterator I,
301            MachineBasicBlock::const_iterator End) {
302  for(; I != End; ++I) {
303    if (!I->isDebugInstr())
304      break;
305  }
306  return I;
307}
308
309/// Non-const version.
310static MachineBasicBlock::iterator
311nextIfDebug(MachineBasicBlock::iterator I,
312            MachineBasicBlock::const_iterator End) {
313  return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
314      .getNonConstIterator();
315}
316
317/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
318ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
319  // Select the scheduler, or set the default.
320  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
321  if (Ctor != useDefaultMachineSched)
322    return Ctor(this);
323
324  // Get the default scheduler set by the target for this function.
325  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
326  if (Scheduler)
327    return Scheduler;
328
329  // Default to GenericScheduler.
330  return createGenericSchedLive(this);
331}
332
333/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
334/// the caller. We don't have a command line option to override the postRA
335/// scheduler. The Target must configure it.
336ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
337  // Get the postRA scheduler set by the target for this function.
338  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
339  if (Scheduler)
340    return Scheduler;
341
342  // Default to GenericScheduler.
343  return createGenericSchedPostRA(this);
344}
345
346/// Top-level MachineScheduler pass driver.
347///
348/// Visit blocks in function order. Divide each block into scheduling regions
349/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
350/// consistent with the DAG builder, which traverses the interior of the
351/// scheduling regions bottom-up.
352///
353/// This design avoids exposing scheduling boundaries to the DAG builder,
354/// simplifying the DAG builder's support for "special" target instructions.
355/// At the same time the design allows target schedulers to operate across
356/// scheduling boundaries, for example to bundle the boundary instructions
357/// without reordering them. This creates complexity, because the target
358/// scheduler must update the RegionBegin and RegionEnd positions cached by
359/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
360/// design would be to split blocks at scheduling boundaries, but LLVM has a
361/// general bias against block splitting purely for implementation simplicity.
362bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
363  if (skipFunction(mf.getFunction()))
364    return false;
365
366  if (EnableMachineSched.getNumOccurrences()) {
367    if (!EnableMachineSched)
368      return false;
369  } else if (!mf.getSubtarget().enableMachineScheduler())
370    return false;
371
372  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
373
374  // Initialize the context of the pass.
375  MF = &mf;
376  MLI = &getAnalysis<MachineLoopInfo>();
377  MDT = &getAnalysis<MachineDominatorTree>();
378  PassConfig = &getAnalysis<TargetPassConfig>();
379  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
380
381  LIS = &getAnalysis<LiveIntervals>();
382
383  if (VerifyScheduling) {
384    LLVM_DEBUG(LIS->dump());
385    MF->verify(this, "Before machine scheduling.");
386  }
387  RegClassInfo->runOnMachineFunction(*MF);
388
389  // Instantiate the selected scheduler for this target, function, and
390  // optimization level.
391  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
392  scheduleRegions(*Scheduler, false);
393
394  LLVM_DEBUG(LIS->dump());
395  if (VerifyScheduling)
396    MF->verify(this, "After machine scheduling.");
397  return true;
398}
399
400bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
401  if (skipFunction(mf.getFunction()))
402    return false;
403
404  if (EnablePostRAMachineSched.getNumOccurrences()) {
405    if (!EnablePostRAMachineSched)
406      return false;
407  } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
408    LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
409    return false;
410  }
411  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
412
413  // Initialize the context of the pass.
414  MF = &mf;
415  MLI = &getAnalysis<MachineLoopInfo>();
416  PassConfig = &getAnalysis<TargetPassConfig>();
417  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
418
419  if (VerifyScheduling)
420    MF->verify(this, "Before post machine scheduling.");
421
422  // Instantiate the selected scheduler for this target, function, and
423  // optimization level.
424  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
425  scheduleRegions(*Scheduler, true);
426
427  if (VerifyScheduling)
428    MF->verify(this, "After post machine scheduling.");
429  return true;
430}
431
432/// Return true of the given instruction should not be included in a scheduling
433/// region.
434///
435/// MachineScheduler does not currently support scheduling across calls. To
436/// handle calls, the DAG builder needs to be modified to create register
437/// anti/output dependencies on the registers clobbered by the call's regmask
438/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
439/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
440/// the boundary, but there would be no benefit to postRA scheduling across
441/// calls this late anyway.
442static bool isSchedBoundary(MachineBasicBlock::iterator MI,
443                            MachineBasicBlock *MBB,
444                            MachineFunction *MF,
445                            const TargetInstrInfo *TII) {
446  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
447}
448
449/// A region of an MBB for scheduling.
450namespace {
451struct SchedRegion {
452  /// RegionBegin is the first instruction in the scheduling region, and
453  /// RegionEnd is either MBB->end() or the scheduling boundary after the
454  /// last instruction in the scheduling region. These iterators cannot refer
455  /// to instructions outside of the identified scheduling region because
456  /// those may be reordered before scheduling this region.
457  MachineBasicBlock::iterator RegionBegin;
458  MachineBasicBlock::iterator RegionEnd;
459  unsigned NumRegionInstrs;
460
461  SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
462              unsigned N) :
463    RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
464};
465} // end anonymous namespace
466
467using MBBRegionsVector = SmallVector<SchedRegion, 16>;
468
469static void
470getSchedRegions(MachineBasicBlock *MBB,
471                MBBRegionsVector &Regions,
472                bool RegionsTopDown) {
473  MachineFunction *MF = MBB->getParent();
474  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
475
476  MachineBasicBlock::iterator I = nullptr;
477  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
478      RegionEnd != MBB->begin(); RegionEnd = I) {
479
480    // Avoid decrementing RegionEnd for blocks with no terminator.
481    if (RegionEnd != MBB->end() ||
482        isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
483      --RegionEnd;
484    }
485
486    // The next region starts above the previous region. Look backward in the
487    // instruction stream until we find the nearest boundary.
488    unsigned NumRegionInstrs = 0;
489    I = RegionEnd;
490    for (;I != MBB->begin(); --I) {
491      MachineInstr &MI = *std::prev(I);
492      if (isSchedBoundary(&MI, &*MBB, MF, TII))
493        break;
494      if (!MI.isDebugInstr()) {
495        // MBB::size() uses instr_iterator to count. Here we need a bundle to
496        // count as a single instruction.
497        ++NumRegionInstrs;
498      }
499    }
500
501    // It's possible we found a scheduling region that only has debug
502    // instructions. Don't bother scheduling these.
503    if (NumRegionInstrs != 0)
504      Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
505  }
506
507  if (RegionsTopDown)
508    std::reverse(Regions.begin(), Regions.end());
509}
510
511/// Main driver for both MachineScheduler and PostMachineScheduler.
512void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
513                                           bool FixKillFlags) {
514  // Visit all machine basic blocks.
515  //
516  // TODO: Visit blocks in global postorder or postorder within the bottom-up
517  // loop tree. Then we can optionally compute global RegPressure.
518  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
519       MBB != MBBEnd; ++MBB) {
520
521    Scheduler.startBlock(&*MBB);
522
523#ifndef NDEBUG
524    if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
525      continue;
526    if (SchedOnlyBlock.getNumOccurrences()
527        && (int)SchedOnlyBlock != MBB->getNumber())
528      continue;
529#endif
530
531    // Break the block into scheduling regions [I, RegionEnd). RegionEnd
532    // points to the scheduling boundary at the bottom of the region. The DAG
533    // does not include RegionEnd, but the region does (i.e. the next
534    // RegionEnd is above the previous RegionBegin). If the current block has
535    // no terminator then RegionEnd == MBB->end() for the bottom region.
536    //
537    // All the regions of MBB are first found and stored in MBBRegions, which
538    // will be processed (MBB) top-down if initialized with true.
539    //
540    // The Scheduler may insert instructions during either schedule() or
541    // exitRegion(), even for empty regions. So the local iterators 'I' and
542    // 'RegionEnd' are invalid across these calls. Instructions must not be
543    // added to other regions than the current one without updating MBBRegions.
544
545    MBBRegionsVector MBBRegions;
546    getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
547    for (MBBRegionsVector::iterator R = MBBRegions.begin();
548         R != MBBRegions.end(); ++R) {
549      MachineBasicBlock::iterator I = R->RegionBegin;
550      MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
551      unsigned NumRegionInstrs = R->NumRegionInstrs;
552
553      // Notify the scheduler of the region, even if we may skip scheduling
554      // it. Perhaps it still needs to be bundled.
555      Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
556
557      // Skip empty scheduling regions (0 or 1 schedulable instructions).
558      if (I == RegionEnd || I == std::prev(RegionEnd)) {
559        // Close the current region. Bundle the terminator if needed.
560        // This invalidates 'RegionEnd' and 'I'.
561        Scheduler.exitRegion();
562        continue;
563      }
564      LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
565      LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
566                        << " " << MBB->getName() << "\n  From: " << *I
567                        << "    To: ";
568                 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
569                 else dbgs() << "End";
570                 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
571      if (DumpCriticalPathLength) {
572        errs() << MF->getName();
573        errs() << ":%bb. " << MBB->getNumber();
574        errs() << " " << MBB->getName() << " \n";
575      }
576
577      // Schedule a region: possibly reorder instructions.
578      // This invalidates the original region iterators.
579      Scheduler.schedule();
580
581      // Close the current region.
582      Scheduler.exitRegion();
583    }
584    Scheduler.finishBlock();
585    // FIXME: Ideally, no further passes should rely on kill flags. However,
586    // thumb2 size reduction is currently an exception, so the PostMIScheduler
587    // needs to do this.
588    if (FixKillFlags)
589      Scheduler.fixupKills(*MBB);
590  }
591  Scheduler.finalizeSchedule();
592}
593
594void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
595  // unimplemented
596}
597
598#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
599LLVM_DUMP_METHOD void ReadyQueue::dump() const {
600  dbgs() << "Queue " << Name << ": ";
601  for (const SUnit *SU : Queue)
602    dbgs() << SU->NodeNum << " ";
603  dbgs() << "\n";
604}
605#endif
606
607//===----------------------------------------------------------------------===//
608// ScheduleDAGMI - Basic machine instruction scheduling. This is
609// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
610// virtual registers.
611// ===----------------------------------------------------------------------===/
612
613// Provide a vtable anchor.
614ScheduleDAGMI::~ScheduleDAGMI() = default;
615
616/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
617/// NumPredsLeft reaches zero, release the successor node.
618///
619/// FIXME: Adjust SuccSU height based on MinLatency.
620void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
621  SUnit *SuccSU = SuccEdge->getSUnit();
622
623  if (SuccEdge->isWeak()) {
624    --SuccSU->WeakPredsLeft;
625    if (SuccEdge->isCluster())
626      NextClusterSucc = SuccSU;
627    return;
628  }
629#ifndef NDEBUG
630  if (SuccSU->NumPredsLeft == 0) {
631    dbgs() << "*** Scheduling failed! ***\n";
632    dumpNode(*SuccSU);
633    dbgs() << " has been released too many times!\n";
634    llvm_unreachable(nullptr);
635  }
636#endif
637  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
638  // CurrCycle may have advanced since then.
639  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
640    SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
641
642  --SuccSU->NumPredsLeft;
643  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
644    SchedImpl->releaseTopNode(SuccSU);
645}
646
647/// releaseSuccessors - Call releaseSucc on each of SU's successors.
648void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
649  for (SDep &Succ : SU->Succs)
650    releaseSucc(SU, &Succ);
651}
652
653/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
654/// NumSuccsLeft reaches zero, release the predecessor node.
655///
656/// FIXME: Adjust PredSU height based on MinLatency.
657void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
658  SUnit *PredSU = PredEdge->getSUnit();
659
660  if (PredEdge->isWeak()) {
661    --PredSU->WeakSuccsLeft;
662    if (PredEdge->isCluster())
663      NextClusterPred = PredSU;
664    return;
665  }
666#ifndef NDEBUG
667  if (PredSU->NumSuccsLeft == 0) {
668    dbgs() << "*** Scheduling failed! ***\n";
669    dumpNode(*PredSU);
670    dbgs() << " has been released too many times!\n";
671    llvm_unreachable(nullptr);
672  }
673#endif
674  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
675  // CurrCycle may have advanced since then.
676  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
677    PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
678
679  --PredSU->NumSuccsLeft;
680  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
681    SchedImpl->releaseBottomNode(PredSU);
682}
683
684/// releasePredecessors - Call releasePred on each of SU's predecessors.
685void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
686  for (SDep &Pred : SU->Preds)
687    releasePred(SU, &Pred);
688}
689
690void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
691  ScheduleDAGInstrs::startBlock(bb);
692  SchedImpl->enterMBB(bb);
693}
694
695void ScheduleDAGMI::finishBlock() {
696  SchedImpl->leaveMBB();
697  ScheduleDAGInstrs::finishBlock();
698}
699
700/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
701/// crossing a scheduling boundary. [begin, end) includes all instructions in
702/// the region, including the boundary itself and single-instruction regions
703/// that don't get scheduled.
704void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
705                                     MachineBasicBlock::iterator begin,
706                                     MachineBasicBlock::iterator end,
707                                     unsigned regioninstrs)
708{
709  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
710
711  SchedImpl->initPolicy(begin, end, regioninstrs);
712}
713
714/// This is normally called from the main scheduler loop but may also be invoked
715/// by the scheduling strategy to perform additional code motion.
716void ScheduleDAGMI::moveInstruction(
717  MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
718  // Advance RegionBegin if the first instruction moves down.
719  if (&*RegionBegin == MI)
720    ++RegionBegin;
721
722  // Update the instruction stream.
723  BB->splice(InsertPos, BB, MI);
724
725  // Update LiveIntervals
726  if (LIS)
727    LIS->handleMove(*MI, /*UpdateFlags=*/true);
728
729  // Recede RegionBegin if an instruction moves above the first.
730  if (RegionBegin == InsertPos)
731    RegionBegin = MI;
732}
733
734bool ScheduleDAGMI::checkSchedLimit() {
735#ifndef NDEBUG
736  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
737    CurrentTop = CurrentBottom;
738    return false;
739  }
740  ++NumInstrsScheduled;
741#endif
742  return true;
743}
744
745/// Per-region scheduling driver, called back from
746/// MachineScheduler::runOnMachineFunction. This is a simplified driver that
747/// does not consider liveness or register pressure. It is useful for PostRA
748/// scheduling and potentially other custom schedulers.
749void ScheduleDAGMI::schedule() {
750  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
751  LLVM_DEBUG(SchedImpl->dumpPolicy());
752
753  // Build the DAG.
754  buildSchedGraph(AA);
755
756  postprocessDAG();
757
758  SmallVector<SUnit*, 8> TopRoots, BotRoots;
759  findRootsAndBiasEdges(TopRoots, BotRoots);
760
761  LLVM_DEBUG(dump());
762  if (PrintDAGs) dump();
763  if (ViewMISchedDAGs) viewGraph();
764
765  // Initialize the strategy before modifying the DAG.
766  // This may initialize a DFSResult to be used for queue priority.
767  SchedImpl->initialize(this);
768
769  // Initialize ready queues now that the DAG and priority data are finalized.
770  initQueues(TopRoots, BotRoots);
771
772  bool IsTopNode = false;
773  while (true) {
774    LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
775    SUnit *SU = SchedImpl->pickNode(IsTopNode);
776    if (!SU) break;
777
778    assert(!SU->isScheduled && "Node already scheduled");
779    if (!checkSchedLimit())
780      break;
781
782    MachineInstr *MI = SU->getInstr();
783    if (IsTopNode) {
784      assert(SU->isTopReady() && "node still has unscheduled dependencies");
785      if (&*CurrentTop == MI)
786        CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
787      else
788        moveInstruction(MI, CurrentTop);
789    } else {
790      assert(SU->isBottomReady() && "node still has unscheduled dependencies");
791      MachineBasicBlock::iterator priorII =
792        priorNonDebug(CurrentBottom, CurrentTop);
793      if (&*priorII == MI)
794        CurrentBottom = priorII;
795      else {
796        if (&*CurrentTop == MI)
797          CurrentTop = nextIfDebug(++CurrentTop, priorII);
798        moveInstruction(MI, CurrentBottom);
799        CurrentBottom = MI;
800      }
801    }
802    // Notify the scheduling strategy before updating the DAG.
803    // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
804    // runs, it can then use the accurate ReadyCycle time to determine whether
805    // newly released nodes can move to the readyQ.
806    SchedImpl->schedNode(SU, IsTopNode);
807
808    updateQueues(SU, IsTopNode);
809  }
810  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
811
812  placeDebugValues();
813
814  LLVM_DEBUG({
815    dbgs() << "*** Final schedule for "
816           << printMBBReference(*begin()->getParent()) << " ***\n";
817    dumpSchedule();
818    dbgs() << '\n';
819  });
820}
821
822/// Apply each ScheduleDAGMutation step in order.
823void ScheduleDAGMI::postprocessDAG() {
824  for (auto &m : Mutations)
825    m->apply(this);
826}
827
828void ScheduleDAGMI::
829findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
830                      SmallVectorImpl<SUnit*> &BotRoots) {
831  for (SUnit &SU : SUnits) {
832    assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
833
834    // Order predecessors so DFSResult follows the critical path.
835    SU.biasCriticalPath();
836
837    // A SUnit is ready to top schedule if it has no predecessors.
838    if (!SU.NumPredsLeft)
839      TopRoots.push_back(&SU);
840    // A SUnit is ready to bottom schedule if it has no successors.
841    if (!SU.NumSuccsLeft)
842      BotRoots.push_back(&SU);
843  }
844  ExitSU.biasCriticalPath();
845}
846
847/// Identify DAG roots and setup scheduler queues.
848void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
849                               ArrayRef<SUnit*> BotRoots) {
850  NextClusterSucc = nullptr;
851  NextClusterPred = nullptr;
852
853  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
854  //
855  // Nodes with unreleased weak edges can still be roots.
856  // Release top roots in forward order.
857  for (SUnit *SU : TopRoots)
858    SchedImpl->releaseTopNode(SU);
859
860  // Release bottom roots in reverse order so the higher priority nodes appear
861  // first. This is more natural and slightly more efficient.
862  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
863         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
864    SchedImpl->releaseBottomNode(*I);
865  }
866
867  releaseSuccessors(&EntrySU);
868  releasePredecessors(&ExitSU);
869
870  SchedImpl->registerRoots();
871
872  // Advance past initial DebugValues.
873  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
874  CurrentBottom = RegionEnd;
875}
876
877/// Update scheduler queues after scheduling an instruction.
878void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
879  // Release dependent instructions for scheduling.
880  if (IsTopNode)
881    releaseSuccessors(SU);
882  else
883    releasePredecessors(SU);
884
885  SU->isScheduled = true;
886}
887
888/// Reinsert any remaining debug_values, just like the PostRA scheduler.
889void ScheduleDAGMI::placeDebugValues() {
890  // If first instruction was a DBG_VALUE then put it back.
891  if (FirstDbgValue) {
892    BB->splice(RegionBegin, BB, FirstDbgValue);
893    RegionBegin = FirstDbgValue;
894  }
895
896  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
897         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
898    std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
899    MachineInstr *DbgValue = P.first;
900    MachineBasicBlock::iterator OrigPrevMI = P.second;
901    if (&*RegionBegin == DbgValue)
902      ++RegionBegin;
903    BB->splice(++OrigPrevMI, BB, DbgValue);
904    if (OrigPrevMI == std::prev(RegionEnd))
905      RegionEnd = DbgValue;
906  }
907  DbgValues.clear();
908  FirstDbgValue = nullptr;
909}
910
911#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
912LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
913  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
914    if (SUnit *SU = getSUnit(&(*MI)))
915      dumpNode(*SU);
916    else
917      dbgs() << "Missing SUnit\n";
918  }
919}
920#endif
921
922//===----------------------------------------------------------------------===//
923// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
924// preservation.
925//===----------------------------------------------------------------------===//
926
927ScheduleDAGMILive::~ScheduleDAGMILive() {
928  delete DFSResult;
929}
930
931void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
932  const MachineInstr &MI = *SU.getInstr();
933  for (const MachineOperand &MO : MI.operands()) {
934    if (!MO.isReg())
935      continue;
936    if (!MO.readsReg())
937      continue;
938    if (TrackLaneMasks && !MO.isUse())
939      continue;
940
941    Register Reg = MO.getReg();
942    if (!Register::isVirtualRegister(Reg))
943      continue;
944
945    // Ignore re-defs.
946    if (TrackLaneMasks) {
947      bool FoundDef = false;
948      for (const MachineOperand &MO2 : MI.operands()) {
949        if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
950          FoundDef = true;
951          break;
952        }
953      }
954      if (FoundDef)
955        continue;
956    }
957
958    // Record this local VReg use.
959    VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
960    for (; UI != VRegUses.end(); ++UI) {
961      if (UI->SU == &SU)
962        break;
963    }
964    if (UI == VRegUses.end())
965      VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
966  }
967}
968
969/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
970/// crossing a scheduling boundary. [begin, end) includes all instructions in
971/// the region, including the boundary itself and single-instruction regions
972/// that don't get scheduled.
973void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
974                                MachineBasicBlock::iterator begin,
975                                MachineBasicBlock::iterator end,
976                                unsigned regioninstrs)
977{
978  // ScheduleDAGMI initializes SchedImpl's per-region policy.
979  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
980
981  // For convenience remember the end of the liveness region.
982  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
983
984  SUPressureDiffs.clear();
985
986  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
987  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
988
989  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
990         "ShouldTrackLaneMasks requires ShouldTrackPressure");
991}
992
993// Setup the register pressure trackers for the top scheduled and bottom
994// scheduled regions.
995void ScheduleDAGMILive::initRegPressure() {
996  VRegUses.clear();
997  VRegUses.setUniverse(MRI.getNumVirtRegs());
998  for (SUnit &SU : SUnits)
999    collectVRegUses(SU);
1000
1001  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1002                    ShouldTrackLaneMasks, false);
1003  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1004                    ShouldTrackLaneMasks, false);
1005
1006  // Close the RPTracker to finalize live ins.
1007  RPTracker.closeRegion();
1008
1009  LLVM_DEBUG(RPTracker.dump());
1010
1011  // Initialize the live ins and live outs.
1012  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1013  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1014
1015  // Close one end of the tracker so we can call
1016  // getMaxUpward/DownwardPressureDelta before advancing across any
1017  // instructions. This converts currently live regs into live ins/outs.
1018  TopRPTracker.closeTop();
1019  BotRPTracker.closeBottom();
1020
1021  BotRPTracker.initLiveThru(RPTracker);
1022  if (!BotRPTracker.getLiveThru().empty()) {
1023    TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1024    LLVM_DEBUG(dbgs() << "Live Thru: ";
1025               dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1026  };
1027
1028  // For each live out vreg reduce the pressure change associated with other
1029  // uses of the same vreg below the live-out reaching def.
1030  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1031
1032  // Account for liveness generated by the region boundary.
1033  if (LiveRegionEnd != RegionEnd) {
1034    SmallVector<RegisterMaskPair, 8> LiveUses;
1035    BotRPTracker.recede(&LiveUses);
1036    updatePressureDiffs(LiveUses);
1037  }
1038
1039  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1040             dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1041             dbgs() << "Bottom Pressure:\n";
1042             dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1043
1044  assert((BotRPTracker.getPos() == RegionEnd ||
1045          (RegionEnd->isDebugInstr() &&
1046           BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1047         "Can't find the region bottom");
1048
1049  // Cache the list of excess pressure sets in this region. This will also track
1050  // the max pressure in the scheduled code for these sets.
1051  RegionCriticalPSets.clear();
1052  const std::vector<unsigned> &RegionPressure =
1053    RPTracker.getPressure().MaxSetPressure;
1054  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1055    unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1056    if (RegionPressure[i] > Limit) {
1057      LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1058                        << " Actual " << RegionPressure[i] << "\n");
1059      RegionCriticalPSets.push_back(PressureChange(i));
1060    }
1061  }
1062  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1063             for (const PressureChange &RCPS
1064                  : RegionCriticalPSets) dbgs()
1065             << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1066             dbgs() << "\n");
1067}
1068
1069void ScheduleDAGMILive::
1070updateScheduledPressure(const SUnit *SU,
1071                        const std::vector<unsigned> &NewMaxPressure) {
1072  const PressureDiff &PDiff = getPressureDiff(SU);
1073  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1074  for (const PressureChange &PC : PDiff) {
1075    if (!PC.isValid())
1076      break;
1077    unsigned ID = PC.getPSet();
1078    while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1079      ++CritIdx;
1080    if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1081      if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1082          && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1083        RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1084    }
1085    unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1086    if (NewMaxPressure[ID] >= Limit - 2) {
1087      LLVM_DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
1088                        << NewMaxPressure[ID]
1089                        << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1090                        << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1091                        << " livethru)\n");
1092    }
1093  }
1094}
1095
1096/// Update the PressureDiff array for liveness after scheduling this
1097/// instruction.
1098void ScheduleDAGMILive::updatePressureDiffs(
1099    ArrayRef<RegisterMaskPair> LiveUses) {
1100  for (const RegisterMaskPair &P : LiveUses) {
1101    unsigned Reg = P.RegUnit;
1102    /// FIXME: Currently assuming single-use physregs.
1103    if (!Register::isVirtualRegister(Reg))
1104      continue;
1105
1106    if (ShouldTrackLaneMasks) {
1107      // If the register has just become live then other uses won't change
1108      // this fact anymore => decrement pressure.
1109      // If the register has just become dead then other uses make it come
1110      // back to life => increment pressure.
1111      bool Decrement = P.LaneMask.any();
1112
1113      for (const VReg2SUnit &V2SU
1114           : make_range(VRegUses.find(Reg), VRegUses.end())) {
1115        SUnit &SU = *V2SU.SU;
1116        if (SU.isScheduled || &SU == &ExitSU)
1117          continue;
1118
1119        PressureDiff &PDiff = getPressureDiff(&SU);
1120        PDiff.addPressureChange(Reg, Decrement, &MRI);
1121        LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
1122                          << printReg(Reg, TRI) << ':'
1123                          << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1124                   dbgs() << "              to "; PDiff.dump(*TRI););
1125      }
1126    } else {
1127      assert(P.LaneMask.any());
1128      LLVM_DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1129      // This may be called before CurrentBottom has been initialized. However,
1130      // BotRPTracker must have a valid position. We want the value live into the
1131      // instruction or live out of the block, so ask for the previous
1132      // instruction's live-out.
1133      const LiveInterval &LI = LIS->getInterval(Reg);
1134      VNInfo *VNI;
1135      MachineBasicBlock::const_iterator I =
1136        nextIfDebug(BotRPTracker.getPos(), BB->end());
1137      if (I == BB->end())
1138        VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1139      else {
1140        LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1141        VNI = LRQ.valueIn();
1142      }
1143      // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1144      assert(VNI && "No live value at use.");
1145      for (const VReg2SUnit &V2SU
1146           : make_range(VRegUses.find(Reg), VRegUses.end())) {
1147        SUnit *SU = V2SU.SU;
1148        // If this use comes before the reaching def, it cannot be a last use,
1149        // so decrease its pressure change.
1150        if (!SU->isScheduled && SU != &ExitSU) {
1151          LiveQueryResult LRQ =
1152              LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1153          if (LRQ.valueIn() == VNI) {
1154            PressureDiff &PDiff = getPressureDiff(SU);
1155            PDiff.addPressureChange(Reg, true, &MRI);
1156            LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
1157                              << *SU->getInstr();
1158                       dbgs() << "              to "; PDiff.dump(*TRI););
1159          }
1160        }
1161      }
1162    }
1163  }
1164}
1165
1166void ScheduleDAGMILive::dump() const {
1167#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1168  if (EntrySU.getInstr() != nullptr)
1169    dumpNodeAll(EntrySU);
1170  for (const SUnit &SU : SUnits) {
1171    dumpNodeAll(SU);
1172    if (ShouldTrackPressure) {
1173      dbgs() << "  Pressure Diff      : ";
1174      getPressureDiff(&SU).dump(*TRI);
1175    }
1176    dbgs() << "  Single Issue       : ";
1177    if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1178        SchedModel.mustEndGroup(SU.getInstr()))
1179      dbgs() << "true;";
1180    else
1181      dbgs() << "false;";
1182    dbgs() << '\n';
1183  }
1184  if (ExitSU.getInstr() != nullptr)
1185    dumpNodeAll(ExitSU);
1186#endif
1187}
1188
1189/// schedule - Called back from MachineScheduler::runOnMachineFunction
1190/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1191/// only includes instructions that have DAG nodes, not scheduling boundaries.
1192///
1193/// This is a skeletal driver, with all the functionality pushed into helpers,
1194/// so that it can be easily extended by experimental schedulers. Generally,
1195/// implementing MachineSchedStrategy should be sufficient to implement a new
1196/// scheduling algorithm. However, if a scheduler further subclasses
1197/// ScheduleDAGMILive then it will want to override this virtual method in order
1198/// to update any specialized state.
1199void ScheduleDAGMILive::schedule() {
1200  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1201  LLVM_DEBUG(SchedImpl->dumpPolicy());
1202  buildDAGWithRegPressure();
1203
1204  postprocessDAG();
1205
1206  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1207  findRootsAndBiasEdges(TopRoots, BotRoots);
1208
1209  // Initialize the strategy before modifying the DAG.
1210  // This may initialize a DFSResult to be used for queue priority.
1211  SchedImpl->initialize(this);
1212
1213  LLVM_DEBUG(dump());
1214  if (PrintDAGs) dump();
1215  if (ViewMISchedDAGs) viewGraph();
1216
1217  // Initialize ready queues now that the DAG and priority data are finalized.
1218  initQueues(TopRoots, BotRoots);
1219
1220  bool IsTopNode = false;
1221  while (true) {
1222    LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1223    SUnit *SU = SchedImpl->pickNode(IsTopNode);
1224    if (!SU) break;
1225
1226    assert(!SU->isScheduled && "Node already scheduled");
1227    if (!checkSchedLimit())
1228      break;
1229
1230    scheduleMI(SU, IsTopNode);
1231
1232    if (DFSResult) {
1233      unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1234      if (!ScheduledTrees.test(SubtreeID)) {
1235        ScheduledTrees.set(SubtreeID);
1236        DFSResult->scheduleTree(SubtreeID);
1237        SchedImpl->scheduleTree(SubtreeID);
1238      }
1239    }
1240
1241    // Notify the scheduling strategy after updating the DAG.
1242    SchedImpl->schedNode(SU, IsTopNode);
1243
1244    updateQueues(SU, IsTopNode);
1245  }
1246  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1247
1248  placeDebugValues();
1249
1250  LLVM_DEBUG({
1251    dbgs() << "*** Final schedule for "
1252           << printMBBReference(*begin()->getParent()) << " ***\n";
1253    dumpSchedule();
1254    dbgs() << '\n';
1255  });
1256}
1257
1258/// Build the DAG and setup three register pressure trackers.
1259void ScheduleDAGMILive::buildDAGWithRegPressure() {
1260  if (!ShouldTrackPressure) {
1261    RPTracker.reset();
1262    RegionCriticalPSets.clear();
1263    buildSchedGraph(AA);
1264    return;
1265  }
1266
1267  // Initialize the register pressure tracker used by buildSchedGraph.
1268  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1269                 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1270
1271  // Account for liveness generate by the region boundary.
1272  if (LiveRegionEnd != RegionEnd)
1273    RPTracker.recede();
1274
1275  // Build the DAG, and compute current register pressure.
1276  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1277
1278  // Initialize top/bottom trackers after computing region pressure.
1279  initRegPressure();
1280}
1281
1282void ScheduleDAGMILive::computeDFSResult() {
1283  if (!DFSResult)
1284    DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1285  DFSResult->clear();
1286  ScheduledTrees.clear();
1287  DFSResult->resize(SUnits.size());
1288  DFSResult->compute(SUnits);
1289  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1290}
1291
1292/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1293/// only provides the critical path for single block loops. To handle loops that
1294/// span blocks, we could use the vreg path latencies provided by
1295/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1296/// available for use in the scheduler.
1297///
1298/// The cyclic path estimation identifies a def-use pair that crosses the back
1299/// edge and considers the depth and height of the nodes. For example, consider
1300/// the following instruction sequence where each instruction has unit latency
1301/// and defines an epomymous virtual register:
1302///
1303/// a->b(a,c)->c(b)->d(c)->exit
1304///
1305/// The cyclic critical path is a two cycles: b->c->b
1306/// The acyclic critical path is four cycles: a->b->c->d->exit
1307/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1308/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1309/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1310/// LiveInDepth = depth(b) = len(a->b) = 1
1311///
1312/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1313/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1314/// CyclicCriticalPath = min(2, 2) = 2
1315///
1316/// This could be relevant to PostRA scheduling, but is currently implemented
1317/// assuming LiveIntervals.
1318unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1319  // This only applies to single block loop.
1320  if (!BB->isSuccessor(BB))
1321    return 0;
1322
1323  unsigned MaxCyclicLatency = 0;
1324  // Visit each live out vreg def to find def/use pairs that cross iterations.
1325  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1326    unsigned Reg = P.RegUnit;
1327    if (!Register::isVirtualRegister(Reg))
1328      continue;
1329    const LiveInterval &LI = LIS->getInterval(Reg);
1330    const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1331    if (!DefVNI)
1332      continue;
1333
1334    MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1335    const SUnit *DefSU = getSUnit(DefMI);
1336    if (!DefSU)
1337      continue;
1338
1339    unsigned LiveOutHeight = DefSU->getHeight();
1340    unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1341    // Visit all local users of the vreg def.
1342    for (const VReg2SUnit &V2SU
1343         : make_range(VRegUses.find(Reg), VRegUses.end())) {
1344      SUnit *SU = V2SU.SU;
1345      if (SU == &ExitSU)
1346        continue;
1347
1348      // Only consider uses of the phi.
1349      LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1350      if (!LRQ.valueIn()->isPHIDef())
1351        continue;
1352
1353      // Assume that a path spanning two iterations is a cycle, which could
1354      // overestimate in strange cases. This allows cyclic latency to be
1355      // estimated as the minimum slack of the vreg's depth or height.
1356      unsigned CyclicLatency = 0;
1357      if (LiveOutDepth > SU->getDepth())
1358        CyclicLatency = LiveOutDepth - SU->getDepth();
1359
1360      unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1361      if (LiveInHeight > LiveOutHeight) {
1362        if (LiveInHeight - LiveOutHeight < CyclicLatency)
1363          CyclicLatency = LiveInHeight - LiveOutHeight;
1364      } else
1365        CyclicLatency = 0;
1366
1367      LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1368                        << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1369      if (CyclicLatency > MaxCyclicLatency)
1370        MaxCyclicLatency = CyclicLatency;
1371    }
1372  }
1373  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1374  return MaxCyclicLatency;
1375}
1376
1377/// Release ExitSU predecessors and setup scheduler queues. Re-position
1378/// the Top RP tracker in case the region beginning has changed.
1379void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1380                                   ArrayRef<SUnit*> BotRoots) {
1381  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1382  if (ShouldTrackPressure) {
1383    assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1384    TopRPTracker.setPos(CurrentTop);
1385  }
1386}
1387
1388/// Move an instruction and update register pressure.
1389void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1390  // Move the instruction to its new location in the instruction stream.
1391  MachineInstr *MI = SU->getInstr();
1392
1393  if (IsTopNode) {
1394    assert(SU->isTopReady() && "node still has unscheduled dependencies");
1395    if (&*CurrentTop == MI)
1396      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1397    else {
1398      moveInstruction(MI, CurrentTop);
1399      TopRPTracker.setPos(MI);
1400    }
1401
1402    if (ShouldTrackPressure) {
1403      // Update top scheduled pressure.
1404      RegisterOperands RegOpers;
1405      RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1406      if (ShouldTrackLaneMasks) {
1407        // Adjust liveness and add missing dead+read-undef flags.
1408        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1409        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1410      } else {
1411        // Adjust for missing dead-def flags.
1412        RegOpers.detectDeadDefs(*MI, *LIS);
1413      }
1414
1415      TopRPTracker.advance(RegOpers);
1416      assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1417      LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1418                     TopRPTracker.getRegSetPressureAtPos(), TRI););
1419
1420      updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1421    }
1422  } else {
1423    assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1424    MachineBasicBlock::iterator priorII =
1425      priorNonDebug(CurrentBottom, CurrentTop);
1426    if (&*priorII == MI)
1427      CurrentBottom = priorII;
1428    else {
1429      if (&*CurrentTop == MI) {
1430        CurrentTop = nextIfDebug(++CurrentTop, priorII);
1431        TopRPTracker.setPos(CurrentTop);
1432      }
1433      moveInstruction(MI, CurrentBottom);
1434      CurrentBottom = MI;
1435      BotRPTracker.setPos(CurrentBottom);
1436    }
1437    if (ShouldTrackPressure) {
1438      RegisterOperands RegOpers;
1439      RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1440      if (ShouldTrackLaneMasks) {
1441        // Adjust liveness and add missing dead+read-undef flags.
1442        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1443        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1444      } else {
1445        // Adjust for missing dead-def flags.
1446        RegOpers.detectDeadDefs(*MI, *LIS);
1447      }
1448
1449      if (BotRPTracker.getPos() != CurrentBottom)
1450        BotRPTracker.recedeSkipDebugValues();
1451      SmallVector<RegisterMaskPair, 8> LiveUses;
1452      BotRPTracker.recede(RegOpers, &LiveUses);
1453      assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1454      LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1455                     BotRPTracker.getRegSetPressureAtPos(), TRI););
1456
1457      updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1458      updatePressureDiffs(LiveUses);
1459    }
1460  }
1461}
1462
1463//===----------------------------------------------------------------------===//
1464// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1465//===----------------------------------------------------------------------===//
1466
1467namespace {
1468
1469/// Post-process the DAG to create cluster edges between neighboring
1470/// loads or between neighboring stores.
1471class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1472  struct MemOpInfo {
1473    SUnit *SU;
1474    const MachineOperand *BaseOp;
1475    int64_t Offset;
1476
1477    MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
1478        : SU(su), BaseOp(Op), Offset(ofs) {}
1479
1480    bool operator<(const MemOpInfo &RHS) const {
1481      if (BaseOp->getType() != RHS.BaseOp->getType())
1482        return BaseOp->getType() < RHS.BaseOp->getType();
1483
1484      if (BaseOp->isReg())
1485        return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
1486               std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
1487                               RHS.SU->NodeNum);
1488      if (BaseOp->isFI()) {
1489        const MachineFunction &MF =
1490            *BaseOp->getParent()->getParent()->getParent();
1491        const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1492        bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1493                              TargetFrameLowering::StackGrowsDown;
1494        // Can't use tuple comparison here since we might need to use a
1495        // different order when the stack grows down.
1496        if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
1497          return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
1498                                : BaseOp->getIndex() < RHS.BaseOp->getIndex();
1499
1500        if (Offset != RHS.Offset)
1501          return Offset < RHS.Offset;
1502
1503        return SU->NodeNum < RHS.SU->NodeNum;
1504      }
1505
1506      llvm_unreachable("MemOpClusterMutation only supports register or frame "
1507                       "index bases.");
1508    }
1509  };
1510
1511  const TargetInstrInfo *TII;
1512  const TargetRegisterInfo *TRI;
1513  bool IsLoad;
1514
1515public:
1516  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1517                           const TargetRegisterInfo *tri, bool IsLoad)
1518      : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1519
1520  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1521
1522protected:
1523  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);
1524};
1525
1526class StoreClusterMutation : public BaseMemOpClusterMutation {
1527public:
1528  StoreClusterMutation(const TargetInstrInfo *tii,
1529                       const TargetRegisterInfo *tri)
1530      : BaseMemOpClusterMutation(tii, tri, false) {}
1531};
1532
1533class LoadClusterMutation : public BaseMemOpClusterMutation {
1534public:
1535  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1536      : BaseMemOpClusterMutation(tii, tri, true) {}
1537};
1538
1539} // end anonymous namespace
1540
1541namespace llvm {
1542
1543std::unique_ptr<ScheduleDAGMutation>
1544createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1545                             const TargetRegisterInfo *TRI) {
1546  return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
1547                            : nullptr;
1548}
1549
1550std::unique_ptr<ScheduleDAGMutation>
1551createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1552                              const TargetRegisterInfo *TRI) {
1553  return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
1554                            : nullptr;
1555}
1556
1557} // end namespace llvm
1558
1559void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1560    ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
1561  SmallVector<MemOpInfo, 32> MemOpRecords;
1562  for (SUnit *SU : MemOps) {
1563    const MachineOperand *BaseOp;
1564    int64_t Offset;
1565    if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
1566      MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
1567  }
1568  if (MemOpRecords.size() < 2)
1569    return;
1570
1571  llvm::sort(MemOpRecords);
1572  unsigned ClusterLength = 1;
1573  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1574    SUnit *SUa = MemOpRecords[Idx].SU;
1575    SUnit *SUb = MemOpRecords[Idx+1].SU;
1576    if (SUa->NodeNum > SUb->NodeNum)
1577      std::swap(SUa, SUb);
1578    if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
1579                                 *MemOpRecords[Idx + 1].BaseOp,
1580                                 ClusterLength) &&
1581        DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1582      LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1583                        << SUb->NodeNum << ")\n");
1584      // Copy successor edges from SUa to SUb. Interleaving computation
1585      // dependent on SUa can prevent load combining due to register reuse.
1586      // Predecessor edges do not need to be copied from SUb to SUa since nearby
1587      // loads should have effectively the same inputs.
1588      for (const SDep &Succ : SUa->Succs) {
1589        if (Succ.getSUnit() == SUb)
1590          continue;
1591        LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum
1592                          << ")\n");
1593        DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1594      }
1595      ++ClusterLength;
1596    } else
1597      ClusterLength = 1;
1598  }
1599}
1600
1601/// Callback from DAG postProcessing to create cluster edges for loads.
1602void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
1603  // Map DAG NodeNum to a set of dependent MemOps in store chain.
1604  DenseMap<unsigned, SmallVector<SUnit *, 4>> StoreChains;
1605  for (SUnit &SU : DAG->SUnits) {
1606    if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1607        (!IsLoad && !SU.getInstr()->mayStore()))
1608      continue;
1609
1610    unsigned ChainPredID = DAG->SUnits.size();
1611    for (const SDep &Pred : SU.Preds) {
1612      if (Pred.isCtrl()) {
1613        ChainPredID = Pred.getSUnit()->NodeNum;
1614        break;
1615      }
1616    }
1617    // Insert the SU to corresponding store chain.
1618    auto &Chain = StoreChains.FindAndConstruct(ChainPredID).second;
1619    Chain.push_back(&SU);
1620  }
1621
1622  // Iterate over the store chains.
1623  for (auto &SCD : StoreChains)
1624    clusterNeighboringMemOps(SCD.second, DAG);
1625}
1626
1627//===----------------------------------------------------------------------===//
1628// CopyConstrain - DAG post-processing to encourage copy elimination.
1629//===----------------------------------------------------------------------===//
1630
1631namespace {
1632
1633/// Post-process the DAG to create weak edges from all uses of a copy to
1634/// the one use that defines the copy's source vreg, most likely an induction
1635/// variable increment.
1636class CopyConstrain : public ScheduleDAGMutation {
1637  // Transient state.
1638  SlotIndex RegionBeginIdx;
1639
1640  // RegionEndIdx is the slot index of the last non-debug instruction in the
1641  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1642  SlotIndex RegionEndIdx;
1643
1644public:
1645  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1646
1647  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1648
1649protected:
1650  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1651};
1652
1653} // end anonymous namespace
1654
1655namespace llvm {
1656
1657std::unique_ptr<ScheduleDAGMutation>
1658createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1659                               const TargetRegisterInfo *TRI) {
1660  return std::make_unique<CopyConstrain>(TII, TRI);
1661}
1662
1663} // end namespace llvm
1664
1665/// constrainLocalCopy handles two possibilities:
1666/// 1) Local src:
1667/// I0:     = dst
1668/// I1: src = ...
1669/// I2:     = dst
1670/// I3: dst = src (copy)
1671/// (create pred->succ edges I0->I1, I2->I1)
1672///
1673/// 2) Local copy:
1674/// I0: dst = src (copy)
1675/// I1:     = dst
1676/// I2: src = ...
1677/// I3:     = dst
1678/// (create pred->succ edges I1->I2, I3->I2)
1679///
1680/// Although the MachineScheduler is currently constrained to single blocks,
1681/// this algorithm should handle extended blocks. An EBB is a set of
1682/// contiguously numbered blocks such that the previous block in the EBB is
1683/// always the single predecessor.
1684void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1685  LiveIntervals *LIS = DAG->getLIS();
1686  MachineInstr *Copy = CopySU->getInstr();
1687
1688  // Check for pure vreg copies.
1689  const MachineOperand &SrcOp = Copy->getOperand(1);
1690  Register SrcReg = SrcOp.getReg();
1691  if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1692    return;
1693
1694  const MachineOperand &DstOp = Copy->getOperand(0);
1695  Register DstReg = DstOp.getReg();
1696  if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
1697    return;
1698
1699  // Check if either the dest or source is local. If it's live across a back
1700  // edge, it's not local. Note that if both vregs are live across the back
1701  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1702  // If both the copy's source and dest are local live intervals, then we
1703  // should treat the dest as the global for the purpose of adding
1704  // constraints. This adds edges from source's other uses to the copy.
1705  unsigned LocalReg = SrcReg;
1706  unsigned GlobalReg = DstReg;
1707  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1708  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1709    LocalReg = DstReg;
1710    GlobalReg = SrcReg;
1711    LocalLI = &LIS->getInterval(LocalReg);
1712    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1713      return;
1714  }
1715  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1716
1717  // Find the global segment after the start of the local LI.
1718  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1719  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1720  // local live range. We could create edges from other global uses to the local
1721  // start, but the coalescer should have already eliminated these cases, so
1722  // don't bother dealing with it.
1723  if (GlobalSegment == GlobalLI->end())
1724    return;
1725
1726  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1727  // returned the next global segment. But if GlobalSegment overlaps with
1728  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1729  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1730  if (GlobalSegment->contains(LocalLI->beginIndex()))
1731    ++GlobalSegment;
1732
1733  if (GlobalSegment == GlobalLI->end())
1734    return;
1735
1736  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1737  if (GlobalSegment != GlobalLI->begin()) {
1738    // Two address defs have no hole.
1739    if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1740                               GlobalSegment->start)) {
1741      return;
1742    }
1743    // If the prior global segment may be defined by the same two-address
1744    // instruction that also defines LocalLI, then can't make a hole here.
1745    if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1746                               LocalLI->beginIndex())) {
1747      return;
1748    }
1749    // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1750    // it would be a disconnected component in the live range.
1751    assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1752           "Disconnected LRG within the scheduling region.");
1753  }
1754  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1755  if (!GlobalDef)
1756    return;
1757
1758  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1759  if (!GlobalSU)
1760    return;
1761
1762  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1763  // constraining the uses of the last local def to precede GlobalDef.
1764  SmallVector<SUnit*,8> LocalUses;
1765  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1766  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1767  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1768  for (const SDep &Succ : LastLocalSU->Succs) {
1769    if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1770      continue;
1771    if (Succ.getSUnit() == GlobalSU)
1772      continue;
1773    if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1774      return;
1775    LocalUses.push_back(Succ.getSUnit());
1776  }
1777  // Open the top of the GlobalLI hole by constraining any earlier global uses
1778  // to precede the start of LocalLI.
1779  SmallVector<SUnit*,8> GlobalUses;
1780  MachineInstr *FirstLocalDef =
1781    LIS->getInstructionFromIndex(LocalLI->beginIndex());
1782  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1783  for (const SDep &Pred : GlobalSU->Preds) {
1784    if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1785      continue;
1786    if (Pred.getSUnit() == FirstLocalSU)
1787      continue;
1788    if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1789      return;
1790    GlobalUses.push_back(Pred.getSUnit());
1791  }
1792  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1793  // Add the weak edges.
1794  for (SmallVectorImpl<SUnit*>::const_iterator
1795         I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1796    LLVM_DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
1797                      << GlobalSU->NodeNum << ")\n");
1798    DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1799  }
1800  for (SmallVectorImpl<SUnit*>::const_iterator
1801         I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1802    LLVM_DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
1803                      << FirstLocalSU->NodeNum << ")\n");
1804    DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1805  }
1806}
1807
1808/// Callback from DAG postProcessing to create weak edges to encourage
1809/// copy elimination.
1810void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1811  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1812  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1813
1814  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1815  if (FirstPos == DAG->end())
1816    return;
1817  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1818  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1819      *priorNonDebug(DAG->end(), DAG->begin()));
1820
1821  for (SUnit &SU : DAG->SUnits) {
1822    if (!SU.getInstr()->isCopy())
1823      continue;
1824
1825    constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1826  }
1827}
1828
1829//===----------------------------------------------------------------------===//
1830// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1831// and possibly other custom schedulers.
1832//===----------------------------------------------------------------------===//
1833
1834static const unsigned InvalidCycle = ~0U;
1835
1836SchedBoundary::~SchedBoundary() { delete HazardRec; }
1837
1838/// Given a Count of resource usage and a Latency value, return true if a
1839/// SchedBoundary becomes resource limited.
1840/// If we are checking after scheduling a node, we should return true when
1841/// we just reach the resource limit.
1842static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1843                               unsigned Latency, bool AfterSchedNode) {
1844  int ResCntFactor = (int)(Count - (Latency * LFactor));
1845  if (AfterSchedNode)
1846    return ResCntFactor >= (int)LFactor;
1847  else
1848    return ResCntFactor > (int)LFactor;
1849}
1850
1851void SchedBoundary::reset() {
1852  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1853  // Destroying and reconstructing it is very expensive though. So keep
1854  // invalid, placeholder HazardRecs.
1855  if (HazardRec && HazardRec->isEnabled()) {
1856    delete HazardRec;
1857    HazardRec = nullptr;
1858  }
1859  Available.clear();
1860  Pending.clear();
1861  CheckPending = false;
1862  CurrCycle = 0;
1863  CurrMOps = 0;
1864  MinReadyCycle = std::numeric_limits<unsigned>::max();
1865  ExpectedLatency = 0;
1866  DependentLatency = 0;
1867  RetiredMOps = 0;
1868  MaxExecutedResCount = 0;
1869  ZoneCritResIdx = 0;
1870  IsResourceLimited = false;
1871  ReservedCycles.clear();
1872  ReservedCyclesIndex.clear();
1873#ifndef NDEBUG
1874  // Track the maximum number of stall cycles that could arise either from the
1875  // latency of a DAG edge or the number of cycles that a processor resource is
1876  // reserved (SchedBoundary::ReservedCycles).
1877  MaxObservedStall = 0;
1878#endif
1879  // Reserve a zero-count for invalid CritResIdx.
1880  ExecutedResCounts.resize(1);
1881  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1882}
1883
1884void SchedRemainder::
1885init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1886  reset();
1887  if (!SchedModel->hasInstrSchedModel())
1888    return;
1889  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1890  for (SUnit &SU : DAG->SUnits) {
1891    const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1892    RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1893      * SchedModel->getMicroOpFactor();
1894    for (TargetSchedModel::ProcResIter
1895           PI = SchedModel->getWriteProcResBegin(SC),
1896           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1897      unsigned PIdx = PI->ProcResourceIdx;
1898      unsigned Factor = SchedModel->getResourceFactor(PIdx);
1899      RemainingCounts[PIdx] += (Factor * PI->Cycles);
1900    }
1901  }
1902}
1903
1904void SchedBoundary::
1905init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1906  reset();
1907  DAG = dag;
1908  SchedModel = smodel;
1909  Rem = rem;
1910  if (SchedModel->hasInstrSchedModel()) {
1911    unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
1912    ReservedCyclesIndex.resize(ResourceCount);
1913    ExecutedResCounts.resize(ResourceCount);
1914    unsigned NumUnits = 0;
1915
1916    for (unsigned i = 0; i < ResourceCount; ++i) {
1917      ReservedCyclesIndex[i] = NumUnits;
1918      NumUnits += SchedModel->getProcResource(i)->NumUnits;
1919    }
1920
1921    ReservedCycles.resize(NumUnits, InvalidCycle);
1922  }
1923}
1924
1925/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1926/// these "soft stalls" differently than the hard stall cycles based on CPU
1927/// resources and computed by checkHazard(). A fully in-order model
1928/// (MicroOpBufferSize==0) will not make use of this since instructions are not
1929/// available for scheduling until they are ready. However, a weaker in-order
1930/// model may use this for heuristics. For example, if a processor has in-order
1931/// behavior when reading certain resources, this may come into play.
1932unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
1933  if (!SU->isUnbuffered)
1934    return 0;
1935
1936  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1937  if (ReadyCycle > CurrCycle)
1938    return ReadyCycle - CurrCycle;
1939  return 0;
1940}
1941
1942/// Compute the next cycle at which the given processor resource unit
1943/// can be scheduled.
1944unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
1945                                                       unsigned Cycles) {
1946  unsigned NextUnreserved = ReservedCycles[InstanceIdx];
1947  // If this resource has never been used, always return cycle zero.
1948  if (NextUnreserved == InvalidCycle)
1949    return 0;
1950  // For bottom-up scheduling add the cycles needed for the current operation.
1951  if (!isTop())
1952    NextUnreserved += Cycles;
1953  return NextUnreserved;
1954}
1955
1956/// Compute the next cycle at which the given processor resource can be
1957/// scheduled.  Returns the next cycle and the index of the processor resource
1958/// instance in the reserved cycles vector.
1959std::pair<unsigned, unsigned>
1960SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1961  unsigned MinNextUnreserved = InvalidCycle;
1962  unsigned InstanceIdx = 0;
1963  unsigned StartIndex = ReservedCyclesIndex[PIdx];
1964  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
1965  assert(NumberOfInstances > 0 &&
1966         "Cannot have zero instances of a ProcResource");
1967
1968  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
1969       ++I) {
1970    unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
1971    if (MinNextUnreserved > NextUnreserved) {
1972      InstanceIdx = I;
1973      MinNextUnreserved = NextUnreserved;
1974    }
1975  }
1976  return std::make_pair(MinNextUnreserved, InstanceIdx);
1977}
1978
1979/// Does this SU have a hazard within the current instruction group.
1980///
1981/// The scheduler supports two modes of hazard recognition. The first is the
1982/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1983/// supports highly complicated in-order reservation tables
1984/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
1985///
1986/// The second is a streamlined mechanism that checks for hazards based on
1987/// simple counters that the scheduler itself maintains. It explicitly checks
1988/// for instruction dispatch limitations, including the number of micro-ops that
1989/// can dispatch per cycle.
1990///
1991/// TODO: Also check whether the SU must start a new group.
1992bool SchedBoundary::checkHazard(SUnit *SU) {
1993  if (HazardRec->isEnabled()
1994      && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1995    return true;
1996  }
1997
1998  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1999  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2000    LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
2001                      << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2002    return true;
2003  }
2004
2005  if (CurrMOps > 0 &&
2006      ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2007       (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2008    LLVM_DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
2009                      << (isTop() ? "begin" : "end") << " group\n");
2010    return true;
2011  }
2012
2013  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2014    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2015    for (const MCWriteProcResEntry &PE :
2016          make_range(SchedModel->getWriteProcResBegin(SC),
2017                     SchedModel->getWriteProcResEnd(SC))) {
2018      unsigned ResIdx = PE.ProcResourceIdx;
2019      unsigned Cycles = PE.Cycles;
2020      unsigned NRCycle, InstanceIdx;
2021      std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
2022      if (NRCycle > CurrCycle) {
2023#ifndef NDEBUG
2024        MaxObservedStall = std::max(Cycles, MaxObservedStall);
2025#endif
2026        LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
2027                          << SchedModel->getResourceName(ResIdx)
2028                          << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx]  << ']'
2029                          << "=" << NRCycle << "c\n");
2030        return true;
2031      }
2032    }
2033  }
2034  return false;
2035}
2036
2037// Find the unscheduled node in ReadySUs with the highest latency.
2038unsigned SchedBoundary::
2039findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
2040  SUnit *LateSU = nullptr;
2041  unsigned RemLatency = 0;
2042  for (SUnit *SU : ReadySUs) {
2043    unsigned L = getUnscheduledLatency(SU);
2044    if (L > RemLatency) {
2045      RemLatency = L;
2046      LateSU = SU;
2047    }
2048  }
2049  if (LateSU) {
2050    LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2051                      << LateSU->NodeNum << ") " << RemLatency << "c\n");
2052  }
2053  return RemLatency;
2054}
2055
2056// Count resources in this zone and the remaining unscheduled
2057// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2058// resource index, or zero if the zone is issue limited.
2059unsigned SchedBoundary::
2060getOtherResourceCount(unsigned &OtherCritIdx) {
2061  OtherCritIdx = 0;
2062  if (!SchedModel->hasInstrSchedModel())
2063    return 0;
2064
2065  unsigned OtherCritCount = Rem->RemIssueCount
2066    + (RetiredMOps * SchedModel->getMicroOpFactor());
2067  LLVM_DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
2068                    << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2069  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2070       PIdx != PEnd; ++PIdx) {
2071    unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2072    if (OtherCount > OtherCritCount) {
2073      OtherCritCount = OtherCount;
2074      OtherCritIdx = PIdx;
2075    }
2076  }
2077  if (OtherCritIdx) {
2078    LLVM_DEBUG(
2079        dbgs() << "  " << Available.getName() << " + Remain CritRes: "
2080               << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2081               << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2082  }
2083  return OtherCritCount;
2084}
2085
2086void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2087                                unsigned Idx) {
2088  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2089
2090#ifndef NDEBUG
2091  // ReadyCycle was been bumped up to the CurrCycle when this node was
2092  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2093  // scheduling, so may now be greater than ReadyCycle.
2094  if (ReadyCycle > CurrCycle)
2095    MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2096#endif
2097
2098  if (ReadyCycle < MinReadyCycle)
2099    MinReadyCycle = ReadyCycle;
2100
2101  // Check for interlocks first. For the purpose of other heuristics, an
2102  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2103  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2104  bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2105                        checkHazard(SU) || (Available.size() >= ReadyListLimit);
2106
2107  if (!HazardDetected) {
2108    Available.push(SU);
2109
2110    if (InPQueue)
2111      Pending.remove(Pending.begin() + Idx);
2112    return;
2113  }
2114
2115  if (!InPQueue)
2116    Pending.push(SU);
2117}
2118
2119/// Move the boundary of scheduled code by one cycle.
2120void SchedBoundary::bumpCycle(unsigned NextCycle) {
2121  if (SchedModel->getMicroOpBufferSize() == 0) {
2122    assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2123           "MinReadyCycle uninitialized");
2124    if (MinReadyCycle > NextCycle)
2125      NextCycle = MinReadyCycle;
2126  }
2127  // Update the current micro-ops, which will issue in the next cycle.
2128  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2129  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2130
2131  // Decrement DependentLatency based on the next cycle.
2132  if ((NextCycle - CurrCycle) > DependentLatency)
2133    DependentLatency = 0;
2134  else
2135    DependentLatency -= (NextCycle - CurrCycle);
2136
2137  if (!HazardRec->isEnabled()) {
2138    // Bypass HazardRec virtual calls.
2139    CurrCycle = NextCycle;
2140  } else {
2141    // Bypass getHazardType calls in case of long latency.
2142    for (; CurrCycle != NextCycle; ++CurrCycle) {
2143      if (isTop())
2144        HazardRec->AdvanceCycle();
2145      else
2146        HazardRec->RecedeCycle();
2147    }
2148  }
2149  CheckPending = true;
2150  IsResourceLimited =
2151      checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2152                         getScheduledLatency(), true);
2153
2154  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2155                    << '\n');
2156}
2157
2158void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2159  ExecutedResCounts[PIdx] += Count;
2160  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2161    MaxExecutedResCount = ExecutedResCounts[PIdx];
2162}
2163
2164/// Add the given processor resource to this scheduled zone.
2165///
2166/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2167/// during which this resource is consumed.
2168///
2169/// \return the next cycle at which the instruction may execute without
2170/// oversubscribing resources.
2171unsigned SchedBoundary::
2172countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2173  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2174  unsigned Count = Factor * Cycles;
2175  LLVM_DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) << " +"
2176                    << Cycles << "x" << Factor << "u\n");
2177
2178  // Update Executed resources counts.
2179  incExecutedResources(PIdx, Count);
2180  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2181  Rem->RemainingCounts[PIdx] -= Count;
2182
2183  // Check if this resource exceeds the current critical resource. If so, it
2184  // becomes the critical resource.
2185  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2186    ZoneCritResIdx = PIdx;
2187    LLVM_DEBUG(dbgs() << "  *** Critical resource "
2188                      << SchedModel->getResourceName(PIdx) << ": "
2189                      << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2190                      << "c\n");
2191  }
2192  // For reserved resources, record the highest cycle using the resource.
2193  unsigned NextAvailable, InstanceIdx;
2194  std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
2195  if (NextAvailable > CurrCycle) {
2196    LLVM_DEBUG(dbgs() << "  Resource conflict: "
2197                      << SchedModel->getResourceName(PIdx)
2198                      << '[' << InstanceIdx - ReservedCyclesIndex[PIdx]  << ']'
2199                      << " reserved until @" << NextAvailable << "\n");
2200  }
2201  return NextAvailable;
2202}
2203
2204/// Move the boundary of scheduled code by one SUnit.
2205void SchedBoundary::bumpNode(SUnit *SU) {
2206  // Update the reservation table.
2207  if (HazardRec->isEnabled()) {
2208    if (!isTop() && SU->isCall) {
2209      // Calls are scheduled with their preceding instructions. For bottom-up
2210      // scheduling, clear the pipeline state before emitting.
2211      HazardRec->Reset();
2212    }
2213    HazardRec->EmitInstruction(SU);
2214    // Scheduling an instruction may have made pending instructions available.
2215    CheckPending = true;
2216  }
2217  // checkHazard should prevent scheduling multiple instructions per cycle that
2218  // exceed the issue width.
2219  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2220  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2221  assert(
2222      (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2223      "Cannot schedule this instruction's MicroOps in the current cycle.");
2224
2225  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2226  LLVM_DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
2227
2228  unsigned NextCycle = CurrCycle;
2229  switch (SchedModel->getMicroOpBufferSize()) {
2230  case 0:
2231    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2232    break;
2233  case 1:
2234    if (ReadyCycle > NextCycle) {
2235      NextCycle = ReadyCycle;
2236      LLVM_DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
2237    }
2238    break;
2239  default:
2240    // We don't currently model the OOO reorder buffer, so consider all
2241    // scheduled MOps to be "retired". We do loosely model in-order resource
2242    // latency. If this instruction uses an in-order resource, account for any
2243    // likely stall cycles.
2244    if (SU->isUnbuffered && ReadyCycle > NextCycle)
2245      NextCycle = ReadyCycle;
2246    break;
2247  }
2248  RetiredMOps += IncMOps;
2249
2250  // Update resource counts and critical resource.
2251  if (SchedModel->hasInstrSchedModel()) {
2252    unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2253    assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2254    Rem->RemIssueCount -= DecRemIssue;
2255    if (ZoneCritResIdx) {
2256      // Scale scheduled micro-ops for comparing with the critical resource.
2257      unsigned ScaledMOps =
2258        RetiredMOps * SchedModel->getMicroOpFactor();
2259
2260      // If scaled micro-ops are now more than the previous critical resource by
2261      // a full cycle, then micro-ops issue becomes critical.
2262      if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2263          >= (int)SchedModel->getLatencyFactor()) {
2264        ZoneCritResIdx = 0;
2265        LLVM_DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
2266                          << ScaledMOps / SchedModel->getLatencyFactor()
2267                          << "c\n");
2268      }
2269    }
2270    for (TargetSchedModel::ProcResIter
2271           PI = SchedModel->getWriteProcResBegin(SC),
2272           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2273      unsigned RCycle =
2274        countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2275      if (RCycle > NextCycle)
2276        NextCycle = RCycle;
2277    }
2278    if (SU->hasReservedResource) {
2279      // For reserved resources, record the highest cycle using the resource.
2280      // For top-down scheduling, this is the cycle in which we schedule this
2281      // instruction plus the number of cycles the operations reserves the
2282      // resource. For bottom-up is it simply the instruction's cycle.
2283      for (TargetSchedModel::ProcResIter
2284             PI = SchedModel->getWriteProcResBegin(SC),
2285             PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2286        unsigned PIdx = PI->ProcResourceIdx;
2287        if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2288          unsigned ReservedUntil, InstanceIdx;
2289          std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
2290          if (isTop()) {
2291            ReservedCycles[InstanceIdx] =
2292                std::max(ReservedUntil, NextCycle + PI->Cycles);
2293          } else
2294            ReservedCycles[InstanceIdx] = NextCycle;
2295        }
2296      }
2297    }
2298  }
2299  // Update ExpectedLatency and DependentLatency.
2300  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2301  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2302  if (SU->getDepth() > TopLatency) {
2303    TopLatency = SU->getDepth();
2304    LLVM_DEBUG(dbgs() << "  " << Available.getName() << " TopLatency SU("
2305                      << SU->NodeNum << ") " << TopLatency << "c\n");
2306  }
2307  if (SU->getHeight() > BotLatency) {
2308    BotLatency = SU->getHeight();
2309    LLVM_DEBUG(dbgs() << "  " << Available.getName() << " BotLatency SU("
2310                      << SU->NodeNum << ") " << BotLatency << "c\n");
2311  }
2312  // If we stall for any reason, bump the cycle.
2313  if (NextCycle > CurrCycle)
2314    bumpCycle(NextCycle);
2315  else
2316    // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2317    // resource limited. If a stall occurred, bumpCycle does this.
2318    IsResourceLimited =
2319        checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2320                           getScheduledLatency(), true);
2321
2322  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2323  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2324  // one cycle.  Since we commonly reach the max MOps here, opportunistically
2325  // bump the cycle to avoid uselessly checking everything in the readyQ.
2326  CurrMOps += IncMOps;
2327
2328  // Bump the cycle count for issue group constraints.
2329  // This must be done after NextCycle has been adjust for all other stalls.
2330  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2331  // currCycle to X.
2332  if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||
2333      (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2334    LLVM_DEBUG(dbgs() << "  Bump cycle to " << (isTop() ? "end" : "begin")
2335                      << " group\n");
2336    bumpCycle(++NextCycle);
2337  }
2338
2339  while (CurrMOps >= SchedModel->getIssueWidth()) {
2340    LLVM_DEBUG(dbgs() << "  *** Max MOps " << CurrMOps << " at cycle "
2341                      << CurrCycle << '\n');
2342    bumpCycle(++NextCycle);
2343  }
2344  LLVM_DEBUG(dumpScheduledState());
2345}
2346
2347/// Release pending ready nodes in to the available queue. This makes them
2348/// visible to heuristics.
2349void SchedBoundary::releasePending() {
2350  // If the available queue is empty, it is safe to reset MinReadyCycle.
2351  if (Available.empty())
2352    MinReadyCycle = std::numeric_limits<unsigned>::max();
2353
2354  // Check to see if any of the pending instructions are ready to issue.  If
2355  // so, add them to the available queue.
2356  for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2357    SUnit *SU = *(Pending.begin() + I);
2358    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2359
2360    if (ReadyCycle < MinReadyCycle)
2361      MinReadyCycle = ReadyCycle;
2362
2363    if (Available.size() >= ReadyListLimit)
2364      break;
2365
2366    releaseNode(SU, ReadyCycle, true, I);
2367    if (E != Pending.size()) {
2368      --I;
2369      --E;
2370    }
2371  }
2372  CheckPending = false;
2373}
2374
2375/// Remove SU from the ready set for this boundary.
2376void SchedBoundary::removeReady(SUnit *SU) {
2377  if (Available.isInQueue(SU))
2378    Available.remove(Available.find(SU));
2379  else {
2380    assert(Pending.isInQueue(SU) && "bad ready count");
2381    Pending.remove(Pending.find(SU));
2382  }
2383}
2384
2385/// If this queue only has one ready candidate, return it. As a side effect,
2386/// defer any nodes that now hit a hazard, and advance the cycle until at least
2387/// one node is ready. If multiple instructions are ready, return NULL.
2388SUnit *SchedBoundary::pickOnlyChoice() {
2389  if (CheckPending)
2390    releasePending();
2391
2392  if (CurrMOps > 0) {
2393    // Defer any ready instrs that now have a hazard.
2394    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2395      if (checkHazard(*I)) {
2396        Pending.push(*I);
2397        I = Available.remove(I);
2398        continue;
2399      }
2400      ++I;
2401    }
2402  }
2403  for (unsigned i = 0; Available.empty(); ++i) {
2404//  FIXME: Re-enable assert once PR20057 is resolved.
2405//    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2406//           "permanent hazard");
2407    (void)i;
2408    bumpCycle(CurrCycle + 1);
2409    releasePending();
2410  }
2411
2412  LLVM_DEBUG(Pending.dump());
2413  LLVM_DEBUG(Available.dump());
2414
2415  if (Available.size() == 1)
2416    return *Available.begin();
2417  return nullptr;
2418}
2419
2420#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2421// This is useful information to dump after bumpNode.
2422// Note that the Queue contents are more useful before pickNodeFromQueue.
2423LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
2424  unsigned ResFactor;
2425  unsigned ResCount;
2426  if (ZoneCritResIdx) {
2427    ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2428    ResCount = getResourceCount(ZoneCritResIdx);
2429  } else {
2430    ResFactor = SchedModel->getMicroOpFactor();
2431    ResCount = RetiredMOps * ResFactor;
2432  }
2433  unsigned LFactor = SchedModel->getLatencyFactor();
2434  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2435         << "  Retired: " << RetiredMOps;
2436  dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2437  dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2438         << ResCount / ResFactor << " "
2439         << SchedModel->getResourceName(ZoneCritResIdx)
2440         << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2441         << (IsResourceLimited ? "  - Resource" : "  - Latency")
2442         << " limited.\n";
2443}
2444#endif
2445
2446//===----------------------------------------------------------------------===//
2447// GenericScheduler - Generic implementation of MachineSchedStrategy.
2448//===----------------------------------------------------------------------===//
2449
2450void GenericSchedulerBase::SchedCandidate::
2451initResourceDelta(const ScheduleDAGMI *DAG,
2452                  const TargetSchedModel *SchedModel) {
2453  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2454    return;
2455
2456  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2457  for (TargetSchedModel::ProcResIter
2458         PI = SchedModel->getWriteProcResBegin(SC),
2459         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2460    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2461      ResDelta.CritResources += PI->Cycles;
2462    if (PI->ProcResourceIdx == Policy.DemandResIdx)
2463      ResDelta.DemandedResources += PI->Cycles;
2464  }
2465}
2466
2467/// Compute remaining latency. We need this both to determine whether the
2468/// overall schedule has become latency-limited and whether the instructions
2469/// outside this zone are resource or latency limited.
2470///
2471/// The "dependent" latency is updated incrementally during scheduling as the
2472/// max height/depth of scheduled nodes minus the cycles since it was
2473/// scheduled:
2474///   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2475///
2476/// The "independent" latency is the max ready queue depth:
2477///   ILat = max N.depth for N in Available|Pending
2478///
2479/// RemainingLatency is the greater of independent and dependent latency.
2480///
2481/// These computations are expensive, especially in DAGs with many edges, so
2482/// only do them if necessary.
2483static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2484  unsigned RemLatency = CurrZone.getDependentLatency();
2485  RemLatency = std::max(RemLatency,
2486                        CurrZone.findMaxLatency(CurrZone.Available.elements()));
2487  RemLatency = std::max(RemLatency,
2488                        CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2489  return RemLatency;
2490}
2491
2492/// Returns true if the current cycle plus remaning latency is greater than
2493/// the critical path in the scheduling region.
2494bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2495                                               SchedBoundary &CurrZone,
2496                                               bool ComputeRemLatency,
2497                                               unsigned &RemLatency) const {
2498  // The current cycle is already greater than the critical path, so we are
2499  // already latency limited and don't need to compute the remaining latency.
2500  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2501    return true;
2502
2503  // If we haven't scheduled anything yet, then we aren't latency limited.
2504  if (CurrZone.getCurrCycle() == 0)
2505    return false;
2506
2507  if (ComputeRemLatency)
2508    RemLatency = computeRemLatency(CurrZone);
2509
2510  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2511}
2512
2513/// Set the CandPolicy given a scheduling zone given the current resources and
2514/// latencies inside and outside the zone.
2515void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2516                                     SchedBoundary &CurrZone,
2517                                     SchedBoundary *OtherZone) {
2518  // Apply preemptive heuristics based on the total latency and resources
2519  // inside and outside this zone. Potential stalls should be considered before
2520  // following this policy.
2521
2522  // Compute the critical resource outside the zone.
2523  unsigned OtherCritIdx = 0;
2524  unsigned OtherCount =
2525    OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2526
2527  bool OtherResLimited = false;
2528  unsigned RemLatency = 0;
2529  bool RemLatencyComputed = false;
2530  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2531    RemLatency = computeRemLatency(CurrZone);
2532    RemLatencyComputed = true;
2533    OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2534                                         OtherCount, RemLatency, false);
2535  }
2536
2537  // Schedule aggressively for latency in PostRA mode. We don't check for
2538  // acyclic latency during PostRA, and highly out-of-order processors will
2539  // skip PostRA scheduling.
2540  if (!OtherResLimited &&
2541      (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2542                                       RemLatency))) {
2543    Policy.ReduceLatency |= true;
2544    LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName()
2545                      << " RemainingLatency " << RemLatency << " + "
2546                      << CurrZone.getCurrCycle() << "c > CritPath "
2547                      << Rem.CriticalPath << "\n");
2548  }
2549  // If the same resource is limiting inside and outside the zone, do nothing.
2550  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2551    return;
2552
2553  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2554    dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
2555           << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2556  } if (OtherResLimited) dbgs()
2557                 << "  RemainingLimit: "
2558                 << SchedModel->getResourceName(OtherCritIdx) << "\n";
2559             if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2560             << "  Latency limited both directions.\n");
2561
2562  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2563    Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2564
2565  if (OtherResLimited)
2566    Policy.DemandResIdx = OtherCritIdx;
2567}
2568
2569#ifndef NDEBUG
2570const char *GenericSchedulerBase::getReasonStr(
2571  GenericSchedulerBase::CandReason Reason) {
2572  switch (Reason) {
2573  case NoCand:         return "NOCAND    ";
2574  case Only1:          return "ONLY1     ";
2575  case PhysReg:        return "PHYS-REG  ";
2576  case RegExcess:      return "REG-EXCESS";
2577  case RegCritical:    return "REG-CRIT  ";
2578  case Stall:          return "STALL     ";
2579  case Cluster:        return "CLUSTER   ";
2580  case Weak:           return "WEAK      ";
2581  case RegMax:         return "REG-MAX   ";
2582  case ResourceReduce: return "RES-REDUCE";
2583  case ResourceDemand: return "RES-DEMAND";
2584  case TopDepthReduce: return "TOP-DEPTH ";
2585  case TopPathReduce:  return "TOP-PATH  ";
2586  case BotHeightReduce:return "BOT-HEIGHT";
2587  case BotPathReduce:  return "BOT-PATH  ";
2588  case NextDefUse:     return "DEF-USE   ";
2589  case NodeOrder:      return "ORDER     ";
2590  };
2591  llvm_unreachable("Unknown reason!");
2592}
2593
2594void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
2595  PressureChange P;
2596  unsigned ResIdx = 0;
2597  unsigned Latency = 0;
2598  switch (Cand.Reason) {
2599  default:
2600    break;
2601  case RegExcess:
2602    P = Cand.RPDelta.Excess;
2603    break;
2604  case RegCritical:
2605    P = Cand.RPDelta.CriticalMax;
2606    break;
2607  case RegMax:
2608    P = Cand.RPDelta.CurrentMax;
2609    break;
2610  case ResourceReduce:
2611    ResIdx = Cand.Policy.ReduceResIdx;
2612    break;
2613  case ResourceDemand:
2614    ResIdx = Cand.Policy.DemandResIdx;
2615    break;
2616  case TopDepthReduce:
2617    Latency = Cand.SU->getDepth();
2618    break;
2619  case TopPathReduce:
2620    Latency = Cand.SU->getHeight();
2621    break;
2622  case BotHeightReduce:
2623    Latency = Cand.SU->getHeight();
2624    break;
2625  case BotPathReduce:
2626    Latency = Cand.SU->getDepth();
2627    break;
2628  }
2629  dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2630  if (P.isValid())
2631    dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2632           << ":" << P.getUnitInc() << " ";
2633  else
2634    dbgs() << "      ";
2635  if (ResIdx)
2636    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2637  else
2638    dbgs() << "         ";
2639  if (Latency)
2640    dbgs() << " " << Latency << " cycles ";
2641  else
2642    dbgs() << "          ";
2643  dbgs() << '\n';
2644}
2645#endif
2646
2647namespace llvm {
2648/// Return true if this heuristic determines order.
2649bool tryLess(int TryVal, int CandVal,
2650             GenericSchedulerBase::SchedCandidate &TryCand,
2651             GenericSchedulerBase::SchedCandidate &Cand,
2652             GenericSchedulerBase::CandReason Reason) {
2653  if (TryVal < CandVal) {
2654    TryCand.Reason = Reason;
2655    return true;
2656  }
2657  if (TryVal > CandVal) {
2658    if (Cand.Reason > Reason)
2659      Cand.Reason = Reason;
2660    return true;
2661  }
2662  return false;
2663}
2664
2665bool tryGreater(int TryVal, int CandVal,
2666                GenericSchedulerBase::SchedCandidate &TryCand,
2667                GenericSchedulerBase::SchedCandidate &Cand,
2668                GenericSchedulerBase::CandReason Reason) {
2669  if (TryVal > CandVal) {
2670    TryCand.Reason = Reason;
2671    return true;
2672  }
2673  if (TryVal < CandVal) {
2674    if (Cand.Reason > Reason)
2675      Cand.Reason = Reason;
2676    return true;
2677  }
2678  return false;
2679}
2680
2681bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
2682                GenericSchedulerBase::SchedCandidate &Cand,
2683                SchedBoundary &Zone) {
2684  if (Zone.isTop()) {
2685    if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2686      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2687                  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2688        return true;
2689    }
2690    if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2691                   TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2692      return true;
2693  } else {
2694    if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2695      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2696                  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2697        return true;
2698    }
2699    if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2700                   TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2701      return true;
2702  }
2703  return false;
2704}
2705} // end namespace llvm
2706
2707static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2708  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2709                    << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2710}
2711
2712static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
2713  tracePick(Cand.Reason, Cand.AtTop);
2714}
2715
2716void GenericScheduler::initialize(ScheduleDAGMI *dag) {
2717  assert(dag->hasVRegLiveness() &&
2718         "(PreRA)GenericScheduler needs vreg liveness");
2719  DAG = static_cast<ScheduleDAGMILive*>(dag);
2720  SchedModel = DAG->getSchedModel();
2721  TRI = DAG->TRI;
2722
2723  Rem.init(DAG, SchedModel);
2724  Top.init(DAG, SchedModel, &Rem);
2725  Bot.init(DAG, SchedModel, &Rem);
2726
2727  // Initialize resource counts.
2728
2729  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2730  // are disabled, then these HazardRecs will be disabled.
2731  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2732  if (!Top.HazardRec) {
2733    Top.HazardRec =
2734        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2735            Itin, DAG);
2736  }
2737  if (!Bot.HazardRec) {
2738    Bot.HazardRec =
2739        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2740            Itin, DAG);
2741  }
2742  TopCand.SU = nullptr;
2743  BotCand.SU = nullptr;
2744}
2745
2746/// Initialize the per-region scheduling policy.
2747void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
2748                                  MachineBasicBlock::iterator End,
2749                                  unsigned NumRegionInstrs) {
2750  const MachineFunction &MF = *Begin->getMF();
2751  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2752
2753  // Avoid setting up the register pressure tracker for small regions to save
2754  // compile time. As a rough heuristic, only track pressure when the number of
2755  // schedulable instructions exceeds half the integer register file.
2756  RegionPolicy.ShouldTrackPressure = true;
2757  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2758    MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2759    if (TLI->isTypeLegal(LegalIntVT)) {
2760      unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2761        TLI->getRegClassFor(LegalIntVT));
2762      RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2763    }
2764  }
2765
2766  // For generic targets, we default to bottom-up, because it's simpler and more
2767  // compile-time optimizations have been implemented in that direction.
2768  RegionPolicy.OnlyBottomUp = true;
2769
2770  // Allow the subtarget to override default policy.
2771  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2772
2773  // After subtarget overrides, apply command line options.
2774  if (!EnableRegPressure) {
2775    RegionPolicy.ShouldTrackPressure = false;
2776    RegionPolicy.ShouldTrackLaneMasks = false;
2777  }
2778
2779  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2780  // e.g. -misched-bottomup=false allows scheduling in both directions.
2781  assert((!ForceTopDown || !ForceBottomUp) &&
2782         "-misched-topdown incompatible with -misched-bottomup");
2783  if (ForceBottomUp.getNumOccurrences() > 0) {
2784    RegionPolicy.OnlyBottomUp = ForceBottomUp;
2785    if (RegionPolicy.OnlyBottomUp)
2786      RegionPolicy.OnlyTopDown = false;
2787  }
2788  if (ForceTopDown.getNumOccurrences() > 0) {
2789    RegionPolicy.OnlyTopDown = ForceTopDown;
2790    if (RegionPolicy.OnlyTopDown)
2791      RegionPolicy.OnlyBottomUp = false;
2792  }
2793}
2794
2795void GenericScheduler::dumpPolicy() const {
2796  // Cannot completely remove virtual function even in release mode.
2797#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2798  dbgs() << "GenericScheduler RegionPolicy: "
2799         << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2800         << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2801         << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2802         << "\n";
2803#endif
2804}
2805
2806/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2807/// critical path by more cycles than it takes to drain the instruction buffer.
2808/// We estimate an upper bounds on in-flight instructions as:
2809///
2810/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2811/// InFlightIterations = AcyclicPath / CyclesPerIteration
2812/// InFlightResources = InFlightIterations * LoopResources
2813///
2814/// TODO: Check execution resources in addition to IssueCount.
2815void GenericScheduler::checkAcyclicLatency() {
2816  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2817    return;
2818
2819  // Scaled number of cycles per loop iteration.
2820  unsigned IterCount =
2821    std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2822             Rem.RemIssueCount);
2823  // Scaled acyclic critical path.
2824  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2825  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2826  unsigned InFlightCount =
2827    (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2828  unsigned BufferLimit =
2829    SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2830
2831  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2832
2833  LLVM_DEBUG(
2834      dbgs() << "IssueCycles="
2835             << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2836             << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2837             << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
2838             << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2839             << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2840      if (Rem.IsAcyclicLatencyLimited) dbgs() << "  ACYCLIC LATENCY LIMIT\n");
2841}
2842
2843void GenericScheduler::registerRoots() {
2844  Rem.CriticalPath = DAG->ExitSU.getDepth();
2845
2846  // Some roots may not feed into ExitSU. Check all of them in case.
2847  for (const SUnit *SU : Bot.Available) {
2848    if (SU->getDepth() > Rem.CriticalPath)
2849      Rem.CriticalPath = SU->getDepth();
2850  }
2851  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2852  if (DumpCriticalPathLength) {
2853    errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2854  }
2855
2856  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2857    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2858    checkAcyclicLatency();
2859  }
2860}
2861
2862namespace llvm {
2863bool tryPressure(const PressureChange &TryP,
2864                 const PressureChange &CandP,
2865                 GenericSchedulerBase::SchedCandidate &TryCand,
2866                 GenericSchedulerBase::SchedCandidate &Cand,
2867                 GenericSchedulerBase::CandReason Reason,
2868                 const TargetRegisterInfo *TRI,
2869                 const MachineFunction &MF) {
2870  // If one candidate decreases and the other increases, go with it.
2871  // Invalid candidates have UnitInc==0.
2872  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2873                 Reason)) {
2874    return true;
2875  }
2876  // Do not compare the magnitude of pressure changes between top and bottom
2877  // boundary.
2878  if (Cand.AtTop != TryCand.AtTop)
2879    return false;
2880
2881  // If both candidates affect the same set in the same boundary, go with the
2882  // smallest increase.
2883  unsigned TryPSet = TryP.getPSetOrMax();
2884  unsigned CandPSet = CandP.getPSetOrMax();
2885  if (TryPSet == CandPSet) {
2886    return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2887                   Reason);
2888  }
2889
2890  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2891                                 std::numeric_limits<int>::max();
2892
2893  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2894                                   std::numeric_limits<int>::max();
2895
2896  // If the candidates are decreasing pressure, reverse priority.
2897  if (TryP.getUnitInc() < 0)
2898    std::swap(TryRank, CandRank);
2899  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2900}
2901
2902unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2903  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2904}
2905
2906/// Minimize physical register live ranges. Regalloc wants them adjacent to
2907/// their physreg def/use.
2908///
2909/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2910/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2911/// with the operation that produces or consumes the physreg. We'll do this when
2912/// regalloc has support for parallel copies.
2913int biasPhysReg(const SUnit *SU, bool isTop) {
2914  const MachineInstr *MI = SU->getInstr();
2915
2916  if (MI->isCopy()) {
2917    unsigned ScheduledOper = isTop ? 1 : 0;
2918    unsigned UnscheduledOper = isTop ? 0 : 1;
2919    // If we have already scheduled the physreg produce/consumer, immediately
2920    // schedule the copy.
2921    if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
2922      return 1;
2923    // If the physreg is at the boundary, defer it. Otherwise schedule it
2924    // immediately to free the dependent. We can hoist the copy later.
2925    bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2926    if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
2927      return AtBoundary ? -1 : 1;
2928  }
2929
2930  if (MI->isMoveImmediate()) {
2931    // If we have a move immediate and all successors have been assigned, bias
2932    // towards scheduling this later. Make sure all register defs are to
2933    // physical registers.
2934    bool DoBias = true;
2935    for (const MachineOperand &Op : MI->defs()) {
2936      if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
2937        DoBias = false;
2938        break;
2939      }
2940    }
2941
2942    if (DoBias)
2943      return isTop ? -1 : 1;
2944  }
2945
2946  return 0;
2947}
2948} // end namespace llvm
2949
2950void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
2951                                     bool AtTop,
2952                                     const RegPressureTracker &RPTracker,
2953                                     RegPressureTracker &TempTracker) {
2954  Cand.SU = SU;
2955  Cand.AtTop = AtTop;
2956  if (DAG->isTrackingPressure()) {
2957    if (AtTop) {
2958      TempTracker.getMaxDownwardPressureDelta(
2959        Cand.SU->getInstr(),
2960        Cand.RPDelta,
2961        DAG->getRegionCriticalPSets(),
2962        DAG->getRegPressure().MaxSetPressure);
2963    } else {
2964      if (VerifyScheduling) {
2965        TempTracker.getMaxUpwardPressureDelta(
2966          Cand.SU->getInstr(),
2967          &DAG->getPressureDiff(Cand.SU),
2968          Cand.RPDelta,
2969          DAG->getRegionCriticalPSets(),
2970          DAG->getRegPressure().MaxSetPressure);
2971      } else {
2972        RPTracker.getUpwardPressureDelta(
2973          Cand.SU->getInstr(),
2974          DAG->getPressureDiff(Cand.SU),
2975          Cand.RPDelta,
2976          DAG->getRegionCriticalPSets(),
2977          DAG->getRegPressure().MaxSetPressure);
2978      }
2979    }
2980  }
2981  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
2982             << "  Try  SU(" << Cand.SU->NodeNum << ") "
2983             << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
2984             << Cand.RPDelta.Excess.getUnitInc() << "\n");
2985}
2986
2987/// Apply a set of heuristics to a new candidate. Heuristics are currently
2988/// hierarchical. This may be more efficient than a graduated cost model because
2989/// we don't need to evaluate all aspects of the model for each node in the
2990/// queue. But it's really done to make the heuristics easier to debug and
2991/// statistically analyze.
2992///
2993/// \param Cand provides the policy and current best candidate.
2994/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2995/// \param Zone describes the scheduled zone that we are extending, or nullptr
2996//              if Cand is from a different zone than TryCand.
2997void GenericScheduler::tryCandidate(SchedCandidate &Cand,
2998                                    SchedCandidate &TryCand,
2999                                    SchedBoundary *Zone) const {
3000  // Initialize the candidate if needed.
3001  if (!Cand.isValid()) {
3002    TryCand.Reason = NodeOrder;
3003    return;
3004  }
3005
3006  // Bias PhysReg Defs and copies to their uses and defined respectively.
3007  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3008                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3009    return;
3010
3011  // Avoid exceeding the target's limit.
3012  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3013                                               Cand.RPDelta.Excess,
3014                                               TryCand, Cand, RegExcess, TRI,
3015                                               DAG->MF))
3016    return;
3017
3018  // Avoid increasing the max critical pressure in the scheduled region.
3019  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3020                                               Cand.RPDelta.CriticalMax,
3021                                               TryCand, Cand, RegCritical, TRI,
3022                                               DAG->MF))
3023    return;
3024
3025  // We only compare a subset of features when comparing nodes between
3026  // Top and Bottom boundary. Some properties are simply incomparable, in many
3027  // other instances we should only override the other boundary if something
3028  // is a clear good pick on one boundary. Skip heuristics that are more
3029  // "tie-breaking" in nature.
3030  bool SameBoundary = Zone != nullptr;
3031  if (SameBoundary) {
3032    // For loops that are acyclic path limited, aggressively schedule for
3033    // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3034    // heuristics to take precedence.
3035    if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3036        tryLatency(TryCand, Cand, *Zone))
3037      return;
3038
3039    // Prioritize instructions that read unbuffered resources by stall cycles.
3040    if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3041                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3042      return;
3043  }
3044
3045  // Keep clustered nodes together to encourage downstream peephole
3046  // optimizations which may reduce resource requirements.
3047  //
3048  // This is a best effort to set things up for a post-RA pass. Optimizations
3049  // like generating loads of multiple registers should ideally be done within
3050  // the scheduler pass by combining the loads during DAG postprocessing.
3051  const SUnit *CandNextClusterSU =
3052    Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3053  const SUnit *TryCandNextClusterSU =
3054    TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3055  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3056                 Cand.SU == CandNextClusterSU,
3057                 TryCand, Cand, Cluster))
3058    return;
3059
3060  if (SameBoundary) {
3061    // Weak edges are for clustering and other constraints.
3062    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3063                getWeakLeft(Cand.SU, Cand.AtTop),
3064                TryCand, Cand, Weak))
3065      return;
3066  }
3067
3068  // Avoid increasing the max pressure of the entire region.
3069  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3070                                               Cand.RPDelta.CurrentMax,
3071                                               TryCand, Cand, RegMax, TRI,
3072                                               DAG->MF))
3073    return;
3074
3075  if (SameBoundary) {
3076    // Avoid critical resource consumption and balance the schedule.
3077    TryCand.initResourceDelta(DAG, SchedModel);
3078    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3079                TryCand, Cand, ResourceReduce))
3080      return;
3081    if (tryGreater(TryCand.ResDelta.DemandedResources,
3082                   Cand.ResDelta.DemandedResources,
3083                   TryCand, Cand, ResourceDemand))
3084      return;
3085
3086    // Avoid serializing long latency dependence chains.
3087    // For acyclic path limited loops, latency was already checked above.
3088    if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3089        !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3090      return;
3091
3092    // Fall through to original instruction order.
3093    if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3094        || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3095      TryCand.Reason = NodeOrder;
3096    }
3097  }
3098}
3099
3100/// Pick the best candidate from the queue.
3101///
3102/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3103/// DAG building. To adjust for the current scheduling location we need to
3104/// maintain the number of vreg uses remaining to be top-scheduled.
3105void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3106                                         const CandPolicy &ZonePolicy,
3107                                         const RegPressureTracker &RPTracker,
3108                                         SchedCandidate &Cand) {
3109  // getMaxPressureDelta temporarily modifies the tracker.
3110  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3111
3112  ReadyQueue &Q = Zone.Available;
3113  for (SUnit *SU : Q) {
3114
3115    SchedCandidate TryCand(ZonePolicy);
3116    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3117    // Pass SchedBoundary only when comparing nodes from the same boundary.
3118    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3119    tryCandidate(Cand, TryCand, ZoneArg);
3120    if (TryCand.Reason != NoCand) {
3121      // Initialize resource delta if needed in case future heuristics query it.
3122      if (TryCand.ResDelta == SchedResourceDelta())
3123        TryCand.initResourceDelta(DAG, SchedModel);
3124      Cand.setBest(TryCand);
3125      LLVM_DEBUG(traceCandidate(Cand));
3126    }
3127  }
3128}
3129
3130/// Pick the best candidate node from either the top or bottom queue.
3131SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
3132  // Schedule as far as possible in the direction of no choice. This is most
3133  // efficient, but also provides the best heuristics for CriticalPSets.
3134  if (SUnit *SU = Bot.pickOnlyChoice()) {
3135    IsTopNode = false;
3136    tracePick(Only1, false);
3137    return SU;
3138  }
3139  if (SUnit *SU = Top.pickOnlyChoice()) {
3140    IsTopNode = true;
3141    tracePick(Only1, true);
3142    return SU;
3143  }
3144  // Set the bottom-up policy based on the state of the current bottom zone and
3145  // the instructions outside the zone, including the top zone.
3146  CandPolicy BotPolicy;
3147  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3148  // Set the top-down policy based on the state of the current top zone and
3149  // the instructions outside the zone, including the bottom zone.
3150  CandPolicy TopPolicy;
3151  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3152
3153  // See if BotCand is still valid (because we previously scheduled from Top).
3154  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3155  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3156      BotCand.Policy != BotPolicy) {
3157    BotCand.reset(CandPolicy());
3158    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3159    assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3160  } else {
3161    LLVM_DEBUG(traceCandidate(BotCand));
3162#ifndef NDEBUG
3163    if (VerifyScheduling) {
3164      SchedCandidate TCand;
3165      TCand.reset(CandPolicy());
3166      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3167      assert(TCand.SU == BotCand.SU &&
3168             "Last pick result should correspond to re-picking right now");
3169    }
3170#endif
3171  }
3172
3173  // Check if the top Q has a better candidate.
3174  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3175  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3176      TopCand.Policy != TopPolicy) {
3177    TopCand.reset(CandPolicy());
3178    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3179    assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3180  } else {
3181    LLVM_DEBUG(traceCandidate(TopCand));
3182#ifndef NDEBUG
3183    if (VerifyScheduling) {
3184      SchedCandidate TCand;
3185      TCand.reset(CandPolicy());
3186      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3187      assert(TCand.SU == TopCand.SU &&
3188           "Last pick result should correspond to re-picking right now");
3189    }
3190#endif
3191  }
3192
3193  // Pick best from BotCand and TopCand.
3194  assert(BotCand.isValid());
3195  assert(TopCand.isValid());
3196  SchedCandidate Cand = BotCand;
3197  TopCand.Reason = NoCand;
3198  tryCandidate(Cand, TopCand, nullptr);
3199  if (TopCand.Reason != NoCand) {
3200    Cand.setBest(TopCand);
3201    LLVM_DEBUG(traceCandidate(Cand));
3202  }
3203
3204  IsTopNode = Cand.AtTop;
3205  tracePick(Cand);
3206  return Cand.SU;
3207}
3208
3209/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3210SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
3211  if (DAG->top() == DAG->bottom()) {
3212    assert(Top.Available.empty() && Top.Pending.empty() &&
3213           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3214    return nullptr;
3215  }
3216  SUnit *SU;
3217  do {
3218    if (RegionPolicy.OnlyTopDown) {
3219      SU = Top.pickOnlyChoice();
3220      if (!SU) {
3221        CandPolicy NoPolicy;
3222        TopCand.reset(NoPolicy);
3223        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3224        assert(TopCand.Reason != NoCand && "failed to find a candidate");
3225        tracePick(TopCand);
3226        SU = TopCand.SU;
3227      }
3228      IsTopNode = true;
3229    } else if (RegionPolicy.OnlyBottomUp) {
3230      SU = Bot.pickOnlyChoice();
3231      if (!SU) {
3232        CandPolicy NoPolicy;
3233        BotCand.reset(NoPolicy);
3234        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3235        assert(BotCand.Reason != NoCand && "failed to find a candidate");
3236        tracePick(BotCand);
3237        SU = BotCand.SU;
3238      }
3239      IsTopNode = false;
3240    } else {
3241      SU = pickNodeBidirectional(IsTopNode);
3242    }
3243  } while (SU->isScheduled);
3244
3245  if (SU->isTopReady())
3246    Top.removeReady(SU);
3247  if (SU->isBottomReady())
3248    Bot.removeReady(SU);
3249
3250  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3251                    << *SU->getInstr());
3252  return SU;
3253}
3254
3255void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
3256  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3257  if (!isTop)
3258    ++InsertPos;
3259  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3260
3261  // Find already scheduled copies with a single physreg dependence and move
3262  // them just above the scheduled instruction.
3263  for (SDep &Dep : Deps) {
3264    if (Dep.getKind() != SDep::Data ||
3265        !Register::isPhysicalRegister(Dep.getReg()))
3266      continue;
3267    SUnit *DepSU = Dep.getSUnit();
3268    if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3269      continue;
3270    MachineInstr *Copy = DepSU->getInstr();
3271    if (!Copy->isCopy() && !Copy->isMoveImmediate())
3272      continue;
3273    LLVM_DEBUG(dbgs() << "  Rescheduling physreg copy ";
3274               DAG->dumpNode(*Dep.getSUnit()));
3275    DAG->moveInstruction(Copy, InsertPos);
3276  }
3277}
3278
3279/// Update the scheduler's state after scheduling a node. This is the same node
3280/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3281/// update it's state based on the current cycle before MachineSchedStrategy
3282/// does.
3283///
3284/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3285/// them here. See comments in biasPhysReg.
3286void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3287  if (IsTopNode) {
3288    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3289    Top.bumpNode(SU);
3290    if (SU->hasPhysRegUses)
3291      reschedulePhysReg(SU, true);
3292  } else {
3293    SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3294    Bot.bumpNode(SU);
3295    if (SU->hasPhysRegDefs)
3296      reschedulePhysReg(SU, false);
3297  }
3298}
3299
3300/// Create the standard converging machine scheduler. This will be used as the
3301/// default scheduler if the target does not set a default.
3302ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
3303  ScheduleDAGMILive *DAG =
3304      new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3305  // Register DAG post-processors.
3306  //
3307  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3308  // data and pass it to later mutations. Have a single mutation that gathers
3309  // the interesting nodes in one pass.
3310  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
3311  return DAG;
3312}
3313
3314static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
3315  return createGenericSchedLive(C);
3316}
3317
3318static MachineSchedRegistry
3319GenericSchedRegistry("converge", "Standard converging scheduler.",
3320                     createConveringSched);
3321
3322//===----------------------------------------------------------------------===//
3323// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3324//===----------------------------------------------------------------------===//
3325
3326void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3327  DAG = Dag;
3328  SchedModel = DAG->getSchedModel();
3329  TRI = DAG->TRI;
3330
3331  Rem.init(DAG, SchedModel);
3332  Top.init(DAG, SchedModel, &Rem);
3333  BotRoots.clear();
3334
3335  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3336  // or are disabled, then these HazardRecs will be disabled.
3337  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3338  if (!Top.HazardRec) {
3339    Top.HazardRec =
3340        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3341            Itin, DAG);
3342  }
3343}
3344
3345void PostGenericScheduler::registerRoots() {
3346  Rem.CriticalPath = DAG->ExitSU.getDepth();
3347
3348  // Some roots may not feed into ExitSU. Check all of them in case.
3349  for (const SUnit *SU : BotRoots) {
3350    if (SU->getDepth() > Rem.CriticalPath)
3351      Rem.CriticalPath = SU->getDepth();
3352  }
3353  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3354  if (DumpCriticalPathLength) {
3355    errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3356  }
3357}
3358
3359/// Apply a set of heuristics to a new candidate for PostRA scheduling.
3360///
3361/// \param Cand provides the policy and current best candidate.
3362/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3363void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3364                                        SchedCandidate &TryCand) {
3365  // Initialize the candidate if needed.
3366  if (!Cand.isValid()) {
3367    TryCand.Reason = NodeOrder;
3368    return;
3369  }
3370
3371  // Prioritize instructions that read unbuffered resources by stall cycles.
3372  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3373              Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3374    return;
3375
3376  // Keep clustered nodes together.
3377  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3378                 Cand.SU == DAG->getNextClusterSucc(),
3379                 TryCand, Cand, Cluster))
3380    return;
3381
3382  // Avoid critical resource consumption and balance the schedule.
3383  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3384              TryCand, Cand, ResourceReduce))
3385    return;
3386  if (tryGreater(TryCand.ResDelta.DemandedResources,
3387                 Cand.ResDelta.DemandedResources,
3388                 TryCand, Cand, ResourceDemand))
3389    return;
3390
3391  // Avoid serializing long latency dependence chains.
3392  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3393    return;
3394  }
3395
3396  // Fall through to original instruction order.
3397  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3398    TryCand.Reason = NodeOrder;
3399}
3400
3401void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
3402  ReadyQueue &Q = Top.Available;
3403  for (SUnit *SU : Q) {
3404    SchedCandidate TryCand(Cand.Policy);
3405    TryCand.SU = SU;
3406    TryCand.AtTop = true;
3407    TryCand.initResourceDelta(DAG, SchedModel);
3408    tryCandidate(Cand, TryCand);
3409    if (TryCand.Reason != NoCand) {
3410      Cand.setBest(TryCand);
3411      LLVM_DEBUG(traceCandidate(Cand));
3412    }
3413  }
3414}
3415
3416/// Pick the next node to schedule.
3417SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3418  if (DAG->top() == DAG->bottom()) {
3419    assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3420    return nullptr;
3421  }
3422  SUnit *SU;
3423  do {
3424    SU = Top.pickOnlyChoice();
3425    if (SU) {
3426      tracePick(Only1, true);
3427    } else {
3428      CandPolicy NoPolicy;
3429      SchedCandidate TopCand(NoPolicy);
3430      // Set the top-down policy based on the state of the current top zone and
3431      // the instructions outside the zone, including the bottom zone.
3432      setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3433      pickNodeFromQueue(TopCand);
3434      assert(TopCand.Reason != NoCand && "failed to find a candidate");
3435      tracePick(TopCand);
3436      SU = TopCand.SU;
3437    }
3438  } while (SU->isScheduled);
3439
3440  IsTopNode = true;
3441  Top.removeReady(SU);
3442
3443  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3444                    << *SU->getInstr());
3445  return SU;
3446}
3447
3448/// Called after ScheduleDAGMI has scheduled an instruction and updated
3449/// scheduled/remaining flags in the DAG nodes.
3450void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3451  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3452  Top.bumpNode(SU);
3453}
3454
3455ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
3456  return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
3457                           /*RemoveKillFlags=*/true);
3458}
3459
3460//===----------------------------------------------------------------------===//
3461// ILP Scheduler. Currently for experimental analysis of heuristics.
3462//===----------------------------------------------------------------------===//
3463
3464namespace {
3465
3466/// Order nodes by the ILP metric.
3467struct ILPOrder {
3468  const SchedDFSResult *DFSResult = nullptr;
3469  const BitVector *ScheduledTrees = nullptr;
3470  bool MaximizeILP;
3471
3472  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3473
3474  /// Apply a less-than relation on node priority.
3475  ///
3476  /// (Return true if A comes after B in the Q.)
3477  bool operator()(const SUnit *A, const SUnit *B) const {
3478    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3479    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3480    if (SchedTreeA != SchedTreeB) {
3481      // Unscheduled trees have lower priority.
3482      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3483        return ScheduledTrees->test(SchedTreeB);
3484
3485      // Trees with shallower connections have have lower priority.
3486      if (DFSResult->getSubtreeLevel(SchedTreeA)
3487          != DFSResult->getSubtreeLevel(SchedTreeB)) {
3488        return DFSResult->getSubtreeLevel(SchedTreeA)
3489          < DFSResult->getSubtreeLevel(SchedTreeB);
3490      }
3491    }
3492    if (MaximizeILP)
3493      return DFSResult->getILP(A) < DFSResult->getILP(B);
3494    else
3495      return DFSResult->getILP(A) > DFSResult->getILP(B);
3496  }
3497};
3498
3499/// Schedule based on the ILP metric.
3500class ILPScheduler : public MachineSchedStrategy {
3501  ScheduleDAGMILive *DAG = nullptr;
3502  ILPOrder Cmp;
3503
3504  std::vector<SUnit*> ReadyQ;
3505
3506public:
3507  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3508
3509  void initialize(ScheduleDAGMI *dag) override {
3510    assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3511    DAG = static_cast<ScheduleDAGMILive*>(dag);
3512    DAG->computeDFSResult();
3513    Cmp.DFSResult = DAG->getDFSResult();
3514    Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3515    ReadyQ.clear();
3516  }
3517
3518  void registerRoots() override {
3519    // Restore the heap in ReadyQ with the updated DFS results.
3520    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3521  }
3522
3523  /// Implement MachineSchedStrategy interface.
3524  /// -----------------------------------------
3525
3526  /// Callback to select the highest priority node from the ready Q.
3527  SUnit *pickNode(bool &IsTopNode) override {
3528    if (ReadyQ.empty()) return nullptr;
3529    std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3530    SUnit *SU = ReadyQ.back();
3531    ReadyQ.pop_back();
3532    IsTopNode = false;
3533    LLVM_DEBUG(dbgs() << "Pick node "
3534                      << "SU(" << SU->NodeNum << ") "
3535                      << " ILP: " << DAG->getDFSResult()->getILP(SU)
3536                      << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3537                      << " @"
3538                      << DAG->getDFSResult()->getSubtreeLevel(
3539                             DAG->getDFSResult()->getSubtreeID(SU))
3540                      << '\n'
3541                      << "Scheduling " << *SU->getInstr());
3542    return SU;
3543  }
3544
3545  /// Scheduler callback to notify that a new subtree is scheduled.
3546  void scheduleTree(unsigned SubtreeID) override {
3547    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3548  }
3549
3550  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3551  /// DFSResults, and resort the priority Q.
3552  void schedNode(SUnit *SU, bool IsTopNode) override {
3553    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3554  }
3555
3556  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3557
3558  void releaseBottomNode(SUnit *SU) override {
3559    ReadyQ.push_back(SU);
3560    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3561  }
3562};
3563
3564} // end anonymous namespace
3565
3566static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
3567  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
3568}
3569static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
3570  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
3571}
3572
3573static MachineSchedRegistry ILPMaxRegistry(
3574  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3575static MachineSchedRegistry ILPMinRegistry(
3576  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3577
3578//===----------------------------------------------------------------------===//
3579// Machine Instruction Shuffler for Correctness Testing
3580//===----------------------------------------------------------------------===//
3581
3582#ifndef NDEBUG
3583namespace {
3584
3585/// Apply a less-than relation on the node order, which corresponds to the
3586/// instruction order prior to scheduling. IsReverse implements greater-than.
3587template<bool IsReverse>
3588struct SUnitOrder {
3589  bool operator()(SUnit *A, SUnit *B) const {
3590    if (IsReverse)
3591      return A->NodeNum > B->NodeNum;
3592    else
3593      return A->NodeNum < B->NodeNum;
3594  }
3595};
3596
3597/// Reorder instructions as much as possible.
3598class InstructionShuffler : public MachineSchedStrategy {
3599  bool IsAlternating;
3600  bool IsTopDown;
3601
3602  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3603  // gives nodes with a higher number higher priority causing the latest
3604  // instructions to be scheduled first.
3605  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3606    TopQ;
3607
3608  // When scheduling bottom-up, use greater-than as the queue priority.
3609  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
3610    BottomQ;
3611
3612public:
3613  InstructionShuffler(bool alternate, bool topdown)
3614    : IsAlternating(alternate), IsTopDown(topdown) {}
3615
3616  void initialize(ScheduleDAGMI*) override {
3617    TopQ.clear();
3618    BottomQ.clear();
3619  }
3620
3621  /// Implement MachineSchedStrategy interface.
3622  /// -----------------------------------------
3623
3624  SUnit *pickNode(bool &IsTopNode) override {
3625    SUnit *SU;
3626    if (IsTopDown) {
3627      do {
3628        if (TopQ.empty()) return nullptr;
3629        SU = TopQ.top();
3630        TopQ.pop();
3631      } while (SU->isScheduled);
3632      IsTopNode = true;
3633    } else {
3634      do {
3635        if (BottomQ.empty()) return nullptr;
3636        SU = BottomQ.top();
3637        BottomQ.pop();
3638      } while (SU->isScheduled);
3639      IsTopNode = false;
3640    }
3641    if (IsAlternating)
3642      IsTopDown = !IsTopDown;
3643    return SU;
3644  }
3645
3646  void schedNode(SUnit *SU, bool IsTopNode) override {}
3647
3648  void releaseTopNode(SUnit *SU) override {
3649    TopQ.push(SU);
3650  }
3651  void releaseBottomNode(SUnit *SU) override {
3652    BottomQ.push(SU);
3653  }
3654};
3655
3656} // end anonymous namespace
3657
3658static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
3659  bool Alternate = !ForceTopDown && !ForceBottomUp;
3660  bool TopDown = !ForceBottomUp;
3661  assert((TopDown || !ForceTopDown) &&
3662         "-misched-topdown incompatible with -misched-bottomup");
3663  return new ScheduleDAGMILive(
3664      C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
3665}
3666
3667static MachineSchedRegistry ShufflerRegistry(
3668  "shuffle", "Shuffle machine instructions alternating directions",
3669  createInstructionShuffler);
3670#endif // !NDEBUG
3671
3672//===----------------------------------------------------------------------===//
3673// GraphWriter support for ScheduleDAGMILive.
3674//===----------------------------------------------------------------------===//
3675
3676#ifndef NDEBUG
3677namespace llvm {
3678
3679template<> struct GraphTraits<
3680  ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
3681
3682template<>
3683struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
3684  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
3685
3686  static std::string getGraphName(const ScheduleDAG *G) {
3687    return G->MF.getName();
3688  }
3689
3690  static bool renderGraphFromBottomUp() {
3691    return true;
3692  }
3693
3694  static bool isNodeHidden(const SUnit *Node) {
3695    if (ViewMISchedCutoff == 0)
3696      return false;
3697    return (Node->Preds.size() > ViewMISchedCutoff
3698         || Node->Succs.size() > ViewMISchedCutoff);
3699  }
3700
3701  /// If you want to override the dot attributes printed for a particular
3702  /// edge, override this method.
3703  static std::string getEdgeAttributes(const SUnit *Node,
3704                                       SUnitIterator EI,
3705                                       const ScheduleDAG *Graph) {
3706    if (EI.isArtificialDep())
3707      return "color=cyan,style=dashed";
3708    if (EI.isCtrlDep())
3709      return "color=blue,style=dashed";
3710    return "";
3711  }
3712
3713  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3714    std::string Str;
3715    raw_string_ostream SS(Str);
3716    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3717    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3718      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3719    SS << "SU:" << SU->NodeNum;
3720    if (DFS)
3721      SS << " I:" << DFS->getNumInstrs(SU);
3722    return SS.str();
3723  }
3724
3725  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3726    return G->getGraphNodeLabel(SU);
3727  }
3728
3729  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3730    std::string Str("shape=Mrecord");
3731    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3732    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3733      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3734    if (DFS) {
3735      Str += ",style=filled,fillcolor=\"#";
3736      Str += DOT::getColorString(DFS->getSubtreeID(N));
3737      Str += '"';
3738    }
3739    return Str;
3740  }
3741};
3742
3743} // end namespace llvm
3744#endif // NDEBUG
3745
3746/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3747/// rendered using 'dot'.
3748void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3749#ifndef NDEBUG
3750  ViewGraph(this, Name, false, Title);
3751#else
3752  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3753         << "systems with Graphviz or gv!\n";
3754#endif  // NDEBUG
3755}
3756
3757/// Out-of-line implementation with no arguments is handy for gdb.
3758void ScheduleDAGMI::viewGraph() {
3759  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3760}
3761