1234285Sdim//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2234285Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6234285Sdim//
7234285Sdim//===----------------------------------------------------------------------===//
8234285Sdim//
9234285Sdim// MachineScheduler schedules machine instructions after phi elimination. It
10234285Sdim// preserves LiveIntervals so it can be invoked before register allocation.
11234285Sdim//
12234285Sdim//===----------------------------------------------------------------------===//
13234285Sdim
14249423Sdim#include "llvm/CodeGen/MachineScheduler.h"
15321369Sdim#include "llvm/ADT/ArrayRef.h"
16321369Sdim#include "llvm/ADT/BitVector.h"
17321369Sdim#include "llvm/ADT/DenseMap.h"
18249423Sdim#include "llvm/ADT/PriorityQueue.h"
19321369Sdim#include "llvm/ADT/STLExtras.h"
20321369Sdim#include "llvm/ADT/SmallVector.h"
21321369Sdim#include "llvm/ADT/iterator_range.h"
22249423Sdim#include "llvm/Analysis/AliasAnalysis.h"
23321369Sdim#include "llvm/CodeGen/LiveInterval.h"
24327952Sdim#include "llvm/CodeGen/LiveIntervals.h"
25321369Sdim#include "llvm/CodeGen/MachineBasicBlock.h"
26249423Sdim#include "llvm/CodeGen/MachineDominators.h"
27321369Sdim#include "llvm/CodeGen/MachineFunction.h"
28321369Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
29321369Sdim#include "llvm/CodeGen/MachineInstr.h"
30249423Sdim#include "llvm/CodeGen/MachineLoopInfo.h"
31321369Sdim#include "llvm/CodeGen/MachineOperand.h"
32321369Sdim#include "llvm/CodeGen/MachinePassRegistry.h"
33261991Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
34234285Sdim#include "llvm/CodeGen/Passes.h"
35239462Sdim#include "llvm/CodeGen/RegisterClassInfo.h"
36321369Sdim#include "llvm/CodeGen/RegisterPressure.h"
37321369Sdim#include "llvm/CodeGen/ScheduleDAG.h"
38321369Sdim#include "llvm/CodeGen/ScheduleDAGInstrs.h"
39321369Sdim#include "llvm/CodeGen/ScheduleDAGMutation.h"
40249423Sdim#include "llvm/CodeGen/ScheduleDFS.h"
41239462Sdim#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
42321369Sdim#include "llvm/CodeGen/SlotIndexes.h"
43344779Sdim#include "llvm/CodeGen/TargetFrameLowering.h"
44327952Sdim#include "llvm/CodeGen/TargetInstrInfo.h"
45327952Sdim#include "llvm/CodeGen/TargetLowering.h"
46309124Sdim#include "llvm/CodeGen/TargetPassConfig.h"
47327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h"
48321369Sdim#include "llvm/CodeGen/TargetSchedule.h"
49327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h"
50341825Sdim#include "llvm/Config/llvm-config.h"
51360784Sdim#include "llvm/InitializePasses.h"
52321369Sdim#include "llvm/MC/LaneBitmask.h"
53321369Sdim#include "llvm/Pass.h"
54234285Sdim#include "llvm/Support/CommandLine.h"
55321369Sdim#include "llvm/Support/Compiler.h"
56234285Sdim#include "llvm/Support/Debug.h"
57234285Sdim#include "llvm/Support/ErrorHandling.h"
58249423Sdim#include "llvm/Support/GraphWriter.h"
59341825Sdim#include "llvm/Support/MachineValueType.h"
60234285Sdim#include "llvm/Support/raw_ostream.h"
61321369Sdim#include <algorithm>
62321369Sdim#include <cassert>
63321369Sdim#include <cstdint>
64321369Sdim#include <iterator>
65321369Sdim#include <limits>
66321369Sdim#include <memory>
67321369Sdim#include <string>
68321369Sdim#include <tuple>
69321369Sdim#include <utility>
70321369Sdim#include <vector>
71234285Sdim
72234285Sdimusing namespace llvm;
73234285Sdim
74321369Sdim#define DEBUG_TYPE "machine-scheduler"
75276479Sdim
76243830Sdimnamespace llvm {
77321369Sdim
78243830Sdimcl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
79243830Sdim                           cl::desc("Force top-down list scheduling"));
80243830Sdimcl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
81243830Sdim                            cl::desc("Force bottom-up list scheduling"));
82280031Sdimcl::opt<bool>
83280031SdimDumpCriticalPathLength("misched-dcpl", cl::Hidden,
84280031Sdim                       cl::desc("Print critical path length to stdout"));
85234285Sdim
86360784Sdimcl::opt<bool> VerifyScheduling(
87360784Sdim    "verify-misched", cl::Hidden,
88360784Sdim    cl::desc("Verify machine instrs before and after machine scheduling"));
89360784Sdim
90321369Sdim} // end namespace llvm
91321369Sdim
92234285Sdim#ifndef NDEBUG
93234285Sdimstatic cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
94234285Sdim  cl::desc("Pop up a window to show MISched dags after they are processed"));
95234285Sdim
96296417Sdim/// In some situations a few uninteresting nodes depend on nearly all other
97296417Sdim/// nodes in the graph, provide a cutoff to hide them.
98296417Sdimstatic cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
99296417Sdim  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
100296417Sdim
101234285Sdimstatic cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
102234285Sdim  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
103276479Sdim
104276479Sdimstatic cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
105276479Sdim  cl::desc("Only schedule this function"));
106276479Sdimstatic cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
107327952Sdim                                        cl::desc("Only schedule this MBB#"));
108344779Sdimstatic cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
109344779Sdim                              cl::desc("Print schedule DAGs"));
110234285Sdim#else
111344779Sdimstatic const bool ViewMISchedDAGs = false;
112344779Sdimstatic const bool PrintDAGs = false;
113234285Sdim#endif // NDEBUG
114234285Sdim
115309124Sdim/// Avoid quadratic complexity in unusually large basic blocks by limiting the
116309124Sdim/// size of the ready lists.
117309124Sdimstatic cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
118309124Sdim  cl::desc("Limit ready list to N instructions"), cl::init(256));
119309124Sdim
120261991Sdimstatic cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
121261991Sdim  cl::desc("Enable register pressure scheduling."), cl::init(true));
122251662Sdim
123261991Sdimstatic cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
124261991Sdim  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
125261991Sdim
126309124Sdimstatic cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
127309124Sdim                                        cl::desc("Enable memop clustering."),
128309124Sdim                                        cl::init(true));
129243830Sdim
130249423Sdim// DAG subtrees must have at least this many nodes.
131249423Sdimstatic const unsigned MinSubtreeSize = 8;
132249423Sdim
133261991Sdim// Pin the vtables to this file.
134261991Sdimvoid MachineSchedStrategy::anchor() {}
135321369Sdim
136261991Sdimvoid ScheduleDAGMutation::anchor() {}
137261991Sdim
138234285Sdim//===----------------------------------------------------------------------===//
139234285Sdim// Machine Instruction Scheduling Pass and Registry
140234285Sdim//===----------------------------------------------------------------------===//
141234285Sdim
142321369SdimMachineSchedContext::MachineSchedContext() {
143239462Sdim  RegClassInfo = new RegisterClassInfo();
144239462Sdim}
145239462Sdim
146239462SdimMachineSchedContext::~MachineSchedContext() {
147239462Sdim  delete RegClassInfo;
148239462Sdim}
149239462Sdim
150234285Sdimnamespace {
151321369Sdim
152276479Sdim/// Base class for a machine scheduler class that can run at any point.
153276479Sdimclass MachineSchedulerBase : public MachineSchedContext,
154276479Sdim                             public MachineFunctionPass {
155276479Sdimpublic:
156276479Sdim  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
157276479Sdim
158276479Sdim  void print(raw_ostream &O, const Module* = nullptr) const override;
159276479Sdim
160276479Sdimprotected:
161296417Sdim  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
162276479Sdim};
163276479Sdim
164234285Sdim/// MachineScheduler runs after coalescing and before register allocation.
165276479Sdimclass MachineScheduler : public MachineSchedulerBase {
166234285Sdimpublic:
167234285Sdim  MachineScheduler();
168234285Sdim
169276479Sdim  void getAnalysisUsage(AnalysisUsage &AU) const override;
170234285Sdim
171276479Sdim  bool runOnMachineFunction(MachineFunction&) override;
172234285Sdim
173276479Sdim  static char ID; // Class identification, replacement for typeinfo
174234285Sdim
175276479Sdimprotected:
176276479Sdim  ScheduleDAGInstrs *createMachineScheduler();
177276479Sdim};
178234285Sdim
179276479Sdim/// PostMachineScheduler runs after shortly before code emission.
180276479Sdimclass PostMachineScheduler : public MachineSchedulerBase {
181276479Sdimpublic:
182276479Sdim  PostMachineScheduler();
183276479Sdim
184276479Sdim  void getAnalysisUsage(AnalysisUsage &AU) const override;
185276479Sdim
186276479Sdim  bool runOnMachineFunction(MachineFunction&) override;
187276479Sdim
188234285Sdim  static char ID; // Class identification, replacement for typeinfo
189261991Sdim
190261991Sdimprotected:
191276479Sdim  ScheduleDAGInstrs *createPostMachineScheduler();
192234285Sdim};
193234285Sdim
194321369Sdim} // end anonymous namespace
195321369Sdim
196234285Sdimchar MachineScheduler::ID = 0;
197234285Sdim
198234285Sdimchar &llvm::MachineSchedulerID = MachineScheduler::ID;
199234285Sdim
200321369SdimINITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
201234285Sdim                      "Machine Instruction Scheduler", false, false)
202296417SdimINITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
203360784SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
204321369SdimINITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
205234285SdimINITIALIZE_PASS_DEPENDENCY(SlotIndexes)
206234285SdimINITIALIZE_PASS_DEPENDENCY(LiveIntervals)
207321369SdimINITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
208234285Sdim                    "Machine Instruction Scheduler", false, false)
209234285Sdim
210327952SdimMachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
211234285Sdim  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
212234285Sdim}
213234285Sdim
214234285Sdimvoid MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
215234285Sdim  AU.setPreservesCFG();
216360784Sdim  AU.addRequired<MachineDominatorTree>();
217234285Sdim  AU.addRequired<MachineLoopInfo>();
218296417Sdim  AU.addRequired<AAResultsWrapperPass>();
219234285Sdim  AU.addRequired<TargetPassConfig>();
220234285Sdim  AU.addRequired<SlotIndexes>();
221234285Sdim  AU.addPreserved<SlotIndexes>();
222234285Sdim  AU.addRequired<LiveIntervals>();
223234285Sdim  AU.addPreserved<LiveIntervals>();
224234285Sdim  MachineFunctionPass::getAnalysisUsage(AU);
225234285Sdim}
226234285Sdim
227276479Sdimchar PostMachineScheduler::ID = 0;
228276479Sdim
229276479Sdimchar &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
230276479Sdim
231276479SdimINITIALIZE_PASS(PostMachineScheduler, "postmisched",
232276479Sdim                "PostRA Machine Instruction Scheduler", false, false)
233276479Sdim
234327952SdimPostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
235276479Sdim  initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
236276479Sdim}
237276479Sdim
238276479Sdimvoid PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
239276479Sdim  AU.setPreservesCFG();
240360784Sdim  AU.addRequired<MachineDominatorTree>();
241276479Sdim  AU.addRequired<MachineLoopInfo>();
242360784Sdim  AU.addRequired<AAResultsWrapperPass>();
243276479Sdim  AU.addRequired<TargetPassConfig>();
244276479Sdim  MachineFunctionPass::getAnalysisUsage(AU);
245276479Sdim}
246276479Sdim
247344779SdimMachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
248344779Sdim    MachineSchedRegistry::Registry;
249234285Sdim
250234285Sdim/// A dummy default scheduler factory indicates whether the scheduler
251234285Sdim/// is overridden on the command line.
252234285Sdimstatic ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
253276479Sdim  return nullptr;
254234285Sdim}
255234285Sdim
256234285Sdim/// MachineSchedOpt allows command line selection of the scheduler.
257234285Sdimstatic cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
258321369Sdim               RegisterPassParser<MachineSchedRegistry>>
259234285SdimMachineSchedOpt("misched",
260234285Sdim                cl::init(&useDefaultMachineSched), cl::Hidden,
261234285Sdim                cl::desc("Machine instruction scheduler to use"));
262234285Sdim
263234285Sdimstatic MachineSchedRegistry
264234285SdimDefaultSchedRegistry("default", "Use the target's default scheduler choice.",
265234285Sdim                     useDefaultMachineSched);
266234285Sdim
267288943Sdimstatic cl::opt<bool> EnableMachineSched(
268288943Sdim    "enable-misched",
269288943Sdim    cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
270288943Sdim    cl::Hidden);
271288943Sdim
272309124Sdimstatic cl::opt<bool> EnablePostRAMachineSched(
273309124Sdim    "enable-post-misched",
274309124Sdim    cl::desc("Enable the post-ra machine instruction scheduling pass."),
275309124Sdim    cl::init(true), cl::Hidden);
276309124Sdim
277239462Sdim/// Decrement this iterator until reaching the top or a non-debug instr.
278261991Sdimstatic MachineBasicBlock::const_iterator
279261991SdimpriorNonDebug(MachineBasicBlock::const_iterator I,
280261991Sdim              MachineBasicBlock::const_iterator Beg) {
281239462Sdim  assert(I != Beg && "reached the top of the region, cannot decrement");
282239462Sdim  while (--I != Beg) {
283341825Sdim    if (!I->isDebugInstr())
284239462Sdim      break;
285239462Sdim  }
286239462Sdim  return I;
287239462Sdim}
288239462Sdim
289261991Sdim/// Non-const version.
290261991Sdimstatic MachineBasicBlock::iterator
291261991SdimpriorNonDebug(MachineBasicBlock::iterator I,
292261991Sdim              MachineBasicBlock::const_iterator Beg) {
293314564Sdim  return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
294314564Sdim      .getNonConstIterator();
295261991Sdim}
296261991Sdim
297239462Sdim/// If this iterator is a debug value, increment until reaching the End or a
298239462Sdim/// non-debug instruction.
299261991Sdimstatic MachineBasicBlock::const_iterator
300261991SdimnextIfDebug(MachineBasicBlock::const_iterator I,
301261991Sdim            MachineBasicBlock::const_iterator End) {
302239462Sdim  for(; I != End; ++I) {
303341825Sdim    if (!I->isDebugInstr())
304239462Sdim      break;
305239462Sdim  }
306239462Sdim  return I;
307239462Sdim}
308239462Sdim
309261991Sdim/// Non-const version.
310261991Sdimstatic MachineBasicBlock::iterator
311261991SdimnextIfDebug(MachineBasicBlock::iterator I,
312261991Sdim            MachineBasicBlock::const_iterator End) {
313314564Sdim  return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
314314564Sdim      .getNonConstIterator();
315261991Sdim}
316261991Sdim
317261991Sdim/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
318261991SdimScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
319261991Sdim  // Select the scheduler, or set the default.
320261991Sdim  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
321261991Sdim  if (Ctor != useDefaultMachineSched)
322261991Sdim    return Ctor(this);
323261991Sdim
324261991Sdim  // Get the default scheduler set by the target for this function.
325261991Sdim  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
326261991Sdim  if (Scheduler)
327261991Sdim    return Scheduler;
328261991Sdim
329261991Sdim  // Default to GenericScheduler.
330276479Sdim  return createGenericSchedLive(this);
331261991Sdim}
332261991Sdim
333276479Sdim/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
334276479Sdim/// the caller. We don't have a command line option to override the postRA
335276479Sdim/// scheduler. The Target must configure it.
336276479SdimScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
337276479Sdim  // Get the postRA scheduler set by the target for this function.
338276479Sdim  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
339276479Sdim  if (Scheduler)
340276479Sdim    return Scheduler;
341276479Sdim
342276479Sdim  // Default to GenericScheduler.
343276479Sdim  return createGenericSchedPostRA(this);
344276479Sdim}
345276479Sdim
346234285Sdim/// Top-level MachineScheduler pass driver.
347234285Sdim///
348234285Sdim/// Visit blocks in function order. Divide each block into scheduling regions
349234285Sdim/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
350234285Sdim/// consistent with the DAG builder, which traverses the interior of the
351234285Sdim/// scheduling regions bottom-up.
352234285Sdim///
353234285Sdim/// This design avoids exposing scheduling boundaries to the DAG builder,
354234285Sdim/// simplifying the DAG builder's support for "special" target instructions.
355234285Sdim/// At the same time the design allows target schedulers to operate across
356341825Sdim/// scheduling boundaries, for example to bundle the boundary instructions
357234285Sdim/// without reordering them. This creates complexity, because the target
358234285Sdim/// scheduler must update the RegionBegin and RegionEnd positions cached by
359234285Sdim/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
360234285Sdim/// design would be to split blocks at scheduling boundaries, but LLVM has a
361234285Sdim/// general bias against block splitting purely for implementation simplicity.
362234285Sdimbool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
363327952Sdim  if (skipFunction(mf.getFunction()))
364309124Sdim    return false;
365309124Sdim
366288943Sdim  if (EnableMachineSched.getNumOccurrences()) {
367288943Sdim    if (!EnableMachineSched)
368288943Sdim      return false;
369288943Sdim  } else if (!mf.getSubtarget().enableMachineScheduler())
370288943Sdim    return false;
371288943Sdim
372341825Sdim  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
373239462Sdim
374234285Sdim  // Initialize the context of the pass.
375234285Sdim  MF = &mf;
376234285Sdim  MLI = &getAnalysis<MachineLoopInfo>();
377234285Sdim  MDT = &getAnalysis<MachineDominatorTree>();
378234285Sdim  PassConfig = &getAnalysis<TargetPassConfig>();
379296417Sdim  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
380234285Sdim
381234285Sdim  LIS = &getAnalysis<LiveIntervals>();
382234285Sdim
383249423Sdim  if (VerifyScheduling) {
384341825Sdim    LLVM_DEBUG(LIS->dump());
385249423Sdim    MF->verify(this, "Before machine scheduling.");
386249423Sdim  }
387239462Sdim  RegClassInfo->runOnMachineFunction(*MF);
388239462Sdim
389261991Sdim  // Instantiate the selected scheduler for this target, function, and
390261991Sdim  // optimization level.
391276479Sdim  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
392296417Sdim  scheduleRegions(*Scheduler, false);
393234285Sdim
394341825Sdim  LLVM_DEBUG(LIS->dump());
395276479Sdim  if (VerifyScheduling)
396276479Sdim    MF->verify(this, "After machine scheduling.");
397276479Sdim  return true;
398276479Sdim}
399276479Sdim
400276479Sdimbool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
401327952Sdim  if (skipFunction(mf.getFunction()))
402276479Sdim    return false;
403276479Sdim
404309124Sdim  if (EnablePostRAMachineSched.getNumOccurrences()) {
405309124Sdim    if (!EnablePostRAMachineSched)
406309124Sdim      return false;
407360784Sdim  } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
408341825Sdim    LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
409276479Sdim    return false;
410276479Sdim  }
411341825Sdim  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
412276479Sdim
413276479Sdim  // Initialize the context of the pass.
414276479Sdim  MF = &mf;
415327952Sdim  MLI = &getAnalysis<MachineLoopInfo>();
416276479Sdim  PassConfig = &getAnalysis<TargetPassConfig>();
417360784Sdim  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
418276479Sdim
419276479Sdim  if (VerifyScheduling)
420276479Sdim    MF->verify(this, "Before post machine scheduling.");
421276479Sdim
422276479Sdim  // Instantiate the selected scheduler for this target, function, and
423276479Sdim  // optimization level.
424276479Sdim  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
425296417Sdim  scheduleRegions(*Scheduler, true);
426276479Sdim
427276479Sdim  if (VerifyScheduling)
428276479Sdim    MF->verify(this, "After post machine scheduling.");
429276479Sdim  return true;
430276479Sdim}
431276479Sdim
432276479Sdim/// Return true of the given instruction should not be included in a scheduling
433276479Sdim/// region.
434276479Sdim///
435276479Sdim/// MachineScheduler does not currently support scheduling across calls. To
436276479Sdim/// handle calls, the DAG builder needs to be modified to create register
437276479Sdim/// anti/output dependencies on the registers clobbered by the call's regmask
438276479Sdim/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
439276479Sdim/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
440276479Sdim/// the boundary, but there would be no benefit to postRA scheduling across
441276479Sdim/// calls this late anyway.
442276479Sdimstatic bool isSchedBoundary(MachineBasicBlock::iterator MI,
443276479Sdim                            MachineBasicBlock *MBB,
444276479Sdim                            MachineFunction *MF,
445296417Sdim                            const TargetInstrInfo *TII) {
446309124Sdim  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
447276479Sdim}
448276479Sdim
449327952Sdim/// A region of an MBB for scheduling.
450327952Sdimnamespace {
451327952Sdimstruct SchedRegion {
452327952Sdim  /// RegionBegin is the first instruction in the scheduling region, and
453327952Sdim  /// RegionEnd is either MBB->end() or the scheduling boundary after the
454327952Sdim  /// last instruction in the scheduling region. These iterators cannot refer
455327952Sdim  /// to instructions outside of the identified scheduling region because
456327952Sdim  /// those may be reordered before scheduling this region.
457327952Sdim  MachineBasicBlock::iterator RegionBegin;
458327952Sdim  MachineBasicBlock::iterator RegionEnd;
459327952Sdim  unsigned NumRegionInstrs;
460327952Sdim
461327952Sdim  SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
462327952Sdim              unsigned N) :
463327952Sdim    RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
464327952Sdim};
465327952Sdim} // end anonymous namespace
466327952Sdim
467327952Sdimusing MBBRegionsVector = SmallVector<SchedRegion, 16>;
468327952Sdim
469327952Sdimstatic void
470327952SdimgetSchedRegions(MachineBasicBlock *MBB,
471327952Sdim                MBBRegionsVector &Regions,
472327952Sdim                bool RegionsTopDown) {
473327952Sdim  MachineFunction *MF = MBB->getParent();
474327952Sdim  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
475327952Sdim
476327952Sdim  MachineBasicBlock::iterator I = nullptr;
477327952Sdim  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
478327952Sdim      RegionEnd != MBB->begin(); RegionEnd = I) {
479327952Sdim
480327952Sdim    // Avoid decrementing RegionEnd for blocks with no terminator.
481327952Sdim    if (RegionEnd != MBB->end() ||
482327952Sdim        isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
483327952Sdim      --RegionEnd;
484327952Sdim    }
485327952Sdim
486327952Sdim    // The next region starts above the previous region. Look backward in the
487327952Sdim    // instruction stream until we find the nearest boundary.
488327952Sdim    unsigned NumRegionInstrs = 0;
489327952Sdim    I = RegionEnd;
490327952Sdim    for (;I != MBB->begin(); --I) {
491327952Sdim      MachineInstr &MI = *std::prev(I);
492327952Sdim      if (isSchedBoundary(&MI, &*MBB, MF, TII))
493327952Sdim        break;
494353358Sdim      if (!MI.isDebugInstr()) {
495327952Sdim        // MBB::size() uses instr_iterator to count. Here we need a bundle to
496327952Sdim        // count as a single instruction.
497327952Sdim        ++NumRegionInstrs;
498353358Sdim      }
499327952Sdim    }
500327952Sdim
501353358Sdim    // It's possible we found a scheduling region that only has debug
502353358Sdim    // instructions. Don't bother scheduling these.
503353358Sdim    if (NumRegionInstrs != 0)
504353358Sdim      Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
505327952Sdim  }
506327952Sdim
507327952Sdim  if (RegionsTopDown)
508327952Sdim    std::reverse(Regions.begin(), Regions.end());
509327952Sdim}
510327952Sdim
511276479Sdim/// Main driver for both MachineScheduler and PostMachineScheduler.
512296417Sdimvoid MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
513296417Sdim                                           bool FixKillFlags) {
514234285Sdim  // Visit all machine basic blocks.
515239462Sdim  //
516239462Sdim  // TODO: Visit blocks in global postorder or postorder within the bottom-up
517239462Sdim  // loop tree. Then we can optionally compute global RegPressure.
518234285Sdim  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
519234285Sdim       MBB != MBBEnd; ++MBB) {
520234285Sdim
521296417Sdim    Scheduler.startBlock(&*MBB);
522234285Sdim
523276479Sdim#ifndef NDEBUG
524276479Sdim    if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
525276479Sdim      continue;
526276479Sdim    if (SchedOnlyBlock.getNumOccurrences()
527276479Sdim        && (int)SchedOnlyBlock != MBB->getNumber())
528276479Sdim      continue;
529276479Sdim#endif
530276479Sdim
531327952Sdim    // Break the block into scheduling regions [I, RegionEnd). RegionEnd
532327952Sdim    // points to the scheduling boundary at the bottom of the region. The DAG
533327952Sdim    // does not include RegionEnd, but the region does (i.e. the next
534327952Sdim    // RegionEnd is above the previous RegionBegin). If the current block has
535327952Sdim    // no terminator then RegionEnd == MBB->end() for the bottom region.
536234285Sdim    //
537327952Sdim    // All the regions of MBB are first found and stored in MBBRegions, which
538327952Sdim    // will be processed (MBB) top-down if initialized with true.
539327952Sdim    //
540234285Sdim    // The Scheduler may insert instructions during either schedule() or
541234285Sdim    // exitRegion(), even for empty regions. So the local iterators 'I' and
542327952Sdim    // 'RegionEnd' are invalid across these calls. Instructions must not be
543327952Sdim    // added to other regions than the current one without updating MBBRegions.
544239462Sdim
545327952Sdim    MBBRegionsVector MBBRegions;
546327952Sdim    getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
547327952Sdim    for (MBBRegionsVector::iterator R = MBBRegions.begin();
548327952Sdim         R != MBBRegions.end(); ++R) {
549327952Sdim      MachineBasicBlock::iterator I = R->RegionBegin;
550327952Sdim      MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
551327952Sdim      unsigned NumRegionInstrs = R->NumRegionInstrs;
552234285Sdim
553234285Sdim      // Notify the scheduler of the region, even if we may skip scheduling
554234285Sdim      // it. Perhaps it still needs to be bundled.
555296417Sdim      Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
556234285Sdim
557234285Sdim      // Skip empty scheduling regions (0 or 1 schedulable instructions).
558276479Sdim      if (I == RegionEnd || I == std::prev(RegionEnd)) {
559234285Sdim        // Close the current region. Bundle the terminator if needed.
560234285Sdim        // This invalidates 'RegionEnd' and 'I'.
561276479Sdim        Scheduler.exitRegion();
562234285Sdim        continue;
563234285Sdim      }
564341825Sdim      LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
565341825Sdim      LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
566341825Sdim                        << " " << MBB->getName() << "\n  From: " << *I
567341825Sdim                        << "    To: ";
568341825Sdim                 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
569341825Sdim                 else dbgs() << "End";
570341825Sdim                 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
571280031Sdim      if (DumpCriticalPathLength) {
572280031Sdim        errs() << MF->getName();
573327952Sdim        errs() << ":%bb. " << MBB->getNumber();
574280031Sdim        errs() << " " << MBB->getName() << " \n";
575280031Sdim      }
576234285Sdim
577234285Sdim      // Schedule a region: possibly reorder instructions.
578327952Sdim      // This invalidates the original region iterators.
579276479Sdim      Scheduler.schedule();
580234285Sdim
581234285Sdim      // Close the current region.
582276479Sdim      Scheduler.exitRegion();
583234285Sdim    }
584276479Sdim    Scheduler.finishBlock();
585296417Sdim    // FIXME: Ideally, no further passes should rely on kill flags. However,
586296417Sdim    // thumb2 size reduction is currently an exception, so the PostMIScheduler
587296417Sdim    // needs to do this.
588296417Sdim    if (FixKillFlags)
589321369Sdim      Scheduler.fixupKills(*MBB);
590234285Sdim  }
591276479Sdim  Scheduler.finalizeSchedule();
592234285Sdim}
593234285Sdim
594276479Sdimvoid MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
595234285Sdim  // unimplemented
596234285Sdim}
597234285Sdim
598321369Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
599321369SdimLLVM_DUMP_METHOD void ReadyQueue::dump() const {
600296417Sdim  dbgs() << "Queue " << Name << ": ";
601321369Sdim  for (const SUnit *SU : Queue)
602321369Sdim    dbgs() << SU->NodeNum << " ";
603243830Sdim  dbgs() << "\n";
604243830Sdim}
605321369Sdim#endif
606234285Sdim
607234285Sdim//===----------------------------------------------------------------------===//
608276479Sdim// ScheduleDAGMI - Basic machine instruction scheduling. This is
609276479Sdim// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
610276479Sdim// virtual registers.
611276479Sdim// ===----------------------------------------------------------------------===/
612234285Sdim
613276479Sdim// Provide a vtable anchor.
614321369SdimScheduleDAGMI::~ScheduleDAGMI() = default;
615249423Sdim
616234285Sdim/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
617234285Sdim/// NumPredsLeft reaches zero, release the successor node.
618239462Sdim///
619239462Sdim/// FIXME: Adjust SuccSU height based on MinLatency.
620234285Sdimvoid ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
621234285Sdim  SUnit *SuccSU = SuccEdge->getSUnit();
622234285Sdim
623249423Sdim  if (SuccEdge->isWeak()) {
624249423Sdim    --SuccSU->WeakPredsLeft;
625249423Sdim    if (SuccEdge->isCluster())
626249423Sdim      NextClusterSucc = SuccSU;
627249423Sdim    return;
628249423Sdim  }
629234285Sdim#ifndef NDEBUG
630234285Sdim  if (SuccSU->NumPredsLeft == 0) {
631234285Sdim    dbgs() << "*** Scheduling failed! ***\n";
632344779Sdim    dumpNode(*SuccSU);
633234285Sdim    dbgs() << " has been released too many times!\n";
634276479Sdim    llvm_unreachable(nullptr);
635234285Sdim  }
636234285Sdim#endif
637276479Sdim  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
638276479Sdim  // CurrCycle may have advanced since then.
639276479Sdim  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
640276479Sdim    SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
641276479Sdim
642234285Sdim  --SuccSU->NumPredsLeft;
643234285Sdim  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
644234285Sdim    SchedImpl->releaseTopNode(SuccSU);
645234285Sdim}
646234285Sdim
647234285Sdim/// releaseSuccessors - Call releaseSucc on each of SU's successors.
648234285Sdimvoid ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
649321369Sdim  for (SDep &Succ : SU->Succs)
650321369Sdim    releaseSucc(SU, &Succ);
651234285Sdim}
652234285Sdim
653234285Sdim/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
654234285Sdim/// NumSuccsLeft reaches zero, release the predecessor node.
655239462Sdim///
656239462Sdim/// FIXME: Adjust PredSU height based on MinLatency.
657234285Sdimvoid ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
658234285Sdim  SUnit *PredSU = PredEdge->getSUnit();
659234285Sdim
660249423Sdim  if (PredEdge->isWeak()) {
661249423Sdim    --PredSU->WeakSuccsLeft;
662249423Sdim    if (PredEdge->isCluster())
663249423Sdim      NextClusterPred = PredSU;
664249423Sdim    return;
665249423Sdim  }
666234285Sdim#ifndef NDEBUG
667234285Sdim  if (PredSU->NumSuccsLeft == 0) {
668234285Sdim    dbgs() << "*** Scheduling failed! ***\n";
669344779Sdim    dumpNode(*PredSU);
670234285Sdim    dbgs() << " has been released too many times!\n";
671276479Sdim    llvm_unreachable(nullptr);
672234285Sdim  }
673234285Sdim#endif
674276479Sdim  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
675276479Sdim  // CurrCycle may have advanced since then.
676276479Sdim  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
677276479Sdim    PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
678276479Sdim
679234285Sdim  --PredSU->NumSuccsLeft;
680234285Sdim  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
681234285Sdim    SchedImpl->releaseBottomNode(PredSU);
682234285Sdim}
683234285Sdim
684234285Sdim/// releasePredecessors - Call releasePred on each of SU's predecessors.
685234285Sdimvoid ScheduleDAGMI::releasePredecessors(SUnit *SU) {
686321369Sdim  for (SDep &Pred : SU->Preds)
687321369Sdim    releasePred(SU, &Pred);
688234285Sdim}
689234285Sdim
690327952Sdimvoid ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
691327952Sdim  ScheduleDAGInstrs::startBlock(bb);
692327952Sdim  SchedImpl->enterMBB(bb);
693327952Sdim}
694327952Sdim
695327952Sdimvoid ScheduleDAGMI::finishBlock() {
696327952Sdim  SchedImpl->leaveMBB();
697327952Sdim  ScheduleDAGInstrs::finishBlock();
698327952Sdim}
699327952Sdim
700276479Sdim/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
701276479Sdim/// crossing a scheduling boundary. [begin, end) includes all instructions in
702276479Sdim/// the region, including the boundary itself and single-instruction regions
703276479Sdim/// that don't get scheduled.
704276479Sdimvoid ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
705276479Sdim                                     MachineBasicBlock::iterator begin,
706276479Sdim                                     MachineBasicBlock::iterator end,
707276479Sdim                                     unsigned regioninstrs)
708276479Sdim{
709276479Sdim  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
710276479Sdim
711276479Sdim  SchedImpl->initPolicy(begin, end, regioninstrs);
712276479Sdim}
713276479Sdim
714251662Sdim/// This is normally called from the main scheduler loop but may also be invoked
715251662Sdim/// by the scheduling strategy to perform additional code motion.
716276479Sdimvoid ScheduleDAGMI::moveInstruction(
717276479Sdim  MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
718239462Sdim  // Advance RegionBegin if the first instruction moves down.
719234285Sdim  if (&*RegionBegin == MI)
720239462Sdim    ++RegionBegin;
721239462Sdim
722239462Sdim  // Update the instruction stream.
723234285Sdim  BB->splice(InsertPos, BB, MI);
724239462Sdim
725239462Sdim  // Update LiveIntervals
726276479Sdim  if (LIS)
727309124Sdim    LIS->handleMove(*MI, /*UpdateFlags=*/true);
728239462Sdim
729239462Sdim  // Recede RegionBegin if an instruction moves above the first.
730234285Sdim  if (RegionBegin == InsertPos)
731234285Sdim    RegionBegin = MI;
732234285Sdim}
733234285Sdim
734234285Sdimbool ScheduleDAGMI::checkSchedLimit() {
735234285Sdim#ifndef NDEBUG
736234285Sdim  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
737234285Sdim    CurrentTop = CurrentBottom;
738234285Sdim    return false;
739234285Sdim  }
740234285Sdim  ++NumInstrsScheduled;
741234285Sdim#endif
742234285Sdim  return true;
743234285Sdim}
744234285Sdim
745276479Sdim/// Per-region scheduling driver, called back from
746276479Sdim/// MachineScheduler::runOnMachineFunction. This is a simplified driver that
747276479Sdim/// does not consider liveness or register pressure. It is useful for PostRA
748276479Sdim/// scheduling and potentially other custom schedulers.
749276479Sdimvoid ScheduleDAGMI::schedule() {
750341825Sdim  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
751341825Sdim  LLVM_DEBUG(SchedImpl->dumpPolicy());
752296417Sdim
753276479Sdim  // Build the DAG.
754276479Sdim  buildSchedGraph(AA);
755276479Sdim
756276479Sdim  postprocessDAG();
757276479Sdim
758276479Sdim  SmallVector<SUnit*, 8> TopRoots, BotRoots;
759276479Sdim  findRootsAndBiasEdges(TopRoots, BotRoots);
760276479Sdim
761344779Sdim  LLVM_DEBUG(dump());
762344779Sdim  if (PrintDAGs) dump();
763341825Sdim  if (ViewMISchedDAGs) viewGraph();
764341825Sdim
765276479Sdim  // Initialize the strategy before modifying the DAG.
766276479Sdim  // This may initialize a DFSResult to be used for queue priority.
767276479Sdim  SchedImpl->initialize(this);
768276479Sdim
769276479Sdim  // Initialize ready queues now that the DAG and priority data are finalized.
770276479Sdim  initQueues(TopRoots, BotRoots);
771276479Sdim
772276479Sdim  bool IsTopNode = false;
773296417Sdim  while (true) {
774341825Sdim    LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
775296417Sdim    SUnit *SU = SchedImpl->pickNode(IsTopNode);
776296417Sdim    if (!SU) break;
777296417Sdim
778276479Sdim    assert(!SU->isScheduled && "Node already scheduled");
779276479Sdim    if (!checkSchedLimit())
780276479Sdim      break;
781276479Sdim
782276479Sdim    MachineInstr *MI = SU->getInstr();
783276479Sdim    if (IsTopNode) {
784276479Sdim      assert(SU->isTopReady() && "node still has unscheduled dependencies");
785276479Sdim      if (&*CurrentTop == MI)
786276479Sdim        CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
787276479Sdim      else
788276479Sdim        moveInstruction(MI, CurrentTop);
789309124Sdim    } else {
790276479Sdim      assert(SU->isBottomReady() && "node still has unscheduled dependencies");
791276479Sdim      MachineBasicBlock::iterator priorII =
792276479Sdim        priorNonDebug(CurrentBottom, CurrentTop);
793276479Sdim      if (&*priorII == MI)
794276479Sdim        CurrentBottom = priorII;
795276479Sdim      else {
796276479Sdim        if (&*CurrentTop == MI)
797276479Sdim          CurrentTop = nextIfDebug(++CurrentTop, priorII);
798276479Sdim        moveInstruction(MI, CurrentBottom);
799276479Sdim        CurrentBottom = MI;
800276479Sdim      }
801276479Sdim    }
802276479Sdim    // Notify the scheduling strategy before updating the DAG.
803276479Sdim    // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
804276479Sdim    // runs, it can then use the accurate ReadyCycle time to determine whether
805276479Sdim    // newly released nodes can move to the readyQ.
806276479Sdim    SchedImpl->schedNode(SU, IsTopNode);
807276479Sdim
808276479Sdim    updateQueues(SU, IsTopNode);
809276479Sdim  }
810276479Sdim  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
811276479Sdim
812276479Sdim  placeDebugValues();
813276479Sdim
814341825Sdim  LLVM_DEBUG({
815327952Sdim    dbgs() << "*** Final schedule for "
816327952Sdim           << printMBBReference(*begin()->getParent()) << " ***\n";
817327952Sdim    dumpSchedule();
818327952Sdim    dbgs() << '\n';
819327952Sdim  });
820276479Sdim}
821276479Sdim
822276479Sdim/// Apply each ScheduleDAGMutation step in order.
823276479Sdimvoid ScheduleDAGMI::postprocessDAG() {
824321369Sdim  for (auto &m : Mutations)
825321369Sdim    m->apply(this);
826276479Sdim}
827276479Sdim
828276479Sdimvoid ScheduleDAGMI::
829276479SdimfindRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
830276479Sdim                      SmallVectorImpl<SUnit*> &BotRoots) {
831321369Sdim  for (SUnit &SU : SUnits) {
832321369Sdim    assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
833276479Sdim
834276479Sdim    // Order predecessors so DFSResult follows the critical path.
835321369Sdim    SU.biasCriticalPath();
836276479Sdim
837276479Sdim    // A SUnit is ready to top schedule if it has no predecessors.
838321369Sdim    if (!SU.NumPredsLeft)
839321369Sdim      TopRoots.push_back(&SU);
840276479Sdim    // A SUnit is ready to bottom schedule if it has no successors.
841321369Sdim    if (!SU.NumSuccsLeft)
842321369Sdim      BotRoots.push_back(&SU);
843276479Sdim  }
844276479Sdim  ExitSU.biasCriticalPath();
845276479Sdim}
846276479Sdim
847276479Sdim/// Identify DAG roots and setup scheduler queues.
848276479Sdimvoid ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
849276479Sdim                               ArrayRef<SUnit*> BotRoots) {
850276479Sdim  NextClusterSucc = nullptr;
851276479Sdim  NextClusterPred = nullptr;
852276479Sdim
853276479Sdim  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
854276479Sdim  //
855276479Sdim  // Nodes with unreleased weak edges can still be roots.
856276479Sdim  // Release top roots in forward order.
857321369Sdim  for (SUnit *SU : TopRoots)
858321369Sdim    SchedImpl->releaseTopNode(SU);
859321369Sdim
860276479Sdim  // Release bottom roots in reverse order so the higher priority nodes appear
861276479Sdim  // first. This is more natural and slightly more efficient.
862276479Sdim  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
863276479Sdim         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
864276479Sdim    SchedImpl->releaseBottomNode(*I);
865276479Sdim  }
866276479Sdim
867276479Sdim  releaseSuccessors(&EntrySU);
868276479Sdim  releasePredecessors(&ExitSU);
869276479Sdim
870276479Sdim  SchedImpl->registerRoots();
871276479Sdim
872276479Sdim  // Advance past initial DebugValues.
873276479Sdim  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
874276479Sdim  CurrentBottom = RegionEnd;
875276479Sdim}
876276479Sdim
877276479Sdim/// Update scheduler queues after scheduling an instruction.
878276479Sdimvoid ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
879276479Sdim  // Release dependent instructions for scheduling.
880276479Sdim  if (IsTopNode)
881276479Sdim    releaseSuccessors(SU);
882276479Sdim  else
883276479Sdim    releasePredecessors(SU);
884276479Sdim
885276479Sdim  SU->isScheduled = true;
886276479Sdim}
887276479Sdim
888276479Sdim/// Reinsert any remaining debug_values, just like the PostRA scheduler.
889276479Sdimvoid ScheduleDAGMI::placeDebugValues() {
890276479Sdim  // If first instruction was a DBG_VALUE then put it back.
891276479Sdim  if (FirstDbgValue) {
892276479Sdim    BB->splice(RegionBegin, BB, FirstDbgValue);
893276479Sdim    RegionBegin = FirstDbgValue;
894276479Sdim  }
895276479Sdim
896321369Sdim  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
897276479Sdim         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
898276479Sdim    std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
899276479Sdim    MachineInstr *DbgValue = P.first;
900276479Sdim    MachineBasicBlock::iterator OrigPrevMI = P.second;
901276479Sdim    if (&*RegionBegin == DbgValue)
902276479Sdim      ++RegionBegin;
903276479Sdim    BB->splice(++OrigPrevMI, BB, DbgValue);
904276479Sdim    if (OrigPrevMI == std::prev(RegionEnd))
905276479Sdim      RegionEnd = DbgValue;
906276479Sdim  }
907276479Sdim  DbgValues.clear();
908276479Sdim  FirstDbgValue = nullptr;
909276479Sdim}
910276479Sdim
911276479Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
912321369SdimLLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
913276479Sdim  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
914276479Sdim    if (SUnit *SU = getSUnit(&(*MI)))
915344779Sdim      dumpNode(*SU);
916276479Sdim    else
917276479Sdim      dbgs() << "Missing SUnit\n";
918276479Sdim  }
919276479Sdim}
920276479Sdim#endif
921276479Sdim
922276479Sdim//===----------------------------------------------------------------------===//
923276479Sdim// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
924276479Sdim// preservation.
925276479Sdim//===----------------------------------------------------------------------===//
926276479Sdim
927276479SdimScheduleDAGMILive::~ScheduleDAGMILive() {
928276479Sdim  delete DFSResult;
929276479Sdim}
930276479Sdim
931314564Sdimvoid ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
932314564Sdim  const MachineInstr &MI = *SU.getInstr();
933314564Sdim  for (const MachineOperand &MO : MI.operands()) {
934314564Sdim    if (!MO.isReg())
935314564Sdim      continue;
936314564Sdim    if (!MO.readsReg())
937314564Sdim      continue;
938314564Sdim    if (TrackLaneMasks && !MO.isUse())
939314564Sdim      continue;
940314564Sdim
941360784Sdim    Register Reg = MO.getReg();
942360784Sdim    if (!Register::isVirtualRegister(Reg))
943314564Sdim      continue;
944314564Sdim
945314564Sdim    // Ignore re-defs.
946314564Sdim    if (TrackLaneMasks) {
947314564Sdim      bool FoundDef = false;
948314564Sdim      for (const MachineOperand &MO2 : MI.operands()) {
949314564Sdim        if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
950314564Sdim          FoundDef = true;
951314564Sdim          break;
952314564Sdim        }
953314564Sdim      }
954314564Sdim      if (FoundDef)
955314564Sdim        continue;
956314564Sdim    }
957314564Sdim
958314564Sdim    // Record this local VReg use.
959314564Sdim    VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
960314564Sdim    for (; UI != VRegUses.end(); ++UI) {
961314564Sdim      if (UI->SU == &SU)
962314564Sdim        break;
963314564Sdim    }
964314564Sdim    if (UI == VRegUses.end())
965314564Sdim      VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
966314564Sdim  }
967314564Sdim}
968314564Sdim
969239462Sdim/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
970239462Sdim/// crossing a scheduling boundary. [begin, end) includes all instructions in
971239462Sdim/// the region, including the boundary itself and single-instruction regions
972239462Sdim/// that don't get scheduled.
973276479Sdimvoid ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
974239462Sdim                                MachineBasicBlock::iterator begin,
975239462Sdim                                MachineBasicBlock::iterator end,
976261991Sdim                                unsigned regioninstrs)
977239462Sdim{
978276479Sdim  // ScheduleDAGMI initializes SchedImpl's per-region policy.
979276479Sdim  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
980239462Sdim
981239462Sdim  // For convenience remember the end of the liveness region.
982276479Sdim  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
983261991Sdim
984261991Sdim  SUPressureDiffs.clear();
985261991Sdim
986261991Sdim  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
987309124Sdim  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
988309124Sdim
989309124Sdim  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
990309124Sdim         "ShouldTrackLaneMasks requires ShouldTrackPressure");
991239462Sdim}
992239462Sdim
993360784Sdim// Setup the register pressure trackers for the top scheduled and bottom
994239462Sdim// scheduled regions.
995276479Sdimvoid ScheduleDAGMILive::initRegPressure() {
996314564Sdim  VRegUses.clear();
997314564Sdim  VRegUses.setUniverse(MRI.getNumVirtRegs());
998314564Sdim  for (SUnit &SU : SUnits)
999314564Sdim    collectVRegUses(SU);
1000314564Sdim
1001309124Sdim  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1002309124Sdim                    ShouldTrackLaneMasks, false);
1003309124Sdim  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1004309124Sdim                    ShouldTrackLaneMasks, false);
1005239462Sdim
1006239462Sdim  // Close the RPTracker to finalize live ins.
1007239462Sdim  RPTracker.closeRegion();
1008239462Sdim
1009341825Sdim  LLVM_DEBUG(RPTracker.dump());
1010239462Sdim
1011239462Sdim  // Initialize the live ins and live outs.
1012239462Sdim  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1013239462Sdim  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1014239462Sdim
1015239462Sdim  // Close one end of the tracker so we can call
1016239462Sdim  // getMaxUpward/DownwardPressureDelta before advancing across any
1017239462Sdim  // instructions. This converts currently live regs into live ins/outs.
1018239462Sdim  TopRPTracker.closeTop();
1019239462Sdim  BotRPTracker.closeBottom();
1020239462Sdim
1021261991Sdim  BotRPTracker.initLiveThru(RPTracker);
1022261991Sdim  if (!BotRPTracker.getLiveThru().empty()) {
1023261991Sdim    TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1024341825Sdim    LLVM_DEBUG(dbgs() << "Live Thru: ";
1025341825Sdim               dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1026261991Sdim  };
1027261991Sdim
1028261991Sdim  // For each live out vreg reduce the pressure change associated with other
1029261991Sdim  // uses of the same vreg below the live-out reaching def.
1030261991Sdim  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1031261991Sdim
1032239462Sdim  // Account for liveness generated by the region boundary.
1033261991Sdim  if (LiveRegionEnd != RegionEnd) {
1034309124Sdim    SmallVector<RegisterMaskPair, 8> LiveUses;
1035261991Sdim    BotRPTracker.recede(&LiveUses);
1036261991Sdim    updatePressureDiffs(LiveUses);
1037261991Sdim  }
1038239462Sdim
1039341825Sdim  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1040341825Sdim             dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1041341825Sdim             dbgs() << "Bottom Pressure:\n";
1042341825Sdim             dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1043296417Sdim
1044327952Sdim  assert((BotRPTracker.getPos() == RegionEnd ||
1045341825Sdim          (RegionEnd->isDebugInstr() &&
1046327952Sdim           BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1047327952Sdim         "Can't find the region bottom");
1048239462Sdim
1049239462Sdim  // Cache the list of excess pressure sets in this region. This will also track
1050239462Sdim  // the max pressure in the scheduled code for these sets.
1051239462Sdim  RegionCriticalPSets.clear();
1052249423Sdim  const std::vector<unsigned> &RegionPressure =
1053249423Sdim    RPTracker.getPressure().MaxSetPressure;
1054239462Sdim  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1055261991Sdim    unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1056261991Sdim    if (RegionPressure[i] > Limit) {
1057341825Sdim      LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1058341825Sdim                        << " Actual " << RegionPressure[i] << "\n");
1059261991Sdim      RegionCriticalPSets.push_back(PressureChange(i));
1060261991Sdim    }
1061239462Sdim  }
1062341825Sdim  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1063341825Sdim             for (const PressureChange &RCPS
1064341825Sdim                  : RegionCriticalPSets) dbgs()
1065341825Sdim             << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1066341825Sdim             dbgs() << "\n");
1067239462Sdim}
1068239462Sdim
1069276479Sdimvoid ScheduleDAGMILive::
1070261991SdimupdateScheduledPressure(const SUnit *SU,
1071261991Sdim                        const std::vector<unsigned> &NewMaxPressure) {
1072261991Sdim  const PressureDiff &PDiff = getPressureDiff(SU);
1073261991Sdim  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1074321369Sdim  for (const PressureChange &PC : PDiff) {
1075321369Sdim    if (!PC.isValid())
1076261991Sdim      break;
1077321369Sdim    unsigned ID = PC.getPSet();
1078261991Sdim    while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1079261991Sdim      ++CritIdx;
1080261991Sdim    if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1081261991Sdim      if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1082321369Sdim          && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1083261991Sdim        RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1084261991Sdim    }
1085261991Sdim    unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1086261991Sdim    if (NewMaxPressure[ID] >= Limit - 2) {
1087341825Sdim      LLVM_DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
1088341825Sdim                        << NewMaxPressure[ID]
1089341825Sdim                        << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1090341825Sdim                        << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1091341825Sdim                        << " livethru)\n");
1092261991Sdim    }
1093239462Sdim  }
1094261991Sdim}
1095261991Sdim
1096261991Sdim/// Update the PressureDiff array for liveness after scheduling this
1097261991Sdim/// instruction.
1098309124Sdimvoid ScheduleDAGMILive::updatePressureDiffs(
1099309124Sdim    ArrayRef<RegisterMaskPair> LiveUses) {
1100309124Sdim  for (const RegisterMaskPair &P : LiveUses) {
1101309124Sdim    unsigned Reg = P.RegUnit;
1102261991Sdim    /// FIXME: Currently assuming single-use physregs.
1103360784Sdim    if (!Register::isVirtualRegister(Reg))
1104261991Sdim      continue;
1105261991Sdim
1106309124Sdim    if (ShouldTrackLaneMasks) {
1107309124Sdim      // If the register has just become live then other uses won't change
1108309124Sdim      // this fact anymore => decrement pressure.
1109309124Sdim      // If the register has just become dead then other uses make it come
1110309124Sdim      // back to life => increment pressure.
1111314564Sdim      bool Decrement = P.LaneMask.any();
1112309124Sdim
1113309124Sdim      for (const VReg2SUnit &V2SU
1114309124Sdim           : make_range(VRegUses.find(Reg), VRegUses.end())) {
1115309124Sdim        SUnit &SU = *V2SU.SU;
1116309124Sdim        if (SU.isScheduled || &SU == &ExitSU)
1117309124Sdim          continue;
1118309124Sdim
1119309124Sdim        PressureDiff &PDiff = getPressureDiff(&SU);
1120309124Sdim        PDiff.addPressureChange(Reg, Decrement, &MRI);
1121341825Sdim        LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
1122341825Sdim                          << printReg(Reg, TRI) << ':'
1123341825Sdim                          << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1124341825Sdim                   dbgs() << "              to "; PDiff.dump(*TRI););
1125309124Sdim      }
1126309124Sdim    } else {
1127314564Sdim      assert(P.LaneMask.any());
1128341825Sdim      LLVM_DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1129309124Sdim      // This may be called before CurrentBottom has been initialized. However,
1130309124Sdim      // BotRPTracker must have a valid position. We want the value live into the
1131309124Sdim      // instruction or live out of the block, so ask for the previous
1132309124Sdim      // instruction's live-out.
1133309124Sdim      const LiveInterval &LI = LIS->getInterval(Reg);
1134309124Sdim      VNInfo *VNI;
1135309124Sdim      MachineBasicBlock::const_iterator I =
1136309124Sdim        nextIfDebug(BotRPTracker.getPos(), BB->end());
1137309124Sdim      if (I == BB->end())
1138309124Sdim        VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1139309124Sdim      else {
1140309124Sdim        LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1141309124Sdim        VNI = LRQ.valueIn();
1142309124Sdim      }
1143309124Sdim      // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1144309124Sdim      assert(VNI && "No live value at use.");
1145309124Sdim      for (const VReg2SUnit &V2SU
1146309124Sdim           : make_range(VRegUses.find(Reg), VRegUses.end())) {
1147309124Sdim        SUnit *SU = V2SU.SU;
1148309124Sdim        // If this use comes before the reaching def, it cannot be a last use,
1149309124Sdim        // so decrease its pressure change.
1150309124Sdim        if (!SU->isScheduled && SU != &ExitSU) {
1151309124Sdim          LiveQueryResult LRQ =
1152309124Sdim              LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1153309124Sdim          if (LRQ.valueIn() == VNI) {
1154309124Sdim            PressureDiff &PDiff = getPressureDiff(SU);
1155309124Sdim            PDiff.addPressureChange(Reg, true, &MRI);
1156341825Sdim            LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
1157341825Sdim                              << *SU->getInstr();
1158341825Sdim                       dbgs() << "              to "; PDiff.dump(*TRI););
1159309124Sdim          }
1160296417Sdim        }
1161251662Sdim      }
1162261991Sdim    }
1163261991Sdim  }
1164239462Sdim}
1165239462Sdim
1166344779Sdimvoid ScheduleDAGMILive::dump() const {
1167344779Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1168344779Sdim  if (EntrySU.getInstr() != nullptr)
1169344779Sdim    dumpNodeAll(EntrySU);
1170344779Sdim  for (const SUnit &SU : SUnits) {
1171344779Sdim    dumpNodeAll(SU);
1172344779Sdim    if (ShouldTrackPressure) {
1173344779Sdim      dbgs() << "  Pressure Diff      : ";
1174344779Sdim      getPressureDiff(&SU).dump(*TRI);
1175344779Sdim    }
1176344779Sdim    dbgs() << "  Single Issue       : ";
1177344779Sdim    if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1178344779Sdim        SchedModel.mustEndGroup(SU.getInstr()))
1179344779Sdim      dbgs() << "true;";
1180344779Sdim    else
1181344779Sdim      dbgs() << "false;";
1182344779Sdim    dbgs() << '\n';
1183344779Sdim  }
1184344779Sdim  if (ExitSU.getInstr() != nullptr)
1185344779Sdim    dumpNodeAll(ExitSU);
1186344779Sdim#endif
1187344779Sdim}
1188344779Sdim
1189243830Sdim/// schedule - Called back from MachineScheduler::runOnMachineFunction
1190243830Sdim/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1191243830Sdim/// only includes instructions that have DAG nodes, not scheduling boundaries.
1192243830Sdim///
1193243830Sdim/// This is a skeletal driver, with all the functionality pushed into helpers,
1194296417Sdim/// so that it can be easily extended by experimental schedulers. Generally,
1195243830Sdim/// implementing MachineSchedStrategy should be sufficient to implement a new
1196243830Sdim/// scheduling algorithm. However, if a scheduler further subclasses
1197276479Sdim/// ScheduleDAGMILive then it will want to override this virtual method in order
1198276479Sdim/// to update any specialized state.
1199276479Sdimvoid ScheduleDAGMILive::schedule() {
1200341825Sdim  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1201341825Sdim  LLVM_DEBUG(SchedImpl->dumpPolicy());
1202243830Sdim  buildDAGWithRegPressure();
1203243830Sdim
1204243830Sdim  postprocessDAG();
1205243830Sdim
1206249423Sdim  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1207249423Sdim  findRootsAndBiasEdges(TopRoots, BotRoots);
1208249423Sdim
1209249423Sdim  // Initialize the strategy before modifying the DAG.
1210249423Sdim  // This may initialize a DFSResult to be used for queue priority.
1211249423Sdim  SchedImpl->initialize(this);
1212249423Sdim
1213344779Sdim  LLVM_DEBUG(dump());
1214344779Sdim  if (PrintDAGs) dump();
1215243830Sdim  if (ViewMISchedDAGs) viewGraph();
1216243830Sdim
1217249423Sdim  // Initialize ready queues now that the DAG and priority data are finalized.
1218249423Sdim  initQueues(TopRoots, BotRoots);
1219243830Sdim
1220243830Sdim  bool IsTopNode = false;
1221296417Sdim  while (true) {
1222341825Sdim    LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1223296417Sdim    SUnit *SU = SchedImpl->pickNode(IsTopNode);
1224296417Sdim    if (!SU) break;
1225296417Sdim
1226243830Sdim    assert(!SU->isScheduled && "Node already scheduled");
1227243830Sdim    if (!checkSchedLimit())
1228243830Sdim      break;
1229243830Sdim
1230243830Sdim    scheduleMI(SU, IsTopNode);
1231243830Sdim
1232276479Sdim    if (DFSResult) {
1233276479Sdim      unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1234276479Sdim      if (!ScheduledTrees.test(SubtreeID)) {
1235276479Sdim        ScheduledTrees.set(SubtreeID);
1236276479Sdim        DFSResult->scheduleTree(SubtreeID);
1237276479Sdim        SchedImpl->scheduleTree(SubtreeID);
1238276479Sdim      }
1239276479Sdim    }
1240276479Sdim
1241276479Sdim    // Notify the scheduling strategy after updating the DAG.
1242276479Sdim    SchedImpl->schedNode(SU, IsTopNode);
1243288943Sdim
1244288943Sdim    updateQueues(SU, IsTopNode);
1245243830Sdim  }
1246243830Sdim  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1247243830Sdim
1248243830Sdim  placeDebugValues();
1249243830Sdim
1250341825Sdim  LLVM_DEBUG({
1251327952Sdim    dbgs() << "*** Final schedule for "
1252327952Sdim           << printMBBReference(*begin()->getParent()) << " ***\n";
1253327952Sdim    dumpSchedule();
1254327952Sdim    dbgs() << '\n';
1255327952Sdim  });
1256243830Sdim}
1257243830Sdim
1258243830Sdim/// Build the DAG and setup three register pressure trackers.
1259276479Sdimvoid ScheduleDAGMILive::buildDAGWithRegPressure() {
1260261991Sdim  if (!ShouldTrackPressure) {
1261261991Sdim    RPTracker.reset();
1262261991Sdim    RegionCriticalPSets.clear();
1263261991Sdim    buildSchedGraph(AA);
1264261991Sdim    return;
1265261991Sdim  }
1266261991Sdim
1267243830Sdim  // Initialize the register pressure tracker used by buildSchedGraph.
1268261991Sdim  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1269309124Sdim                 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1270243830Sdim
1271243830Sdim  // Account for liveness generate by the region boundary.
1272243830Sdim  if (LiveRegionEnd != RegionEnd)
1273243830Sdim    RPTracker.recede();
1274243830Sdim
1275243830Sdim  // Build the DAG, and compute current register pressure.
1276309124Sdim  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1277243830Sdim
1278243830Sdim  // Initialize top/bottom trackers after computing region pressure.
1279243830Sdim  initRegPressure();
1280243830Sdim}
1281243830Sdim
1282276479Sdimvoid ScheduleDAGMILive::computeDFSResult() {
1283249423Sdim  if (!DFSResult)
1284249423Sdim    DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1285249423Sdim  DFSResult->clear();
1286249423Sdim  ScheduledTrees.clear();
1287249423Sdim  DFSResult->resize(SUnits.size());
1288249423Sdim  DFSResult->compute(SUnits);
1289249423Sdim  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1290249423Sdim}
1291239462Sdim
1292261991Sdim/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1293261991Sdim/// only provides the critical path for single block loops. To handle loops that
1294261991Sdim/// span blocks, we could use the vreg path latencies provided by
1295261991Sdim/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1296261991Sdim/// available for use in the scheduler.
1297261991Sdim///
1298261991Sdim/// The cyclic path estimation identifies a def-use pair that crosses the back
1299261991Sdim/// edge and considers the depth and height of the nodes. For example, consider
1300261991Sdim/// the following instruction sequence where each instruction has unit latency
1301261991Sdim/// and defines an epomymous virtual register:
1302261991Sdim///
1303261991Sdim/// a->b(a,c)->c(b)->d(c)->exit
1304261991Sdim///
1305261991Sdim/// The cyclic critical path is a two cycles: b->c->b
1306261991Sdim/// The acyclic critical path is four cycles: a->b->c->d->exit
1307261991Sdim/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1308261991Sdim/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1309261991Sdim/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1310261991Sdim/// LiveInDepth = depth(b) = len(a->b) = 1
1311261991Sdim///
1312261991Sdim/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1313261991Sdim/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1314261991Sdim/// CyclicCriticalPath = min(2, 2) = 2
1315276479Sdim///
1316276479Sdim/// This could be relevant to PostRA scheduling, but is currently implemented
1317276479Sdim/// assuming LiveIntervals.
1318276479Sdimunsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1319261991Sdim  // This only applies to single block loop.
1320261991Sdim  if (!BB->isSuccessor(BB))
1321261991Sdim    return 0;
1322261991Sdim
1323261991Sdim  unsigned MaxCyclicLatency = 0;
1324261991Sdim  // Visit each live out vreg def to find def/use pairs that cross iterations.
1325309124Sdim  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1326309124Sdim    unsigned Reg = P.RegUnit;
1327360784Sdim    if (!Register::isVirtualRegister(Reg))
1328360784Sdim      continue;
1329261991Sdim    const LiveInterval &LI = LIS->getInterval(Reg);
1330261991Sdim    const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1331261991Sdim    if (!DefVNI)
1332261991Sdim      continue;
1333261991Sdim
1334261991Sdim    MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1335261991Sdim    const SUnit *DefSU = getSUnit(DefMI);
1336261991Sdim    if (!DefSU)
1337261991Sdim      continue;
1338261991Sdim
1339261991Sdim    unsigned LiveOutHeight = DefSU->getHeight();
1340261991Sdim    unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1341261991Sdim    // Visit all local users of the vreg def.
1342296417Sdim    for (const VReg2SUnit &V2SU
1343296417Sdim         : make_range(VRegUses.find(Reg), VRegUses.end())) {
1344296417Sdim      SUnit *SU = V2SU.SU;
1345296417Sdim      if (SU == &ExitSU)
1346261991Sdim        continue;
1347261991Sdim
1348261991Sdim      // Only consider uses of the phi.
1349309124Sdim      LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1350261991Sdim      if (!LRQ.valueIn()->isPHIDef())
1351261991Sdim        continue;
1352261991Sdim
1353261991Sdim      // Assume that a path spanning two iterations is a cycle, which could
1354261991Sdim      // overestimate in strange cases. This allows cyclic latency to be
1355261991Sdim      // estimated as the minimum slack of the vreg's depth or height.
1356261991Sdim      unsigned CyclicLatency = 0;
1357296417Sdim      if (LiveOutDepth > SU->getDepth())
1358296417Sdim        CyclicLatency = LiveOutDepth - SU->getDepth();
1359261991Sdim
1360296417Sdim      unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1361261991Sdim      if (LiveInHeight > LiveOutHeight) {
1362261991Sdim        if (LiveInHeight - LiveOutHeight < CyclicLatency)
1363261991Sdim          CyclicLatency = LiveInHeight - LiveOutHeight;
1364309124Sdim      } else
1365261991Sdim        CyclicLatency = 0;
1366261991Sdim
1367341825Sdim      LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1368341825Sdim                        << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1369261991Sdim      if (CyclicLatency > MaxCyclicLatency)
1370261991Sdim        MaxCyclicLatency = CyclicLatency;
1371261991Sdim    }
1372261991Sdim  }
1373341825Sdim  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1374261991Sdim  return MaxCyclicLatency;
1375261991Sdim}
1376261991Sdim
1377309124Sdim/// Release ExitSU predecessors and setup scheduler queues. Re-position
1378309124Sdim/// the Top RP tracker in case the region beginning has changed.
1379309124Sdimvoid ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1380309124Sdim                                   ArrayRef<SUnit*> BotRoots) {
1381309124Sdim  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1382309124Sdim  if (ShouldTrackPressure) {
1383309124Sdim    assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1384309124Sdim    TopRPTracker.setPos(CurrentTop);
1385309124Sdim  }
1386309124Sdim}
1387309124Sdim
1388243830Sdim/// Move an instruction and update register pressure.
1389276479Sdimvoid ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1390243830Sdim  // Move the instruction to its new location in the instruction stream.
1391243830Sdim  MachineInstr *MI = SU->getInstr();
1392234285Sdim
1393243830Sdim  if (IsTopNode) {
1394243830Sdim    assert(SU->isTopReady() && "node still has unscheduled dependencies");
1395243830Sdim    if (&*CurrentTop == MI)
1396243830Sdim      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1397243830Sdim    else {
1398243830Sdim      moveInstruction(MI, CurrentTop);
1399243830Sdim      TopRPTracker.setPos(MI);
1400243830Sdim    }
1401239462Sdim
1402261991Sdim    if (ShouldTrackPressure) {
1403261991Sdim      // Update top scheduled pressure.
1404309124Sdim      RegisterOperands RegOpers;
1405309124Sdim      RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1406309124Sdim      if (ShouldTrackLaneMasks) {
1407309124Sdim        // Adjust liveness and add missing dead+read-undef flags.
1408309124Sdim        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1409309124Sdim        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1410309124Sdim      } else {
1411309124Sdim        // Adjust for missing dead-def flags.
1412309124Sdim        RegOpers.detectDeadDefs(*MI, *LIS);
1413309124Sdim      }
1414309124Sdim
1415309124Sdim      TopRPTracker.advance(RegOpers);
1416261991Sdim      assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1417341825Sdim      LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1418341825Sdim                     TopRPTracker.getRegSetPressureAtPos(), TRI););
1419296417Sdim
1420261991Sdim      updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1421261991Sdim    }
1422309124Sdim  } else {
1423243830Sdim    assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1424243830Sdim    MachineBasicBlock::iterator priorII =
1425243830Sdim      priorNonDebug(CurrentBottom, CurrentTop);
1426243830Sdim    if (&*priorII == MI)
1427243830Sdim      CurrentBottom = priorII;
1428234285Sdim    else {
1429243830Sdim      if (&*CurrentTop == MI) {
1430243830Sdim        CurrentTop = nextIfDebug(++CurrentTop, priorII);
1431243830Sdim        TopRPTracker.setPos(CurrentTop);
1432234285Sdim      }
1433243830Sdim      moveInstruction(MI, CurrentBottom);
1434243830Sdim      CurrentBottom = MI;
1435341825Sdim      BotRPTracker.setPos(CurrentBottom);
1436234285Sdim    }
1437261991Sdim    if (ShouldTrackPressure) {
1438309124Sdim      RegisterOperands RegOpers;
1439309124Sdim      RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1440309124Sdim      if (ShouldTrackLaneMasks) {
1441309124Sdim        // Adjust liveness and add missing dead+read-undef flags.
1442309124Sdim        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1443309124Sdim        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1444309124Sdim      } else {
1445309124Sdim        // Adjust for missing dead-def flags.
1446309124Sdim        RegOpers.detectDeadDefs(*MI, *LIS);
1447309124Sdim      }
1448309124Sdim
1449327952Sdim      if (BotRPTracker.getPos() != CurrentBottom)
1450327952Sdim        BotRPTracker.recedeSkipDebugValues();
1451309124Sdim      SmallVector<RegisterMaskPair, 8> LiveUses;
1452309124Sdim      BotRPTracker.recede(RegOpers, &LiveUses);
1453261991Sdim      assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1454341825Sdim      LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1455341825Sdim                     BotRPTracker.getRegSetPressureAtPos(), TRI););
1456296417Sdim
1457261991Sdim      updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1458261991Sdim      updatePressureDiffs(LiveUses);
1459261991Sdim    }
1460234285Sdim  }
1461243830Sdim}
1462239462Sdim
1463234285Sdim//===----------------------------------------------------------------------===//
1464309124Sdim// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1465249423Sdim//===----------------------------------------------------------------------===//
1466249423Sdim
1467249423Sdimnamespace {
1468321369Sdim
1469341825Sdim/// Post-process the DAG to create cluster edges between neighboring
1470309124Sdim/// loads or between neighboring stores.
1471309124Sdimclass BaseMemOpClusterMutation : public ScheduleDAGMutation {
1472309124Sdim  struct MemOpInfo {
1473249423Sdim    SUnit *SU;
1474353358Sdim    const MachineOperand *BaseOp;
1475309124Sdim    int64_t Offset;
1476321369Sdim
1477353358Sdim    MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
1478344779Sdim        : SU(su), BaseOp(Op), Offset(ofs) {}
1479276479Sdim
1480344779Sdim    bool operator<(const MemOpInfo &RHS) const {
1481344779Sdim      if (BaseOp->getType() != RHS.BaseOp->getType())
1482344779Sdim        return BaseOp->getType() < RHS.BaseOp->getType();
1483344779Sdim
1484344779Sdim      if (BaseOp->isReg())
1485344779Sdim        return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
1486344779Sdim               std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
1487344779Sdim                               RHS.SU->NodeNum);
1488344779Sdim      if (BaseOp->isFI()) {
1489344779Sdim        const MachineFunction &MF =
1490344779Sdim            *BaseOp->getParent()->getParent()->getParent();
1491344779Sdim        const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1492344779Sdim        bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1493344779Sdim                              TargetFrameLowering::StackGrowsDown;
1494344779Sdim        // Can't use tuple comparison here since we might need to use a
1495344779Sdim        // different order when the stack grows down.
1496344779Sdim        if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
1497344779Sdim          return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
1498344779Sdim                                : BaseOp->getIndex() < RHS.BaseOp->getIndex();
1499344779Sdim
1500344779Sdim        if (Offset != RHS.Offset)
1501360784Sdim          return Offset < RHS.Offset;
1502344779Sdim
1503344779Sdim        return SU->NodeNum < RHS.SU->NodeNum;
1504344779Sdim      }
1505344779Sdim
1506344779Sdim      llvm_unreachable("MemOpClusterMutation only supports register or frame "
1507344779Sdim                       "index bases.");
1508276479Sdim    }
1509249423Sdim  };
1510249423Sdim
1511249423Sdim  const TargetInstrInfo *TII;
1512249423Sdim  const TargetRegisterInfo *TRI;
1513309124Sdim  bool IsLoad;
1514309124Sdim
1515249423Sdimpublic:
1516309124Sdim  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1517309124Sdim                           const TargetRegisterInfo *tri, bool IsLoad)
1518309124Sdim      : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1519249423Sdim
1520309124Sdim  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1521309124Sdim
1522249423Sdimprotected:
1523353358Sdim  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);
1524249423Sdim};
1525309124Sdim
1526309124Sdimclass StoreClusterMutation : public BaseMemOpClusterMutation {
1527309124Sdimpublic:
1528309124Sdim  StoreClusterMutation(const TargetInstrInfo *tii,
1529309124Sdim                       const TargetRegisterInfo *tri)
1530309124Sdim      : BaseMemOpClusterMutation(tii, tri, false) {}
1531309124Sdim};
1532309124Sdim
1533309124Sdimclass LoadClusterMutation : public BaseMemOpClusterMutation {
1534309124Sdimpublic:
1535309124Sdim  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1536309124Sdim      : BaseMemOpClusterMutation(tii, tri, true) {}
1537309124Sdim};
1538249423Sdim
1539321369Sdim} // end anonymous namespace
1540321369Sdim
1541314564Sdimnamespace llvm {
1542314564Sdim
1543314564Sdimstd::unique_ptr<ScheduleDAGMutation>
1544314564SdimcreateLoadClusterDAGMutation(const TargetInstrInfo *TII,
1545314564Sdim                             const TargetRegisterInfo *TRI) {
1546360784Sdim  return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
1547314564Sdim                            : nullptr;
1548314564Sdim}
1549314564Sdim
1550314564Sdimstd::unique_ptr<ScheduleDAGMutation>
1551314564SdimcreateStoreClusterDAGMutation(const TargetInstrInfo *TII,
1552314564Sdim                              const TargetRegisterInfo *TRI) {
1553360784Sdim  return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
1554314564Sdim                            : nullptr;
1555314564Sdim}
1556314564Sdim
1557321369Sdim} // end namespace llvm
1558314564Sdim
1559309124Sdimvoid BaseMemOpClusterMutation::clusterNeighboringMemOps(
1560353358Sdim    ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
1561309124Sdim  SmallVector<MemOpInfo, 32> MemOpRecords;
1562321369Sdim  for (SUnit *SU : MemOps) {
1563353358Sdim    const MachineOperand *BaseOp;
1564309124Sdim    int64_t Offset;
1565344779Sdim    if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
1566344779Sdim      MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
1567249423Sdim  }
1568309124Sdim  if (MemOpRecords.size() < 2)
1569249423Sdim    return;
1570309124Sdim
1571344779Sdim  llvm::sort(MemOpRecords);
1572249423Sdim  unsigned ClusterLength = 1;
1573309124Sdim  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1574309124Sdim    SUnit *SUa = MemOpRecords[Idx].SU;
1575309124Sdim    SUnit *SUb = MemOpRecords[Idx+1].SU;
1576360784Sdim    if (SUa->NodeNum > SUb->NodeNum)
1577360784Sdim      std::swap(SUa, SUb);
1578344779Sdim    if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
1579344779Sdim                                 *MemOpRecords[Idx + 1].BaseOp,
1580309124Sdim                                 ClusterLength) &&
1581309124Sdim        DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1582341825Sdim      LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1583341825Sdim                        << SUb->NodeNum << ")\n");
1584249423Sdim      // Copy successor edges from SUa to SUb. Interleaving computation
1585249423Sdim      // dependent on SUa can prevent load combining due to register reuse.
1586249423Sdim      // Predecessor edges do not need to be copied from SUb to SUa since nearby
1587249423Sdim      // loads should have effectively the same inputs.
1588321369Sdim      for (const SDep &Succ : SUa->Succs) {
1589321369Sdim        if (Succ.getSUnit() == SUb)
1590249423Sdim          continue;
1591341825Sdim        LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum
1592341825Sdim                          << ")\n");
1593321369Sdim        DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1594249423Sdim      }
1595249423Sdim      ++ClusterLength;
1596309124Sdim    } else
1597249423Sdim      ClusterLength = 1;
1598249423Sdim  }
1599249423Sdim}
1600249423Sdim
1601341825Sdim/// Callback from DAG postProcessing to create cluster edges for loads.
1602353358Sdimvoid BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
1603360784Sdim  // Map DAG NodeNum to a set of dependent MemOps in store chain.
1604360784Sdim  DenseMap<unsigned, SmallVector<SUnit *, 4>> StoreChains;
1605321369Sdim  for (SUnit &SU : DAG->SUnits) {
1606321369Sdim    if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1607321369Sdim        (!IsLoad && !SU.getInstr()->mayStore()))
1608249423Sdim      continue;
1609309124Sdim
1610249423Sdim    unsigned ChainPredID = DAG->SUnits.size();
1611321369Sdim    for (const SDep &Pred : SU.Preds) {
1612321369Sdim      if (Pred.isCtrl()) {
1613321369Sdim        ChainPredID = Pred.getSUnit()->NodeNum;
1614249423Sdim        break;
1615249423Sdim      }
1616249423Sdim    }
1617360784Sdim    // Insert the SU to corresponding store chain.
1618360784Sdim    auto &Chain = StoreChains.FindAndConstruct(ChainPredID).second;
1619360784Sdim    Chain.push_back(&SU);
1620249423Sdim  }
1621309124Sdim
1622249423Sdim  // Iterate over the store chains.
1623360784Sdim  for (auto &SCD : StoreChains)
1624360784Sdim    clusterNeighboringMemOps(SCD.second, DAG);
1625249423Sdim}
1626249423Sdim
1627249423Sdim//===----------------------------------------------------------------------===//
1628321369Sdim// CopyConstrain - DAG post-processing to encourage copy elimination.
1629249423Sdim//===----------------------------------------------------------------------===//
1630249423Sdim
1631249423Sdimnamespace {
1632249423Sdim
1633341825Sdim/// Post-process the DAG to create weak edges from all uses of a copy to
1634251662Sdim/// the one use that defines the copy's source vreg, most likely an induction
1635251662Sdim/// variable increment.
1636251662Sdimclass CopyConstrain : public ScheduleDAGMutation {
1637251662Sdim  // Transient state.
1638251662Sdim  SlotIndex RegionBeginIdx;
1639327952Sdim
1640251662Sdim  // RegionEndIdx is the slot index of the last non-debug instruction in the
1641251662Sdim  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1642251662Sdim  SlotIndex RegionEndIdx;
1643321369Sdim
1644251662Sdimpublic:
1645251662Sdim  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1646251662Sdim
1647309124Sdim  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1648251662Sdim
1649251662Sdimprotected:
1650276479Sdim  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1651251662Sdim};
1652251662Sdim
1653321369Sdim} // end anonymous namespace
1654321369Sdim
1655314564Sdimnamespace llvm {
1656314564Sdim
1657314564Sdimstd::unique_ptr<ScheduleDAGMutation>
1658314564SdimcreateCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1659321369Sdim                               const TargetRegisterInfo *TRI) {
1660360784Sdim  return std::make_unique<CopyConstrain>(TII, TRI);
1661314564Sdim}
1662314564Sdim
1663321369Sdim} // end namespace llvm
1664314564Sdim
1665251662Sdim/// constrainLocalCopy handles two possibilities:
1666251662Sdim/// 1) Local src:
1667251662Sdim/// I0:     = dst
1668251662Sdim/// I1: src = ...
1669251662Sdim/// I2:     = dst
1670251662Sdim/// I3: dst = src (copy)
1671251662Sdim/// (create pred->succ edges I0->I1, I2->I1)
1672251662Sdim///
1673251662Sdim/// 2) Local copy:
1674251662Sdim/// I0: dst = src (copy)
1675251662Sdim/// I1:     = dst
1676251662Sdim/// I2: src = ...
1677251662Sdim/// I3:     = dst
1678251662Sdim/// (create pred->succ edges I1->I2, I3->I2)
1679251662Sdim///
1680251662Sdim/// Although the MachineScheduler is currently constrained to single blocks,
1681251662Sdim/// this algorithm should handle extended blocks. An EBB is a set of
1682251662Sdim/// contiguously numbered blocks such that the previous block in the EBB is
1683251662Sdim/// always the single predecessor.
1684276479Sdimvoid CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1685251662Sdim  LiveIntervals *LIS = DAG->getLIS();
1686251662Sdim  MachineInstr *Copy = CopySU->getInstr();
1687251662Sdim
1688251662Sdim  // Check for pure vreg copies.
1689309124Sdim  const MachineOperand &SrcOp = Copy->getOperand(1);
1690360784Sdim  Register SrcReg = SrcOp.getReg();
1691360784Sdim  if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1692251662Sdim    return;
1693251662Sdim
1694309124Sdim  const MachineOperand &DstOp = Copy->getOperand(0);
1695360784Sdim  Register DstReg = DstOp.getReg();
1696360784Sdim  if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
1697251662Sdim    return;
1698251662Sdim
1699251662Sdim  // Check if either the dest or source is local. If it's live across a back
1700251662Sdim  // edge, it's not local. Note that if both vregs are live across the back
1701251662Sdim  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1702288943Sdim  // If both the copy's source and dest are local live intervals, then we
1703288943Sdim  // should treat the dest as the global for the purpose of adding
1704288943Sdim  // constraints. This adds edges from source's other uses to the copy.
1705288943Sdim  unsigned LocalReg = SrcReg;
1706288943Sdim  unsigned GlobalReg = DstReg;
1707251662Sdim  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1708251662Sdim  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1709288943Sdim    LocalReg = DstReg;
1710288943Sdim    GlobalReg = SrcReg;
1711251662Sdim    LocalLI = &LIS->getInterval(LocalReg);
1712251662Sdim    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1713251662Sdim      return;
1714251662Sdim  }
1715251662Sdim  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1716251662Sdim
1717251662Sdim  // Find the global segment after the start of the local LI.
1718251662Sdim  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1719251662Sdim  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1720251662Sdim  // local live range. We could create edges from other global uses to the local
1721251662Sdim  // start, but the coalescer should have already eliminated these cases, so
1722251662Sdim  // don't bother dealing with it.
1723251662Sdim  if (GlobalSegment == GlobalLI->end())
1724251662Sdim    return;
1725251662Sdim
1726251662Sdim  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1727251662Sdim  // returned the next global segment. But if GlobalSegment overlaps with
1728341825Sdim  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1729251662Sdim  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1730251662Sdim  if (GlobalSegment->contains(LocalLI->beginIndex()))
1731251662Sdim    ++GlobalSegment;
1732251662Sdim
1733251662Sdim  if (GlobalSegment == GlobalLI->end())
1734251662Sdim    return;
1735251662Sdim
1736251662Sdim  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1737251662Sdim  if (GlobalSegment != GlobalLI->begin()) {
1738251662Sdim    // Two address defs have no hole.
1739276479Sdim    if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1740251662Sdim                               GlobalSegment->start)) {
1741251662Sdim      return;
1742251662Sdim    }
1743261991Sdim    // If the prior global segment may be defined by the same two-address
1744261991Sdim    // instruction that also defines LocalLI, then can't make a hole here.
1745276479Sdim    if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1746261991Sdim                               LocalLI->beginIndex())) {
1747261991Sdim      return;
1748261991Sdim    }
1749251662Sdim    // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1750251662Sdim    // it would be a disconnected component in the live range.
1751276479Sdim    assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1752251662Sdim           "Disconnected LRG within the scheduling region.");
1753251662Sdim  }
1754251662Sdim  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1755251662Sdim  if (!GlobalDef)
1756251662Sdim    return;
1757251662Sdim
1758251662Sdim  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1759251662Sdim  if (!GlobalSU)
1760251662Sdim    return;
1761251662Sdim
1762251662Sdim  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1763251662Sdim  // constraining the uses of the last local def to precede GlobalDef.
1764251662Sdim  SmallVector<SUnit*,8> LocalUses;
1765251662Sdim  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1766251662Sdim  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1767251662Sdim  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1768321369Sdim  for (const SDep &Succ : LastLocalSU->Succs) {
1769321369Sdim    if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1770251662Sdim      continue;
1771321369Sdim    if (Succ.getSUnit() == GlobalSU)
1772251662Sdim      continue;
1773321369Sdim    if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1774251662Sdim      return;
1775321369Sdim    LocalUses.push_back(Succ.getSUnit());
1776251662Sdim  }
1777251662Sdim  // Open the top of the GlobalLI hole by constraining any earlier global uses
1778251662Sdim  // to precede the start of LocalLI.
1779251662Sdim  SmallVector<SUnit*,8> GlobalUses;
1780251662Sdim  MachineInstr *FirstLocalDef =
1781251662Sdim    LIS->getInstructionFromIndex(LocalLI->beginIndex());
1782251662Sdim  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1783321369Sdim  for (const SDep &Pred : GlobalSU->Preds) {
1784321369Sdim    if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1785251662Sdim      continue;
1786321369Sdim    if (Pred.getSUnit() == FirstLocalSU)
1787251662Sdim      continue;
1788321369Sdim    if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1789251662Sdim      return;
1790321369Sdim    GlobalUses.push_back(Pred.getSUnit());
1791251662Sdim  }
1792341825Sdim  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1793251662Sdim  // Add the weak edges.
1794251662Sdim  for (SmallVectorImpl<SUnit*>::const_iterator
1795251662Sdim         I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1796341825Sdim    LLVM_DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
1797341825Sdim                      << GlobalSU->NodeNum << ")\n");
1798251662Sdim    DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1799251662Sdim  }
1800251662Sdim  for (SmallVectorImpl<SUnit*>::const_iterator
1801251662Sdim         I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1802341825Sdim    LLVM_DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
1803341825Sdim                      << FirstLocalSU->NodeNum << ")\n");
1804251662Sdim    DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1805251662Sdim  }
1806251662Sdim}
1807251662Sdim
1808341825Sdim/// Callback from DAG postProcessing to create weak edges to encourage
1809251662Sdim/// copy elimination.
1810309124Sdimvoid CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1811309124Sdim  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1812276479Sdim  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1813276479Sdim
1814251662Sdim  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1815251662Sdim  if (FirstPos == DAG->end())
1816251662Sdim    return;
1817309124Sdim  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1818251662Sdim  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1819309124Sdim      *priorNonDebug(DAG->end(), DAG->begin()));
1820251662Sdim
1821321369Sdim  for (SUnit &SU : DAG->SUnits) {
1822321369Sdim    if (!SU.getInstr()->isCopy())
1823251662Sdim      continue;
1824251662Sdim
1825321369Sdim    constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1826251662Sdim  }
1827251662Sdim}
1828251662Sdim
1829251662Sdim//===----------------------------------------------------------------------===//
1830276479Sdim// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1831276479Sdim// and possibly other custom schedulers.
1832234285Sdim//===----------------------------------------------------------------------===//
1833234285Sdim
1834276479Sdimstatic const unsigned InvalidCycle = ~0U;
1835239462Sdim
1836276479SdimSchedBoundary::~SchedBoundary() { delete HazardRec; }
1837239462Sdim
1838327952Sdim/// Given a Count of resource usage and a Latency value, return true if a
1839327952Sdim/// SchedBoundary becomes resource limited.
1840353358Sdim/// If we are checking after scheduling a node, we should return true when
1841353358Sdim/// we just reach the resource limit.
1842327952Sdimstatic bool checkResourceLimit(unsigned LFactor, unsigned Count,
1843353358Sdim                               unsigned Latency, bool AfterSchedNode) {
1844353358Sdim  int ResCntFactor = (int)(Count - (Latency * LFactor));
1845353358Sdim  if (AfterSchedNode)
1846353358Sdim    return ResCntFactor >= (int)LFactor;
1847353358Sdim  else
1848353358Sdim    return ResCntFactor > (int)LFactor;
1849327952Sdim}
1850327952Sdim
1851276479Sdimvoid SchedBoundary::reset() {
1852276479Sdim  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1853276479Sdim  // Destroying and reconstructing it is very expensive though. So keep
1854276479Sdim  // invalid, placeholder HazardRecs.
1855276479Sdim  if (HazardRec && HazardRec->isEnabled()) {
1856276479Sdim    delete HazardRec;
1857276479Sdim    HazardRec = nullptr;
1858276479Sdim  }
1859276479Sdim  Available.clear();
1860276479Sdim  Pending.clear();
1861276479Sdim  CheckPending = false;
1862276479Sdim  CurrCycle = 0;
1863276479Sdim  CurrMOps = 0;
1864321369Sdim  MinReadyCycle = std::numeric_limits<unsigned>::max();
1865276479Sdim  ExpectedLatency = 0;
1866276479Sdim  DependentLatency = 0;
1867276479Sdim  RetiredMOps = 0;
1868276479Sdim  MaxExecutedResCount = 0;
1869276479Sdim  ZoneCritResIdx = 0;
1870276479Sdim  IsResourceLimited = false;
1871276479Sdim  ReservedCycles.clear();
1872353358Sdim  ReservedCyclesIndex.clear();
1873243830Sdim#ifndef NDEBUG
1874276479Sdim  // Track the maximum number of stall cycles that could arise either from the
1875276479Sdim  // latency of a DAG edge or the number of cycles that a processor resource is
1876276479Sdim  // reserved (SchedBoundary::ReservedCycles).
1877276479Sdim  MaxObservedStall = 0;
1878243830Sdim#endif
1879276479Sdim  // Reserve a zero-count for invalid CritResIdx.
1880276479Sdim  ExecutedResCounts.resize(1);
1881276479Sdim  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1882276479Sdim}
1883239462Sdim
1884276479Sdimvoid SchedRemainder::
1885243830Sdiminit(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1886243830Sdim  reset();
1887243830Sdim  if (!SchedModel->hasInstrSchedModel())
1888243830Sdim    return;
1889243830Sdim  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1890321369Sdim  for (SUnit &SU : DAG->SUnits) {
1891321369Sdim    const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1892321369Sdim    RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1893261991Sdim      * SchedModel->getMicroOpFactor();
1894243830Sdim    for (TargetSchedModel::ProcResIter
1895243830Sdim           PI = SchedModel->getWriteProcResBegin(SC),
1896243830Sdim           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1897243830Sdim      unsigned PIdx = PI->ProcResourceIdx;
1898243830Sdim      unsigned Factor = SchedModel->getResourceFactor(PIdx);
1899243830Sdim      RemainingCounts[PIdx] += (Factor * PI->Cycles);
1900243830Sdim    }
1901243830Sdim  }
1902243830Sdim}
1903243830Sdim
1904276479Sdimvoid SchedBoundary::
1905243830Sdiminit(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1906243830Sdim  reset();
1907243830Sdim  DAG = dag;
1908243830Sdim  SchedModel = smodel;
1909243830Sdim  Rem = rem;
1910276479Sdim  if (SchedModel->hasInstrSchedModel()) {
1911353358Sdim    unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
1912353358Sdim    ReservedCyclesIndex.resize(ResourceCount);
1913353358Sdim    ExecutedResCounts.resize(ResourceCount);
1914353358Sdim    unsigned NumUnits = 0;
1915353358Sdim
1916353358Sdim    for (unsigned i = 0; i < ResourceCount; ++i) {
1917353358Sdim      ReservedCyclesIndex[i] = NumUnits;
1918353358Sdim      NumUnits += SchedModel->getProcResource(i)->NumUnits;
1919353358Sdim    }
1920353358Sdim
1921353358Sdim    ReservedCycles.resize(NumUnits, InvalidCycle);
1922261991Sdim  }
1923261991Sdim}
1924261991Sdim
1925276479Sdim/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1926276479Sdim/// these "soft stalls" differently than the hard stall cycles based on CPU
1927276479Sdim/// resources and computed by checkHazard(). A fully in-order model
1928276479Sdim/// (MicroOpBufferSize==0) will not make use of this since instructions are not
1929276479Sdim/// available for scheduling until they are ready. However, a weaker in-order
1930276479Sdim/// model may use this for heuristics. For example, if a processor has in-order
1931276479Sdim/// behavior when reading certain resources, this may come into play.
1932276479Sdimunsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
1933276479Sdim  if (!SU->isUnbuffered)
1934276479Sdim    return 0;
1935249423Sdim
1936276479Sdim  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1937276479Sdim  if (ReadyCycle > CurrCycle)
1938276479Sdim    return ReadyCycle - CurrCycle;
1939276479Sdim  return 0;
1940239462Sdim}
1941239462Sdim
1942353358Sdim/// Compute the next cycle at which the given processor resource unit
1943353358Sdim/// can be scheduled.
1944353358Sdimunsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
1945353358Sdim                                                       unsigned Cycles) {
1946353358Sdim  unsigned NextUnreserved = ReservedCycles[InstanceIdx];
1947276479Sdim  // If this resource has never been used, always return cycle zero.
1948276479Sdim  if (NextUnreserved == InvalidCycle)
1949276479Sdim    return 0;
1950276479Sdim  // For bottom-up scheduling add the cycles needed for the current operation.
1951276479Sdim  if (!isTop())
1952276479Sdim    NextUnreserved += Cycles;
1953276479Sdim  return NextUnreserved;
1954239462Sdim}
1955234285Sdim
1956353358Sdim/// Compute the next cycle at which the given processor resource can be
1957353358Sdim/// scheduled.  Returns the next cycle and the index of the processor resource
1958353358Sdim/// instance in the reserved cycles vector.
1959353358Sdimstd::pair<unsigned, unsigned>
1960353358SdimSchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1961353358Sdim  unsigned MinNextUnreserved = InvalidCycle;
1962353358Sdim  unsigned InstanceIdx = 0;
1963353358Sdim  unsigned StartIndex = ReservedCyclesIndex[PIdx];
1964353358Sdim  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
1965353358Sdim  assert(NumberOfInstances > 0 &&
1966353358Sdim         "Cannot have zero instances of a ProcResource");
1967353358Sdim
1968353358Sdim  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
1969353358Sdim       ++I) {
1970353358Sdim    unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
1971353358Sdim    if (MinNextUnreserved > NextUnreserved) {
1972353358Sdim      InstanceIdx = I;
1973353358Sdim      MinNextUnreserved = NextUnreserved;
1974353358Sdim    }
1975353358Sdim  }
1976353358Sdim  return std::make_pair(MinNextUnreserved, InstanceIdx);
1977353358Sdim}
1978353358Sdim
1979239462Sdim/// Does this SU have a hazard within the current instruction group.
1980239462Sdim///
1981239462Sdim/// The scheduler supports two modes of hazard recognition. The first is the
1982239462Sdim/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1983239462Sdim/// supports highly complicated in-order reservation tables
1984341825Sdim/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
1985239462Sdim///
1986239462Sdim/// The second is a streamlined mechanism that checks for hazards based on
1987239462Sdim/// simple counters that the scheduler itself maintains. It explicitly checks
1988239462Sdim/// for instruction dispatch limitations, including the number of micro-ops that
1989239462Sdim/// can dispatch per cycle.
1990239462Sdim///
1991239462Sdim/// TODO: Also check whether the SU must start a new group.
1992276479Sdimbool SchedBoundary::checkHazard(SUnit *SU) {
1993276479Sdim  if (HazardRec->isEnabled()
1994276479Sdim      && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1995276479Sdim    return true;
1996276479Sdim  }
1997321369Sdim
1998243830Sdim  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1999261991Sdim  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2000341825Sdim    LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
2001341825Sdim                      << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2002239462Sdim    return true;
2003243830Sdim  }
2004321369Sdim
2005321369Sdim  if (CurrMOps > 0 &&
2006321369Sdim      ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2007321369Sdim       (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2008341825Sdim    LLVM_DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
2009341825Sdim                      << (isTop() ? "begin" : "end") << " group\n");
2010321369Sdim    return true;
2011321369Sdim  }
2012321369Sdim
2013276479Sdim  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2014276479Sdim    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2015327952Sdim    for (const MCWriteProcResEntry &PE :
2016327952Sdim          make_range(SchedModel->getWriteProcResBegin(SC),
2017327952Sdim                     SchedModel->getWriteProcResEnd(SC))) {
2018327952Sdim      unsigned ResIdx = PE.ProcResourceIdx;
2019327952Sdim      unsigned Cycles = PE.Cycles;
2020353358Sdim      unsigned NRCycle, InstanceIdx;
2021353358Sdim      std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
2022276479Sdim      if (NRCycle > CurrCycle) {
2023276479Sdim#ifndef NDEBUG
2024327952Sdim        MaxObservedStall = std::max(Cycles, MaxObservedStall);
2025276479Sdim#endif
2026341825Sdim        LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
2027353358Sdim                          << SchedModel->getResourceName(ResIdx)
2028353358Sdim                          << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx]  << ']'
2029353358Sdim                          << "=" << NRCycle << "c\n");
2030276479Sdim        return true;
2031276479Sdim      }
2032276479Sdim    }
2033276479Sdim  }
2034239462Sdim  return false;
2035239462Sdim}
2036239462Sdim
2037261991Sdim// Find the unscheduled node in ReadySUs with the highest latency.
2038276479Sdimunsigned SchedBoundary::
2039261991SdimfindMaxLatency(ArrayRef<SUnit*> ReadySUs) {
2040276479Sdim  SUnit *LateSU = nullptr;
2041249423Sdim  unsigned RemLatency = 0;
2042321369Sdim  for (SUnit *SU : ReadySUs) {
2043321369Sdim    unsigned L = getUnscheduledLatency(SU);
2044261991Sdim    if (L > RemLatency) {
2045249423Sdim      RemLatency = L;
2046321369Sdim      LateSU = SU;
2047261991Sdim    }
2048243830Sdim  }
2049261991Sdim  if (LateSU) {
2050341825Sdim    LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2051341825Sdim                      << LateSU->NodeNum << ") " << RemLatency << "c\n");
2052249423Sdim  }
2053261991Sdim  return RemLatency;
2054261991Sdim}
2055261991Sdim
2056261991Sdim// Count resources in this zone and the remaining unscheduled
2057261991Sdim// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2058261991Sdim// resource index, or zero if the zone is issue limited.
2059276479Sdimunsigned SchedBoundary::
2060261991SdimgetOtherResourceCount(unsigned &OtherCritIdx) {
2061261991Sdim  OtherCritIdx = 0;
2062261991Sdim  if (!SchedModel->hasInstrSchedModel())
2063261991Sdim    return 0;
2064261991Sdim
2065261991Sdim  unsigned OtherCritCount = Rem->RemIssueCount
2066261991Sdim    + (RetiredMOps * SchedModel->getMicroOpFactor());
2067341825Sdim  LLVM_DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
2068341825Sdim                    << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2069261991Sdim  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2070261991Sdim       PIdx != PEnd; ++PIdx) {
2071261991Sdim    unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2072261991Sdim    if (OtherCount > OtherCritCount) {
2073261991Sdim      OtherCritCount = OtherCount;
2074261991Sdim      OtherCritIdx = PIdx;
2075261991Sdim    }
2076249423Sdim  }
2077261991Sdim  if (OtherCritIdx) {
2078341825Sdim    LLVM_DEBUG(
2079341825Sdim        dbgs() << "  " << Available.getName() << " + Remain CritRes: "
2080341825Sdim               << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2081341825Sdim               << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2082261991Sdim  }
2083261991Sdim  return OtherCritCount;
2084243830Sdim}
2085243830Sdim
2086360784Sdimvoid SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2087360784Sdim                                unsigned Idx) {
2088276479Sdim  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2089261991Sdim
2090276479Sdim#ifndef NDEBUG
2091276479Sdim  // ReadyCycle was been bumped up to the CurrCycle when this node was
2092276479Sdim  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2093276479Sdim  // scheduling, so may now be greater than ReadyCycle.
2094276479Sdim  if (ReadyCycle > CurrCycle)
2095276479Sdim    MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2096276479Sdim#endif
2097261991Sdim
2098239462Sdim  if (ReadyCycle < MinReadyCycle)
2099239462Sdim    MinReadyCycle = ReadyCycle;
2100239462Sdim
2101239462Sdim  // Check for interlocks first. For the purpose of other heuristics, an
2102239462Sdim  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2103261991Sdim  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2104360784Sdim  bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2105360784Sdim                        checkHazard(SU) || (Available.size() >= ReadyListLimit);
2106360784Sdim
2107360784Sdim  if (!HazardDetected) {
2108360784Sdim    Available.push(SU);
2109360784Sdim
2110360784Sdim    if (InPQueue)
2111360784Sdim      Pending.remove(Pending.begin() + Idx);
2112360784Sdim    return;
2113360784Sdim  }
2114360784Sdim
2115360784Sdim  if (!InPQueue)
2116239462Sdim    Pending.push(SU);
2117239462Sdim}
2118239462Sdim
2119239462Sdim/// Move the boundary of scheduled code by one cycle.
2120276479Sdimvoid SchedBoundary::bumpCycle(unsigned NextCycle) {
2121261991Sdim  if (SchedModel->getMicroOpBufferSize() == 0) {
2122321369Sdim    assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2123321369Sdim           "MinReadyCycle uninitialized");
2124261991Sdim    if (MinReadyCycle > NextCycle)
2125261991Sdim      NextCycle = MinReadyCycle;
2126243830Sdim  }
2127261991Sdim  // Update the current micro-ops, which will issue in the next cycle.
2128261991Sdim  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2129261991Sdim  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2130239462Sdim
2131261991Sdim  // Decrement DependentLatency based on the next cycle.
2132261991Sdim  if ((NextCycle - CurrCycle) > DependentLatency)
2133261991Sdim    DependentLatency = 0;
2134261991Sdim  else
2135261991Sdim    DependentLatency -= (NextCycle - CurrCycle);
2136261991Sdim
2137239462Sdim  if (!HazardRec->isEnabled()) {
2138239462Sdim    // Bypass HazardRec virtual calls.
2139239462Sdim    CurrCycle = NextCycle;
2140309124Sdim  } else {
2141239462Sdim    // Bypass getHazardType calls in case of long latency.
2142239462Sdim    for (; CurrCycle != NextCycle; ++CurrCycle) {
2143239462Sdim      if (isTop())
2144239462Sdim        HazardRec->AdvanceCycle();
2145239462Sdim      else
2146239462Sdim        HazardRec->RecedeCycle();
2147234285Sdim    }
2148239462Sdim  }
2149239462Sdim  CheckPending = true;
2150261991Sdim  IsResourceLimited =
2151327952Sdim      checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2152353358Sdim                         getScheduledLatency(), true);
2153239462Sdim
2154341825Sdim  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2155341825Sdim                    << '\n');
2156239462Sdim}
2157239462Sdim
2158276479Sdimvoid SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2159261991Sdim  ExecutedResCounts[PIdx] += Count;
2160261991Sdim  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2161261991Sdim    MaxExecutedResCount = ExecutedResCounts[PIdx];
2162261991Sdim}
2163261991Sdim
2164243830Sdim/// Add the given processor resource to this scheduled zone.
2165261991Sdim///
2166261991Sdim/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2167261991Sdim/// during which this resource is consumed.
2168261991Sdim///
2169261991Sdim/// \return the next cycle at which the instruction may execute without
2170261991Sdim/// oversubscribing resources.
2171276479Sdimunsigned SchedBoundary::
2172276479SdimcountResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2173243830Sdim  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2174261991Sdim  unsigned Count = Factor * Cycles;
2175341825Sdim  LLVM_DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) << " +"
2176341825Sdim                    << Cycles << "x" << Factor << "u\n");
2177243830Sdim
2178261991Sdim  // Update Executed resources counts.
2179261991Sdim  incExecutedResources(PIdx, Count);
2180243830Sdim  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2181243830Sdim  Rem->RemainingCounts[PIdx] -= Count;
2182243830Sdim
2183261991Sdim  // Check if this resource exceeds the current critical resource. If so, it
2184261991Sdim  // becomes the critical resource.
2185261991Sdim  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2186261991Sdim    ZoneCritResIdx = PIdx;
2187341825Sdim    LLVM_DEBUG(dbgs() << "  *** Critical resource "
2188341825Sdim                      << SchedModel->getResourceName(PIdx) << ": "
2189341825Sdim                      << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2190341825Sdim                      << "c\n");
2191243830Sdim  }
2192276479Sdim  // For reserved resources, record the highest cycle using the resource.
2193353358Sdim  unsigned NextAvailable, InstanceIdx;
2194353358Sdim  std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
2195276479Sdim  if (NextAvailable > CurrCycle) {
2196341825Sdim    LLVM_DEBUG(dbgs() << "  Resource conflict: "
2197353358Sdim                      << SchedModel->getResourceName(PIdx)
2198353358Sdim                      << '[' << InstanceIdx - ReservedCyclesIndex[PIdx]  << ']'
2199341825Sdim                      << " reserved until @" << NextAvailable << "\n");
2200276479Sdim  }
2201276479Sdim  return NextAvailable;
2202243830Sdim}
2203243830Sdim
2204239462Sdim/// Move the boundary of scheduled code by one SUnit.
2205276479Sdimvoid SchedBoundary::bumpNode(SUnit *SU) {
2206239462Sdim  // Update the reservation table.
2207239462Sdim  if (HazardRec->isEnabled()) {
2208239462Sdim    if (!isTop() && SU->isCall) {
2209239462Sdim      // Calls are scheduled with their preceding instructions. For bottom-up
2210239462Sdim      // scheduling, clear the pipeline state before emitting.
2211239462Sdim      HazardRec->Reset();
2212234285Sdim    }
2213239462Sdim    HazardRec->EmitInstruction(SU);
2214353358Sdim    // Scheduling an instruction may have made pending instructions available.
2215353358Sdim    CheckPending = true;
2216239462Sdim  }
2217276479Sdim  // checkHazard should prevent scheduling multiple instructions per cycle that
2218276479Sdim  // exceed the issue width.
2219261991Sdim  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2220261991Sdim  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2221276479Sdim  assert(
2222276479Sdim      (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2223276479Sdim      "Cannot schedule this instruction's MicroOps in the current cycle.");
2224276479Sdim
2225261991Sdim  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2226341825Sdim  LLVM_DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
2227261991Sdim
2228276479Sdim  unsigned NextCycle = CurrCycle;
2229261991Sdim  switch (SchedModel->getMicroOpBufferSize()) {
2230261991Sdim  case 0:
2231261991Sdim    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2232261991Sdim    break;
2233261991Sdim  case 1:
2234261991Sdim    if (ReadyCycle > NextCycle) {
2235261991Sdim      NextCycle = ReadyCycle;
2236341825Sdim      LLVM_DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
2237261991Sdim    }
2238261991Sdim    break;
2239261991Sdim  default:
2240261991Sdim    // We don't currently model the OOO reorder buffer, so consider all
2241276479Sdim    // scheduled MOps to be "retired". We do loosely model in-order resource
2242276479Sdim    // latency. If this instruction uses an in-order resource, account for any
2243276479Sdim    // likely stall cycles.
2244276479Sdim    if (SU->isUnbuffered && ReadyCycle > NextCycle)
2245276479Sdim      NextCycle = ReadyCycle;
2246261991Sdim    break;
2247261991Sdim  }
2248261991Sdim  RetiredMOps += IncMOps;
2249261991Sdim
2250243830Sdim  // Update resource counts and critical resource.
2251243830Sdim  if (SchedModel->hasInstrSchedModel()) {
2252261991Sdim    unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2253261991Sdim    assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2254261991Sdim    Rem->RemIssueCount -= DecRemIssue;
2255261991Sdim    if (ZoneCritResIdx) {
2256261991Sdim      // Scale scheduled micro-ops for comparing with the critical resource.
2257261991Sdim      unsigned ScaledMOps =
2258261991Sdim        RetiredMOps * SchedModel->getMicroOpFactor();
2259261991Sdim
2260261991Sdim      // If scaled micro-ops are now more than the previous critical resource by
2261261991Sdim      // a full cycle, then micro-ops issue becomes critical.
2262261991Sdim      if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2263261991Sdim          >= (int)SchedModel->getLatencyFactor()) {
2264261991Sdim        ZoneCritResIdx = 0;
2265341825Sdim        LLVM_DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
2266341825Sdim                          << ScaledMOps / SchedModel->getLatencyFactor()
2267341825Sdim                          << "c\n");
2268261991Sdim      }
2269261991Sdim    }
2270243830Sdim    for (TargetSchedModel::ProcResIter
2271243830Sdim           PI = SchedModel->getWriteProcResBegin(SC),
2272243830Sdim           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2273261991Sdim      unsigned RCycle =
2274276479Sdim        countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2275261991Sdim      if (RCycle > NextCycle)
2276261991Sdim        NextCycle = RCycle;
2277243830Sdim    }
2278276479Sdim    if (SU->hasReservedResource) {
2279276479Sdim      // For reserved resources, record the highest cycle using the resource.
2280276479Sdim      // For top-down scheduling, this is the cycle in which we schedule this
2281276479Sdim      // instruction plus the number of cycles the operations reserves the
2282276479Sdim      // resource. For bottom-up is it simply the instruction's cycle.
2283276479Sdim      for (TargetSchedModel::ProcResIter
2284276479Sdim             PI = SchedModel->getWriteProcResBegin(SC),
2285276479Sdim             PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2286276479Sdim        unsigned PIdx = PI->ProcResourceIdx;
2287276479Sdim        if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2288353358Sdim          unsigned ReservedUntil, InstanceIdx;
2289353358Sdim          std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
2290276479Sdim          if (isTop()) {
2291353358Sdim            ReservedCycles[InstanceIdx] =
2292353358Sdim                std::max(ReservedUntil, NextCycle + PI->Cycles);
2293353358Sdim          } else
2294353358Sdim            ReservedCycles[InstanceIdx] = NextCycle;
2295276479Sdim        }
2296276479Sdim      }
2297276479Sdim    }
2298243830Sdim  }
2299261991Sdim  // Update ExpectedLatency and DependentLatency.
2300261991Sdim  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2301261991Sdim  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2302261991Sdim  if (SU->getDepth() > TopLatency) {
2303261991Sdim    TopLatency = SU->getDepth();
2304341825Sdim    LLVM_DEBUG(dbgs() << "  " << Available.getName() << " TopLatency SU("
2305341825Sdim                      << SU->NodeNum << ") " << TopLatency << "c\n");
2306243830Sdim  }
2307261991Sdim  if (SU->getHeight() > BotLatency) {
2308261991Sdim    BotLatency = SU->getHeight();
2309341825Sdim    LLVM_DEBUG(dbgs() << "  " << Available.getName() << " BotLatency SU("
2310341825Sdim                      << SU->NodeNum << ") " << BotLatency << "c\n");
2311261991Sdim  }
2312261991Sdim  // If we stall for any reason, bump the cycle.
2313327952Sdim  if (NextCycle > CurrCycle)
2314261991Sdim    bumpCycle(NextCycle);
2315327952Sdim  else
2316261991Sdim    // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2317276479Sdim    // resource limited. If a stall occurred, bumpCycle does this.
2318261991Sdim    IsResourceLimited =
2319327952Sdim        checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2320353358Sdim                           getScheduledLatency(), true);
2321327952Sdim
2322276479Sdim  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2323276479Sdim  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2324276479Sdim  // one cycle.  Since we commonly reach the max MOps here, opportunistically
2325276479Sdim  // bump the cycle to avoid uselessly checking everything in the readyQ.
2326276479Sdim  CurrMOps += IncMOps;
2327321369Sdim
2328321369Sdim  // Bump the cycle count for issue group constraints.
2329321369Sdim  // This must be done after NextCycle has been adjust for all other stalls.
2330321369Sdim  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2331321369Sdim  // currCycle to X.
2332321369Sdim  if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||
2333321369Sdim      (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2334341825Sdim    LLVM_DEBUG(dbgs() << "  Bump cycle to " << (isTop() ? "end" : "begin")
2335341825Sdim                      << " group\n");
2336321369Sdim    bumpCycle(++NextCycle);
2337321369Sdim  }
2338321369Sdim
2339276479Sdim  while (CurrMOps >= SchedModel->getIssueWidth()) {
2340341825Sdim    LLVM_DEBUG(dbgs() << "  *** Max MOps " << CurrMOps << " at cycle "
2341341825Sdim                      << CurrCycle << '\n');
2342276479Sdim    bumpCycle(++NextCycle);
2343276479Sdim  }
2344341825Sdim  LLVM_DEBUG(dumpScheduledState());
2345239462Sdim}
2346239462Sdim
2347239462Sdim/// Release pending ready nodes in to the available queue. This makes them
2348239462Sdim/// visible to heuristics.
2349276479Sdimvoid SchedBoundary::releasePending() {
2350239462Sdim  // If the available queue is empty, it is safe to reset MinReadyCycle.
2351239462Sdim  if (Available.empty())
2352321369Sdim    MinReadyCycle = std::numeric_limits<unsigned>::max();
2353239462Sdim
2354239462Sdim  // Check to see if any of the pending instructions are ready to issue.  If
2355239462Sdim  // so, add them to the available queue.
2356360784Sdim  for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2357360784Sdim    SUnit *SU = *(Pending.begin() + I);
2358239462Sdim    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2359239462Sdim
2360239462Sdim    if (ReadyCycle < MinReadyCycle)
2361239462Sdim      MinReadyCycle = ReadyCycle;
2362239462Sdim
2363309124Sdim    if (Available.size() >= ReadyListLimit)
2364309124Sdim      break;
2365309124Sdim
2366360784Sdim    releaseNode(SU, ReadyCycle, true, I);
2367360784Sdim    if (E != Pending.size()) {
2368360784Sdim      --I;
2369360784Sdim      --E;
2370360784Sdim    }
2371239462Sdim  }
2372239462Sdim  CheckPending = false;
2373239462Sdim}
2374239462Sdim
2375239462Sdim/// Remove SU from the ready set for this boundary.
2376276479Sdimvoid SchedBoundary::removeReady(SUnit *SU) {
2377239462Sdim  if (Available.isInQueue(SU))
2378239462Sdim    Available.remove(Available.find(SU));
2379239462Sdim  else {
2380239462Sdim    assert(Pending.isInQueue(SU) && "bad ready count");
2381239462Sdim    Pending.remove(Pending.find(SU));
2382239462Sdim  }
2383239462Sdim}
2384239462Sdim
2385239462Sdim/// If this queue only has one ready candidate, return it. As a side effect,
2386243830Sdim/// defer any nodes that now hit a hazard, and advance the cycle until at least
2387243830Sdim/// one node is ready. If multiple instructions are ready, return NULL.
2388276479SdimSUnit *SchedBoundary::pickOnlyChoice() {
2389239462Sdim  if (CheckPending)
2390239462Sdim    releasePending();
2391239462Sdim
2392261991Sdim  if (CurrMOps > 0) {
2393243830Sdim    // Defer any ready instrs that now have a hazard.
2394243830Sdim    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2395243830Sdim      if (checkHazard(*I)) {
2396243830Sdim        Pending.push(*I);
2397243830Sdim        I = Available.remove(I);
2398243830Sdim        continue;
2399243830Sdim      }
2400243830Sdim      ++I;
2401243830Sdim    }
2402243830Sdim  }
2403239462Sdim  for (unsigned i = 0; Available.empty(); ++i) {
2404276479Sdim//  FIXME: Re-enable assert once PR20057 is resolved.
2405276479Sdim//    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2406276479Sdim//           "permanent hazard");
2407276479Sdim    (void)i;
2408261991Sdim    bumpCycle(CurrCycle + 1);
2409239462Sdim    releasePending();
2410239462Sdim  }
2411309124Sdim
2412341825Sdim  LLVM_DEBUG(Pending.dump());
2413341825Sdim  LLVM_DEBUG(Available.dump());
2414309124Sdim
2415239462Sdim  if (Available.size() == 1)
2416239462Sdim    return *Available.begin();
2417276479Sdim  return nullptr;
2418239462Sdim}
2419239462Sdim
2420321369Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2421261991Sdim// This is useful information to dump after bumpNode.
2422261991Sdim// Note that the Queue contents are more useful before pickNodeFromQueue.
2423321369SdimLLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
2424261991Sdim  unsigned ResFactor;
2425261991Sdim  unsigned ResCount;
2426261991Sdim  if (ZoneCritResIdx) {
2427261991Sdim    ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2428261991Sdim    ResCount = getResourceCount(ZoneCritResIdx);
2429309124Sdim  } else {
2430261991Sdim    ResFactor = SchedModel->getMicroOpFactor();
2431327952Sdim    ResCount = RetiredMOps * ResFactor;
2432243830Sdim  }
2433261991Sdim  unsigned LFactor = SchedModel->getLatencyFactor();
2434261991Sdim  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2435261991Sdim         << "  Retired: " << RetiredMOps;
2436261991Sdim  dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2437261991Sdim  dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2438276479Sdim         << ResCount / ResFactor << " "
2439276479Sdim         << SchedModel->getResourceName(ZoneCritResIdx)
2440261991Sdim         << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2441261991Sdim         << (IsResourceLimited ? "  - Resource" : "  - Latency")
2442261991Sdim         << " limited.\n";
2443239462Sdim}
2444261991Sdim#endif
2445239462Sdim
2446276479Sdim//===----------------------------------------------------------------------===//
2447276479Sdim// GenericScheduler - Generic implementation of MachineSchedStrategy.
2448276479Sdim//===----------------------------------------------------------------------===//
2449276479Sdim
2450276479Sdimvoid GenericSchedulerBase::SchedCandidate::
2451243830SdiminitResourceDelta(const ScheduleDAGMI *DAG,
2452243830Sdim                  const TargetSchedModel *SchedModel) {
2453243830Sdim  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2454243830Sdim    return;
2455243830Sdim
2456243830Sdim  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2457243830Sdim  for (TargetSchedModel::ProcResIter
2458243830Sdim         PI = SchedModel->getWriteProcResBegin(SC),
2459243830Sdim         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2460243830Sdim    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2461243830Sdim      ResDelta.CritResources += PI->Cycles;
2462243830Sdim    if (PI->ProcResourceIdx == Policy.DemandResIdx)
2463243830Sdim      ResDelta.DemandedResources += PI->Cycles;
2464243830Sdim  }
2465243830Sdim}
2466243830Sdim
2467344779Sdim/// Compute remaining latency. We need this both to determine whether the
2468344779Sdim/// overall schedule has become latency-limited and whether the instructions
2469344779Sdim/// outside this zone are resource or latency limited.
2470344779Sdim///
2471344779Sdim/// The "dependent" latency is updated incrementally during scheduling as the
2472344779Sdim/// max height/depth of scheduled nodes minus the cycles since it was
2473344779Sdim/// scheduled:
2474344779Sdim///   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2475344779Sdim///
2476344779Sdim/// The "independent" latency is the max ready queue depth:
2477344779Sdim///   ILat = max N.depth for N in Available|Pending
2478344779Sdim///
2479344779Sdim/// RemainingLatency is the greater of independent and dependent latency.
2480344779Sdim///
2481344779Sdim/// These computations are expensive, especially in DAGs with many edges, so
2482344779Sdim/// only do them if necessary.
2483344779Sdimstatic unsigned computeRemLatency(SchedBoundary &CurrZone) {
2484344779Sdim  unsigned RemLatency = CurrZone.getDependentLatency();
2485344779Sdim  RemLatency = std::max(RemLatency,
2486344779Sdim                        CurrZone.findMaxLatency(CurrZone.Available.elements()));
2487344779Sdim  RemLatency = std::max(RemLatency,
2488344779Sdim                        CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2489344779Sdim  return RemLatency;
2490344779Sdim}
2491344779Sdim
2492344779Sdim/// Returns true if the current cycle plus remaning latency is greater than
2493344779Sdim/// the critical path in the scheduling region.
2494344779Sdimbool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2495344779Sdim                                               SchedBoundary &CurrZone,
2496344779Sdim                                               bool ComputeRemLatency,
2497344779Sdim                                               unsigned &RemLatency) const {
2498344779Sdim  // The current cycle is already greater than the critical path, so we are
2499344779Sdim  // already latency limited and don't need to compute the remaining latency.
2500344779Sdim  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2501344779Sdim    return true;
2502344779Sdim
2503344779Sdim  // If we haven't scheduled anything yet, then we aren't latency limited.
2504344779Sdim  if (CurrZone.getCurrCycle() == 0)
2505344779Sdim    return false;
2506344779Sdim
2507344779Sdim  if (ComputeRemLatency)
2508344779Sdim    RemLatency = computeRemLatency(CurrZone);
2509344779Sdim
2510344779Sdim  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2511344779Sdim}
2512344779Sdim
2513276479Sdim/// Set the CandPolicy given a scheduling zone given the current resources and
2514276479Sdim/// latencies inside and outside the zone.
2515309124Sdimvoid GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2516276479Sdim                                     SchedBoundary &CurrZone,
2517276479Sdim                                     SchedBoundary *OtherZone) {
2518288943Sdim  // Apply preemptive heuristics based on the total latency and resources
2519276479Sdim  // inside and outside this zone. Potential stalls should be considered before
2520276479Sdim  // following this policy.
2521261991Sdim
2522276479Sdim  // Compute the critical resource outside the zone.
2523276479Sdim  unsigned OtherCritIdx = 0;
2524276479Sdim  unsigned OtherCount =
2525276479Sdim    OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2526276479Sdim
2527276479Sdim  bool OtherResLimited = false;
2528344779Sdim  unsigned RemLatency = 0;
2529344779Sdim  bool RemLatencyComputed = false;
2530344779Sdim  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2531344779Sdim    RemLatency = computeRemLatency(CurrZone);
2532344779Sdim    RemLatencyComputed = true;
2533327952Sdim    OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2534353358Sdim                                         OtherCount, RemLatency, false);
2535344779Sdim  }
2536327952Sdim
2537276479Sdim  // Schedule aggressively for latency in PostRA mode. We don't check for
2538276479Sdim  // acyclic latency during PostRA, and highly out-of-order processors will
2539276479Sdim  // skip PostRA scheduling.
2540344779Sdim  if (!OtherResLimited &&
2541344779Sdim      (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2542344779Sdim                                       RemLatency))) {
2543344779Sdim    Policy.ReduceLatency |= true;
2544344779Sdim    LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName()
2545344779Sdim                      << " RemainingLatency " << RemLatency << " + "
2546344779Sdim                      << CurrZone.getCurrCycle() << "c > CritPath "
2547344779Sdim                      << Rem.CriticalPath << "\n");
2548276479Sdim  }
2549276479Sdim  // If the same resource is limiting inside and outside the zone, do nothing.
2550276479Sdim  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2551276479Sdim    return;
2552276479Sdim
2553341825Sdim  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2554341825Sdim    dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
2555341825Sdim           << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2556341825Sdim  } if (OtherResLimited) dbgs()
2557341825Sdim                 << "  RemainingLimit: "
2558341825Sdim                 << SchedModel->getResourceName(OtherCritIdx) << "\n";
2559341825Sdim             if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2560341825Sdim             << "  Latency limited both directions.\n");
2561276479Sdim
2562276479Sdim  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2563276479Sdim    Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2564276479Sdim
2565276479Sdim  if (OtherResLimited)
2566276479Sdim    Policy.DemandResIdx = OtherCritIdx;
2567276479Sdim}
2568276479Sdim
2569276479Sdim#ifndef NDEBUG
2570276479Sdimconst char *GenericSchedulerBase::getReasonStr(
2571276479Sdim  GenericSchedulerBase::CandReason Reason) {
2572276479Sdim  switch (Reason) {
2573276479Sdim  case NoCand:         return "NOCAND    ";
2574309124Sdim  case Only1:          return "ONLY1     ";
2575344779Sdim  case PhysReg:        return "PHYS-REG  ";
2576276479Sdim  case RegExcess:      return "REG-EXCESS";
2577276479Sdim  case RegCritical:    return "REG-CRIT  ";
2578276479Sdim  case Stall:          return "STALL     ";
2579276479Sdim  case Cluster:        return "CLUSTER   ";
2580276479Sdim  case Weak:           return "WEAK      ";
2581276479Sdim  case RegMax:         return "REG-MAX   ";
2582276479Sdim  case ResourceReduce: return "RES-REDUCE";
2583276479Sdim  case ResourceDemand: return "RES-DEMAND";
2584276479Sdim  case TopDepthReduce: return "TOP-DEPTH ";
2585276479Sdim  case TopPathReduce:  return "TOP-PATH  ";
2586276479Sdim  case BotHeightReduce:return "BOT-HEIGHT";
2587276479Sdim  case BotPathReduce:  return "BOT-PATH  ";
2588276479Sdim  case NextDefUse:     return "DEF-USE   ";
2589276479Sdim  case NodeOrder:      return "ORDER     ";
2590276479Sdim  };
2591276479Sdim  llvm_unreachable("Unknown reason!");
2592276479Sdim}
2593276479Sdim
2594276479Sdimvoid GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
2595276479Sdim  PressureChange P;
2596276479Sdim  unsigned ResIdx = 0;
2597276479Sdim  unsigned Latency = 0;
2598276479Sdim  switch (Cand.Reason) {
2599276479Sdim  default:
2600276479Sdim    break;
2601276479Sdim  case RegExcess:
2602276479Sdim    P = Cand.RPDelta.Excess;
2603276479Sdim    break;
2604276479Sdim  case RegCritical:
2605276479Sdim    P = Cand.RPDelta.CriticalMax;
2606276479Sdim    break;
2607276479Sdim  case RegMax:
2608276479Sdim    P = Cand.RPDelta.CurrentMax;
2609276479Sdim    break;
2610276479Sdim  case ResourceReduce:
2611276479Sdim    ResIdx = Cand.Policy.ReduceResIdx;
2612276479Sdim    break;
2613276479Sdim  case ResourceDemand:
2614276479Sdim    ResIdx = Cand.Policy.DemandResIdx;
2615276479Sdim    break;
2616276479Sdim  case TopDepthReduce:
2617276479Sdim    Latency = Cand.SU->getDepth();
2618276479Sdim    break;
2619276479Sdim  case TopPathReduce:
2620276479Sdim    Latency = Cand.SU->getHeight();
2621276479Sdim    break;
2622276479Sdim  case BotHeightReduce:
2623276479Sdim    Latency = Cand.SU->getHeight();
2624276479Sdim    break;
2625276479Sdim  case BotPathReduce:
2626276479Sdim    Latency = Cand.SU->getDepth();
2627276479Sdim    break;
2628276479Sdim  }
2629296417Sdim  dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2630276479Sdim  if (P.isValid())
2631276479Sdim    dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2632276479Sdim           << ":" << P.getUnitInc() << " ";
2633276479Sdim  else
2634276479Sdim    dbgs() << "      ";
2635276479Sdim  if (ResIdx)
2636276479Sdim    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2637276479Sdim  else
2638276479Sdim    dbgs() << "         ";
2639276479Sdim  if (Latency)
2640276479Sdim    dbgs() << " " << Latency << " cycles ";
2641276479Sdim  else
2642276479Sdim    dbgs() << "          ";
2643276479Sdim  dbgs() << '\n';
2644276479Sdim}
2645276479Sdim#endif
2646276479Sdim
2647341825Sdimnamespace llvm {
2648243830Sdim/// Return true if this heuristic determines order.
2649341825Sdimbool tryLess(int TryVal, int CandVal,
2650341825Sdim             GenericSchedulerBase::SchedCandidate &TryCand,
2651341825Sdim             GenericSchedulerBase::SchedCandidate &Cand,
2652341825Sdim             GenericSchedulerBase::CandReason Reason) {
2653243830Sdim  if (TryVal < CandVal) {
2654243830Sdim    TryCand.Reason = Reason;
2655243830Sdim    return true;
2656243830Sdim  }
2657243830Sdim  if (TryVal > CandVal) {
2658243830Sdim    if (Cand.Reason > Reason)
2659243830Sdim      Cand.Reason = Reason;
2660243830Sdim    return true;
2661243830Sdim  }
2662243830Sdim  return false;
2663243830Sdim}
2664249423Sdim
2665341825Sdimbool tryGreater(int TryVal, int CandVal,
2666341825Sdim                GenericSchedulerBase::SchedCandidate &TryCand,
2667341825Sdim                GenericSchedulerBase::SchedCandidate &Cand,
2668341825Sdim                GenericSchedulerBase::CandReason Reason) {
2669243830Sdim  if (TryVal > CandVal) {
2670243830Sdim    TryCand.Reason = Reason;
2671243830Sdim    return true;
2672243830Sdim  }
2673243830Sdim  if (TryVal < CandVal) {
2674243830Sdim    if (Cand.Reason > Reason)
2675243830Sdim      Cand.Reason = Reason;
2676243830Sdim    return true;
2677243830Sdim  }
2678243830Sdim  return false;
2679243830Sdim}
2680243830Sdim
2681341825Sdimbool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
2682341825Sdim                GenericSchedulerBase::SchedCandidate &Cand,
2683341825Sdim                SchedBoundary &Zone) {
2684276479Sdim  if (Zone.isTop()) {
2685276479Sdim    if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2686276479Sdim      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2687276479Sdim                  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2688276479Sdim        return true;
2689276479Sdim    }
2690276479Sdim    if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2691276479Sdim                   TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2692276479Sdim      return true;
2693309124Sdim  } else {
2694276479Sdim    if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2695276479Sdim      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2696276479Sdim                  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2697276479Sdim        return true;
2698276479Sdim    }
2699276479Sdim    if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2700276479Sdim                   TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2701276479Sdim      return true;
2702276479Sdim  }
2703276479Sdim  return false;
2704276479Sdim}
2705341825Sdim} // end namespace llvm
2706276479Sdim
2707309124Sdimstatic void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2708341825Sdim  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2709341825Sdim                    << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2710276479Sdim}
2711276479Sdim
2712309124Sdimstatic void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
2713309124Sdim  tracePick(Cand.Reason, Cand.AtTop);
2714309124Sdim}
2715309124Sdim
2716276479Sdimvoid GenericScheduler::initialize(ScheduleDAGMI *dag) {
2717276479Sdim  assert(dag->hasVRegLiveness() &&
2718276479Sdim         "(PreRA)GenericScheduler needs vreg liveness");
2719276479Sdim  DAG = static_cast<ScheduleDAGMILive*>(dag);
2720276479Sdim  SchedModel = DAG->getSchedModel();
2721276479Sdim  TRI = DAG->TRI;
2722276479Sdim
2723276479Sdim  Rem.init(DAG, SchedModel);
2724276479Sdim  Top.init(DAG, SchedModel, &Rem);
2725276479Sdim  Bot.init(DAG, SchedModel, &Rem);
2726276479Sdim
2727276479Sdim  // Initialize resource counts.
2728276479Sdim
2729276479Sdim  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2730276479Sdim  // are disabled, then these HazardRecs will be disabled.
2731276479Sdim  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2732276479Sdim  if (!Top.HazardRec) {
2733276479Sdim    Top.HazardRec =
2734280031Sdim        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2735280031Sdim            Itin, DAG);
2736276479Sdim  }
2737276479Sdim  if (!Bot.HazardRec) {
2738276479Sdim    Bot.HazardRec =
2739280031Sdim        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2740280031Sdim            Itin, DAG);
2741276479Sdim  }
2742309124Sdim  TopCand.SU = nullptr;
2743309124Sdim  BotCand.SU = nullptr;
2744276479Sdim}
2745276479Sdim
2746276479Sdim/// Initialize the per-region scheduling policy.
2747276479Sdimvoid GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
2748276479Sdim                                  MachineBasicBlock::iterator End,
2749276479Sdim                                  unsigned NumRegionInstrs) {
2750327952Sdim  const MachineFunction &MF = *Begin->getMF();
2751280031Sdim  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2752276479Sdim
2753276479Sdim  // Avoid setting up the register pressure tracker for small regions to save
2754276479Sdim  // compile time. As a rough heuristic, only track pressure when the number of
2755276479Sdim  // schedulable instructions exceeds half the integer register file.
2756276479Sdim  RegionPolicy.ShouldTrackPressure = true;
2757276479Sdim  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2758276479Sdim    MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2759276479Sdim    if (TLI->isTypeLegal(LegalIntVT)) {
2760276479Sdim      unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2761276479Sdim        TLI->getRegClassFor(LegalIntVT));
2762276479Sdim      RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2763276479Sdim    }
2764276479Sdim  }
2765276479Sdim
2766276479Sdim  // For generic targets, we default to bottom-up, because it's simpler and more
2767276479Sdim  // compile-time optimizations have been implemented in that direction.
2768276479Sdim  RegionPolicy.OnlyBottomUp = true;
2769276479Sdim
2770276479Sdim  // Allow the subtarget to override default policy.
2771309124Sdim  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2772276479Sdim
2773276479Sdim  // After subtarget overrides, apply command line options.
2774353358Sdim  if (!EnableRegPressure) {
2775276479Sdim    RegionPolicy.ShouldTrackPressure = false;
2776353358Sdim    RegionPolicy.ShouldTrackLaneMasks = false;
2777353358Sdim  }
2778276479Sdim
2779276479Sdim  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2780276479Sdim  // e.g. -misched-bottomup=false allows scheduling in both directions.
2781276479Sdim  assert((!ForceTopDown || !ForceBottomUp) &&
2782276479Sdim         "-misched-topdown incompatible with -misched-bottomup");
2783276479Sdim  if (ForceBottomUp.getNumOccurrences() > 0) {
2784276479Sdim    RegionPolicy.OnlyBottomUp = ForceBottomUp;
2785276479Sdim    if (RegionPolicy.OnlyBottomUp)
2786276479Sdim      RegionPolicy.OnlyTopDown = false;
2787276479Sdim  }
2788276479Sdim  if (ForceTopDown.getNumOccurrences() > 0) {
2789276479Sdim    RegionPolicy.OnlyTopDown = ForceTopDown;
2790276479Sdim    if (RegionPolicy.OnlyTopDown)
2791276479Sdim      RegionPolicy.OnlyBottomUp = false;
2792276479Sdim  }
2793276479Sdim}
2794276479Sdim
2795321369Sdimvoid GenericScheduler::dumpPolicy() const {
2796321369Sdim  // Cannot completely remove virtual function even in release mode.
2797321369Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2798296417Sdim  dbgs() << "GenericScheduler RegionPolicy: "
2799296417Sdim         << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2800296417Sdim         << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2801296417Sdim         << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2802296417Sdim         << "\n";
2803321369Sdim#endif
2804296417Sdim}
2805296417Sdim
2806276479Sdim/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2807276479Sdim/// critical path by more cycles than it takes to drain the instruction buffer.
2808276479Sdim/// We estimate an upper bounds on in-flight instructions as:
2809276479Sdim///
2810276479Sdim/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2811276479Sdim/// InFlightIterations = AcyclicPath / CyclesPerIteration
2812276479Sdim/// InFlightResources = InFlightIterations * LoopResources
2813276479Sdim///
2814276479Sdim/// TODO: Check execution resources in addition to IssueCount.
2815276479Sdimvoid GenericScheduler::checkAcyclicLatency() {
2816276479Sdim  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2817276479Sdim    return;
2818276479Sdim
2819276479Sdim  // Scaled number of cycles per loop iteration.
2820276479Sdim  unsigned IterCount =
2821276479Sdim    std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2822276479Sdim             Rem.RemIssueCount);
2823276479Sdim  // Scaled acyclic critical path.
2824276479Sdim  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2825276479Sdim  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2826276479Sdim  unsigned InFlightCount =
2827276479Sdim    (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2828276479Sdim  unsigned BufferLimit =
2829276479Sdim    SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2830276479Sdim
2831276479Sdim  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2832276479Sdim
2833341825Sdim  LLVM_DEBUG(
2834341825Sdim      dbgs() << "IssueCycles="
2835341825Sdim             << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2836341825Sdim             << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2837341825Sdim             << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
2838341825Sdim             << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2839341825Sdim             << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2840341825Sdim      if (Rem.IsAcyclicLatencyLimited) dbgs() << "  ACYCLIC LATENCY LIMIT\n");
2841276479Sdim}
2842276479Sdim
2843276479Sdimvoid GenericScheduler::registerRoots() {
2844276479Sdim  Rem.CriticalPath = DAG->ExitSU.getDepth();
2845276479Sdim
2846276479Sdim  // Some roots may not feed into ExitSU. Check all of them in case.
2847321369Sdim  for (const SUnit *SU : Bot.Available) {
2848321369Sdim    if (SU->getDepth() > Rem.CriticalPath)
2849321369Sdim      Rem.CriticalPath = SU->getDepth();
2850276479Sdim  }
2851341825Sdim  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2852280031Sdim  if (DumpCriticalPathLength) {
2853280031Sdim    errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2854280031Sdim  }
2855276479Sdim
2856321369Sdim  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2857276479Sdim    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2858276479Sdim    checkAcyclicLatency();
2859276479Sdim  }
2860276479Sdim}
2861276479Sdim
2862341825Sdimnamespace llvm {
2863341825Sdimbool tryPressure(const PressureChange &TryP,
2864341825Sdim                 const PressureChange &CandP,
2865341825Sdim                 GenericSchedulerBase::SchedCandidate &TryCand,
2866341825Sdim                 GenericSchedulerBase::SchedCandidate &Cand,
2867341825Sdim                 GenericSchedulerBase::CandReason Reason,
2868341825Sdim                 const TargetRegisterInfo *TRI,
2869341825Sdim                 const MachineFunction &MF) {
2870261991Sdim  // If one candidate decreases and the other increases, go with it.
2871261991Sdim  // Invalid candidates have UnitInc==0.
2872280031Sdim  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2873280031Sdim                 Reason)) {
2874261991Sdim    return true;
2875261991Sdim  }
2876309124Sdim  // Do not compare the magnitude of pressure changes between top and bottom
2877309124Sdim  // boundary.
2878309124Sdim  if (Cand.AtTop != TryCand.AtTop)
2879309124Sdim    return false;
2880296417Sdim
2881309124Sdim  // If both candidates affect the same set in the same boundary, go with the
2882309124Sdim  // smallest increase.
2883309124Sdim  unsigned TryPSet = TryP.getPSetOrMax();
2884309124Sdim  unsigned CandPSet = CandP.getPSetOrMax();
2885309124Sdim  if (TryPSet == CandPSet) {
2886309124Sdim    return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2887309124Sdim                   Reason);
2888309124Sdim  }
2889309124Sdim
2890296417Sdim  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2891296417Sdim                                 std::numeric_limits<int>::max();
2892296417Sdim
2893296417Sdim  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2894296417Sdim                                   std::numeric_limits<int>::max();
2895296417Sdim
2896261991Sdim  // If the candidates are decreasing pressure, reverse priority.
2897261991Sdim  if (TryP.getUnitInc() < 0)
2898261991Sdim    std::swap(TryRank, CandRank);
2899261991Sdim  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2900261991Sdim}
2901261991Sdim
2902341825Sdimunsigned getWeakLeft(const SUnit *SU, bool isTop) {
2903249423Sdim  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2904249423Sdim}
2905249423Sdim
2906251662Sdim/// Minimize physical register live ranges. Regalloc wants them adjacent to
2907251662Sdim/// their physreg def/use.
2908251662Sdim///
2909251662Sdim/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2910251662Sdim/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2911251662Sdim/// with the operation that produces or consumes the physreg. We'll do this when
2912251662Sdim/// regalloc has support for parallel copies.
2913344779Sdimint biasPhysReg(const SUnit *SU, bool isTop) {
2914251662Sdim  const MachineInstr *MI = SU->getInstr();
2915251662Sdim
2916344779Sdim  if (MI->isCopy()) {
2917344779Sdim    unsigned ScheduledOper = isTop ? 1 : 0;
2918344779Sdim    unsigned UnscheduledOper = isTop ? 0 : 1;
2919344779Sdim    // If we have already scheduled the physreg produce/consumer, immediately
2920344779Sdim    // schedule the copy.
2921360784Sdim    if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
2922344779Sdim      return 1;
2923344779Sdim    // If the physreg is at the boundary, defer it. Otherwise schedule it
2924344779Sdim    // immediately to free the dependent. We can hoist the copy later.
2925344779Sdim    bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2926360784Sdim    if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
2927344779Sdim      return AtBoundary ? -1 : 1;
2928344779Sdim  }
2929344779Sdim
2930344779Sdim  if (MI->isMoveImmediate()) {
2931344779Sdim    // If we have a move immediate and all successors have been assigned, bias
2932344779Sdim    // towards scheduling this later. Make sure all register defs are to
2933344779Sdim    // physical registers.
2934344779Sdim    bool DoBias = true;
2935344779Sdim    for (const MachineOperand &Op : MI->defs()) {
2936360784Sdim      if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
2937344779Sdim        DoBias = false;
2938344779Sdim        break;
2939344779Sdim      }
2940344779Sdim    }
2941344779Sdim
2942344779Sdim    if (DoBias)
2943344779Sdim      return isTop ? -1 : 1;
2944344779Sdim  }
2945344779Sdim
2946251662Sdim  return 0;
2947251662Sdim}
2948341825Sdim} // end namespace llvm
2949251662Sdim
2950309124Sdimvoid GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
2951309124Sdim                                     bool AtTop,
2952309124Sdim                                     const RegPressureTracker &RPTracker,
2953309124Sdim                                     RegPressureTracker &TempTracker) {
2954309124Sdim  Cand.SU = SU;
2955309124Sdim  Cand.AtTop = AtTop;
2956261991Sdim  if (DAG->isTrackingPressure()) {
2957309124Sdim    if (AtTop) {
2958261991Sdim      TempTracker.getMaxDownwardPressureDelta(
2959309124Sdim        Cand.SU->getInstr(),
2960309124Sdim        Cand.RPDelta,
2961261991Sdim        DAG->getRegionCriticalPSets(),
2962261991Sdim        DAG->getRegPressure().MaxSetPressure);
2963309124Sdim    } else {
2964261991Sdim      if (VerifyScheduling) {
2965261991Sdim        TempTracker.getMaxUpwardPressureDelta(
2966309124Sdim          Cand.SU->getInstr(),
2967309124Sdim          &DAG->getPressureDiff(Cand.SU),
2968309124Sdim          Cand.RPDelta,
2969261991Sdim          DAG->getRegionCriticalPSets(),
2970261991Sdim          DAG->getRegPressure().MaxSetPressure);
2971309124Sdim      } else {
2972261991Sdim        RPTracker.getUpwardPressureDelta(
2973309124Sdim          Cand.SU->getInstr(),
2974309124Sdim          DAG->getPressureDiff(Cand.SU),
2975309124Sdim          Cand.RPDelta,
2976261991Sdim          DAG->getRegionCriticalPSets(),
2977261991Sdim          DAG->getRegPressure().MaxSetPressure);
2978261991Sdim      }
2979261991Sdim    }
2980261991Sdim  }
2981341825Sdim  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
2982341825Sdim             << "  Try  SU(" << Cand.SU->NodeNum << ") "
2983341825Sdim             << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
2984341825Sdim             << Cand.RPDelta.Excess.getUnitInc() << "\n");
2985309124Sdim}
2986243830Sdim
2987341825Sdim/// Apply a set of heuristics to a new candidate. Heuristics are currently
2988309124Sdim/// hierarchical. This may be more efficient than a graduated cost model because
2989309124Sdim/// we don't need to evaluate all aspects of the model for each node in the
2990309124Sdim/// queue. But it's really done to make the heuristics easier to debug and
2991309124Sdim/// statistically analyze.
2992309124Sdim///
2993309124Sdim/// \param Cand provides the policy and current best candidate.
2994309124Sdim/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2995309124Sdim/// \param Zone describes the scheduled zone that we are extending, or nullptr
2996309124Sdim//              if Cand is from a different zone than TryCand.
2997309124Sdimvoid GenericScheduler::tryCandidate(SchedCandidate &Cand,
2998309124Sdim                                    SchedCandidate &TryCand,
2999341825Sdim                                    SchedBoundary *Zone) const {
3000243830Sdim  // Initialize the candidate if needed.
3001243830Sdim  if (!Cand.isValid()) {
3002243830Sdim    TryCand.Reason = NodeOrder;
3003243830Sdim    return;
3004243830Sdim  }
3005251662Sdim
3006344779Sdim  // Bias PhysReg Defs and copies to their uses and defined respectively.
3007344779Sdim  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3008344779Sdim                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3009251662Sdim    return;
3010251662Sdim
3011288943Sdim  // Avoid exceeding the target's limit.
3012261991Sdim  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3013261991Sdim                                               Cand.RPDelta.Excess,
3014296417Sdim                                               TryCand, Cand, RegExcess, TRI,
3015296417Sdim                                               DAG->MF))
3016243830Sdim    return;
3017243830Sdim
3018243830Sdim  // Avoid increasing the max critical pressure in the scheduled region.
3019261991Sdim  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3020261991Sdim                                               Cand.RPDelta.CriticalMax,
3021296417Sdim                                               TryCand, Cand, RegCritical, TRI,
3022296417Sdim                                               DAG->MF))
3023243830Sdim    return;
3024243830Sdim
3025309124Sdim  // We only compare a subset of features when comparing nodes between
3026309124Sdim  // Top and Bottom boundary. Some properties are simply incomparable, in many
3027309124Sdim  // other instances we should only override the other boundary if something
3028309124Sdim  // is a clear good pick on one boundary. Skip heuristics that are more
3029309124Sdim  // "tie-breaking" in nature.
3030309124Sdim  bool SameBoundary = Zone != nullptr;
3031309124Sdim  if (SameBoundary) {
3032309124Sdim    // For loops that are acyclic path limited, aggressively schedule for
3033314564Sdim    // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3034314564Sdim    // heuristics to take precedence.
3035309124Sdim    if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3036309124Sdim        tryLatency(TryCand, Cand, *Zone))
3037309124Sdim      return;
3038261991Sdim
3039309124Sdim    // Prioritize instructions that read unbuffered resources by stall cycles.
3040309124Sdim    if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3041309124Sdim                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3042309124Sdim      return;
3043309124Sdim  }
3044276479Sdim
3045249423Sdim  // Keep clustered nodes together to encourage downstream peephole
3046249423Sdim  // optimizations which may reduce resource requirements.
3047249423Sdim  //
3048249423Sdim  // This is a best effort to set things up for a post-RA pass. Optimizations
3049249423Sdim  // like generating loads of multiple registers should ideally be done within
3050249423Sdim  // the scheduler pass by combining the loads during DAG postprocessing.
3051309124Sdim  const SUnit *CandNextClusterSU =
3052309124Sdim    Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3053309124Sdim  const SUnit *TryCandNextClusterSU =
3054309124Sdim    TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3055309124Sdim  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3056309124Sdim                 Cand.SU == CandNextClusterSU,
3057249423Sdim                 TryCand, Cand, Cluster))
3058249423Sdim    return;
3059251662Sdim
3060309124Sdim  if (SameBoundary) {
3061309124Sdim    // Weak edges are for clustering and other constraints.
3062309124Sdim    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3063309124Sdim                getWeakLeft(Cand.SU, Cand.AtTop),
3064309124Sdim                TryCand, Cand, Weak))
3065309124Sdim      return;
3066249423Sdim  }
3067309124Sdim
3068261991Sdim  // Avoid increasing the max pressure of the entire region.
3069261991Sdim  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3070261991Sdim                                               Cand.RPDelta.CurrentMax,
3071296417Sdim                                               TryCand, Cand, RegMax, TRI,
3072296417Sdim                                               DAG->MF))
3073261991Sdim    return;
3074261991Sdim
3075309124Sdim  if (SameBoundary) {
3076309124Sdim    // Avoid critical resource consumption and balance the schedule.
3077309124Sdim    TryCand.initResourceDelta(DAG, SchedModel);
3078309124Sdim    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3079309124Sdim                TryCand, Cand, ResourceReduce))
3080309124Sdim      return;
3081309124Sdim    if (tryGreater(TryCand.ResDelta.DemandedResources,
3082309124Sdim                   Cand.ResDelta.DemandedResources,
3083309124Sdim                   TryCand, Cand, ResourceDemand))
3084309124Sdim      return;
3085243830Sdim
3086309124Sdim    // Avoid serializing long latency dependence chains.
3087309124Sdim    // For acyclic path limited loops, latency was already checked above.
3088309124Sdim    if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3089309124Sdim        !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3090309124Sdim      return;
3091243830Sdim
3092309124Sdim    // Fall through to original instruction order.
3093309124Sdim    if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3094309124Sdim        || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3095309124Sdim      TryCand.Reason = NodeOrder;
3096309124Sdim    }
3097243830Sdim  }
3098243830Sdim}
3099243830Sdim
3100261991Sdim/// Pick the best candidate from the queue.
3101239462Sdim///
3102239462Sdim/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3103239462Sdim/// DAG building. To adjust for the current scheduling location we need to
3104239462Sdim/// maintain the number of vreg uses remaining to be top-scheduled.
3105261991Sdimvoid GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3106309124Sdim                                         const CandPolicy &ZonePolicy,
3107276479Sdim                                         const RegPressureTracker &RPTracker,
3108276479Sdim                                         SchedCandidate &Cand) {
3109239462Sdim  // getMaxPressureDelta temporarily modifies the tracker.
3110239462Sdim  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3111239462Sdim
3112309124Sdim  ReadyQueue &Q = Zone.Available;
3113321369Sdim  for (SUnit *SU : Q) {
3114239462Sdim
3115309124Sdim    SchedCandidate TryCand(ZonePolicy);
3116321369Sdim    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3117309124Sdim    // Pass SchedBoundary only when comparing nodes from the same boundary.
3118309124Sdim    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3119309124Sdim    tryCandidate(Cand, TryCand, ZoneArg);
3120243830Sdim    if (TryCand.Reason != NoCand) {
3121243830Sdim      // Initialize resource delta if needed in case future heuristics query it.
3122243830Sdim      if (TryCand.ResDelta == SchedResourceDelta())
3123243830Sdim        TryCand.initResourceDelta(DAG, SchedModel);
3124243830Sdim      Cand.setBest(TryCand);
3125341825Sdim      LLVM_DEBUG(traceCandidate(Cand));
3126234285Sdim    }
3127239462Sdim  }
3128239462Sdim}
3129239462Sdim
3130239462Sdim/// Pick the best candidate node from either the top or bottom queue.
3131261991SdimSUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
3132239462Sdim  // Schedule as far as possible in the direction of no choice. This is most
3133239462Sdim  // efficient, but also provides the best heuristics for CriticalPSets.
3134239462Sdim  if (SUnit *SU = Bot.pickOnlyChoice()) {
3135239462Sdim    IsTopNode = false;
3136309124Sdim    tracePick(Only1, false);
3137234285Sdim    return SU;
3138234285Sdim  }
3139239462Sdim  if (SUnit *SU = Top.pickOnlyChoice()) {
3140239462Sdim    IsTopNode = true;
3141309124Sdim    tracePick(Only1, true);
3142239462Sdim    return SU;
3143239462Sdim  }
3144276479Sdim  // Set the bottom-up policy based on the state of the current bottom zone and
3145276479Sdim  // the instructions outside the zone, including the top zone.
3146309124Sdim  CandPolicy BotPolicy;
3147309124Sdim  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3148276479Sdim  // Set the top-down policy based on the state of the current top zone and
3149276479Sdim  // the instructions outside the zone, including the bottom zone.
3150309124Sdim  CandPolicy TopPolicy;
3151309124Sdim  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3152243830Sdim
3153309124Sdim  // See if BotCand is still valid (because we previously scheduled from Top).
3154341825Sdim  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3155309124Sdim  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3156309124Sdim      BotCand.Policy != BotPolicy) {
3157309124Sdim    BotCand.reset(CandPolicy());
3158309124Sdim    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3159309124Sdim    assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3160309124Sdim  } else {
3161341825Sdim    LLVM_DEBUG(traceCandidate(BotCand));
3162309124Sdim#ifndef NDEBUG
3163309124Sdim    if (VerifyScheduling) {
3164309124Sdim      SchedCandidate TCand;
3165309124Sdim      TCand.reset(CandPolicy());
3166309124Sdim      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3167309124Sdim      assert(TCand.SU == BotCand.SU &&
3168309124Sdim             "Last pick result should correspond to re-picking right now");
3169309124Sdim    }
3170309124Sdim#endif
3171309124Sdim  }
3172234285Sdim
3173309124Sdim  // Check if the top Q has a better candidate.
3174341825Sdim  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3175309124Sdim  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3176309124Sdim      TopCand.Policy != TopPolicy) {
3177309124Sdim    TopCand.reset(CandPolicy());
3178309124Sdim    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3179309124Sdim    assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3180309124Sdim  } else {
3181341825Sdim    LLVM_DEBUG(traceCandidate(TopCand));
3182309124Sdim#ifndef NDEBUG
3183309124Sdim    if (VerifyScheduling) {
3184309124Sdim      SchedCandidate TCand;
3185309124Sdim      TCand.reset(CandPolicy());
3186309124Sdim      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3187309124Sdim      assert(TCand.SU == TopCand.SU &&
3188309124Sdim           "Last pick result should correspond to re-picking right now");
3189309124Sdim    }
3190309124Sdim#endif
3191234285Sdim  }
3192239462Sdim
3193309124Sdim  // Pick best from BotCand and TopCand.
3194309124Sdim  assert(BotCand.isValid());
3195309124Sdim  assert(TopCand.isValid());
3196309124Sdim  SchedCandidate Cand = BotCand;
3197309124Sdim  TopCand.Reason = NoCand;
3198309124Sdim  tryCandidate(Cand, TopCand, nullptr);
3199309124Sdim  if (TopCand.Reason != NoCand) {
3200309124Sdim    Cand.setBest(TopCand);
3201341825Sdim    LLVM_DEBUG(traceCandidate(Cand));
3202239462Sdim  }
3203309124Sdim
3204309124Sdim  IsTopNode = Cand.AtTop;
3205309124Sdim  tracePick(Cand);
3206309124Sdim  return Cand.SU;
3207239462Sdim}
3208234285Sdim
3209239462Sdim/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3210261991SdimSUnit *GenericScheduler::pickNode(bool &IsTopNode) {
3211239462Sdim  if (DAG->top() == DAG->bottom()) {
3212239462Sdim    assert(Top.Available.empty() && Top.Pending.empty() &&
3213239462Sdim           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3214276479Sdim    return nullptr;
3215239462Sdim  }
3216239462Sdim  SUnit *SU;
3217243830Sdim  do {
3218261991Sdim    if (RegionPolicy.OnlyTopDown) {
3219243830Sdim      SU = Top.pickOnlyChoice();
3220243830Sdim      if (!SU) {
3221243830Sdim        CandPolicy NoPolicy;
3222309124Sdim        TopCand.reset(NoPolicy);
3223309124Sdim        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3224261991Sdim        assert(TopCand.Reason != NoCand && "failed to find a candidate");
3225309124Sdim        tracePick(TopCand);
3226243830Sdim        SU = TopCand.SU;
3227243830Sdim      }
3228243830Sdim      IsTopNode = true;
3229309124Sdim    } else if (RegionPolicy.OnlyBottomUp) {
3230243830Sdim      SU = Bot.pickOnlyChoice();
3231243830Sdim      if (!SU) {
3232243830Sdim        CandPolicy NoPolicy;
3233309124Sdim        BotCand.reset(NoPolicy);
3234309124Sdim        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3235261991Sdim        assert(BotCand.Reason != NoCand && "failed to find a candidate");
3236309124Sdim        tracePick(BotCand);
3237243830Sdim        SU = BotCand.SU;
3238243830Sdim      }
3239243830Sdim      IsTopNode = false;
3240309124Sdim    } else {
3241243830Sdim      SU = pickNodeBidirectional(IsTopNode);
3242243830Sdim    }
3243243830Sdim  } while (SU->isScheduled);
3244243830Sdim
3245239462Sdim  if (SU->isTopReady())
3246239462Sdim    Top.removeReady(SU);
3247239462Sdim  if (SU->isBottomReady())
3248239462Sdim    Bot.removeReady(SU);
3249239462Sdim
3250341825Sdim  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3251341825Sdim                    << *SU->getInstr());
3252239462Sdim  return SU;
3253239462Sdim}
3254239462Sdim
3255344779Sdimvoid GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
3256251662Sdim  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3257251662Sdim  if (!isTop)
3258251662Sdim    ++InsertPos;
3259251662Sdim  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3260251662Sdim
3261251662Sdim  // Find already scheduled copies with a single physreg dependence and move
3262251662Sdim  // them just above the scheduled instruction.
3263321369Sdim  for (SDep &Dep : Deps) {
3264360784Sdim    if (Dep.getKind() != SDep::Data ||
3265360784Sdim        !Register::isPhysicalRegister(Dep.getReg()))
3266251662Sdim      continue;
3267321369Sdim    SUnit *DepSU = Dep.getSUnit();
3268251662Sdim    if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3269251662Sdim      continue;
3270251662Sdim    MachineInstr *Copy = DepSU->getInstr();
3271344779Sdim    if (!Copy->isCopy() && !Copy->isMoveImmediate())
3272251662Sdim      continue;
3273341825Sdim    LLVM_DEBUG(dbgs() << "  Rescheduling physreg copy ";
3274344779Sdim               DAG->dumpNode(*Dep.getSUnit()));
3275251662Sdim    DAG->moveInstruction(Copy, InsertPos);
3276251662Sdim  }
3277251662Sdim}
3278251662Sdim
3279239462Sdim/// Update the scheduler's state after scheduling a node. This is the same node
3280276479Sdim/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3281276479Sdim/// update it's state based on the current cycle before MachineSchedStrategy
3282276479Sdim/// does.
3283251662Sdim///
3284251662Sdim/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3285344779Sdim/// them here. See comments in biasPhysReg.
3286261991Sdimvoid GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3287239462Sdim  if (IsTopNode) {
3288276479Sdim    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3289239462Sdim    Top.bumpNode(SU);
3290251662Sdim    if (SU->hasPhysRegUses)
3291344779Sdim      reschedulePhysReg(SU, true);
3292309124Sdim  } else {
3293276479Sdim    SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3294239462Sdim    Bot.bumpNode(SU);
3295251662Sdim    if (SU->hasPhysRegDefs)
3296344779Sdim      reschedulePhysReg(SU, false);
3297239462Sdim  }
3298239462Sdim}
3299239462Sdim
3300234285Sdim/// Create the standard converging machine scheduler. This will be used as the
3301234285Sdim/// default scheduler if the target does not set a default.
3302314564SdimScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
3303321369Sdim  ScheduleDAGMILive *DAG =
3304360784Sdim      new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3305249423Sdim  // Register DAG post-processors.
3306251662Sdim  //
3307251662Sdim  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3308251662Sdim  // data and pass it to later mutations. Have a single mutation that gathers
3309251662Sdim  // the interesting nodes in one pass.
3310314564Sdim  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
3311249423Sdim  return DAG;
3312234285Sdim}
3313276479Sdim
3314314564Sdimstatic ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
3315314564Sdim  return createGenericSchedLive(C);
3316314564Sdim}
3317314564Sdim
3318234285Sdimstatic MachineSchedRegistry
3319261991SdimGenericSchedRegistry("converge", "Standard converging scheduler.",
3320314564Sdim                     createConveringSched);
3321234285Sdim
3322234285Sdim//===----------------------------------------------------------------------===//
3323276479Sdim// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3324276479Sdim//===----------------------------------------------------------------------===//
3325276479Sdim
3326276479Sdimvoid PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3327276479Sdim  DAG = Dag;
3328276479Sdim  SchedModel = DAG->getSchedModel();
3329276479Sdim  TRI = DAG->TRI;
3330276479Sdim
3331276479Sdim  Rem.init(DAG, SchedModel);
3332276479Sdim  Top.init(DAG, SchedModel, &Rem);
3333276479Sdim  BotRoots.clear();
3334276479Sdim
3335276479Sdim  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3336276479Sdim  // or are disabled, then these HazardRecs will be disabled.
3337276479Sdim  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3338276479Sdim  if (!Top.HazardRec) {
3339276479Sdim    Top.HazardRec =
3340280031Sdim        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3341280031Sdim            Itin, DAG);
3342276479Sdim  }
3343276479Sdim}
3344276479Sdim
3345276479Sdimvoid PostGenericScheduler::registerRoots() {
3346276479Sdim  Rem.CriticalPath = DAG->ExitSU.getDepth();
3347276479Sdim
3348276479Sdim  // Some roots may not feed into ExitSU. Check all of them in case.
3349321369Sdim  for (const SUnit *SU : BotRoots) {
3350321369Sdim    if (SU->getDepth() > Rem.CriticalPath)
3351321369Sdim      Rem.CriticalPath = SU->getDepth();
3352276479Sdim  }
3353341825Sdim  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3354280031Sdim  if (DumpCriticalPathLength) {
3355280031Sdim    errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3356280031Sdim  }
3357276479Sdim}
3358276479Sdim
3359341825Sdim/// Apply a set of heuristics to a new candidate for PostRA scheduling.
3360276479Sdim///
3361276479Sdim/// \param Cand provides the policy and current best candidate.
3362276479Sdim/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3363276479Sdimvoid PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3364276479Sdim                                        SchedCandidate &TryCand) {
3365276479Sdim  // Initialize the candidate if needed.
3366276479Sdim  if (!Cand.isValid()) {
3367276479Sdim    TryCand.Reason = NodeOrder;
3368276479Sdim    return;
3369276479Sdim  }
3370276479Sdim
3371276479Sdim  // Prioritize instructions that read unbuffered resources by stall cycles.
3372276479Sdim  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3373276479Sdim              Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3374276479Sdim    return;
3375276479Sdim
3376321369Sdim  // Keep clustered nodes together.
3377321369Sdim  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3378321369Sdim                 Cand.SU == DAG->getNextClusterSucc(),
3379321369Sdim                 TryCand, Cand, Cluster))
3380321369Sdim    return;
3381321369Sdim
3382276479Sdim  // Avoid critical resource consumption and balance the schedule.
3383276479Sdim  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3384276479Sdim              TryCand, Cand, ResourceReduce))
3385276479Sdim    return;
3386276479Sdim  if (tryGreater(TryCand.ResDelta.DemandedResources,
3387276479Sdim                 Cand.ResDelta.DemandedResources,
3388276479Sdim                 TryCand, Cand, ResourceDemand))
3389276479Sdim    return;
3390276479Sdim
3391276479Sdim  // Avoid serializing long latency dependence chains.
3392276479Sdim  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3393276479Sdim    return;
3394276479Sdim  }
3395276479Sdim
3396276479Sdim  // Fall through to original instruction order.
3397276479Sdim  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3398276479Sdim    TryCand.Reason = NodeOrder;
3399276479Sdim}
3400276479Sdim
3401276479Sdimvoid PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
3402276479Sdim  ReadyQueue &Q = Top.Available;
3403321369Sdim  for (SUnit *SU : Q) {
3404276479Sdim    SchedCandidate TryCand(Cand.Policy);
3405321369Sdim    TryCand.SU = SU;
3406309124Sdim    TryCand.AtTop = true;
3407276479Sdim    TryCand.initResourceDelta(DAG, SchedModel);
3408276479Sdim    tryCandidate(Cand, TryCand);
3409276479Sdim    if (TryCand.Reason != NoCand) {
3410276479Sdim      Cand.setBest(TryCand);
3411341825Sdim      LLVM_DEBUG(traceCandidate(Cand));
3412276479Sdim    }
3413276479Sdim  }
3414276479Sdim}
3415276479Sdim
3416276479Sdim/// Pick the next node to schedule.
3417276479SdimSUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3418276479Sdim  if (DAG->top() == DAG->bottom()) {
3419276479Sdim    assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3420276479Sdim    return nullptr;
3421276479Sdim  }
3422276479Sdim  SUnit *SU;
3423276479Sdim  do {
3424276479Sdim    SU = Top.pickOnlyChoice();
3425309124Sdim    if (SU) {
3426309124Sdim      tracePick(Only1, true);
3427309124Sdim    } else {
3428276479Sdim      CandPolicy NoPolicy;
3429276479Sdim      SchedCandidate TopCand(NoPolicy);
3430276479Sdim      // Set the top-down policy based on the state of the current top zone and
3431276479Sdim      // the instructions outside the zone, including the bottom zone.
3432276479Sdim      setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3433276479Sdim      pickNodeFromQueue(TopCand);
3434276479Sdim      assert(TopCand.Reason != NoCand && "failed to find a candidate");
3435309124Sdim      tracePick(TopCand);
3436276479Sdim      SU = TopCand.SU;
3437276479Sdim    }
3438276479Sdim  } while (SU->isScheduled);
3439276479Sdim
3440276479Sdim  IsTopNode = true;
3441276479Sdim  Top.removeReady(SU);
3442276479Sdim
3443341825Sdim  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3444341825Sdim                    << *SU->getInstr());
3445276479Sdim  return SU;
3446276479Sdim}
3447276479Sdim
3448276479Sdim/// Called after ScheduleDAGMI has scheduled an instruction and updated
3449276479Sdim/// scheduled/remaining flags in the DAG nodes.
3450276479Sdimvoid PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3451276479Sdim  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3452276479Sdim  Top.bumpNode(SU);
3453276479Sdim}
3454276479Sdim
3455314564SdimScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
3456360784Sdim  return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
3457314564Sdim                           /*RemoveKillFlags=*/true);
3458276479Sdim}
3459276479Sdim
3460276479Sdim//===----------------------------------------------------------------------===//
3461243830Sdim// ILP Scheduler. Currently for experimental analysis of heuristics.
3462243830Sdim//===----------------------------------------------------------------------===//
3463243830Sdim
3464243830Sdimnamespace {
3465321369Sdim
3466341825Sdim/// Order nodes by the ILP metric.
3467243830Sdimstruct ILPOrder {
3468321369Sdim  const SchedDFSResult *DFSResult = nullptr;
3469321369Sdim  const BitVector *ScheduledTrees = nullptr;
3470243830Sdim  bool MaximizeILP;
3471243830Sdim
3472321369Sdim  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3473243830Sdim
3474341825Sdim  /// Apply a less-than relation on node priority.
3475249423Sdim  ///
3476249423Sdim  /// (Return true if A comes after B in the Q.)
3477243830Sdim  bool operator()(const SUnit *A, const SUnit *B) const {
3478249423Sdim    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3479249423Sdim    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3480249423Sdim    if (SchedTreeA != SchedTreeB) {
3481249423Sdim      // Unscheduled trees have lower priority.
3482249423Sdim      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3483249423Sdim        return ScheduledTrees->test(SchedTreeB);
3484249423Sdim
3485249423Sdim      // Trees with shallower connections have have lower priority.
3486249423Sdim      if (DFSResult->getSubtreeLevel(SchedTreeA)
3487249423Sdim          != DFSResult->getSubtreeLevel(SchedTreeB)) {
3488249423Sdim        return DFSResult->getSubtreeLevel(SchedTreeA)
3489249423Sdim          < DFSResult->getSubtreeLevel(SchedTreeB);
3490249423Sdim      }
3491249423Sdim    }
3492243830Sdim    if (MaximizeILP)
3493249423Sdim      return DFSResult->getILP(A) < DFSResult->getILP(B);
3494243830Sdim    else
3495249423Sdim      return DFSResult->getILP(A) > DFSResult->getILP(B);
3496243830Sdim  }
3497243830Sdim};
3498243830Sdim
3499341825Sdim/// Schedule based on the ILP metric.
3500243830Sdimclass ILPScheduler : public MachineSchedStrategy {
3501321369Sdim  ScheduleDAGMILive *DAG = nullptr;
3502243830Sdim  ILPOrder Cmp;
3503243830Sdim
3504243830Sdim  std::vector<SUnit*> ReadyQ;
3505321369Sdim
3506243830Sdimpublic:
3507321369Sdim  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3508243830Sdim
3509276479Sdim  void initialize(ScheduleDAGMI *dag) override {
3510276479Sdim    assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3511276479Sdim    DAG = static_cast<ScheduleDAGMILive*>(dag);
3512249423Sdim    DAG->computeDFSResult();
3513249423Sdim    Cmp.DFSResult = DAG->getDFSResult();
3514249423Sdim    Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3515243830Sdim    ReadyQ.clear();
3516243830Sdim  }
3517243830Sdim
3518276479Sdim  void registerRoots() override {
3519249423Sdim    // Restore the heap in ReadyQ with the updated DFS results.
3520249423Sdim    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3521243830Sdim  }
3522243830Sdim
3523243830Sdim  /// Implement MachineSchedStrategy interface.
3524243830Sdim  /// -----------------------------------------
3525243830Sdim
3526249423Sdim  /// Callback to select the highest priority node from the ready Q.
3527276479Sdim  SUnit *pickNode(bool &IsTopNode) override {
3528276479Sdim    if (ReadyQ.empty()) return nullptr;
3529249423Sdim    std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3530243830Sdim    SUnit *SU = ReadyQ.back();
3531243830Sdim    ReadyQ.pop_back();
3532243830Sdim    IsTopNode = false;
3533341825Sdim    LLVM_DEBUG(dbgs() << "Pick node "
3534341825Sdim                      << "SU(" << SU->NodeNum << ") "
3535341825Sdim                      << " ILP: " << DAG->getDFSResult()->getILP(SU)
3536341825Sdim                      << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3537341825Sdim                      << " @"
3538341825Sdim                      << DAG->getDFSResult()->getSubtreeLevel(
3539341825Sdim                             DAG->getDFSResult()->getSubtreeID(SU))
3540341825Sdim                      << '\n'
3541341825Sdim                      << "Scheduling " << *SU->getInstr());
3542243830Sdim    return SU;
3543243830Sdim  }
3544243830Sdim
3545341825Sdim  /// Scheduler callback to notify that a new subtree is scheduled.
3546276479Sdim  void scheduleTree(unsigned SubtreeID) override {
3547249423Sdim    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3548249423Sdim  }
3549243830Sdim
3550249423Sdim  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3551249423Sdim  /// DFSResults, and resort the priority Q.
3552276479Sdim  void schedNode(SUnit *SU, bool IsTopNode) override {
3553249423Sdim    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3554249423Sdim  }
3555249423Sdim
3556276479Sdim  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3557243830Sdim
3558276479Sdim  void releaseBottomNode(SUnit *SU) override {
3559243830Sdim    ReadyQ.push_back(SU);
3560243830Sdim    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3561243830Sdim  }
3562243830Sdim};
3563243830Sdim
3564321369Sdim} // end anonymous namespace
3565321369Sdim
3566243830Sdimstatic ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
3567360784Sdim  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
3568243830Sdim}
3569243830Sdimstatic ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
3570360784Sdim  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
3571243830Sdim}
3572321369Sdim
3573243830Sdimstatic MachineSchedRegistry ILPMaxRegistry(
3574243830Sdim  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3575243830Sdimstatic MachineSchedRegistry ILPMinRegistry(
3576243830Sdim  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3577243830Sdim
3578243830Sdim//===----------------------------------------------------------------------===//
3579234285Sdim// Machine Instruction Shuffler for Correctness Testing
3580234285Sdim//===----------------------------------------------------------------------===//
3581234285Sdim
3582234285Sdim#ifndef NDEBUG
3583234285Sdimnamespace {
3584321369Sdim
3585234285Sdim/// Apply a less-than relation on the node order, which corresponds to the
3586234285Sdim/// instruction order prior to scheduling. IsReverse implements greater-than.
3587234285Sdimtemplate<bool IsReverse>
3588234285Sdimstruct SUnitOrder {
3589234285Sdim  bool operator()(SUnit *A, SUnit *B) const {
3590234285Sdim    if (IsReverse)
3591234285Sdim      return A->NodeNum > B->NodeNum;
3592234285Sdim    else
3593234285Sdim      return A->NodeNum < B->NodeNum;
3594234285Sdim  }
3595234285Sdim};
3596234285Sdim
3597234285Sdim/// Reorder instructions as much as possible.
3598234285Sdimclass InstructionShuffler : public MachineSchedStrategy {
3599234285Sdim  bool IsAlternating;
3600234285Sdim  bool IsTopDown;
3601234285Sdim
3602234285Sdim  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3603234285Sdim  // gives nodes with a higher number higher priority causing the latest
3604234285Sdim  // instructions to be scheduled first.
3605321369Sdim  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3606234285Sdim    TopQ;
3607327952Sdim
3608234285Sdim  // When scheduling bottom-up, use greater-than as the queue priority.
3609321369Sdim  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
3610234285Sdim    BottomQ;
3611321369Sdim
3612234285Sdimpublic:
3613234285Sdim  InstructionShuffler(bool alternate, bool topdown)
3614234285Sdim    : IsAlternating(alternate), IsTopDown(topdown) {}
3615234285Sdim
3616276479Sdim  void initialize(ScheduleDAGMI*) override {
3617234285Sdim    TopQ.clear();
3618234285Sdim    BottomQ.clear();
3619234285Sdim  }
3620234285Sdim
3621234285Sdim  /// Implement MachineSchedStrategy interface.
3622234285Sdim  /// -----------------------------------------
3623234285Sdim
3624276479Sdim  SUnit *pickNode(bool &IsTopNode) override {
3625234285Sdim    SUnit *SU;
3626234285Sdim    if (IsTopDown) {
3627234285Sdim      do {
3628276479Sdim        if (TopQ.empty()) return nullptr;
3629234285Sdim        SU = TopQ.top();
3630234285Sdim        TopQ.pop();
3631234285Sdim      } while (SU->isScheduled);
3632234285Sdim      IsTopNode = true;
3633309124Sdim    } else {
3634234285Sdim      do {
3635276479Sdim        if (BottomQ.empty()) return nullptr;
3636234285Sdim        SU = BottomQ.top();
3637234285Sdim        BottomQ.pop();
3638234285Sdim      } while (SU->isScheduled);
3639234285Sdim      IsTopNode = false;
3640234285Sdim    }
3641234285Sdim    if (IsAlternating)
3642234285Sdim      IsTopDown = !IsTopDown;
3643234285Sdim    return SU;
3644234285Sdim  }
3645234285Sdim
3646276479Sdim  void schedNode(SUnit *SU, bool IsTopNode) override {}
3647239462Sdim
3648276479Sdim  void releaseTopNode(SUnit *SU) override {
3649234285Sdim    TopQ.push(SU);
3650234285Sdim  }
3651276479Sdim  void releaseBottomNode(SUnit *SU) override {
3652234285Sdim    BottomQ.push(SU);
3653234285Sdim  }
3654234285Sdim};
3655234285Sdim
3656321369Sdim} // end anonymous namespace
3657321369Sdim
3658234285Sdimstatic ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
3659234285Sdim  bool Alternate = !ForceTopDown && !ForceBottomUp;
3660234285Sdim  bool TopDown = !ForceBottomUp;
3661234285Sdim  assert((TopDown || !ForceTopDown) &&
3662234285Sdim         "-misched-topdown incompatible with -misched-bottomup");
3663321369Sdim  return new ScheduleDAGMILive(
3664360784Sdim      C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
3665234285Sdim}
3666321369Sdim
3667234285Sdimstatic MachineSchedRegistry ShufflerRegistry(
3668234285Sdim  "shuffle", "Shuffle machine instructions alternating directions",
3669234285Sdim  createInstructionShuffler);
3670234285Sdim#endif // !NDEBUG
3671249423Sdim
3672249423Sdim//===----------------------------------------------------------------------===//
3673276479Sdim// GraphWriter support for ScheduleDAGMILive.
3674249423Sdim//===----------------------------------------------------------------------===//
3675249423Sdim
3676249423Sdim#ifndef NDEBUG
3677249423Sdimnamespace llvm {
3678249423Sdim
3679249423Sdimtemplate<> struct GraphTraits<
3680249423Sdim  ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
3681249423Sdim
3682249423Sdimtemplate<>
3683249423Sdimstruct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
3684321369Sdim  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
3685249423Sdim
3686249423Sdim  static std::string getGraphName(const ScheduleDAG *G) {
3687249423Sdim    return G->MF.getName();
3688249423Sdim  }
3689249423Sdim
3690249423Sdim  static bool renderGraphFromBottomUp() {
3691249423Sdim    return true;
3692249423Sdim  }
3693249423Sdim
3694249423Sdim  static bool isNodeHidden(const SUnit *Node) {
3695296417Sdim    if (ViewMISchedCutoff == 0)
3696296417Sdim      return false;
3697296417Sdim    return (Node->Preds.size() > ViewMISchedCutoff
3698296417Sdim         || Node->Succs.size() > ViewMISchedCutoff);
3699249423Sdim  }
3700249423Sdim
3701249423Sdim  /// If you want to override the dot attributes printed for a particular
3702249423Sdim  /// edge, override this method.
3703249423Sdim  static std::string getEdgeAttributes(const SUnit *Node,
3704249423Sdim                                       SUnitIterator EI,
3705249423Sdim                                       const ScheduleDAG *Graph) {
3706249423Sdim    if (EI.isArtificialDep())
3707249423Sdim      return "color=cyan,style=dashed";
3708249423Sdim    if (EI.isCtrlDep())
3709249423Sdim      return "color=blue,style=dashed";
3710249423Sdim    return "";
3711249423Sdim  }
3712249423Sdim
3713249423Sdim  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3714249423Sdim    std::string Str;
3715249423Sdim    raw_string_ostream SS(Str);
3716276479Sdim    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3717276479Sdim    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3718276479Sdim      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3719261991Sdim    SS << "SU:" << SU->NodeNum;
3720261991Sdim    if (DFS)
3721261991Sdim      SS << " I:" << DFS->getNumInstrs(SU);
3722249423Sdim    return SS.str();
3723249423Sdim  }
3724327952Sdim
3725249423Sdim  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3726249423Sdim    return G->getGraphNodeLabel(SU);
3727249423Sdim  }
3728249423Sdim
3729276479Sdim  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3730249423Sdim    std::string Str("shape=Mrecord");
3731276479Sdim    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3732276479Sdim    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3733276479Sdim      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3734249423Sdim    if (DFS) {
3735249423Sdim      Str += ",style=filled,fillcolor=\"#";
3736249423Sdim      Str += DOT::getColorString(DFS->getSubtreeID(N));
3737249423Sdim      Str += '"';
3738249423Sdim    }
3739249423Sdim    return Str;
3740249423Sdim  }
3741249423Sdim};
3742321369Sdim
3743321369Sdim} // end namespace llvm
3744249423Sdim#endif // NDEBUG
3745249423Sdim
3746249423Sdim/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3747249423Sdim/// rendered using 'dot'.
3748249423Sdimvoid ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3749249423Sdim#ifndef NDEBUG
3750249423Sdim  ViewGraph(this, Name, false, Title);
3751249423Sdim#else
3752249423Sdim  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3753249423Sdim         << "systems with Graphviz or gv!\n";
3754249423Sdim#endif  // NDEBUG
3755249423Sdim}
3756249423Sdim
3757249423Sdim/// Out-of-line implementation with no arguments is handy for gdb.
3758249423Sdimvoid ScheduleDAGMI::viewGraph() {
3759249423Sdim  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3760249423Sdim}
3761