1234285Sdim//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2234285Sdim//
3234285Sdim//                     The LLVM Compiler Infrastructure
4234285Sdim//
5234285Sdim// This file is distributed under the University of Illinois Open Source
6234285Sdim// License. See LICENSE.TXT for details.
7234285Sdim//
8234285Sdim//===----------------------------------------------------------------------===//
9234285Sdim//
10234285Sdim// MachineScheduler schedules machine instructions after phi elimination. It
11234285Sdim// preserves LiveIntervals so it can be invoked before register allocation.
12234285Sdim//
13234285Sdim//===----------------------------------------------------------------------===//
14234285Sdim
15234285Sdim#define DEBUG_TYPE "misched"
16234285Sdim
17249423Sdim#include "llvm/CodeGen/MachineScheduler.h"
18249423Sdim#include "llvm/ADT/OwningPtr.h"
19249423Sdim#include "llvm/ADT/PriorityQueue.h"
20249423Sdim#include "llvm/Analysis/AliasAnalysis.h"
21234285Sdim#include "llvm/CodeGen/LiveIntervalAnalysis.h"
22249423Sdim#include "llvm/CodeGen/MachineDominators.h"
23249423Sdim#include "llvm/CodeGen/MachineLoopInfo.h"
24263508Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
25234285Sdim#include "llvm/CodeGen/Passes.h"
26239462Sdim#include "llvm/CodeGen/RegisterClassInfo.h"
27249423Sdim#include "llvm/CodeGen/ScheduleDFS.h"
28239462Sdim#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
29234285Sdim#include "llvm/Support/CommandLine.h"
30234285Sdim#include "llvm/Support/Debug.h"
31234285Sdim#include "llvm/Support/ErrorHandling.h"
32249423Sdim#include "llvm/Support/GraphWriter.h"
33234285Sdim#include "llvm/Support/raw_ostream.h"
34263508Sdim#include "llvm/Target/TargetInstrInfo.h"
35234285Sdim#include <queue>
36234285Sdim
37234285Sdimusing namespace llvm;
38234285Sdim
39243830Sdimnamespace llvm {
40243830Sdimcl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
41243830Sdim                           cl::desc("Force top-down list scheduling"));
42243830Sdimcl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
43243830Sdim                            cl::desc("Force bottom-up list scheduling"));
44243830Sdim}
45234285Sdim
46234285Sdim#ifndef NDEBUG
47234285Sdimstatic cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
48234285Sdim  cl::desc("Pop up a window to show MISched dags after they are processed"));
49234285Sdim
50234285Sdimstatic cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
51234285Sdim  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
52234285Sdim#else
53234285Sdimstatic bool ViewMISchedDAGs = false;
54234285Sdim#endif // NDEBUG
55234285Sdim
56263508Sdimstatic cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
57263508Sdim  cl::desc("Enable register pressure scheduling."), cl::init(true));
58251662Sdim
59263508Sdimstatic cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
60263508Sdim  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
61263508Sdim
62249423Sdimstatic cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
63249423Sdim  cl::desc("Enable load clustering."), cl::init(true));
64243830Sdim
65249423Sdim// Experimental heuristics
66249423Sdimstatic cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
67249423Sdim  cl::desc("Enable scheduling for macro fusion."), cl::init(true));
68249423Sdim
69249423Sdimstatic cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
70249423Sdim  cl::desc("Verify machine instrs before and after machine scheduling"));
71249423Sdim
72249423Sdim// DAG subtrees must have at least this many nodes.
73249423Sdimstatic const unsigned MinSubtreeSize = 8;
74249423Sdim
75263508Sdim// Pin the vtables to this file.
76263508Sdimvoid MachineSchedStrategy::anchor() {}
77263508Sdimvoid ScheduleDAGMutation::anchor() {}
78263508Sdim
79234285Sdim//===----------------------------------------------------------------------===//
80234285Sdim// Machine Instruction Scheduling Pass and Registry
81234285Sdim//===----------------------------------------------------------------------===//
82234285Sdim
83239462SdimMachineSchedContext::MachineSchedContext():
84239462Sdim    MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
85239462Sdim  RegClassInfo = new RegisterClassInfo();
86239462Sdim}
87239462Sdim
88239462SdimMachineSchedContext::~MachineSchedContext() {
89239462Sdim  delete RegClassInfo;
90239462Sdim}
91239462Sdim
92234285Sdimnamespace {
93234285Sdim/// MachineScheduler runs after coalescing and before register allocation.
94234285Sdimclass MachineScheduler : public MachineSchedContext,
95234285Sdim                         public MachineFunctionPass {
96234285Sdimpublic:
97234285Sdim  MachineScheduler();
98234285Sdim
99234285Sdim  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
100234285Sdim
101234285Sdim  virtual void releaseMemory() {}
102234285Sdim
103234285Sdim  virtual bool runOnMachineFunction(MachineFunction&);
104234285Sdim
105234285Sdim  virtual void print(raw_ostream &O, const Module* = 0) const;
106234285Sdim
107234285Sdim  static char ID; // Class identification, replacement for typeinfo
108263508Sdim
109263508Sdimprotected:
110263508Sdim  ScheduleDAGInstrs *createMachineScheduler();
111234285Sdim};
112234285Sdim} // namespace
113234285Sdim
114234285Sdimchar MachineScheduler::ID = 0;
115234285Sdim
116234285Sdimchar &llvm::MachineSchedulerID = MachineScheduler::ID;
117234285Sdim
118234285SdimINITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
119234285Sdim                      "Machine Instruction Scheduler", false, false)
120234285SdimINITIALIZE_AG_DEPENDENCY(AliasAnalysis)
121234285SdimINITIALIZE_PASS_DEPENDENCY(SlotIndexes)
122234285SdimINITIALIZE_PASS_DEPENDENCY(LiveIntervals)
123234285SdimINITIALIZE_PASS_END(MachineScheduler, "misched",
124234285Sdim                    "Machine Instruction Scheduler", false, false)
125234285Sdim
126234285SdimMachineScheduler::MachineScheduler()
127234285Sdim: MachineFunctionPass(ID) {
128234285Sdim  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
129234285Sdim}
130234285Sdim
131234285Sdimvoid MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
132234285Sdim  AU.setPreservesCFG();
133234285Sdim  AU.addRequiredID(MachineDominatorsID);
134234285Sdim  AU.addRequired<MachineLoopInfo>();
135234285Sdim  AU.addRequired<AliasAnalysis>();
136234285Sdim  AU.addRequired<TargetPassConfig>();
137234285Sdim  AU.addRequired<SlotIndexes>();
138234285Sdim  AU.addPreserved<SlotIndexes>();
139234285Sdim  AU.addRequired<LiveIntervals>();
140234285Sdim  AU.addPreserved<LiveIntervals>();
141234285Sdim  MachineFunctionPass::getAnalysisUsage(AU);
142234285Sdim}
143234285Sdim
144234285SdimMachinePassRegistry MachineSchedRegistry::Registry;
145234285Sdim
146234285Sdim/// A dummy default scheduler factory indicates whether the scheduler
147234285Sdim/// is overridden on the command line.
148234285Sdimstatic ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
149234285Sdim  return 0;
150234285Sdim}
151234285Sdim
152234285Sdim/// MachineSchedOpt allows command line selection of the scheduler.
153234285Sdimstatic cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
154234285Sdim               RegisterPassParser<MachineSchedRegistry> >
155234285SdimMachineSchedOpt("misched",
156234285Sdim                cl::init(&useDefaultMachineSched), cl::Hidden,
157234285Sdim                cl::desc("Machine instruction scheduler to use"));
158234285Sdim
159234285Sdimstatic MachineSchedRegistry
160234285SdimDefaultSchedRegistry("default", "Use the target's default scheduler choice.",
161234285Sdim                     useDefaultMachineSched);
162234285Sdim
163234285Sdim/// Forward declare the standard machine scheduler. This will be used as the
164234285Sdim/// default scheduler if the target does not set a default.
165263508Sdimstatic ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C);
166234285Sdim
167239462Sdim
168239462Sdim/// Decrement this iterator until reaching the top or a non-debug instr.
169263508Sdimstatic MachineBasicBlock::const_iterator
170263508SdimpriorNonDebug(MachineBasicBlock::const_iterator I,
171263508Sdim              MachineBasicBlock::const_iterator Beg) {
172239462Sdim  assert(I != Beg && "reached the top of the region, cannot decrement");
173239462Sdim  while (--I != Beg) {
174239462Sdim    if (!I->isDebugValue())
175239462Sdim      break;
176239462Sdim  }
177239462Sdim  return I;
178239462Sdim}
179239462Sdim
180263508Sdim/// Non-const version.
181263508Sdimstatic MachineBasicBlock::iterator
182263508SdimpriorNonDebug(MachineBasicBlock::iterator I,
183263508Sdim              MachineBasicBlock::const_iterator Beg) {
184263508Sdim  return const_cast<MachineInstr*>(
185263508Sdim    &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg));
186263508Sdim}
187263508Sdim
188239462Sdim/// If this iterator is a debug value, increment until reaching the End or a
189239462Sdim/// non-debug instruction.
190263508Sdimstatic MachineBasicBlock::const_iterator
191263508SdimnextIfDebug(MachineBasicBlock::const_iterator I,
192263508Sdim            MachineBasicBlock::const_iterator End) {
193239462Sdim  for(; I != End; ++I) {
194239462Sdim    if (!I->isDebugValue())
195239462Sdim      break;
196239462Sdim  }
197239462Sdim  return I;
198239462Sdim}
199239462Sdim
200263508Sdim/// Non-const version.
201263508Sdimstatic MachineBasicBlock::iterator
202263508SdimnextIfDebug(MachineBasicBlock::iterator I,
203263508Sdim            MachineBasicBlock::const_iterator End) {
204263508Sdim  // Cast the return value to nonconst MachineInstr, then cast to an
205263508Sdim  // instr_iterator, which does not check for null, finally return a
206263508Sdim  // bundle_iterator.
207263508Sdim  return MachineBasicBlock::instr_iterator(
208263508Sdim    const_cast<MachineInstr*>(
209263508Sdim      &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
210263508Sdim}
211263508Sdim
212263508Sdim/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
213263508SdimScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
214263508Sdim  // Select the scheduler, or set the default.
215263508Sdim  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
216263508Sdim  if (Ctor != useDefaultMachineSched)
217263508Sdim    return Ctor(this);
218263508Sdim
219263508Sdim  // Get the default scheduler set by the target for this function.
220263508Sdim  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
221263508Sdim  if (Scheduler)
222263508Sdim    return Scheduler;
223263508Sdim
224263508Sdim  // Default to GenericScheduler.
225263508Sdim  return createGenericSched(this);
226263508Sdim}
227263508Sdim
228234285Sdim/// Top-level MachineScheduler pass driver.
229234285Sdim///
230234285Sdim/// Visit blocks in function order. Divide each block into scheduling regions
231234285Sdim/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
232234285Sdim/// consistent with the DAG builder, which traverses the interior of the
233234285Sdim/// scheduling regions bottom-up.
234234285Sdim///
235234285Sdim/// This design avoids exposing scheduling boundaries to the DAG builder,
236234285Sdim/// simplifying the DAG builder's support for "special" target instructions.
237234285Sdim/// At the same time the design allows target schedulers to operate across
238234285Sdim/// scheduling boundaries, for example to bundle the boudary instructions
239234285Sdim/// without reordering them. This creates complexity, because the target
240234285Sdim/// scheduler must update the RegionBegin and RegionEnd positions cached by
241234285Sdim/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
242234285Sdim/// design would be to split blocks at scheduling boundaries, but LLVM has a
243234285Sdim/// general bias against block splitting purely for implementation simplicity.
244234285Sdimbool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
245239462Sdim  DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
246239462Sdim
247234285Sdim  // Initialize the context of the pass.
248234285Sdim  MF = &mf;
249234285Sdim  MLI = &getAnalysis<MachineLoopInfo>();
250234285Sdim  MDT = &getAnalysis<MachineDominatorTree>();
251234285Sdim  PassConfig = &getAnalysis<TargetPassConfig>();
252234285Sdim  AA = &getAnalysis<AliasAnalysis>();
253234285Sdim
254234285Sdim  LIS = &getAnalysis<LiveIntervals>();
255234285Sdim  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
256234285Sdim
257249423Sdim  if (VerifyScheduling) {
258263508Sdim    DEBUG(LIS->dump());
259249423Sdim    MF->verify(this, "Before machine scheduling.");
260249423Sdim  }
261239462Sdim  RegClassInfo->runOnMachineFunction(*MF);
262239462Sdim
263263508Sdim  // Instantiate the selected scheduler for this target, function, and
264263508Sdim  // optimization level.
265263508Sdim  OwningPtr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
266234285Sdim
267234285Sdim  // Visit all machine basic blocks.
268239462Sdim  //
269239462Sdim  // TODO: Visit blocks in global postorder or postorder within the bottom-up
270239462Sdim  // loop tree. Then we can optionally compute global RegPressure.
271234285Sdim  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
272234285Sdim       MBB != MBBEnd; ++MBB) {
273234285Sdim
274234285Sdim    Scheduler->startBlock(MBB);
275234285Sdim
276234285Sdim    // Break the block into scheduling regions [I, RegionEnd), and schedule each
277239462Sdim    // region as soon as it is discovered. RegionEnd points the scheduling
278234285Sdim    // boundary at the bottom of the region. The DAG does not include RegionEnd,
279234285Sdim    // but the region does (i.e. the next RegionEnd is above the previous
280234285Sdim    // RegionBegin). If the current block has no terminator then RegionEnd ==
281234285Sdim    // MBB->end() for the bottom region.
282234285Sdim    //
283234285Sdim    // The Scheduler may insert instructions during either schedule() or
284234285Sdim    // exitRegion(), even for empty regions. So the local iterators 'I' and
285234285Sdim    // 'RegionEnd' are invalid across these calls.
286243830Sdim    unsigned RemainingInstrs = MBB->size();
287234285Sdim    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
288234285Sdim        RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
289239462Sdim
290234285Sdim      // Avoid decrementing RegionEnd for blocks with no terminator.
291234285Sdim      if (RegionEnd != MBB->end()
292234285Sdim          || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
293234285Sdim        --RegionEnd;
294234285Sdim        // Count the boundary instruction.
295243830Sdim        --RemainingInstrs;
296234285Sdim      }
297234285Sdim
298234285Sdim      // The next region starts above the previous region. Look backward in the
299234285Sdim      // instruction stream until we find the nearest boundary.
300263508Sdim      unsigned NumRegionInstrs = 0;
301234285Sdim      MachineBasicBlock::iterator I = RegionEnd;
302263508Sdim      for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) {
303234285Sdim        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
304234285Sdim          break;
305234285Sdim      }
306234285Sdim      // Notify the scheduler of the region, even if we may skip scheduling
307234285Sdim      // it. Perhaps it still needs to be bundled.
308263508Sdim      Scheduler->enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
309234285Sdim
310234285Sdim      // Skip empty scheduling regions (0 or 1 schedulable instructions).
311234285Sdim      if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
312234285Sdim        // Close the current region. Bundle the terminator if needed.
313234285Sdim        // This invalidates 'RegionEnd' and 'I'.
314234285Sdim        Scheduler->exitRegion();
315234285Sdim        continue;
316234285Sdim      }
317239462Sdim      DEBUG(dbgs() << "********** MI Scheduling **********\n");
318243830Sdim      DEBUG(dbgs() << MF->getName()
319249423Sdim            << ":BB#" << MBB->getNumber() << " " << MBB->getName()
320249423Sdim            << "\n  From: " << *I << "    To: ";
321234285Sdim            if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
322234285Sdim            else dbgs() << "End";
323263508Sdim            dbgs() << " RegionInstrs: " << NumRegionInstrs
324263508Sdim            << " Remaining: " << RemainingInstrs << "\n");
325234285Sdim
326234285Sdim      // Schedule a region: possibly reorder instructions.
327234285Sdim      // This invalidates 'RegionEnd' and 'I'.
328234285Sdim      Scheduler->schedule();
329234285Sdim
330234285Sdim      // Close the current region.
331234285Sdim      Scheduler->exitRegion();
332234285Sdim
333234285Sdim      // Scheduling has invalidated the current iterator 'I'. Ask the
334234285Sdim      // scheduler for the top of it's scheduled region.
335234285Sdim      RegionEnd = Scheduler->begin();
336234285Sdim    }
337243830Sdim    assert(RemainingInstrs == 0 && "Instruction count mismatch!");
338234285Sdim    Scheduler->finishBlock();
339234285Sdim  }
340234285Sdim  Scheduler->finalizeSchedule();
341263508Sdim  DEBUG(LIS->dump());
342249423Sdim  if (VerifyScheduling)
343249423Sdim    MF->verify(this, "After machine scheduling.");
344234285Sdim  return true;
345234285Sdim}
346234285Sdim
347234285Sdimvoid MachineScheduler::print(raw_ostream &O, const Module* m) const {
348234285Sdim  // unimplemented
349234285Sdim}
350234285Sdim
351243830Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
352243830Sdimvoid ReadyQueue::dump() {
353263508Sdim  dbgs() << Name << ": ";
354243830Sdim  for (unsigned i = 0, e = Queue.size(); i < e; ++i)
355243830Sdim    dbgs() << Queue[i]->NodeNum << " ";
356243830Sdim  dbgs() << "\n";
357243830Sdim}
358243830Sdim#endif
359234285Sdim
360234285Sdim//===----------------------------------------------------------------------===//
361234285Sdim// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
362234285Sdim// preservation.
363234285Sdim//===----------------------------------------------------------------------===//
364234285Sdim
365249423SdimScheduleDAGMI::~ScheduleDAGMI() {
366249423Sdim  delete DFSResult;
367249423Sdim  DeleteContainerPointers(Mutations);
368249423Sdim  delete SchedImpl;
369249423Sdim}
370249423Sdim
371251662Sdimbool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
372251662Sdim  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
373251662Sdim}
374251662Sdim
375249423Sdimbool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
376249423Sdim  if (SuccSU != &ExitSU) {
377249423Sdim    // Do not use WillCreateCycle, it assumes SD scheduling.
378249423Sdim    // If Pred is reachable from Succ, then the edge creates a cycle.
379249423Sdim    if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
380249423Sdim      return false;
381249423Sdim    Topo.AddPred(SuccSU, PredDep.getSUnit());
382249423Sdim  }
383249423Sdim  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
384249423Sdim  // Return true regardless of whether a new edge needed to be inserted.
385249423Sdim  return true;
386249423Sdim}
387249423Sdim
388234285Sdim/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
389234285Sdim/// NumPredsLeft reaches zero, release the successor node.
390239462Sdim///
391239462Sdim/// FIXME: Adjust SuccSU height based on MinLatency.
392234285Sdimvoid ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
393234285Sdim  SUnit *SuccSU = SuccEdge->getSUnit();
394234285Sdim
395249423Sdim  if (SuccEdge->isWeak()) {
396249423Sdim    --SuccSU->WeakPredsLeft;
397249423Sdim    if (SuccEdge->isCluster())
398249423Sdim      NextClusterSucc = SuccSU;
399249423Sdim    return;
400249423Sdim  }
401234285Sdim#ifndef NDEBUG
402234285Sdim  if (SuccSU->NumPredsLeft == 0) {
403234285Sdim    dbgs() << "*** Scheduling failed! ***\n";
404234285Sdim    SuccSU->dump(this);
405234285Sdim    dbgs() << " has been released too many times!\n";
406234285Sdim    llvm_unreachable(0);
407234285Sdim  }
408234285Sdim#endif
409234285Sdim  --SuccSU->NumPredsLeft;
410234285Sdim  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
411234285Sdim    SchedImpl->releaseTopNode(SuccSU);
412234285Sdim}
413234285Sdim
414234285Sdim/// releaseSuccessors - Call releaseSucc on each of SU's successors.
415234285Sdimvoid ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
416234285Sdim  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
417234285Sdim       I != E; ++I) {
418234285Sdim    releaseSucc(SU, &*I);
419234285Sdim  }
420234285Sdim}
421234285Sdim
422234285Sdim/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
423234285Sdim/// NumSuccsLeft reaches zero, release the predecessor node.
424239462Sdim///
425239462Sdim/// FIXME: Adjust PredSU height based on MinLatency.
426234285Sdimvoid ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
427234285Sdim  SUnit *PredSU = PredEdge->getSUnit();
428234285Sdim
429249423Sdim  if (PredEdge->isWeak()) {
430249423Sdim    --PredSU->WeakSuccsLeft;
431249423Sdim    if (PredEdge->isCluster())
432249423Sdim      NextClusterPred = PredSU;
433249423Sdim    return;
434249423Sdim  }
435234285Sdim#ifndef NDEBUG
436234285Sdim  if (PredSU->NumSuccsLeft == 0) {
437234285Sdim    dbgs() << "*** Scheduling failed! ***\n";
438234285Sdim    PredSU->dump(this);
439234285Sdim    dbgs() << " has been released too many times!\n";
440234285Sdim    llvm_unreachable(0);
441234285Sdim  }
442234285Sdim#endif
443234285Sdim  --PredSU->NumSuccsLeft;
444234285Sdim  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
445234285Sdim    SchedImpl->releaseBottomNode(PredSU);
446234285Sdim}
447234285Sdim
448234285Sdim/// releasePredecessors - Call releasePred on each of SU's predecessors.
449234285Sdimvoid ScheduleDAGMI::releasePredecessors(SUnit *SU) {
450234285Sdim  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
451234285Sdim       I != E; ++I) {
452234285Sdim    releasePred(SU, &*I);
453234285Sdim  }
454234285Sdim}
455234285Sdim
456251662Sdim/// This is normally called from the main scheduler loop but may also be invoked
457251662Sdim/// by the scheduling strategy to perform additional code motion.
458234285Sdimvoid ScheduleDAGMI::moveInstruction(MachineInstr *MI,
459234285Sdim                                    MachineBasicBlock::iterator InsertPos) {
460239462Sdim  // Advance RegionBegin if the first instruction moves down.
461234285Sdim  if (&*RegionBegin == MI)
462239462Sdim    ++RegionBegin;
463239462Sdim
464239462Sdim  // Update the instruction stream.
465234285Sdim  BB->splice(InsertPos, BB, MI);
466239462Sdim
467239462Sdim  // Update LiveIntervals
468243830Sdim  LIS->handleMove(MI, /*UpdateFlags=*/true);
469239462Sdim
470239462Sdim  // Recede RegionBegin if an instruction moves above the first.
471234285Sdim  if (RegionBegin == InsertPos)
472234285Sdim    RegionBegin = MI;
473234285Sdim}
474234285Sdim
475234285Sdimbool ScheduleDAGMI::checkSchedLimit() {
476234285Sdim#ifndef NDEBUG
477234285Sdim  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
478234285Sdim    CurrentTop = CurrentBottom;
479234285Sdim    return false;
480234285Sdim  }
481234285Sdim  ++NumInstrsScheduled;
482234285Sdim#endif
483234285Sdim  return true;
484234285Sdim}
485234285Sdim
486239462Sdim/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
487239462Sdim/// crossing a scheduling boundary. [begin, end) includes all instructions in
488239462Sdim/// the region, including the boundary itself and single-instruction regions
489239462Sdim/// that don't get scheduled.
490239462Sdimvoid ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
491239462Sdim                                MachineBasicBlock::iterator begin,
492239462Sdim                                MachineBasicBlock::iterator end,
493263508Sdim                                unsigned regioninstrs)
494239462Sdim{
495263508Sdim  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
496239462Sdim
497239462Sdim  // For convenience remember the end of the liveness region.
498239462Sdim  LiveRegionEnd =
499239462Sdim    (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
500263508Sdim
501263508Sdim  SUPressureDiffs.clear();
502263508Sdim
503263508Sdim  SchedImpl->initPolicy(begin, end, regioninstrs);
504263508Sdim
505263508Sdim  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
506239462Sdim}
507239462Sdim
508239462Sdim// Setup the register pressure trackers for the top scheduled top and bottom
509239462Sdim// scheduled regions.
510239462Sdimvoid ScheduleDAGMI::initRegPressure() {
511239462Sdim  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
512239462Sdim  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
513239462Sdim
514239462Sdim  // Close the RPTracker to finalize live ins.
515239462Sdim  RPTracker.closeRegion();
516239462Sdim
517263508Sdim  DEBUG(RPTracker.dump());
518239462Sdim
519239462Sdim  // Initialize the live ins and live outs.
520239462Sdim  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
521239462Sdim  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
522239462Sdim
523239462Sdim  // Close one end of the tracker so we can call
524239462Sdim  // getMaxUpward/DownwardPressureDelta before advancing across any
525239462Sdim  // instructions. This converts currently live regs into live ins/outs.
526239462Sdim  TopRPTracker.closeTop();
527239462Sdim  BotRPTracker.closeBottom();
528239462Sdim
529263508Sdim  BotRPTracker.initLiveThru(RPTracker);
530263508Sdim  if (!BotRPTracker.getLiveThru().empty()) {
531263508Sdim    TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
532263508Sdim    DEBUG(dbgs() << "Live Thru: ";
533263508Sdim          dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
534263508Sdim  };
535263508Sdim
536263508Sdim  // For each live out vreg reduce the pressure change associated with other
537263508Sdim  // uses of the same vreg below the live-out reaching def.
538263508Sdim  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
539263508Sdim
540239462Sdim  // Account for liveness generated by the region boundary.
541263508Sdim  if (LiveRegionEnd != RegionEnd) {
542263508Sdim    SmallVector<unsigned, 8> LiveUses;
543263508Sdim    BotRPTracker.recede(&LiveUses);
544263508Sdim    updatePressureDiffs(LiveUses);
545263508Sdim  }
546239462Sdim
547239462Sdim  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
548239462Sdim
549239462Sdim  // Cache the list of excess pressure sets in this region. This will also track
550239462Sdim  // the max pressure in the scheduled code for these sets.
551239462Sdim  RegionCriticalPSets.clear();
552249423Sdim  const std::vector<unsigned> &RegionPressure =
553249423Sdim    RPTracker.getPressure().MaxSetPressure;
554239462Sdim  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
555263508Sdim    unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
556263508Sdim    if (RegionPressure[i] > Limit) {
557263508Sdim      DEBUG(dbgs() << TRI->getRegPressureSetName(i)
558263508Sdim            << " Limit " << Limit
559263508Sdim            << " Actual " << RegionPressure[i] << "\n");
560263508Sdim      RegionCriticalPSets.push_back(PressureChange(i));
561263508Sdim    }
562239462Sdim  }
563239462Sdim  DEBUG(dbgs() << "Excess PSets: ";
564239462Sdim        for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
565239462Sdim          dbgs() << TRI->getRegPressureSetName(
566263508Sdim            RegionCriticalPSets[i].getPSet()) << " ";
567239462Sdim        dbgs() << "\n");
568239462Sdim}
569239462Sdim
570239462Sdimvoid ScheduleDAGMI::
571263508SdimupdateScheduledPressure(const SUnit *SU,
572263508Sdim                        const std::vector<unsigned> &NewMaxPressure) {
573263508Sdim  const PressureDiff &PDiff = getPressureDiff(SU);
574263508Sdim  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
575263508Sdim  for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
576263508Sdim       I != E; ++I) {
577263508Sdim    if (!I->isValid())
578263508Sdim      break;
579263508Sdim    unsigned ID = I->getPSet();
580263508Sdim    while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
581263508Sdim      ++CritIdx;
582263508Sdim    if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
583263508Sdim      if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
584263508Sdim          && NewMaxPressure[ID] <= INT16_MAX)
585263508Sdim        RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
586263508Sdim    }
587263508Sdim    unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
588263508Sdim    if (NewMaxPressure[ID] >= Limit - 2) {
589263508Sdim      DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
590263508Sdim            << NewMaxPressure[ID] << " > " << Limit << "(+ "
591263508Sdim            << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
592263508Sdim    }
593239462Sdim  }
594263508Sdim}
595263508Sdim
596263508Sdim/// Update the PressureDiff array for liveness after scheduling this
597263508Sdim/// instruction.
598263508Sdimvoid ScheduleDAGMI::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
599263508Sdim  for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) {
600263508Sdim    /// FIXME: Currently assuming single-use physregs.
601263508Sdim    unsigned Reg = LiveUses[LUIdx];
602263508Sdim    DEBUG(dbgs() << "  LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
603263508Sdim    if (!TRI->isVirtualRegister(Reg))
604263508Sdim      continue;
605263508Sdim
606263508Sdim    // This may be called before CurrentBottom has been initialized. However,
607263508Sdim    // BotRPTracker must have a valid position. We want the value live into the
608263508Sdim    // instruction or live out of the block, so ask for the previous
609263508Sdim    // instruction's live-out.
610263508Sdim    const LiveInterval &LI = LIS->getInterval(Reg);
611263508Sdim    VNInfo *VNI;
612263508Sdim    MachineBasicBlock::const_iterator I =
613263508Sdim      nextIfDebug(BotRPTracker.getPos(), BB->end());
614263508Sdim    if (I == BB->end())
615263508Sdim      VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
616263508Sdim    else {
617263508Sdim      LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I));
618263508Sdim      VNI = LRQ.valueIn();
619263508Sdim    }
620263508Sdim    // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
621263508Sdim    assert(VNI && "No live value at use.");
622263508Sdim    for (VReg2UseMap::iterator
623263508Sdim           UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
624263508Sdim      SUnit *SU = UI->SU;
625263508Sdim      DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
626263508Sdim            << *SU->getInstr());
627263508Sdim      // If this use comes before the reaching def, it cannot be a last use, so
628263508Sdim      // descrease its pressure change.
629263508Sdim      if (!SU->isScheduled && SU != &ExitSU) {
630263508Sdim        LiveQueryResult LRQ
631263508Sdim          = LI.Query(LIS->getInstructionIndex(SU->getInstr()));
632263508Sdim        if (LRQ.valueIn() == VNI)
633263508Sdim          getPressureDiff(SU).addPressureChange(Reg, true, &MRI);
634251662Sdim      }
635263508Sdim    }
636263508Sdim  }
637239462Sdim}
638239462Sdim
639243830Sdim/// schedule - Called back from MachineScheduler::runOnMachineFunction
640243830Sdim/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
641243830Sdim/// only includes instructions that have DAG nodes, not scheduling boundaries.
642243830Sdim///
643243830Sdim/// This is a skeletal driver, with all the functionality pushed into helpers,
644243830Sdim/// so that it can be easilly extended by experimental schedulers. Generally,
645243830Sdim/// implementing MachineSchedStrategy should be sufficient to implement a new
646243830Sdim/// scheduling algorithm. However, if a scheduler further subclasses
647243830Sdim/// ScheduleDAGMI then it will want to override this virtual method in order to
648243830Sdim/// update any specialized state.
649243830Sdimvoid ScheduleDAGMI::schedule() {
650243830Sdim  buildDAGWithRegPressure();
651243830Sdim
652249423Sdim  Topo.InitDAGTopologicalSorting();
653249423Sdim
654243830Sdim  postprocessDAG();
655243830Sdim
656249423Sdim  SmallVector<SUnit*, 8> TopRoots, BotRoots;
657249423Sdim  findRootsAndBiasEdges(TopRoots, BotRoots);
658249423Sdim
659249423Sdim  // Initialize the strategy before modifying the DAG.
660249423Sdim  // This may initialize a DFSResult to be used for queue priority.
661249423Sdim  SchedImpl->initialize(this);
662249423Sdim
663243830Sdim  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
664243830Sdim          SUnits[su].dumpAll(this));
665243830Sdim  if (ViewMISchedDAGs) viewGraph();
666243830Sdim
667249423Sdim  // Initialize ready queues now that the DAG and priority data are finalized.
668249423Sdim  initQueues(TopRoots, BotRoots);
669243830Sdim
670243830Sdim  bool IsTopNode = false;
671243830Sdim  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
672243830Sdim    assert(!SU->isScheduled && "Node already scheduled");
673243830Sdim    if (!checkSchedLimit())
674243830Sdim      break;
675243830Sdim
676243830Sdim    scheduleMI(SU, IsTopNode);
677243830Sdim
678243830Sdim    updateQueues(SU, IsTopNode);
679243830Sdim  }
680243830Sdim  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
681243830Sdim
682243830Sdim  placeDebugValues();
683243830Sdim
684243830Sdim  DEBUG({
685249423Sdim      unsigned BBNum = begin()->getParent()->getNumber();
686243830Sdim      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
687243830Sdim      dumpSchedule();
688243830Sdim      dbgs() << '\n';
689243830Sdim    });
690243830Sdim}
691243830Sdim
692243830Sdim/// Build the DAG and setup three register pressure trackers.
693243830Sdimvoid ScheduleDAGMI::buildDAGWithRegPressure() {
694263508Sdim  if (!ShouldTrackPressure) {
695263508Sdim    RPTracker.reset();
696263508Sdim    RegionCriticalPSets.clear();
697263508Sdim    buildSchedGraph(AA);
698263508Sdim    return;
699263508Sdim  }
700263508Sdim
701243830Sdim  // Initialize the register pressure tracker used by buildSchedGraph.
702263508Sdim  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
703263508Sdim                 /*TrackUntiedDefs=*/true);
704243830Sdim
705243830Sdim  // Account for liveness generate by the region boundary.
706243830Sdim  if (LiveRegionEnd != RegionEnd)
707243830Sdim    RPTracker.recede();
708243830Sdim
709243830Sdim  // Build the DAG, and compute current register pressure.
710263508Sdim  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs);
711243830Sdim
712243830Sdim  // Initialize top/bottom trackers after computing region pressure.
713243830Sdim  initRegPressure();
714243830Sdim}
715243830Sdim
716243830Sdim/// Apply each ScheduleDAGMutation step in order.
717243830Sdimvoid ScheduleDAGMI::postprocessDAG() {
718243830Sdim  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
719243830Sdim    Mutations[i]->apply(this);
720243830Sdim  }
721243830Sdim}
722243830Sdim
723249423Sdimvoid ScheduleDAGMI::computeDFSResult() {
724249423Sdim  if (!DFSResult)
725249423Sdim    DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
726249423Sdim  DFSResult->clear();
727249423Sdim  ScheduledTrees.clear();
728249423Sdim  DFSResult->resize(SUnits.size());
729249423Sdim  DFSResult->compute(SUnits);
730249423Sdim  ScheduledTrees.resize(DFSResult->getNumSubtrees());
731249423Sdim}
732239462Sdim
733249423Sdimvoid ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
734249423Sdim                                          SmallVectorImpl<SUnit*> &BotRoots) {
735239462Sdim  for (std::vector<SUnit>::iterator
736239462Sdim         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
737249423Sdim    SUnit *SU = &(*I);
738249423Sdim    assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
739249423Sdim
740249423Sdim    // Order predecessors so DFSResult follows the critical path.
741249423Sdim    SU->biasCriticalPath();
742249423Sdim
743239462Sdim    // A SUnit is ready to top schedule if it has no predecessors.
744249423Sdim    if (!I->NumPredsLeft)
745249423Sdim      TopRoots.push_back(SU);
746239462Sdim    // A SUnit is ready to bottom schedule if it has no successors.
747249423Sdim    if (!I->NumSuccsLeft)
748249423Sdim      BotRoots.push_back(SU);
749239462Sdim  }
750249423Sdim  ExitSU.biasCriticalPath();
751249423Sdim}
752249423Sdim
753263508Sdim/// Compute the max cyclic critical path through the DAG. The scheduling DAG
754263508Sdim/// only provides the critical path for single block loops. To handle loops that
755263508Sdim/// span blocks, we could use the vreg path latencies provided by
756263508Sdim/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
757263508Sdim/// available for use in the scheduler.
758263508Sdim///
759263508Sdim/// The cyclic path estimation identifies a def-use pair that crosses the back
760263508Sdim/// edge and considers the depth and height of the nodes. For example, consider
761263508Sdim/// the following instruction sequence where each instruction has unit latency
762263508Sdim/// and defines an epomymous virtual register:
763263508Sdim///
764263508Sdim/// a->b(a,c)->c(b)->d(c)->exit
765263508Sdim///
766263508Sdim/// The cyclic critical path is a two cycles: b->c->b
767263508Sdim/// The acyclic critical path is four cycles: a->b->c->d->exit
768263508Sdim/// LiveOutHeight = height(c) = len(c->d->exit) = 2
769263508Sdim/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
770263508Sdim/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
771263508Sdim/// LiveInDepth = depth(b) = len(a->b) = 1
772263508Sdim///
773263508Sdim/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
774263508Sdim/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
775263508Sdim/// CyclicCriticalPath = min(2, 2) = 2
776263508Sdimunsigned ScheduleDAGMI::computeCyclicCriticalPath() {
777263508Sdim  // This only applies to single block loop.
778263508Sdim  if (!BB->isSuccessor(BB))
779263508Sdim    return 0;
780263508Sdim
781263508Sdim  unsigned MaxCyclicLatency = 0;
782263508Sdim  // Visit each live out vreg def to find def/use pairs that cross iterations.
783263508Sdim  ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs;
784263508Sdim  for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end();
785263508Sdim       RI != RE; ++RI) {
786263508Sdim    unsigned Reg = *RI;
787263508Sdim    if (!TRI->isVirtualRegister(Reg))
788263508Sdim        continue;
789263508Sdim    const LiveInterval &LI = LIS->getInterval(Reg);
790263508Sdim    const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
791263508Sdim    if (!DefVNI)
792263508Sdim      continue;
793263508Sdim
794263508Sdim    MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
795263508Sdim    const SUnit *DefSU = getSUnit(DefMI);
796263508Sdim    if (!DefSU)
797263508Sdim      continue;
798263508Sdim
799263508Sdim    unsigned LiveOutHeight = DefSU->getHeight();
800263508Sdim    unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
801263508Sdim    // Visit all local users of the vreg def.
802263508Sdim    for (VReg2UseMap::iterator
803263508Sdim           UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
804263508Sdim      if (UI->SU == &ExitSU)
805263508Sdim        continue;
806263508Sdim
807263508Sdim      // Only consider uses of the phi.
808263508Sdim      LiveQueryResult LRQ =
809263508Sdim        LI.Query(LIS->getInstructionIndex(UI->SU->getInstr()));
810263508Sdim      if (!LRQ.valueIn()->isPHIDef())
811263508Sdim        continue;
812263508Sdim
813263508Sdim      // Assume that a path spanning two iterations is a cycle, which could
814263508Sdim      // overestimate in strange cases. This allows cyclic latency to be
815263508Sdim      // estimated as the minimum slack of the vreg's depth or height.
816263508Sdim      unsigned CyclicLatency = 0;
817263508Sdim      if (LiveOutDepth > UI->SU->getDepth())
818263508Sdim        CyclicLatency = LiveOutDepth - UI->SU->getDepth();
819263508Sdim
820263508Sdim      unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;
821263508Sdim      if (LiveInHeight > LiveOutHeight) {
822263508Sdim        if (LiveInHeight - LiveOutHeight < CyclicLatency)
823263508Sdim          CyclicLatency = LiveInHeight - LiveOutHeight;
824263508Sdim      }
825263508Sdim      else
826263508Sdim        CyclicLatency = 0;
827263508Sdim
828263508Sdim      DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
829263508Sdim            << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");
830263508Sdim      if (CyclicLatency > MaxCyclicLatency)
831263508Sdim        MaxCyclicLatency = CyclicLatency;
832263508Sdim    }
833263508Sdim  }
834263508Sdim  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
835263508Sdim  return MaxCyclicLatency;
836263508Sdim}
837263508Sdim
838249423Sdim/// Identify DAG roots and setup scheduler queues.
839249423Sdimvoid ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
840249423Sdim                               ArrayRef<SUnit*> BotRoots) {
841249423Sdim  NextClusterSucc = NULL;
842249423Sdim  NextClusterPred = NULL;
843249423Sdim
844249423Sdim  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
845249423Sdim  //
846249423Sdim  // Nodes with unreleased weak edges can still be roots.
847249423Sdim  // Release top roots in forward order.
848249423Sdim  for (SmallVectorImpl<SUnit*>::const_iterator
849249423Sdim         I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
850249423Sdim    SchedImpl->releaseTopNode(*I);
851249423Sdim  }
852239462Sdim  // Release bottom roots in reverse order so the higher priority nodes appear
853239462Sdim  // first. This is more natural and slightly more efficient.
854239462Sdim  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
855249423Sdim         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
856239462Sdim    SchedImpl->releaseBottomNode(*I);
857249423Sdim  }
858239462Sdim
859234285Sdim  releaseSuccessors(&EntrySU);
860234285Sdim  releasePredecessors(&ExitSU);
861234285Sdim
862243830Sdim  SchedImpl->registerRoots();
863243830Sdim
864249423Sdim  // Advance past initial DebugValues.
865239462Sdim  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
866263508Sdim  CurrentBottom = RegionEnd;
867249423Sdim
868263508Sdim  if (ShouldTrackPressure) {
869263508Sdim    assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
870263508Sdim    TopRPTracker.setPos(CurrentTop);
871263508Sdim  }
872243830Sdim}
873234285Sdim
874243830Sdim/// Move an instruction and update register pressure.
875243830Sdimvoid ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
876243830Sdim  // Move the instruction to its new location in the instruction stream.
877243830Sdim  MachineInstr *MI = SU->getInstr();
878234285Sdim
879243830Sdim  if (IsTopNode) {
880243830Sdim    assert(SU->isTopReady() && "node still has unscheduled dependencies");
881243830Sdim    if (&*CurrentTop == MI)
882243830Sdim      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
883243830Sdim    else {
884243830Sdim      moveInstruction(MI, CurrentTop);
885243830Sdim      TopRPTracker.setPos(MI);
886243830Sdim    }
887239462Sdim
888263508Sdim    if (ShouldTrackPressure) {
889263508Sdim      // Update top scheduled pressure.
890263508Sdim      TopRPTracker.advance();
891263508Sdim      assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
892263508Sdim      updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
893263508Sdim    }
894243830Sdim  }
895243830Sdim  else {
896243830Sdim    assert(SU->isBottomReady() && "node still has unscheduled dependencies");
897243830Sdim    MachineBasicBlock::iterator priorII =
898243830Sdim      priorNonDebug(CurrentBottom, CurrentTop);
899243830Sdim    if (&*priorII == MI)
900243830Sdim      CurrentBottom = priorII;
901234285Sdim    else {
902243830Sdim      if (&*CurrentTop == MI) {
903243830Sdim        CurrentTop = nextIfDebug(++CurrentTop, priorII);
904243830Sdim        TopRPTracker.setPos(CurrentTop);
905234285Sdim      }
906243830Sdim      moveInstruction(MI, CurrentBottom);
907243830Sdim      CurrentBottom = MI;
908234285Sdim    }
909263508Sdim    if (ShouldTrackPressure) {
910263508Sdim      // Update bottom scheduled pressure.
911263508Sdim      SmallVector<unsigned, 8> LiveUses;
912263508Sdim      BotRPTracker.recede(&LiveUses);
913263508Sdim      assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
914263508Sdim      updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
915263508Sdim      updatePressureDiffs(LiveUses);
916263508Sdim    }
917234285Sdim  }
918243830Sdim}
919239462Sdim
920243830Sdim/// Update scheduler queues after scheduling an instruction.
921243830Sdimvoid ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
922243830Sdim  // Release dependent instructions for scheduling.
923243830Sdim  if (IsTopNode)
924243830Sdim    releaseSuccessors(SU);
925243830Sdim  else
926243830Sdim    releasePredecessors(SU);
927243830Sdim
928243830Sdim  SU->isScheduled = true;
929243830Sdim
930249423Sdim  if (DFSResult) {
931249423Sdim    unsigned SubtreeID = DFSResult->getSubtreeID(SU);
932249423Sdim    if (!ScheduledTrees.test(SubtreeID)) {
933249423Sdim      ScheduledTrees.set(SubtreeID);
934249423Sdim      DFSResult->scheduleTree(SubtreeID);
935249423Sdim      SchedImpl->scheduleTree(SubtreeID);
936249423Sdim    }
937249423Sdim  }
938249423Sdim
939243830Sdim  // Notify the scheduling strategy after updating the DAG.
940243830Sdim  SchedImpl->schedNode(SU, IsTopNode);
941234285Sdim}
942234285Sdim
943239462Sdim/// Reinsert any remaining debug_values, just like the PostRA scheduler.
944239462Sdimvoid ScheduleDAGMI::placeDebugValues() {
945239462Sdim  // If first instruction was a DBG_VALUE then put it back.
946239462Sdim  if (FirstDbgValue) {
947239462Sdim    BB->splice(RegionBegin, BB, FirstDbgValue);
948239462Sdim    RegionBegin = FirstDbgValue;
949239462Sdim  }
950239462Sdim
951239462Sdim  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
952239462Sdim         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
953239462Sdim    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
954239462Sdim    MachineInstr *DbgValue = P.first;
955239462Sdim    MachineBasicBlock::iterator OrigPrevMI = P.second;
956249423Sdim    if (&*RegionBegin == DbgValue)
957249423Sdim      ++RegionBegin;
958239462Sdim    BB->splice(++OrigPrevMI, BB, DbgValue);
959239462Sdim    if (OrigPrevMI == llvm::prior(RegionEnd))
960239462Sdim      RegionEnd = DbgValue;
961239462Sdim  }
962239462Sdim  DbgValues.clear();
963239462Sdim  FirstDbgValue = NULL;
964239462Sdim}
965239462Sdim
966243830Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
967243830Sdimvoid ScheduleDAGMI::dumpSchedule() const {
968243830Sdim  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
969243830Sdim    if (SUnit *SU = getSUnit(&(*MI)))
970243830Sdim      SU->dump(this);
971243830Sdim    else
972243830Sdim      dbgs() << "Missing SUnit\n";
973243830Sdim  }
974243830Sdim}
975243830Sdim#endif
976243830Sdim
977234285Sdim//===----------------------------------------------------------------------===//
978249423Sdim// LoadClusterMutation - DAG post-processing to cluster loads.
979249423Sdim//===----------------------------------------------------------------------===//
980249423Sdim
981249423Sdimnamespace {
982249423Sdim/// \brief Post-process the DAG to create cluster edges between neighboring
983249423Sdim/// loads.
984249423Sdimclass LoadClusterMutation : public ScheduleDAGMutation {
985249423Sdim  struct LoadInfo {
986249423Sdim    SUnit *SU;
987249423Sdim    unsigned BaseReg;
988249423Sdim    unsigned Offset;
989249423Sdim    LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
990249423Sdim      : SU(su), BaseReg(reg), Offset(ofs) {}
991249423Sdim  };
992249423Sdim  static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
993249423Sdim                           const LoadClusterMutation::LoadInfo &RHS);
994249423Sdim
995249423Sdim  const TargetInstrInfo *TII;
996249423Sdim  const TargetRegisterInfo *TRI;
997249423Sdimpublic:
998249423Sdim  LoadClusterMutation(const TargetInstrInfo *tii,
999249423Sdim                      const TargetRegisterInfo *tri)
1000249423Sdim    : TII(tii), TRI(tri) {}
1001249423Sdim
1002249423Sdim  virtual void apply(ScheduleDAGMI *DAG);
1003249423Sdimprotected:
1004249423Sdim  void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
1005249423Sdim};
1006249423Sdim} // anonymous
1007249423Sdim
1008249423Sdimbool LoadClusterMutation::LoadInfoLess(
1009249423Sdim  const LoadClusterMutation::LoadInfo &LHS,
1010249423Sdim  const LoadClusterMutation::LoadInfo &RHS) {
1011249423Sdim  if (LHS.BaseReg != RHS.BaseReg)
1012249423Sdim    return LHS.BaseReg < RHS.BaseReg;
1013249423Sdim  return LHS.Offset < RHS.Offset;
1014249423Sdim}
1015249423Sdim
1016249423Sdimvoid LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
1017249423Sdim                                                  ScheduleDAGMI *DAG) {
1018249423Sdim  SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
1019249423Sdim  for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
1020249423Sdim    SUnit *SU = Loads[Idx];
1021249423Sdim    unsigned BaseReg;
1022249423Sdim    unsigned Offset;
1023249423Sdim    if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
1024249423Sdim      LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
1025249423Sdim  }
1026249423Sdim  if (LoadRecords.size() < 2)
1027249423Sdim    return;
1028249423Sdim  std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
1029249423Sdim  unsigned ClusterLength = 1;
1030249423Sdim  for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
1031249423Sdim    if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
1032249423Sdim      ClusterLength = 1;
1033249423Sdim      continue;
1034249423Sdim    }
1035249423Sdim
1036249423Sdim    SUnit *SUa = LoadRecords[Idx].SU;
1037249423Sdim    SUnit *SUb = LoadRecords[Idx+1].SU;
1038249423Sdim    if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
1039249423Sdim        && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1040249423Sdim
1041249423Sdim      DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
1042249423Sdim            << SUb->NodeNum << ")\n");
1043249423Sdim      // Copy successor edges from SUa to SUb. Interleaving computation
1044249423Sdim      // dependent on SUa can prevent load combining due to register reuse.
1045249423Sdim      // Predecessor edges do not need to be copied from SUb to SUa since nearby
1046249423Sdim      // loads should have effectively the same inputs.
1047249423Sdim      for (SUnit::const_succ_iterator
1048249423Sdim             SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
1049249423Sdim        if (SI->getSUnit() == SUb)
1050249423Sdim          continue;
1051249423Sdim        DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
1052249423Sdim        DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
1053249423Sdim      }
1054249423Sdim      ++ClusterLength;
1055249423Sdim    }
1056249423Sdim    else
1057249423Sdim      ClusterLength = 1;
1058249423Sdim  }
1059249423Sdim}
1060249423Sdim
1061249423Sdim/// \brief Callback from DAG postProcessing to create cluster edges for loads.
1062249423Sdimvoid LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
1063249423Sdim  // Map DAG NodeNum to store chain ID.
1064249423Sdim  DenseMap<unsigned, unsigned> StoreChainIDs;
1065249423Sdim  // Map each store chain to a set of dependent loads.
1066249423Sdim  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1067249423Sdim  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1068249423Sdim    SUnit *SU = &DAG->SUnits[Idx];
1069249423Sdim    if (!SU->getInstr()->mayLoad())
1070249423Sdim      continue;
1071249423Sdim    unsigned ChainPredID = DAG->SUnits.size();
1072249423Sdim    for (SUnit::const_pred_iterator
1073249423Sdim           PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
1074249423Sdim      if (PI->isCtrl()) {
1075249423Sdim        ChainPredID = PI->getSUnit()->NodeNum;
1076249423Sdim        break;
1077249423Sdim      }
1078249423Sdim    }
1079249423Sdim    // Check if this chain-like pred has been seen
1080249423Sdim    // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
1081249423Sdim    unsigned NumChains = StoreChainDependents.size();
1082249423Sdim    std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1083249423Sdim      StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1084249423Sdim    if (Result.second)
1085249423Sdim      StoreChainDependents.resize(NumChains + 1);
1086249423Sdim    StoreChainDependents[Result.first->second].push_back(SU);
1087249423Sdim  }
1088249423Sdim  // Iterate over the store chains.
1089249423Sdim  for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
1090249423Sdim    clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
1091249423Sdim}
1092249423Sdim
1093249423Sdim//===----------------------------------------------------------------------===//
1094249423Sdim// MacroFusion - DAG post-processing to encourage fusion of macro ops.
1095249423Sdim//===----------------------------------------------------------------------===//
1096249423Sdim
1097249423Sdimnamespace {
1098249423Sdim/// \brief Post-process the DAG to create cluster edges between instructions
1099249423Sdim/// that may be fused by the processor into a single operation.
1100249423Sdimclass MacroFusion : public ScheduleDAGMutation {
1101249423Sdim  const TargetInstrInfo *TII;
1102249423Sdimpublic:
1103249423Sdim  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
1104249423Sdim
1105249423Sdim  virtual void apply(ScheduleDAGMI *DAG);
1106249423Sdim};
1107249423Sdim} // anonymous
1108249423Sdim
1109249423Sdim/// \brief Callback from DAG postProcessing to create cluster edges to encourage
1110249423Sdim/// fused operations.
1111249423Sdimvoid MacroFusion::apply(ScheduleDAGMI *DAG) {
1112249423Sdim  // For now, assume targets can only fuse with the branch.
1113249423Sdim  MachineInstr *Branch = DAG->ExitSU.getInstr();
1114249423Sdim  if (!Branch)
1115249423Sdim    return;
1116249423Sdim
1117249423Sdim  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
1118249423Sdim    SUnit *SU = &DAG->SUnits[--Idx];
1119249423Sdim    if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
1120249423Sdim      continue;
1121249423Sdim
1122249423Sdim    // Create a single weak edge from SU to ExitSU. The only effect is to cause
1123249423Sdim    // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
1124249423Sdim    // need to copy predecessor edges from ExitSU to SU, since top-down
1125249423Sdim    // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
1126249423Sdim    // of SU, we could create an artificial edge from the deepest root, but it
1127249423Sdim    // hasn't been needed yet.
1128249423Sdim    bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
1129249423Sdim    (void)Success;
1130249423Sdim    assert(Success && "No DAG nodes should be reachable from ExitSU");
1131249423Sdim
1132249423Sdim    DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
1133249423Sdim    break;
1134249423Sdim  }
1135249423Sdim}
1136249423Sdim
1137249423Sdim//===----------------------------------------------------------------------===//
1138251662Sdim// CopyConstrain - DAG post-processing to encourage copy elimination.
1139251662Sdim//===----------------------------------------------------------------------===//
1140251662Sdim
1141251662Sdimnamespace {
1142251662Sdim/// \brief Post-process the DAG to create weak edges from all uses of a copy to
1143251662Sdim/// the one use that defines the copy's source vreg, most likely an induction
1144251662Sdim/// variable increment.
1145251662Sdimclass CopyConstrain : public ScheduleDAGMutation {
1146251662Sdim  // Transient state.
1147251662Sdim  SlotIndex RegionBeginIdx;
1148251662Sdim  // RegionEndIdx is the slot index of the last non-debug instruction in the
1149251662Sdim  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1150251662Sdim  SlotIndex RegionEndIdx;
1151251662Sdimpublic:
1152251662Sdim  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1153251662Sdim
1154251662Sdim  virtual void apply(ScheduleDAGMI *DAG);
1155251662Sdim
1156251662Sdimprotected:
1157251662Sdim  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG);
1158251662Sdim};
1159251662Sdim} // anonymous
1160251662Sdim
1161251662Sdim/// constrainLocalCopy handles two possibilities:
1162251662Sdim/// 1) Local src:
1163251662Sdim/// I0:     = dst
1164251662Sdim/// I1: src = ...
1165251662Sdim/// I2:     = dst
1166251662Sdim/// I3: dst = src (copy)
1167251662Sdim/// (create pred->succ edges I0->I1, I2->I1)
1168251662Sdim///
1169251662Sdim/// 2) Local copy:
1170251662Sdim/// I0: dst = src (copy)
1171251662Sdim/// I1:     = dst
1172251662Sdim/// I2: src = ...
1173251662Sdim/// I3:     = dst
1174251662Sdim/// (create pred->succ edges I1->I2, I3->I2)
1175251662Sdim///
1176251662Sdim/// Although the MachineScheduler is currently constrained to single blocks,
1177251662Sdim/// this algorithm should handle extended blocks. An EBB is a set of
1178251662Sdim/// contiguously numbered blocks such that the previous block in the EBB is
1179251662Sdim/// always the single predecessor.
1180251662Sdimvoid CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) {
1181251662Sdim  LiveIntervals *LIS = DAG->getLIS();
1182251662Sdim  MachineInstr *Copy = CopySU->getInstr();
1183251662Sdim
1184251662Sdim  // Check for pure vreg copies.
1185251662Sdim  unsigned SrcReg = Copy->getOperand(1).getReg();
1186251662Sdim  if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
1187251662Sdim    return;
1188251662Sdim
1189251662Sdim  unsigned DstReg = Copy->getOperand(0).getReg();
1190251662Sdim  if (!TargetRegisterInfo::isVirtualRegister(DstReg))
1191251662Sdim    return;
1192251662Sdim
1193251662Sdim  // Check if either the dest or source is local. If it's live across a back
1194251662Sdim  // edge, it's not local. Note that if both vregs are live across the back
1195251662Sdim  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1196251662Sdim  unsigned LocalReg = DstReg;
1197251662Sdim  unsigned GlobalReg = SrcReg;
1198251662Sdim  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1199251662Sdim  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1200251662Sdim    LocalReg = SrcReg;
1201251662Sdim    GlobalReg = DstReg;
1202251662Sdim    LocalLI = &LIS->getInterval(LocalReg);
1203251662Sdim    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1204251662Sdim      return;
1205251662Sdim  }
1206251662Sdim  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1207251662Sdim
1208251662Sdim  // Find the global segment after the start of the local LI.
1209251662Sdim  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1210251662Sdim  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1211251662Sdim  // local live range. We could create edges from other global uses to the local
1212251662Sdim  // start, but the coalescer should have already eliminated these cases, so
1213251662Sdim  // don't bother dealing with it.
1214251662Sdim  if (GlobalSegment == GlobalLI->end())
1215251662Sdim    return;
1216251662Sdim
1217251662Sdim  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1218251662Sdim  // returned the next global segment. But if GlobalSegment overlaps with
1219251662Sdim  // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1220251662Sdim  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1221251662Sdim  if (GlobalSegment->contains(LocalLI->beginIndex()))
1222251662Sdim    ++GlobalSegment;
1223251662Sdim
1224251662Sdim  if (GlobalSegment == GlobalLI->end())
1225251662Sdim    return;
1226251662Sdim
1227251662Sdim  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1228251662Sdim  if (GlobalSegment != GlobalLI->begin()) {
1229251662Sdim    // Two address defs have no hole.
1230251662Sdim    if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end,
1231251662Sdim                               GlobalSegment->start)) {
1232251662Sdim      return;
1233251662Sdim    }
1234263508Sdim    // If the prior global segment may be defined by the same two-address
1235263508Sdim    // instruction that also defines LocalLI, then can't make a hole here.
1236263508Sdim    if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start,
1237263508Sdim                               LocalLI->beginIndex())) {
1238263508Sdim      return;
1239263508Sdim    }
1240251662Sdim    // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1241251662Sdim    // it would be a disconnected component in the live range.
1242251662Sdim    assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() &&
1243251662Sdim           "Disconnected LRG within the scheduling region.");
1244251662Sdim  }
1245251662Sdim  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1246251662Sdim  if (!GlobalDef)
1247251662Sdim    return;
1248251662Sdim
1249251662Sdim  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1250251662Sdim  if (!GlobalSU)
1251251662Sdim    return;
1252251662Sdim
1253251662Sdim  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1254251662Sdim  // constraining the uses of the last local def to precede GlobalDef.
1255251662Sdim  SmallVector<SUnit*,8> LocalUses;
1256251662Sdim  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1257251662Sdim  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1258251662Sdim  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1259251662Sdim  for (SUnit::const_succ_iterator
1260251662Sdim         I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
1261251662Sdim       I != E; ++I) {
1262251662Sdim    if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
1263251662Sdim      continue;
1264251662Sdim    if (I->getSUnit() == GlobalSU)
1265251662Sdim      continue;
1266251662Sdim    if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
1267251662Sdim      return;
1268251662Sdim    LocalUses.push_back(I->getSUnit());
1269251662Sdim  }
1270251662Sdim  // Open the top of the GlobalLI hole by constraining any earlier global uses
1271251662Sdim  // to precede the start of LocalLI.
1272251662Sdim  SmallVector<SUnit*,8> GlobalUses;
1273251662Sdim  MachineInstr *FirstLocalDef =
1274251662Sdim    LIS->getInstructionFromIndex(LocalLI->beginIndex());
1275251662Sdim  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1276251662Sdim  for (SUnit::const_pred_iterator
1277251662Sdim         I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
1278251662Sdim    if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
1279251662Sdim      continue;
1280251662Sdim    if (I->getSUnit() == FirstLocalSU)
1281251662Sdim      continue;
1282251662Sdim    if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
1283251662Sdim      return;
1284251662Sdim    GlobalUses.push_back(I->getSUnit());
1285251662Sdim  }
1286251662Sdim  DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1287251662Sdim  // Add the weak edges.
1288251662Sdim  for (SmallVectorImpl<SUnit*>::const_iterator
1289251662Sdim         I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1290251662Sdim    DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
1291251662Sdim          << GlobalSU->NodeNum << ")\n");
1292251662Sdim    DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1293251662Sdim  }
1294251662Sdim  for (SmallVectorImpl<SUnit*>::const_iterator
1295251662Sdim         I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1296251662Sdim    DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
1297251662Sdim          << FirstLocalSU->NodeNum << ")\n");
1298251662Sdim    DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1299251662Sdim  }
1300251662Sdim}
1301251662Sdim
1302251662Sdim/// \brief Callback from DAG postProcessing to create weak edges to encourage
1303251662Sdim/// copy elimination.
1304251662Sdimvoid CopyConstrain::apply(ScheduleDAGMI *DAG) {
1305251662Sdim  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1306251662Sdim  if (FirstPos == DAG->end())
1307251662Sdim    return;
1308251662Sdim  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
1309251662Sdim  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1310251662Sdim    &*priorNonDebug(DAG->end(), DAG->begin()));
1311251662Sdim
1312251662Sdim  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1313251662Sdim    SUnit *SU = &DAG->SUnits[Idx];
1314251662Sdim    if (!SU->getInstr()->isCopy())
1315251662Sdim      continue;
1316251662Sdim
1317251662Sdim    constrainLocalCopy(SU, DAG);
1318251662Sdim  }
1319251662Sdim}
1320251662Sdim
1321251662Sdim//===----------------------------------------------------------------------===//
1322263508Sdim// GenericScheduler - Implementation of the generic MachineSchedStrategy.
1323234285Sdim//===----------------------------------------------------------------------===//
1324234285Sdim
1325234285Sdimnamespace {
1326263508Sdim/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
1327243830Sdim/// the schedule.
1328263508Sdimclass GenericScheduler : public MachineSchedStrategy {
1329239462Sdimpublic:
1330243830Sdim  /// Represent the type of SchedCandidate found within a single queue.
1331243830Sdim  /// pickNodeBidirectional depends on these listed by decreasing priority.
1332243830Sdim  enum CandReason {
1333263508Sdim    NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax,
1334249423Sdim    ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
1335263508Sdim    TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
1336239462Sdim
1337243830Sdim#ifndef NDEBUG
1338263508Sdim  static const char *getReasonStr(GenericScheduler::CandReason Reason);
1339243830Sdim#endif
1340239462Sdim
1341243830Sdim  /// Policy for scheduling the next instruction in the candidate's zone.
1342243830Sdim  struct CandPolicy {
1343243830Sdim    bool ReduceLatency;
1344243830Sdim    unsigned ReduceResIdx;
1345243830Sdim    unsigned DemandResIdx;
1346239462Sdim
1347243830Sdim    CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
1348243830Sdim  };
1349239462Sdim
1350243830Sdim  /// Status of an instruction's critical resource consumption.
1351243830Sdim  struct SchedResourceDelta {
1352243830Sdim    // Count critical resources in the scheduled region required by SU.
1353243830Sdim    unsigned CritResources;
1354239462Sdim
1355243830Sdim    // Count critical resources from another region consumed by SU.
1356243830Sdim    unsigned DemandedResources;
1357239462Sdim
1358243830Sdim    SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
1359239462Sdim
1360243830Sdim    bool operator==(const SchedResourceDelta &RHS) const {
1361243830Sdim      return CritResources == RHS.CritResources
1362243830Sdim        && DemandedResources == RHS.DemandedResources;
1363243830Sdim    }
1364243830Sdim    bool operator!=(const SchedResourceDelta &RHS) const {
1365243830Sdim      return !operator==(RHS);
1366243830Sdim    }
1367243830Sdim  };
1368239462Sdim
1369263508Sdim  /// Store the state used by GenericScheduler heuristics, required for the
1370239462Sdim  /// lifetime of one invocation of pickNode().
1371239462Sdim  struct SchedCandidate {
1372243830Sdim    CandPolicy Policy;
1373243830Sdim
1374239462Sdim    // The best SUnit candidate.
1375239462Sdim    SUnit *SU;
1376239462Sdim
1377243830Sdim    // The reason for this candidate.
1378243830Sdim    CandReason Reason;
1379243830Sdim
1380263508Sdim    // Set of reasons that apply to multiple candidates.
1381263508Sdim    uint32_t RepeatReasonSet;
1382263508Sdim
1383239462Sdim    // Register pressure values for the best candidate.
1384239462Sdim    RegPressureDelta RPDelta;
1385239462Sdim
1386243830Sdim    // Critical resource consumption of the best candidate.
1387243830Sdim    SchedResourceDelta ResDelta;
1388243830Sdim
1389243830Sdim    SchedCandidate(const CandPolicy &policy)
1390263508Sdim      : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {}
1391243830Sdim
1392243830Sdim    bool isValid() const { return SU; }
1393243830Sdim
1394243830Sdim    // Copy the status of another candidate without changing policy.
1395243830Sdim    void setBest(SchedCandidate &Best) {
1396243830Sdim      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
1397243830Sdim      SU = Best.SU;
1398243830Sdim      Reason = Best.Reason;
1399243830Sdim      RPDelta = Best.RPDelta;
1400243830Sdim      ResDelta = Best.ResDelta;
1401243830Sdim    }
1402243830Sdim
1403263508Sdim    bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
1404263508Sdim    void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
1405263508Sdim
1406243830Sdim    void initResourceDelta(const ScheduleDAGMI *DAG,
1407243830Sdim                           const TargetSchedModel *SchedModel);
1408239462Sdim  };
1409239462Sdim
1410243830Sdim  /// Summarize the unscheduled region.
1411243830Sdim  struct SchedRemainder {
1412243830Sdim    // Critical path through the DAG in expected latency.
1413243830Sdim    unsigned CriticalPath;
1414263508Sdim    unsigned CyclicCritPath;
1415243830Sdim
1416263508Sdim    // Scaled count of micro-ops left to schedule.
1417263508Sdim    unsigned RemIssueCount;
1418263508Sdim
1419263508Sdim    bool IsAcyclicLatencyLimited;
1420263508Sdim
1421243830Sdim    // Unscheduled resources
1422243830Sdim    SmallVector<unsigned, 16> RemainingCounts;
1423243830Sdim
1424243830Sdim    void reset() {
1425243830Sdim      CriticalPath = 0;
1426263508Sdim      CyclicCritPath = 0;
1427263508Sdim      RemIssueCount = 0;
1428263508Sdim      IsAcyclicLatencyLimited = false;
1429243830Sdim      RemainingCounts.clear();
1430243830Sdim    }
1431243830Sdim
1432243830Sdim    SchedRemainder() { reset(); }
1433243830Sdim
1434243830Sdim    void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
1435243830Sdim  };
1436243830Sdim
1437239462Sdim  /// Each Scheduling boundary is associated with ready queues. It tracks the
1438243830Sdim  /// current cycle in the direction of movement, and maintains the state
1439239462Sdim  /// of "hazards" and other interlocks at the current cycle.
1440239462Sdim  struct SchedBoundary {
1441239462Sdim    ScheduleDAGMI *DAG;
1442243830Sdim    const TargetSchedModel *SchedModel;
1443243830Sdim    SchedRemainder *Rem;
1444239462Sdim
1445239462Sdim    ReadyQueue Available;
1446239462Sdim    ReadyQueue Pending;
1447239462Sdim    bool CheckPending;
1448239462Sdim
1449243830Sdim    // For heuristics, keep a list of the nodes that immediately depend on the
1450243830Sdim    // most recently scheduled node.
1451243830Sdim    SmallPtrSet<const SUnit*, 8> NextSUs;
1452243830Sdim
1453239462Sdim    ScheduleHazardRecognizer *HazardRec;
1454239462Sdim
1455263508Sdim    /// Number of cycles it takes to issue the instructions scheduled in this
1456263508Sdim    /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
1457263508Sdim    /// See getStalls().
1458239462Sdim    unsigned CurrCycle;
1459239462Sdim
1460263508Sdim    /// Micro-ops issued in the current cycle
1461263508Sdim    unsigned CurrMOps;
1462263508Sdim
1463239462Sdim    /// MinReadyCycle - Cycle of the soonest available instruction.
1464239462Sdim    unsigned MinReadyCycle;
1465239462Sdim
1466243830Sdim    // The expected latency of the critical path in this scheduled zone.
1467243830Sdim    unsigned ExpectedLatency;
1468243830Sdim
1469263508Sdim    // The latency of dependence chains leading into this zone.
1470263508Sdim    // For each node scheduled bottom-up: DLat = max DLat, N.Depth.
1471263508Sdim    // For each cycle scheduled: DLat -= 1.
1472263508Sdim    unsigned DependentLatency;
1473243830Sdim
1474263508Sdim    /// Count the scheduled (issued) micro-ops that can be retired by
1475263508Sdim    /// time=CurrCycle assuming the first scheduled instr is retired at time=0.
1476263508Sdim    unsigned RetiredMOps;
1477263508Sdim
1478263508Sdim    // Count scheduled resources that have been executed. Resources are
1479263508Sdim    // considered executed if they become ready in the time that it takes to
1480263508Sdim    // saturate any resource including the one in question. Counts are scaled
1481263508Sdim    // for direct comparison with other resources. Counts can be compared with
1482263508Sdim    // MOps * getMicroOpFactor and Latency * getLatencyFactor.
1483263508Sdim    SmallVector<unsigned, 16> ExecutedResCounts;
1484263508Sdim
1485263508Sdim    /// Cache the max count for a single resource.
1486263508Sdim    unsigned MaxExecutedResCount;
1487263508Sdim
1488243830Sdim    // Cache the critical resources ID in this scheduled zone.
1489263508Sdim    unsigned ZoneCritResIdx;
1490243830Sdim
1491243830Sdim    // Is the scheduled region resource limited vs. latency limited.
1492243830Sdim    bool IsResourceLimited;
1493243830Sdim
1494243830Sdim#ifndef NDEBUG
1495263508Sdim    // Remember the greatest operand latency as an upper bound on the number of
1496263508Sdim    // times we should retry the pending queue because of a hazard.
1497263508Sdim    unsigned MaxObservedLatency;
1498243830Sdim#endif
1499239462Sdim
1500243830Sdim    void reset() {
1501249423Sdim      // A new HazardRec is created for each DAG and owned by SchedBoundary.
1502263508Sdim      // Destroying and reconstructing it is very expensive though. So keep
1503263508Sdim      // invalid, placeholder HazardRecs.
1504263508Sdim      if (HazardRec && HazardRec->isEnabled()) {
1505263508Sdim        delete HazardRec;
1506263508Sdim        HazardRec = 0;
1507263508Sdim      }
1508243830Sdim      Available.clear();
1509243830Sdim      Pending.clear();
1510243830Sdim      CheckPending = false;
1511243830Sdim      NextSUs.clear();
1512243830Sdim      CurrCycle = 0;
1513263508Sdim      CurrMOps = 0;
1514243830Sdim      MinReadyCycle = UINT_MAX;
1515243830Sdim      ExpectedLatency = 0;
1516263508Sdim      DependentLatency = 0;
1517263508Sdim      RetiredMOps = 0;
1518263508Sdim      MaxExecutedResCount = 0;
1519263508Sdim      ZoneCritResIdx = 0;
1520243830Sdim      IsResourceLimited = false;
1521243830Sdim#ifndef NDEBUG
1522263508Sdim      MaxObservedLatency = 0;
1523243830Sdim#endif
1524243830Sdim      // Reserve a zero-count for invalid CritResIdx.
1525263508Sdim      ExecutedResCounts.resize(1);
1526263508Sdim      assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1527243830Sdim    }
1528243830Sdim
1529239462Sdim    /// Pending queues extend the ready queues with the same ID and the
1530239462Sdim    /// PendingFlag set.
1531239462Sdim    SchedBoundary(unsigned ID, const Twine &Name):
1532243830Sdim      DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
1533263508Sdim      Pending(ID << GenericScheduler::LogMaxQID, Name+".P"),
1534249423Sdim      HazardRec(0) {
1535243830Sdim      reset();
1536243830Sdim    }
1537239462Sdim
1538239462Sdim    ~SchedBoundary() { delete HazardRec; }
1539239462Sdim
1540243830Sdim    void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
1541243830Sdim              SchedRemainder *rem);
1542243830Sdim
1543239462Sdim    bool isTop() const {
1544263508Sdim      return Available.getID() == GenericScheduler::TopQID;
1545239462Sdim    }
1546239462Sdim
1547263508Sdim#ifndef NDEBUG
1548263508Sdim    const char *getResourceName(unsigned PIdx) {
1549263508Sdim      if (!PIdx)
1550263508Sdim        return "MOps";
1551263508Sdim      return SchedModel->getProcResource(PIdx)->Name;
1552263508Sdim    }
1553263508Sdim#endif
1554263508Sdim
1555263508Sdim    /// Get the number of latency cycles "covered" by the scheduled
1556263508Sdim    /// instructions. This is the larger of the critical path within the zone
1557263508Sdim    /// and the number of cycles required to issue the instructions.
1558263508Sdim    unsigned getScheduledLatency() const {
1559263508Sdim      return std::max(ExpectedLatency, CurrCycle);
1560263508Sdim    }
1561263508Sdim
1562243830Sdim    unsigned getUnscheduledLatency(SUnit *SU) const {
1563263508Sdim      return isTop() ? SU->getHeight() : SU->getDepth();
1564243830Sdim    }
1565243830Sdim
1566263508Sdim    unsigned getResourceCount(unsigned ResIdx) const {
1567263508Sdim      return ExecutedResCounts[ResIdx];
1568263508Sdim    }
1569263508Sdim
1570263508Sdim    /// Get the scaled count of scheduled micro-ops and resources, including
1571263508Sdim    /// executed resources.
1572243830Sdim    unsigned getCriticalCount() const {
1573263508Sdim      if (!ZoneCritResIdx)
1574263508Sdim        return RetiredMOps * SchedModel->getMicroOpFactor();
1575263508Sdim      return getResourceCount(ZoneCritResIdx);
1576243830Sdim    }
1577243830Sdim
1578263508Sdim    /// Get a scaled count for the minimum execution time of the scheduled
1579263508Sdim    /// micro-ops that are ready to execute by getExecutedCount. Notice the
1580263508Sdim    /// feedback loop.
1581263508Sdim    unsigned getExecutedCount() const {
1582263508Sdim      return std::max(CurrCycle * SchedModel->getLatencyFactor(),
1583263508Sdim                      MaxExecutedResCount);
1584263508Sdim    }
1585263508Sdim
1586239462Sdim    bool checkHazard(SUnit *SU);
1587239462Sdim
1588263508Sdim    unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
1589243830Sdim
1590263508Sdim    unsigned getOtherResourceCount(unsigned &OtherCritIdx);
1591263508Sdim
1592263508Sdim    void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone);
1593263508Sdim
1594239462Sdim    void releaseNode(SUnit *SU, unsigned ReadyCycle);
1595239462Sdim
1596263508Sdim    void bumpCycle(unsigned NextCycle);
1597239462Sdim
1598263508Sdim    void incExecutedResources(unsigned PIdx, unsigned Count);
1599243830Sdim
1600263508Sdim    unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle);
1601263508Sdim
1602239462Sdim    void bumpNode(SUnit *SU);
1603239462Sdim
1604239462Sdim    void releasePending();
1605239462Sdim
1606239462Sdim    void removeReady(SUnit *SU);
1607239462Sdim
1608239462Sdim    SUnit *pickOnlyChoice();
1609263508Sdim
1610263508Sdim#ifndef NDEBUG
1611263508Sdim    void dumpScheduledState();
1612263508Sdim#endif
1613239462Sdim  };
1614239462Sdim
1615243830Sdimprivate:
1616263508Sdim  const MachineSchedContext *Context;
1617234285Sdim  ScheduleDAGMI *DAG;
1618243830Sdim  const TargetSchedModel *SchedModel;
1619239462Sdim  const TargetRegisterInfo *TRI;
1620234285Sdim
1621239462Sdim  // State of the top and bottom scheduled instruction boundaries.
1622243830Sdim  SchedRemainder Rem;
1623239462Sdim  SchedBoundary Top;
1624239462Sdim  SchedBoundary Bot;
1625234285Sdim
1626263508Sdim  MachineSchedPolicy RegionPolicy;
1627234285Sdimpublic:
1628239462Sdim  /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
1629239462Sdim  enum {
1630239462Sdim    TopQID = 1,
1631239462Sdim    BotQID = 2,
1632239462Sdim    LogMaxQID = 2
1633239462Sdim  };
1634234285Sdim
1635263508Sdim  GenericScheduler(const MachineSchedContext *C):
1636263508Sdim    Context(C), DAG(0), SchedModel(0), TRI(0),
1637263508Sdim    Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
1638239462Sdim
1639263508Sdim  virtual void initPolicy(MachineBasicBlock::iterator Begin,
1640263508Sdim                          MachineBasicBlock::iterator End,
1641263508Sdim                          unsigned NumRegionInstrs);
1642263508Sdim
1643263508Sdim  bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; }
1644263508Sdim
1645239462Sdim  virtual void initialize(ScheduleDAGMI *dag);
1646239462Sdim
1647239462Sdim  virtual SUnit *pickNode(bool &IsTopNode);
1648239462Sdim
1649239462Sdim  virtual void schedNode(SUnit *SU, bool IsTopNode);
1650239462Sdim
1651239462Sdim  virtual void releaseTopNode(SUnit *SU);
1652239462Sdim
1653239462Sdim  virtual void releaseBottomNode(SUnit *SU);
1654239462Sdim
1655243830Sdim  virtual void registerRoots();
1656243830Sdim
1657239462Sdimprotected:
1658263508Sdim  void checkAcyclicLatency();
1659239462Sdim
1660243830Sdim  void tryCandidate(SchedCandidate &Cand,
1661243830Sdim                    SchedCandidate &TryCand,
1662243830Sdim                    SchedBoundary &Zone,
1663243830Sdim                    const RegPressureTracker &RPTracker,
1664243830Sdim                    RegPressureTracker &TempTracker);
1665243830Sdim
1666243830Sdim  SUnit *pickNodeBidirectional(bool &IsTopNode);
1667243830Sdim
1668243830Sdim  void pickNodeFromQueue(SchedBoundary &Zone,
1669243830Sdim                         const RegPressureTracker &RPTracker,
1670243830Sdim                         SchedCandidate &Candidate);
1671243830Sdim
1672251662Sdim  void reschedulePhysRegCopies(SUnit *SU, bool isTop);
1673251662Sdim
1674239462Sdim#ifndef NDEBUG
1675249423Sdim  void traceCandidate(const SchedCandidate &Cand);
1676239462Sdim#endif
1677239462Sdim};
1678239462Sdim} // namespace
1679239462Sdim
1680263508Sdimvoid GenericScheduler::SchedRemainder::
1681243830Sdiminit(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1682243830Sdim  reset();
1683243830Sdim  if (!SchedModel->hasInstrSchedModel())
1684243830Sdim    return;
1685243830Sdim  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1686243830Sdim  for (std::vector<SUnit>::iterator
1687243830Sdim         I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1688243830Sdim    const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1689263508Sdim    RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
1690263508Sdim      * SchedModel->getMicroOpFactor();
1691243830Sdim    for (TargetSchedModel::ProcResIter
1692243830Sdim           PI = SchedModel->getWriteProcResBegin(SC),
1693243830Sdim           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1694243830Sdim      unsigned PIdx = PI->ProcResourceIdx;
1695243830Sdim      unsigned Factor = SchedModel->getResourceFactor(PIdx);
1696243830Sdim      RemainingCounts[PIdx] += (Factor * PI->Cycles);
1697243830Sdim    }
1698243830Sdim  }
1699243830Sdim}
1700243830Sdim
1701263508Sdimvoid GenericScheduler::SchedBoundary::
1702243830Sdiminit(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1703243830Sdim  reset();
1704243830Sdim  DAG = dag;
1705243830Sdim  SchedModel = smodel;
1706243830Sdim  Rem = rem;
1707243830Sdim  if (SchedModel->hasInstrSchedModel())
1708263508Sdim    ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1709243830Sdim}
1710243830Sdim
1711263508Sdim/// Initialize the per-region scheduling policy.
1712263508Sdimvoid GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
1713263508Sdim                                     MachineBasicBlock::iterator End,
1714263508Sdim                                     unsigned NumRegionInstrs) {
1715263508Sdim  const TargetMachine &TM = Context->MF->getTarget();
1716263508Sdim
1717263508Sdim  // Avoid setting up the register pressure tracker for small regions to save
1718263508Sdim  // compile time. As a rough heuristic, only track pressure when the number of
1719263508Sdim  // schedulable instructions exceeds half the integer register file.
1720263508Sdim  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
1721263508Sdim    TM.getTargetLowering()->getRegClassFor(MVT::i32));
1722263508Sdim
1723263508Sdim  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
1724263508Sdim
1725263508Sdim  // For generic targets, we default to bottom-up, because it's simpler and more
1726263508Sdim  // compile-time optimizations have been implemented in that direction.
1727263508Sdim  RegionPolicy.OnlyBottomUp = true;
1728263508Sdim
1729263508Sdim  // Allow the subtarget to override default policy.
1730263508Sdim  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
1731263508Sdim  ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs);
1732263508Sdim
1733263508Sdim  // After subtarget overrides, apply command line options.
1734263508Sdim  if (!EnableRegPressure)
1735263508Sdim    RegionPolicy.ShouldTrackPressure = false;
1736263508Sdim
1737263508Sdim  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
1738263508Sdim  // e.g. -misched-bottomup=false allows scheduling in both directions.
1739263508Sdim  assert((!ForceTopDown || !ForceBottomUp) &&
1740263508Sdim         "-misched-topdown incompatible with -misched-bottomup");
1741263508Sdim  if (ForceBottomUp.getNumOccurrences() > 0) {
1742263508Sdim    RegionPolicy.OnlyBottomUp = ForceBottomUp;
1743263508Sdim    if (RegionPolicy.OnlyBottomUp)
1744263508Sdim      RegionPolicy.OnlyTopDown = false;
1745263508Sdim  }
1746263508Sdim  if (ForceTopDown.getNumOccurrences() > 0) {
1747263508Sdim    RegionPolicy.OnlyTopDown = ForceTopDown;
1748263508Sdim    if (RegionPolicy.OnlyTopDown)
1749263508Sdim      RegionPolicy.OnlyBottomUp = false;
1750263508Sdim  }
1751263508Sdim}
1752263508Sdim
1753263508Sdimvoid GenericScheduler::initialize(ScheduleDAGMI *dag) {
1754239462Sdim  DAG = dag;
1755243830Sdim  SchedModel = DAG->getSchedModel();
1756239462Sdim  TRI = DAG->TRI;
1757249423Sdim
1758243830Sdim  Rem.init(DAG, SchedModel);
1759243830Sdim  Top.init(DAG, SchedModel, &Rem);
1760243830Sdim  Bot.init(DAG, SchedModel, &Rem);
1761239462Sdim
1762243830Sdim  // Initialize resource counts.
1763243830Sdim
1764243830Sdim  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
1765243830Sdim  // are disabled, then these HazardRecs will be disabled.
1766243830Sdim  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
1767239462Sdim  const TargetMachine &TM = DAG->MF.getTarget();
1768263508Sdim  if (!Top.HazardRec) {
1769263508Sdim    Top.HazardRec =
1770263508Sdim      TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1771263508Sdim  }
1772263508Sdim  if (!Bot.HazardRec) {
1773263508Sdim    Bot.HazardRec =
1774263508Sdim      TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1775263508Sdim  }
1776239462Sdim}
1777239462Sdim
1778263508Sdimvoid GenericScheduler::releaseTopNode(SUnit *SU) {
1779239462Sdim  if (SU->isScheduled)
1780239462Sdim    return;
1781239462Sdim
1782249423Sdim  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1783239462Sdim       I != E; ++I) {
1784263508Sdim    if (I->isWeak())
1785263508Sdim      continue;
1786239462Sdim    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1787263508Sdim    unsigned Latency = I->getLatency();
1788239462Sdim#ifndef NDEBUG
1789263508Sdim    Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency);
1790239462Sdim#endif
1791263508Sdim    if (SU->TopReadyCycle < PredReadyCycle + Latency)
1792263508Sdim      SU->TopReadyCycle = PredReadyCycle + Latency;
1793234285Sdim  }
1794239462Sdim  Top.releaseNode(SU, SU->TopReadyCycle);
1795239462Sdim}
1796234285Sdim
1797263508Sdimvoid GenericScheduler::releaseBottomNode(SUnit *SU) {
1798239462Sdim  if (SU->isScheduled)
1799239462Sdim    return;
1800234285Sdim
1801239462Sdim  assert(SU->getInstr() && "Scheduled SUnit must have instr");
1802239462Sdim
1803239462Sdim  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1804239462Sdim       I != E; ++I) {
1805249423Sdim    if (I->isWeak())
1806249423Sdim      continue;
1807239462Sdim    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1808263508Sdim    unsigned Latency = I->getLatency();
1809239462Sdim#ifndef NDEBUG
1810263508Sdim    Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency);
1811239462Sdim#endif
1812263508Sdim    if (SU->BotReadyCycle < SuccReadyCycle + Latency)
1813263508Sdim      SU->BotReadyCycle = SuccReadyCycle + Latency;
1814239462Sdim  }
1815239462Sdim  Bot.releaseNode(SU, SU->BotReadyCycle);
1816239462Sdim}
1817239462Sdim
1818263508Sdim/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
1819263508Sdim/// critical path by more cycles than it takes to drain the instruction buffer.
1820263508Sdim/// We estimate an upper bounds on in-flight instructions as:
1821263508Sdim///
1822263508Sdim/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
1823263508Sdim/// InFlightIterations = AcyclicPath / CyclesPerIteration
1824263508Sdim/// InFlightResources = InFlightIterations * LoopResources
1825263508Sdim///
1826263508Sdim/// TODO: Check execution resources in addition to IssueCount.
1827263508Sdimvoid GenericScheduler::checkAcyclicLatency() {
1828263508Sdim  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
1829263508Sdim    return;
1830263508Sdim
1831263508Sdim  // Scaled number of cycles per loop iteration.
1832263508Sdim  unsigned IterCount =
1833263508Sdim    std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
1834263508Sdim             Rem.RemIssueCount);
1835263508Sdim  // Scaled acyclic critical path.
1836263508Sdim  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
1837263508Sdim  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
1838263508Sdim  unsigned InFlightCount =
1839263508Sdim    (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
1840263508Sdim  unsigned BufferLimit =
1841263508Sdim    SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
1842263508Sdim
1843263508Sdim  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
1844263508Sdim
1845263508Sdim  DEBUG(dbgs() << "IssueCycles="
1846263508Sdim        << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
1847263508Sdim        << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
1848263508Sdim        << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
1849263508Sdim        << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
1850263508Sdim        << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
1851263508Sdim        if (Rem.IsAcyclicLatencyLimited)
1852263508Sdim          dbgs() << "  ACYCLIC LATENCY LIMIT\n");
1853263508Sdim}
1854263508Sdim
1855263508Sdimvoid GenericScheduler::registerRoots() {
1856243830Sdim  Rem.CriticalPath = DAG->ExitSU.getDepth();
1857263508Sdim
1858243830Sdim  // Some roots may not feed into ExitSU. Check all of them in case.
1859243830Sdim  for (std::vector<SUnit*>::const_iterator
1860243830Sdim         I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
1861243830Sdim    if ((*I)->getDepth() > Rem.CriticalPath)
1862243830Sdim      Rem.CriticalPath = (*I)->getDepth();
1863243830Sdim  }
1864243830Sdim  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
1865263508Sdim
1866263508Sdim  if (EnableCyclicPath) {
1867263508Sdim    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
1868263508Sdim    checkAcyclicLatency();
1869263508Sdim  }
1870243830Sdim}
1871243830Sdim
1872239462Sdim/// Does this SU have a hazard within the current instruction group.
1873239462Sdim///
1874239462Sdim/// The scheduler supports two modes of hazard recognition. The first is the
1875239462Sdim/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1876239462Sdim/// supports highly complicated in-order reservation tables
1877239462Sdim/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1878239462Sdim///
1879239462Sdim/// The second is a streamlined mechanism that checks for hazards based on
1880239462Sdim/// simple counters that the scheduler itself maintains. It explicitly checks
1881239462Sdim/// for instruction dispatch limitations, including the number of micro-ops that
1882239462Sdim/// can dispatch per cycle.
1883239462Sdim///
1884239462Sdim/// TODO: Also check whether the SU must start a new group.
1885263508Sdimbool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) {
1886239462Sdim  if (HazardRec->isEnabled())
1887239462Sdim    return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
1888239462Sdim
1889243830Sdim  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1890263508Sdim  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1891243830Sdim    DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1892243830Sdim          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1893239462Sdim    return true;
1894243830Sdim  }
1895239462Sdim  return false;
1896239462Sdim}
1897239462Sdim
1898263508Sdim// Find the unscheduled node in ReadySUs with the highest latency.
1899263508Sdimunsigned GenericScheduler::SchedBoundary::
1900263508SdimfindMaxLatency(ArrayRef<SUnit*> ReadySUs) {
1901263508Sdim  SUnit *LateSU = 0;
1902249423Sdim  unsigned RemLatency = 0;
1903263508Sdim  for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
1904249423Sdim       I != E; ++I) {
1905249423Sdim    unsigned L = getUnscheduledLatency(*I);
1906263508Sdim    if (L > RemLatency) {
1907249423Sdim      RemLatency = L;
1908263508Sdim      LateSU = *I;
1909263508Sdim    }
1910243830Sdim  }
1911263508Sdim  if (LateSU) {
1912263508Sdim    DEBUG(dbgs() << Available.getName() << " RemLatency SU("
1913263508Sdim          << LateSU->NodeNum << ") " << RemLatency << "c\n");
1914249423Sdim  }
1915263508Sdim  return RemLatency;
1916263508Sdim}
1917263508Sdim
1918263508Sdim// Count resources in this zone and the remaining unscheduled
1919263508Sdim// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
1920263508Sdim// resource index, or zero if the zone is issue limited.
1921263508Sdimunsigned GenericScheduler::SchedBoundary::
1922263508SdimgetOtherResourceCount(unsigned &OtherCritIdx) {
1923263508Sdim  OtherCritIdx = 0;
1924263508Sdim  if (!SchedModel->hasInstrSchedModel())
1925263508Sdim    return 0;
1926263508Sdim
1927263508Sdim  unsigned OtherCritCount = Rem->RemIssueCount
1928263508Sdim    + (RetiredMOps * SchedModel->getMicroOpFactor());
1929263508Sdim  DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
1930263508Sdim        << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
1931263508Sdim  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
1932263508Sdim       PIdx != PEnd; ++PIdx) {
1933263508Sdim    unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
1934263508Sdim    if (OtherCount > OtherCritCount) {
1935263508Sdim      OtherCritCount = OtherCount;
1936263508Sdim      OtherCritIdx = PIdx;
1937263508Sdim    }
1938249423Sdim  }
1939263508Sdim  if (OtherCritIdx) {
1940263508Sdim    DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
1941263508Sdim          << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
1942263508Sdim          << " " << getResourceName(OtherCritIdx) << "\n");
1943263508Sdim  }
1944263508Sdim  return OtherCritCount;
1945243830Sdim}
1946243830Sdim
1947263508Sdim/// Set the CandPolicy for this zone given the current resources and latencies
1948263508Sdim/// inside and outside the zone.
1949263508Sdimvoid GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy,
1950263508Sdim                                                   SchedBoundary &OtherZone) {
1951263508Sdim  // Now that potential stalls have been considered, apply preemptive heuristics
1952263508Sdim  // based on the the total latency and resources inside and outside this
1953263508Sdim  // zone.
1954263508Sdim
1955263508Sdim  // Compute remaining latency. We need this both to determine whether the
1956263508Sdim  // overall schedule has become latency-limited and whether the instructions
1957263508Sdim  // outside this zone are resource or latency limited.
1958263508Sdim  //
1959263508Sdim  // The "dependent" latency is updated incrementally during scheduling as the
1960263508Sdim  // max height/depth of scheduled nodes minus the cycles since it was
1961263508Sdim  // scheduled:
1962263508Sdim  //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
1963263508Sdim  //
1964263508Sdim  // The "independent" latency is the max ready queue depth:
1965263508Sdim  //   ILat = max N.depth for N in Available|Pending
1966263508Sdim  //
1967263508Sdim  // RemainingLatency is the greater of independent and dependent latency.
1968263508Sdim  unsigned RemLatency = DependentLatency;
1969263508Sdim  RemLatency = std::max(RemLatency, findMaxLatency(Available.elements()));
1970263508Sdim  RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements()));
1971263508Sdim
1972263508Sdim  // Compute the critical resource outside the zone.
1973263508Sdim  unsigned OtherCritIdx;
1974263508Sdim  unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx);
1975263508Sdim
1976263508Sdim  bool OtherResLimited = false;
1977263508Sdim  if (SchedModel->hasInstrSchedModel()) {
1978263508Sdim    unsigned LFactor = SchedModel->getLatencyFactor();
1979263508Sdim    OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
1980263508Sdim  }
1981263508Sdim  if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) {
1982263508Sdim    Policy.ReduceLatency |= true;
1983263508Sdim    DEBUG(dbgs() << "  " << Available.getName() << " RemainingLatency "
1984263508Sdim          << RemLatency << " + " << CurrCycle << "c > CritPath "
1985263508Sdim          << Rem->CriticalPath << "\n");
1986263508Sdim  }
1987263508Sdim  // If the same resource is limiting inside and outside the zone, do nothing.
1988263508Sdim  if (ZoneCritResIdx == OtherCritIdx)
1989263508Sdim    return;
1990263508Sdim
1991263508Sdim  DEBUG(
1992263508Sdim    if (IsResourceLimited) {
1993263508Sdim      dbgs() << "  " << Available.getName() << " ResourceLimited: "
1994263508Sdim             << getResourceName(ZoneCritResIdx) << "\n";
1995263508Sdim    }
1996263508Sdim    if (OtherResLimited)
1997263508Sdim      dbgs() << "  RemainingLimit: " << getResourceName(OtherCritIdx) << "\n";
1998263508Sdim    if (!IsResourceLimited && !OtherResLimited)
1999263508Sdim      dbgs() << "  Latency limited both directions.\n");
2000263508Sdim
2001263508Sdim  if (IsResourceLimited && !Policy.ReduceResIdx)
2002263508Sdim    Policy.ReduceResIdx = ZoneCritResIdx;
2003263508Sdim
2004263508Sdim  if (OtherResLimited)
2005263508Sdim    Policy.DemandResIdx = OtherCritIdx;
2006263508Sdim}
2007263508Sdim
2008263508Sdimvoid GenericScheduler::SchedBoundary::releaseNode(SUnit *SU,
2009239462Sdim                                                     unsigned ReadyCycle) {
2010239462Sdim  if (ReadyCycle < MinReadyCycle)
2011239462Sdim    MinReadyCycle = ReadyCycle;
2012239462Sdim
2013239462Sdim  // Check for interlocks first. For the purpose of other heuristics, an
2014239462Sdim  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2015263508Sdim  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2016263508Sdim  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
2017239462Sdim    Pending.push(SU);
2018239462Sdim  else
2019239462Sdim    Available.push(SU);
2020243830Sdim
2021243830Sdim  // Record this node as an immediate dependent of the scheduled node.
2022243830Sdim  NextSUs.insert(SU);
2023239462Sdim}
2024239462Sdim
2025239462Sdim/// Move the boundary of scheduled code by one cycle.
2026263508Sdimvoid GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
2027263508Sdim  if (SchedModel->getMicroOpBufferSize() == 0) {
2028263508Sdim    assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
2029263508Sdim    if (MinReadyCycle > NextCycle)
2030263508Sdim      NextCycle = MinReadyCycle;
2031243830Sdim  }
2032263508Sdim  // Update the current micro-ops, which will issue in the next cycle.
2033263508Sdim  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2034263508Sdim  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2035239462Sdim
2036263508Sdim  // Decrement DependentLatency based on the next cycle.
2037263508Sdim  if ((NextCycle - CurrCycle) > DependentLatency)
2038263508Sdim    DependentLatency = 0;
2039263508Sdim  else
2040263508Sdim    DependentLatency -= (NextCycle - CurrCycle);
2041263508Sdim
2042239462Sdim  if (!HazardRec->isEnabled()) {
2043239462Sdim    // Bypass HazardRec virtual calls.
2044239462Sdim    CurrCycle = NextCycle;
2045239462Sdim  }
2046239462Sdim  else {
2047239462Sdim    // Bypass getHazardType calls in case of long latency.
2048239462Sdim    for (; CurrCycle != NextCycle; ++CurrCycle) {
2049239462Sdim      if (isTop())
2050239462Sdim        HazardRec->AdvanceCycle();
2051239462Sdim      else
2052239462Sdim        HazardRec->RecedeCycle();
2053234285Sdim    }
2054239462Sdim  }
2055239462Sdim  CheckPending = true;
2056263508Sdim  unsigned LFactor = SchedModel->getLatencyFactor();
2057263508Sdim  IsResourceLimited =
2058263508Sdim    (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2059263508Sdim    > (int)LFactor;
2060239462Sdim
2061263508Sdim  DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
2062239462Sdim}
2063239462Sdim
2064263508Sdimvoid GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
2065263508Sdim                                                              unsigned Count) {
2066263508Sdim  ExecutedResCounts[PIdx] += Count;
2067263508Sdim  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2068263508Sdim    MaxExecutedResCount = ExecutedResCounts[PIdx];
2069263508Sdim}
2070263508Sdim
2071243830Sdim/// Add the given processor resource to this scheduled zone.
2072263508Sdim///
2073263508Sdim/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2074263508Sdim/// during which this resource is consumed.
2075263508Sdim///
2076263508Sdim/// \return the next cycle at which the instruction may execute without
2077263508Sdim/// oversubscribing resources.
2078263508Sdimunsigned GenericScheduler::SchedBoundary::
2079263508SdimcountResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
2080243830Sdim  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2081263508Sdim  unsigned Count = Factor * Cycles;
2082263508Sdim  DEBUG(dbgs() << "  " << getResourceName(PIdx)
2083263508Sdim        << " +" << Cycles << "x" << Factor << "u\n");
2084243830Sdim
2085263508Sdim  // Update Executed resources counts.
2086263508Sdim  incExecutedResources(PIdx, Count);
2087243830Sdim  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2088243830Sdim  Rem->RemainingCounts[PIdx] -= Count;
2089243830Sdim
2090263508Sdim  // Check if this resource exceeds the current critical resource. If so, it
2091263508Sdim  // becomes the critical resource.
2092263508Sdim  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2093263508Sdim    ZoneCritResIdx = PIdx;
2094243830Sdim    DEBUG(dbgs() << "  *** Critical resource "
2095263508Sdim          << getResourceName(PIdx) << ": "
2096263508Sdim          << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
2097243830Sdim  }
2098263508Sdim  // TODO: We don't yet model reserved resources. It's not hard though.
2099263508Sdim  return CurrCycle;
2100243830Sdim}
2101243830Sdim
2102239462Sdim/// Move the boundary of scheduled code by one SUnit.
2103263508Sdimvoid GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
2104239462Sdim  // Update the reservation table.
2105239462Sdim  if (HazardRec->isEnabled()) {
2106239462Sdim    if (!isTop() && SU->isCall) {
2107239462Sdim      // Calls are scheduled with their preceding instructions. For bottom-up
2108239462Sdim      // scheduling, clear the pipeline state before emitting.
2109239462Sdim      HazardRec->Reset();
2110234285Sdim    }
2111239462Sdim    HazardRec->EmitInstruction(SU);
2112239462Sdim  }
2113263508Sdim  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2114263508Sdim  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2115263508Sdim  CurrMOps += IncMOps;
2116263508Sdim  // checkHazard prevents scheduling multiple instructions per cycle that exceed
2117263508Sdim  // issue width. However, we commonly reach the maximum. In this case
2118263508Sdim  // opportunistically bump the cycle to avoid uselessly checking everything in
2119263508Sdim  // the readyQ. Furthermore, a single instruction may produce more than one
2120263508Sdim  // cycle's worth of micro-ops.
2121263508Sdim  //
2122263508Sdim  // TODO: Also check if this SU must end a dispatch group.
2123263508Sdim  unsigned NextCycle = CurrCycle;
2124263508Sdim  if (CurrMOps >= SchedModel->getIssueWidth()) {
2125263508Sdim    ++NextCycle;
2126263508Sdim    DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
2127263508Sdim          << " at cycle " << CurrCycle << '\n');
2128263508Sdim  }
2129263508Sdim  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2130263508Sdim  DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
2131263508Sdim
2132263508Sdim  switch (SchedModel->getMicroOpBufferSize()) {
2133263508Sdim  case 0:
2134263508Sdim    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2135263508Sdim    break;
2136263508Sdim  case 1:
2137263508Sdim    if (ReadyCycle > NextCycle) {
2138263508Sdim      NextCycle = ReadyCycle;
2139263508Sdim      DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
2140263508Sdim    }
2141263508Sdim    break;
2142263508Sdim  default:
2143263508Sdim    // We don't currently model the OOO reorder buffer, so consider all
2144263508Sdim    // scheduled MOps to be "retired".
2145263508Sdim    break;
2146263508Sdim  }
2147263508Sdim  RetiredMOps += IncMOps;
2148263508Sdim
2149243830Sdim  // Update resource counts and critical resource.
2150243830Sdim  if (SchedModel->hasInstrSchedModel()) {
2151263508Sdim    unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2152263508Sdim    assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2153263508Sdim    Rem->RemIssueCount -= DecRemIssue;
2154263508Sdim    if (ZoneCritResIdx) {
2155263508Sdim      // Scale scheduled micro-ops for comparing with the critical resource.
2156263508Sdim      unsigned ScaledMOps =
2157263508Sdim        RetiredMOps * SchedModel->getMicroOpFactor();
2158263508Sdim
2159263508Sdim      // If scaled micro-ops are now more than the previous critical resource by
2160263508Sdim      // a full cycle, then micro-ops issue becomes critical.
2161263508Sdim      if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2162263508Sdim          >= (int)SchedModel->getLatencyFactor()) {
2163263508Sdim        ZoneCritResIdx = 0;
2164263508Sdim        DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
2165263508Sdim              << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
2166263508Sdim      }
2167263508Sdim    }
2168243830Sdim    for (TargetSchedModel::ProcResIter
2169243830Sdim           PI = SchedModel->getWriteProcResBegin(SC),
2170243830Sdim           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2171263508Sdim      unsigned RCycle =
2172263508Sdim        countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle);
2173263508Sdim      if (RCycle > NextCycle)
2174263508Sdim        NextCycle = RCycle;
2175243830Sdim    }
2176243830Sdim  }
2177263508Sdim  // Update ExpectedLatency and DependentLatency.
2178263508Sdim  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2179263508Sdim  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2180263508Sdim  if (SU->getDepth() > TopLatency) {
2181263508Sdim    TopLatency = SU->getDepth();
2182263508Sdim    DEBUG(dbgs() << "  " << Available.getName()
2183263508Sdim          << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
2184243830Sdim  }
2185263508Sdim  if (SU->getHeight() > BotLatency) {
2186263508Sdim    BotLatency = SU->getHeight();
2187263508Sdim    DEBUG(dbgs() << "  " << Available.getName()
2188263508Sdim          << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
2189263508Sdim  }
2190263508Sdim  // If we stall for any reason, bump the cycle.
2191263508Sdim  if (NextCycle > CurrCycle) {
2192263508Sdim    bumpCycle(NextCycle);
2193263508Sdim  }
2194243830Sdim  else {
2195263508Sdim    // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2196263508Sdim    // resource limited. If a stall occured, bumpCycle does this.
2197263508Sdim    unsigned LFactor = SchedModel->getLatencyFactor();
2198263508Sdim    IsResourceLimited =
2199263508Sdim      (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2200263508Sdim      > (int)LFactor;
2201243830Sdim  }
2202263508Sdim  DEBUG(dumpScheduledState());
2203239462Sdim}
2204239462Sdim
2205239462Sdim/// Release pending ready nodes in to the available queue. This makes them
2206239462Sdim/// visible to heuristics.
2207263508Sdimvoid GenericScheduler::SchedBoundary::releasePending() {
2208239462Sdim  // If the available queue is empty, it is safe to reset MinReadyCycle.
2209239462Sdim  if (Available.empty())
2210239462Sdim    MinReadyCycle = UINT_MAX;
2211239462Sdim
2212239462Sdim  // Check to see if any of the pending instructions are ready to issue.  If
2213239462Sdim  // so, add them to the available queue.
2214263508Sdim  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2215239462Sdim  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2216239462Sdim    SUnit *SU = *(Pending.begin()+i);
2217239462Sdim    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2218239462Sdim
2219239462Sdim    if (ReadyCycle < MinReadyCycle)
2220239462Sdim      MinReadyCycle = ReadyCycle;
2221239462Sdim
2222263508Sdim    if (!IsBuffered && ReadyCycle > CurrCycle)
2223239462Sdim      continue;
2224239462Sdim
2225239462Sdim    if (checkHazard(SU))
2226239462Sdim      continue;
2227239462Sdim
2228239462Sdim    Available.push(SU);
2229239462Sdim    Pending.remove(Pending.begin()+i);
2230239462Sdim    --i; --e;
2231239462Sdim  }
2232243830Sdim  DEBUG(if (!Pending.empty()) Pending.dump());
2233239462Sdim  CheckPending = false;
2234239462Sdim}
2235239462Sdim
2236239462Sdim/// Remove SU from the ready set for this boundary.
2237263508Sdimvoid GenericScheduler::SchedBoundary::removeReady(SUnit *SU) {
2238239462Sdim  if (Available.isInQueue(SU))
2239239462Sdim    Available.remove(Available.find(SU));
2240239462Sdim  else {
2241239462Sdim    assert(Pending.isInQueue(SU) && "bad ready count");
2242239462Sdim    Pending.remove(Pending.find(SU));
2243239462Sdim  }
2244239462Sdim}
2245239462Sdim
2246239462Sdim/// If this queue only has one ready candidate, return it. As a side effect,
2247243830Sdim/// defer any nodes that now hit a hazard, and advance the cycle until at least
2248243830Sdim/// one node is ready. If multiple instructions are ready, return NULL.
2249263508SdimSUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() {
2250239462Sdim  if (CheckPending)
2251239462Sdim    releasePending();
2252239462Sdim
2253263508Sdim  if (CurrMOps > 0) {
2254243830Sdim    // Defer any ready instrs that now have a hazard.
2255243830Sdim    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2256243830Sdim      if (checkHazard(*I)) {
2257243830Sdim        Pending.push(*I);
2258243830Sdim        I = Available.remove(I);
2259243830Sdim        continue;
2260243830Sdim      }
2261243830Sdim      ++I;
2262243830Sdim    }
2263243830Sdim  }
2264239462Sdim  for (unsigned i = 0; Available.empty(); ++i) {
2265263508Sdim    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
2266239462Sdim           "permanent hazard"); (void)i;
2267263508Sdim    bumpCycle(CurrCycle + 1);
2268239462Sdim    releasePending();
2269239462Sdim  }
2270239462Sdim  if (Available.size() == 1)
2271239462Sdim    return *Available.begin();
2272239462Sdim  return NULL;
2273239462Sdim}
2274239462Sdim
2275263508Sdim#ifndef NDEBUG
2276263508Sdim// This is useful information to dump after bumpNode.
2277263508Sdim// Note that the Queue contents are more useful before pickNodeFromQueue.
2278263508Sdimvoid GenericScheduler::SchedBoundary::dumpScheduledState() {
2279263508Sdim  unsigned ResFactor;
2280263508Sdim  unsigned ResCount;
2281263508Sdim  if (ZoneCritResIdx) {
2282263508Sdim    ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2283263508Sdim    ResCount = getResourceCount(ZoneCritResIdx);
2284243830Sdim  }
2285263508Sdim  else {
2286263508Sdim    ResFactor = SchedModel->getMicroOpFactor();
2287263508Sdim    ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
2288243830Sdim  }
2289263508Sdim  unsigned LFactor = SchedModel->getLatencyFactor();
2290263508Sdim  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2291263508Sdim         << "  Retired: " << RetiredMOps;
2292263508Sdim  dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2293263508Sdim  dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2294263508Sdim         << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx)
2295263508Sdim         << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2296263508Sdim         << (IsResourceLimited ? "  - Resource" : "  - Latency")
2297263508Sdim         << " limited.\n";
2298239462Sdim}
2299263508Sdim#endif
2300239462Sdim
2301263508Sdimvoid GenericScheduler::SchedCandidate::
2302243830SdiminitResourceDelta(const ScheduleDAGMI *DAG,
2303243830Sdim                  const TargetSchedModel *SchedModel) {
2304243830Sdim  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2305243830Sdim    return;
2306243830Sdim
2307243830Sdim  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2308243830Sdim  for (TargetSchedModel::ProcResIter
2309243830Sdim         PI = SchedModel->getWriteProcResBegin(SC),
2310243830Sdim         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2311243830Sdim    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2312243830Sdim      ResDelta.CritResources += PI->Cycles;
2313243830Sdim    if (PI->ProcResourceIdx == Policy.DemandResIdx)
2314243830Sdim      ResDelta.DemandedResources += PI->Cycles;
2315243830Sdim  }
2316243830Sdim}
2317243830Sdim
2318263508Sdim
2319243830Sdim/// Return true if this heuristic determines order.
2320249423Sdimstatic bool tryLess(int TryVal, int CandVal,
2321263508Sdim                    GenericScheduler::SchedCandidate &TryCand,
2322263508Sdim                    GenericScheduler::SchedCandidate &Cand,
2323263508Sdim                    GenericScheduler::CandReason Reason) {
2324243830Sdim  if (TryVal < CandVal) {
2325243830Sdim    TryCand.Reason = Reason;
2326243830Sdim    return true;
2327243830Sdim  }
2328243830Sdim  if (TryVal > CandVal) {
2329243830Sdim    if (Cand.Reason > Reason)
2330243830Sdim      Cand.Reason = Reason;
2331243830Sdim    return true;
2332243830Sdim  }
2333263508Sdim  Cand.setRepeat(Reason);
2334243830Sdim  return false;
2335243830Sdim}
2336249423Sdim
2337249423Sdimstatic bool tryGreater(int TryVal, int CandVal,
2338263508Sdim                       GenericScheduler::SchedCandidate &TryCand,
2339263508Sdim                       GenericScheduler::SchedCandidate &Cand,
2340263508Sdim                       GenericScheduler::CandReason Reason) {
2341243830Sdim  if (TryVal > CandVal) {
2342243830Sdim    TryCand.Reason = Reason;
2343243830Sdim    return true;
2344243830Sdim  }
2345243830Sdim  if (TryVal < CandVal) {
2346243830Sdim    if (Cand.Reason > Reason)
2347243830Sdim      Cand.Reason = Reason;
2348243830Sdim    return true;
2349243830Sdim  }
2350263508Sdim  Cand.setRepeat(Reason);
2351243830Sdim  return false;
2352243830Sdim}
2353243830Sdim
2354263508Sdimstatic bool tryPressure(const PressureChange &TryP,
2355263508Sdim                        const PressureChange &CandP,
2356263508Sdim                        GenericScheduler::SchedCandidate &TryCand,
2357263508Sdim                        GenericScheduler::SchedCandidate &Cand,
2358263508Sdim                        GenericScheduler::CandReason Reason) {
2359263508Sdim  int TryRank = TryP.getPSetOrMax();
2360263508Sdim  int CandRank = CandP.getPSetOrMax();
2361263508Sdim  // If both candidates affect the same set, go with the smallest increase.
2362263508Sdim  if (TryRank == CandRank) {
2363263508Sdim    return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2364263508Sdim                   Reason);
2365263508Sdim  }
2366263508Sdim  // If one candidate decreases and the other increases, go with it.
2367263508Sdim  // Invalid candidates have UnitInc==0.
2368263508Sdim  if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2369263508Sdim              Reason)) {
2370263508Sdim    return true;
2371263508Sdim  }
2372263508Sdim  // If the candidates are decreasing pressure, reverse priority.
2373263508Sdim  if (TryP.getUnitInc() < 0)
2374263508Sdim    std::swap(TryRank, CandRank);
2375263508Sdim  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2376263508Sdim}
2377263508Sdim
2378249423Sdimstatic unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2379249423Sdim  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2380249423Sdim}
2381249423Sdim
2382251662Sdim/// Minimize physical register live ranges. Regalloc wants them adjacent to
2383251662Sdim/// their physreg def/use.
2384251662Sdim///
2385251662Sdim/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2386251662Sdim/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2387251662Sdim/// with the operation that produces or consumes the physreg. We'll do this when
2388251662Sdim/// regalloc has support for parallel copies.
2389251662Sdimstatic int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2390251662Sdim  const MachineInstr *MI = SU->getInstr();
2391251662Sdim  if (!MI->isCopy())
2392251662Sdim    return 0;
2393251662Sdim
2394251662Sdim  unsigned ScheduledOper = isTop ? 1 : 0;
2395251662Sdim  unsigned UnscheduledOper = isTop ? 0 : 1;
2396251662Sdim  // If we have already scheduled the physreg produce/consumer, immediately
2397251662Sdim  // schedule the copy.
2398251662Sdim  if (TargetRegisterInfo::isPhysicalRegister(
2399251662Sdim        MI->getOperand(ScheduledOper).getReg()))
2400251662Sdim    return 1;
2401251662Sdim  // If the physreg is at the boundary, defer it. Otherwise schedule it
2402251662Sdim  // immediately to free the dependent. We can hoist the copy later.
2403251662Sdim  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2404251662Sdim  if (TargetRegisterInfo::isPhysicalRegister(
2405251662Sdim        MI->getOperand(UnscheduledOper).getReg()))
2406251662Sdim    return AtBoundary ? -1 : 1;
2407251662Sdim  return 0;
2408251662Sdim}
2409251662Sdim
2410263508Sdimstatic bool tryLatency(GenericScheduler::SchedCandidate &TryCand,
2411263508Sdim                       GenericScheduler::SchedCandidate &Cand,
2412263508Sdim                       GenericScheduler::SchedBoundary &Zone) {
2413263508Sdim  if (Zone.isTop()) {
2414263508Sdim    if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2415263508Sdim      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2416263508Sdim                  TryCand, Cand, GenericScheduler::TopDepthReduce))
2417263508Sdim        return true;
2418263508Sdim    }
2419263508Sdim    if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2420263508Sdim                   TryCand, Cand, GenericScheduler::TopPathReduce))
2421263508Sdim      return true;
2422263508Sdim  }
2423263508Sdim  else {
2424263508Sdim    if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2425263508Sdim      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2426263508Sdim                  TryCand, Cand, GenericScheduler::BotHeightReduce))
2427263508Sdim        return true;
2428263508Sdim    }
2429263508Sdim    if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2430263508Sdim                   TryCand, Cand, GenericScheduler::BotPathReduce))
2431263508Sdim      return true;
2432263508Sdim  }
2433263508Sdim  return false;
2434263508Sdim}
2435263508Sdim
2436243830Sdim/// Apply a set of heursitics to a new candidate. Heuristics are currently
2437243830Sdim/// hierarchical. This may be more efficient than a graduated cost model because
2438243830Sdim/// we don't need to evaluate all aspects of the model for each node in the
2439243830Sdim/// queue. But it's really done to make the heuristics easier to debug and
2440243830Sdim/// statistically analyze.
2441243830Sdim///
2442243830Sdim/// \param Cand provides the policy and current best candidate.
2443243830Sdim/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2444243830Sdim/// \param Zone describes the scheduled zone that we are extending.
2445243830Sdim/// \param RPTracker describes reg pressure within the scheduled zone.
2446243830Sdim/// \param TempTracker is a scratch pressure tracker to reuse in queries.
2447263508Sdimvoid GenericScheduler::tryCandidate(SchedCandidate &Cand,
2448243830Sdim                                       SchedCandidate &TryCand,
2449243830Sdim                                       SchedBoundary &Zone,
2450243830Sdim                                       const RegPressureTracker &RPTracker,
2451243830Sdim                                       RegPressureTracker &TempTracker) {
2452243830Sdim
2453263508Sdim  if (DAG->isTrackingPressure()) {
2454263508Sdim    // Always initialize TryCand's RPDelta.
2455263508Sdim    if (Zone.isTop()) {
2456263508Sdim      TempTracker.getMaxDownwardPressureDelta(
2457263508Sdim        TryCand.SU->getInstr(),
2458263508Sdim        TryCand.RPDelta,
2459263508Sdim        DAG->getRegionCriticalPSets(),
2460263508Sdim        DAG->getRegPressure().MaxSetPressure);
2461263508Sdim    }
2462263508Sdim    else {
2463263508Sdim      if (VerifyScheduling) {
2464263508Sdim        TempTracker.getMaxUpwardPressureDelta(
2465263508Sdim          TryCand.SU->getInstr(),
2466263508Sdim          &DAG->getPressureDiff(TryCand.SU),
2467263508Sdim          TryCand.RPDelta,
2468263508Sdim          DAG->getRegionCriticalPSets(),
2469263508Sdim          DAG->getRegPressure().MaxSetPressure);
2470263508Sdim      }
2471263508Sdim      else {
2472263508Sdim        RPTracker.getUpwardPressureDelta(
2473263508Sdim          TryCand.SU->getInstr(),
2474263508Sdim          DAG->getPressureDiff(TryCand.SU),
2475263508Sdim          TryCand.RPDelta,
2476263508Sdim          DAG->getRegionCriticalPSets(),
2477263508Sdim          DAG->getRegPressure().MaxSetPressure);
2478263508Sdim      }
2479263508Sdim    }
2480263508Sdim  }
2481263508Sdim  DEBUG(if (TryCand.RPDelta.Excess.isValid())
2482263508Sdim          dbgs() << "  SU(" << TryCand.SU->NodeNum << ") "
2483263508Sdim                 << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
2484263508Sdim                 << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
2485243830Sdim
2486243830Sdim  // Initialize the candidate if needed.
2487243830Sdim  if (!Cand.isValid()) {
2488243830Sdim    TryCand.Reason = NodeOrder;
2489243830Sdim    return;
2490243830Sdim  }
2491251662Sdim
2492251662Sdim  if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
2493251662Sdim                 biasPhysRegCopy(Cand.SU, Zone.isTop()),
2494251662Sdim                 TryCand, Cand, PhysRegCopy))
2495251662Sdim    return;
2496251662Sdim
2497263508Sdim  // Avoid exceeding the target's limit. If signed PSetID is negative, it is
2498263508Sdim  // invalid; convert it to INT_MAX to give it lowest priority.
2499263508Sdim  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2500263508Sdim                                               Cand.RPDelta.Excess,
2501263508Sdim                                               TryCand, Cand, RegExcess))
2502243830Sdim    return;
2503243830Sdim
2504243830Sdim  // Avoid increasing the max critical pressure in the scheduled region.
2505263508Sdim  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2506263508Sdim                                               Cand.RPDelta.CriticalMax,
2507263508Sdim                                               TryCand, Cand, RegCritical))
2508243830Sdim    return;
2509243830Sdim
2510263508Sdim  // For loops that are acyclic path limited, aggressively schedule for latency.
2511263508Sdim  // This can result in very long dependence chains scheduled in sequence, so
2512263508Sdim  // once every cycle (when CurrMOps == 0), switch to normal heuristics.
2513263508Sdim  if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps
2514263508Sdim      && tryLatency(TryCand, Cand, Zone))
2515263508Sdim    return;
2516263508Sdim
2517249423Sdim  // Keep clustered nodes together to encourage downstream peephole
2518249423Sdim  // optimizations which may reduce resource requirements.
2519249423Sdim  //
2520249423Sdim  // This is a best effort to set things up for a post-RA pass. Optimizations
2521249423Sdim  // like generating loads of multiple registers should ideally be done within
2522249423Sdim  // the scheduler pass by combining the loads during DAG postprocessing.
2523249423Sdim  const SUnit *NextClusterSU =
2524249423Sdim    Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2525249423Sdim  if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
2526249423Sdim                 TryCand, Cand, Cluster))
2527249423Sdim    return;
2528251662Sdim
2529251662Sdim  // Weak edges are for clustering and other constraints.
2530249423Sdim  if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
2531249423Sdim              getWeakLeft(Cand.SU, Zone.isTop()),
2532251662Sdim              TryCand, Cand, Weak)) {
2533249423Sdim    return;
2534249423Sdim  }
2535263508Sdim  // Avoid increasing the max pressure of the entire region.
2536263508Sdim  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2537263508Sdim                                               Cand.RPDelta.CurrentMax,
2538263508Sdim                                               TryCand, Cand, RegMax))
2539263508Sdim    return;
2540263508Sdim
2541243830Sdim  // Avoid critical resource consumption and balance the schedule.
2542243830Sdim  TryCand.initResourceDelta(DAG, SchedModel);
2543243830Sdim  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
2544243830Sdim              TryCand, Cand, ResourceReduce))
2545243830Sdim    return;
2546243830Sdim  if (tryGreater(TryCand.ResDelta.DemandedResources,
2547243830Sdim                 Cand.ResDelta.DemandedResources,
2548243830Sdim                 TryCand, Cand, ResourceDemand))
2549243830Sdim    return;
2550243830Sdim
2551243830Sdim  // Avoid serializing long latency dependence chains.
2552263508Sdim  // For acyclic path limited loops, latency was already checked above.
2553263508Sdim  if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
2554263508Sdim      && tryLatency(TryCand, Cand, Zone)) {
2555263508Sdim    return;
2556243830Sdim  }
2557243830Sdim
2558243830Sdim  // Prefer immediate defs/users of the last scheduled instruction. This is a
2559263508Sdim  // local pressure avoidance strategy that also makes the machine code
2560263508Sdim  // readable.
2561249423Sdim  if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
2562249423Sdim                 TryCand, Cand, NextDefUse))
2563243830Sdim    return;
2564249423Sdim
2565243830Sdim  // Fall through to original instruction order.
2566243830Sdim  if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2567243830Sdim      || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2568243830Sdim    TryCand.Reason = NodeOrder;
2569243830Sdim  }
2570243830Sdim}
2571243830Sdim
2572243830Sdim#ifndef NDEBUG
2573263508Sdimconst char *GenericScheduler::getReasonStr(
2574263508Sdim  GenericScheduler::CandReason Reason) {
2575243830Sdim  switch (Reason) {
2576243830Sdim  case NoCand:         return "NOCAND    ";
2577251662Sdim  case PhysRegCopy:    return "PREG-COPY";
2578263508Sdim  case RegExcess:      return "REG-EXCESS";
2579263508Sdim  case RegCritical:    return "REG-CRIT  ";
2580249423Sdim  case Cluster:        return "CLUSTER   ";
2581251662Sdim  case Weak:           return "WEAK      ";
2582263508Sdim  case RegMax:         return "REG-MAX   ";
2583243830Sdim  case ResourceReduce: return "RES-REDUCE";
2584243830Sdim  case ResourceDemand: return "RES-DEMAND";
2585243830Sdim  case TopDepthReduce: return "TOP-DEPTH ";
2586243830Sdim  case TopPathReduce:  return "TOP-PATH  ";
2587243830Sdim  case BotHeightReduce:return "BOT-HEIGHT";
2588243830Sdim  case BotPathReduce:  return "BOT-PATH  ";
2589243830Sdim  case NextDefUse:     return "DEF-USE   ";
2590243830Sdim  case NodeOrder:      return "ORDER     ";
2591243830Sdim  };
2592243830Sdim  llvm_unreachable("Unknown reason!");
2593243830Sdim}
2594243830Sdim
2595263508Sdimvoid GenericScheduler::traceCandidate(const SchedCandidate &Cand) {
2596263508Sdim  PressureChange P;
2597243830Sdim  unsigned ResIdx = 0;
2598243830Sdim  unsigned Latency = 0;
2599243830Sdim  switch (Cand.Reason) {
2600243830Sdim  default:
2601243830Sdim    break;
2602263508Sdim  case RegExcess:
2603243830Sdim    P = Cand.RPDelta.Excess;
2604243830Sdim    break;
2605263508Sdim  case RegCritical:
2606243830Sdim    P = Cand.RPDelta.CriticalMax;
2607243830Sdim    break;
2608263508Sdim  case RegMax:
2609243830Sdim    P = Cand.RPDelta.CurrentMax;
2610243830Sdim    break;
2611243830Sdim  case ResourceReduce:
2612243830Sdim    ResIdx = Cand.Policy.ReduceResIdx;
2613243830Sdim    break;
2614243830Sdim  case ResourceDemand:
2615243830Sdim    ResIdx = Cand.Policy.DemandResIdx;
2616243830Sdim    break;
2617243830Sdim  case TopDepthReduce:
2618243830Sdim    Latency = Cand.SU->getDepth();
2619243830Sdim    break;
2620243830Sdim  case TopPathReduce:
2621243830Sdim    Latency = Cand.SU->getHeight();
2622243830Sdim    break;
2623243830Sdim  case BotHeightReduce:
2624243830Sdim    Latency = Cand.SU->getHeight();
2625243830Sdim    break;
2626243830Sdim  case BotPathReduce:
2627243830Sdim    Latency = Cand.SU->getDepth();
2628243830Sdim    break;
2629243830Sdim  }
2630249423Sdim  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2631243830Sdim  if (P.isValid())
2632263508Sdim    dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2633263508Sdim           << ":" << P.getUnitInc() << " ";
2634243830Sdim  else
2635249423Sdim    dbgs() << "      ";
2636243830Sdim  if (ResIdx)
2637249423Sdim    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2638243830Sdim  else
2639249423Sdim    dbgs() << "         ";
2640243830Sdim  if (Latency)
2641249423Sdim    dbgs() << " " << Latency << " cycles ";
2642243830Sdim  else
2643249423Sdim    dbgs() << "          ";
2644249423Sdim  dbgs() << '\n';
2645243830Sdim}
2646243830Sdim#endif
2647243830Sdim
2648263508Sdim/// Pick the best candidate from the queue.
2649239462Sdim///
2650239462Sdim/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2651239462Sdim/// DAG building. To adjust for the current scheduling location we need to
2652239462Sdim/// maintain the number of vreg uses remaining to be top-scheduled.
2653263508Sdimvoid GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
2654243830Sdim                                            const RegPressureTracker &RPTracker,
2655243830Sdim                                            SchedCandidate &Cand) {
2656243830Sdim  ReadyQueue &Q = Zone.Available;
2657243830Sdim
2658239462Sdim  DEBUG(Q.dump());
2659239462Sdim
2660239462Sdim  // getMaxPressureDelta temporarily modifies the tracker.
2661239462Sdim  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
2662239462Sdim
2663239462Sdim  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
2664239462Sdim
2665243830Sdim    SchedCandidate TryCand(Cand.Policy);
2666243830Sdim    TryCand.SU = *I;
2667243830Sdim    tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
2668243830Sdim    if (TryCand.Reason != NoCand) {
2669243830Sdim      // Initialize resource delta if needed in case future heuristics query it.
2670243830Sdim      if (TryCand.ResDelta == SchedResourceDelta())
2671243830Sdim        TryCand.initResourceDelta(DAG, SchedModel);
2672243830Sdim      Cand.setBest(TryCand);
2673249423Sdim      DEBUG(traceCandidate(Cand));
2674234285Sdim    }
2675239462Sdim  }
2676239462Sdim}
2677239462Sdim
2678263508Sdimstatic void tracePick(const GenericScheduler::SchedCandidate &Cand,
2679243830Sdim                      bool IsTop) {
2680251662Sdim  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2681263508Sdim        << GenericScheduler::getReasonStr(Cand.Reason) << '\n');
2682243830Sdim}
2683243830Sdim
2684239462Sdim/// Pick the best candidate node from either the top or bottom queue.
2685263508SdimSUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
2686239462Sdim  // Schedule as far as possible in the direction of no choice. This is most
2687239462Sdim  // efficient, but also provides the best heuristics for CriticalPSets.
2688239462Sdim  if (SUnit *SU = Bot.pickOnlyChoice()) {
2689239462Sdim    IsTopNode = false;
2690263508Sdim    DEBUG(dbgs() << "Pick Bot NOCAND\n");
2691234285Sdim    return SU;
2692234285Sdim  }
2693239462Sdim  if (SUnit *SU = Top.pickOnlyChoice()) {
2694239462Sdim    IsTopNode = true;
2695263508Sdim    DEBUG(dbgs() << "Pick Top NOCAND\n");
2696239462Sdim    return SU;
2697239462Sdim  }
2698243830Sdim  CandPolicy NoPolicy;
2699243830Sdim  SchedCandidate BotCand(NoPolicy);
2700243830Sdim  SchedCandidate TopCand(NoPolicy);
2701263508Sdim  Bot.setPolicy(BotCand.Policy, Top);
2702263508Sdim  Top.setPolicy(TopCand.Policy, Bot);
2703243830Sdim
2704239462Sdim  // Prefer bottom scheduling when heuristics are silent.
2705243830Sdim  pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2706243830Sdim  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2707234285Sdim
2708239462Sdim  // If either Q has a single candidate that provides the least increase in
2709239462Sdim  // Excess pressure, we can immediately schedule from that Q.
2710239462Sdim  //
2711239462Sdim  // RegionCriticalPSets summarizes the pressure within the scheduled region and
2712239462Sdim  // affects picking from either Q. If scheduling in one direction must
2713239462Sdim  // increase pressure for one of the excess PSets, then schedule in that
2714239462Sdim  // direction first to provide more freedom in the other direction.
2715263508Sdim  if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
2716263508Sdim      || (BotCand.Reason == RegCritical
2717263508Sdim          && !BotCand.isRepeat(RegCritical)))
2718263508Sdim  {
2719239462Sdim    IsTopNode = false;
2720243830Sdim    tracePick(BotCand, IsTopNode);
2721239462Sdim    return BotCand.SU;
2722234285Sdim  }
2723239462Sdim  // Check if the top Q has a better candidate.
2724243830Sdim  pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2725243830Sdim  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2726239462Sdim
2727263508Sdim  // Choose the queue with the most important (lowest enum) reason.
2728263508Sdim  if (TopCand.Reason < BotCand.Reason) {
2729239462Sdim    IsTopNode = true;
2730243830Sdim    tracePick(TopCand, IsTopNode);
2731239462Sdim    return TopCand.SU;
2732239462Sdim  }
2733243830Sdim  // Otherwise prefer the bottom candidate, in node order if all else failed.
2734239462Sdim  IsTopNode = false;
2735243830Sdim  tracePick(BotCand, IsTopNode);
2736239462Sdim  return BotCand.SU;
2737239462Sdim}
2738234285Sdim
2739239462Sdim/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
2740263508SdimSUnit *GenericScheduler::pickNode(bool &IsTopNode) {
2741239462Sdim  if (DAG->top() == DAG->bottom()) {
2742239462Sdim    assert(Top.Available.empty() && Top.Pending.empty() &&
2743239462Sdim           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
2744239462Sdim    return NULL;
2745239462Sdim  }
2746239462Sdim  SUnit *SU;
2747243830Sdim  do {
2748263508Sdim    if (RegionPolicy.OnlyTopDown) {
2749243830Sdim      SU = Top.pickOnlyChoice();
2750243830Sdim      if (!SU) {
2751243830Sdim        CandPolicy NoPolicy;
2752243830Sdim        SchedCandidate TopCand(NoPolicy);
2753243830Sdim        pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2754263508Sdim        assert(TopCand.Reason != NoCand && "failed to find a candidate");
2755263508Sdim        tracePick(TopCand, true);
2756243830Sdim        SU = TopCand.SU;
2757243830Sdim      }
2758243830Sdim      IsTopNode = true;
2759239462Sdim    }
2760263508Sdim    else if (RegionPolicy.OnlyBottomUp) {
2761243830Sdim      SU = Bot.pickOnlyChoice();
2762243830Sdim      if (!SU) {
2763243830Sdim        CandPolicy NoPolicy;
2764243830Sdim        SchedCandidate BotCand(NoPolicy);
2765243830Sdim        pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2766263508Sdim        assert(BotCand.Reason != NoCand && "failed to find a candidate");
2767263508Sdim        tracePick(BotCand, false);
2768243830Sdim        SU = BotCand.SU;
2769243830Sdim      }
2770243830Sdim      IsTopNode = false;
2771239462Sdim    }
2772243830Sdim    else {
2773243830Sdim      SU = pickNodeBidirectional(IsTopNode);
2774243830Sdim    }
2775243830Sdim  } while (SU->isScheduled);
2776243830Sdim
2777239462Sdim  if (SU->isTopReady())
2778239462Sdim    Top.removeReady(SU);
2779239462Sdim  if (SU->isBottomReady())
2780239462Sdim    Bot.removeReady(SU);
2781239462Sdim
2782251662Sdim  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
2783239462Sdim  return SU;
2784239462Sdim}
2785239462Sdim
2786263508Sdimvoid GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
2787251662Sdim
2788251662Sdim  MachineBasicBlock::iterator InsertPos = SU->getInstr();
2789251662Sdim  if (!isTop)
2790251662Sdim    ++InsertPos;
2791251662Sdim  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
2792251662Sdim
2793251662Sdim  // Find already scheduled copies with a single physreg dependence and move
2794251662Sdim  // them just above the scheduled instruction.
2795251662Sdim  for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
2796251662Sdim       I != E; ++I) {
2797251662Sdim    if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
2798251662Sdim      continue;
2799251662Sdim    SUnit *DepSU = I->getSUnit();
2800251662Sdim    if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
2801251662Sdim      continue;
2802251662Sdim    MachineInstr *Copy = DepSU->getInstr();
2803251662Sdim    if (!Copy->isCopy())
2804251662Sdim      continue;
2805251662Sdim    DEBUG(dbgs() << "  Rescheduling physreg copy ";
2806251662Sdim          I->getSUnit()->dump(DAG));
2807251662Sdim    DAG->moveInstruction(Copy, InsertPos);
2808251662Sdim  }
2809251662Sdim}
2810251662Sdim
2811239462Sdim/// Update the scheduler's state after scheduling a node. This is the same node
2812239462Sdim/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
2813239462Sdim/// it's state based on the current cycle before MachineSchedStrategy does.
2814251662Sdim///
2815251662Sdim/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
2816251662Sdim/// them here. See comments in biasPhysRegCopy.
2817263508Sdimvoid GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2818239462Sdim  if (IsTopNode) {
2819263508Sdim    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle);
2820239462Sdim    Top.bumpNode(SU);
2821251662Sdim    if (SU->hasPhysRegUses)
2822251662Sdim      reschedulePhysRegCopies(SU, true);
2823239462Sdim  }
2824239462Sdim  else {
2825263508Sdim    SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle);
2826239462Sdim    Bot.bumpNode(SU);
2827251662Sdim    if (SU->hasPhysRegDefs)
2828251662Sdim      reschedulePhysRegCopies(SU, false);
2829239462Sdim  }
2830239462Sdim}
2831239462Sdim
2832234285Sdim/// Create the standard converging machine scheduler. This will be used as the
2833234285Sdim/// default scheduler if the target does not set a default.
2834263508Sdimstatic ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) {
2835263508Sdim  ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new GenericScheduler(C));
2836249423Sdim  // Register DAG post-processors.
2837251662Sdim  //
2838251662Sdim  // FIXME: extend the mutation API to allow earlier mutations to instantiate
2839251662Sdim  // data and pass it to later mutations. Have a single mutation that gathers
2840251662Sdim  // the interesting nodes in one pass.
2841263508Sdim  DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
2842263508Sdim  if (EnableLoadCluster && DAG->TII->enableClusterLoads())
2843249423Sdim    DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
2844249423Sdim  if (EnableMacroFusion)
2845249423Sdim    DAG->addMutation(new MacroFusion(DAG->TII));
2846249423Sdim  return DAG;
2847234285Sdim}
2848234285Sdimstatic MachineSchedRegistry
2849263508SdimGenericSchedRegistry("converge", "Standard converging scheduler.",
2850263508Sdim                     createGenericSched);
2851234285Sdim
2852234285Sdim//===----------------------------------------------------------------------===//
2853243830Sdim// ILP Scheduler. Currently for experimental analysis of heuristics.
2854243830Sdim//===----------------------------------------------------------------------===//
2855243830Sdim
2856243830Sdimnamespace {
2857243830Sdim/// \brief Order nodes by the ILP metric.
2858243830Sdimstruct ILPOrder {
2859249423Sdim  const SchedDFSResult *DFSResult;
2860249423Sdim  const BitVector *ScheduledTrees;
2861243830Sdim  bool MaximizeILP;
2862243830Sdim
2863249423Sdim  ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
2864243830Sdim
2865243830Sdim  /// \brief Apply a less-than relation on node priority.
2866249423Sdim  ///
2867249423Sdim  /// (Return true if A comes after B in the Q.)
2868243830Sdim  bool operator()(const SUnit *A, const SUnit *B) const {
2869249423Sdim    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
2870249423Sdim    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
2871249423Sdim    if (SchedTreeA != SchedTreeB) {
2872249423Sdim      // Unscheduled trees have lower priority.
2873249423Sdim      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
2874249423Sdim        return ScheduledTrees->test(SchedTreeB);
2875249423Sdim
2876249423Sdim      // Trees with shallower connections have have lower priority.
2877249423Sdim      if (DFSResult->getSubtreeLevel(SchedTreeA)
2878249423Sdim          != DFSResult->getSubtreeLevel(SchedTreeB)) {
2879249423Sdim        return DFSResult->getSubtreeLevel(SchedTreeA)
2880249423Sdim          < DFSResult->getSubtreeLevel(SchedTreeB);
2881249423Sdim      }
2882249423Sdim    }
2883243830Sdim    if (MaximizeILP)
2884249423Sdim      return DFSResult->getILP(A) < DFSResult->getILP(B);
2885243830Sdim    else
2886249423Sdim      return DFSResult->getILP(A) > DFSResult->getILP(B);
2887243830Sdim  }
2888243830Sdim};
2889243830Sdim
2890243830Sdim/// \brief Schedule based on the ILP metric.
2891243830Sdimclass ILPScheduler : public MachineSchedStrategy {
2892249423Sdim  ScheduleDAGMI *DAG;
2893243830Sdim  ILPOrder Cmp;
2894243830Sdim
2895243830Sdim  std::vector<SUnit*> ReadyQ;
2896243830Sdimpublic:
2897249423Sdim  ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
2898243830Sdim
2899249423Sdim  virtual void initialize(ScheduleDAGMI *dag) {
2900249423Sdim    DAG = dag;
2901249423Sdim    DAG->computeDFSResult();
2902249423Sdim    Cmp.DFSResult = DAG->getDFSResult();
2903249423Sdim    Cmp.ScheduledTrees = &DAG->getScheduledTrees();
2904243830Sdim    ReadyQ.clear();
2905243830Sdim  }
2906243830Sdim
2907243830Sdim  virtual void registerRoots() {
2908249423Sdim    // Restore the heap in ReadyQ with the updated DFS results.
2909249423Sdim    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2910243830Sdim  }
2911243830Sdim
2912243830Sdim  /// Implement MachineSchedStrategy interface.
2913243830Sdim  /// -----------------------------------------
2914243830Sdim
2915249423Sdim  /// Callback to select the highest priority node from the ready Q.
2916243830Sdim  virtual SUnit *pickNode(bool &IsTopNode) {
2917243830Sdim    if (ReadyQ.empty()) return NULL;
2918249423Sdim    std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2919243830Sdim    SUnit *SU = ReadyQ.back();
2920243830Sdim    ReadyQ.pop_back();
2921243830Sdim    IsTopNode = false;
2922251662Sdim    DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
2923249423Sdim          << " ILP: " << DAG->getDFSResult()->getILP(SU)
2924249423Sdim          << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
2925249423Sdim          << DAG->getDFSResult()->getSubtreeLevel(
2926251662Sdim            DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
2927251662Sdim          << "Scheduling " << *SU->getInstr());
2928243830Sdim    return SU;
2929243830Sdim  }
2930243830Sdim
2931249423Sdim  /// \brief Scheduler callback to notify that a new subtree is scheduled.
2932249423Sdim  virtual void scheduleTree(unsigned SubtreeID) {
2933249423Sdim    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2934249423Sdim  }
2935243830Sdim
2936249423Sdim  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
2937249423Sdim  /// DFSResults, and resort the priority Q.
2938249423Sdim  virtual void schedNode(SUnit *SU, bool IsTopNode) {
2939249423Sdim    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
2940249423Sdim  }
2941249423Sdim
2942243830Sdim  virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
2943243830Sdim
2944243830Sdim  virtual void releaseBottomNode(SUnit *SU) {
2945243830Sdim    ReadyQ.push_back(SU);
2946243830Sdim    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2947243830Sdim  }
2948243830Sdim};
2949243830Sdim} // namespace
2950243830Sdim
2951243830Sdimstatic ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
2952243830Sdim  return new ScheduleDAGMI(C, new ILPScheduler(true));
2953243830Sdim}
2954243830Sdimstatic ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
2955243830Sdim  return new ScheduleDAGMI(C, new ILPScheduler(false));
2956243830Sdim}
2957243830Sdimstatic MachineSchedRegistry ILPMaxRegistry(
2958243830Sdim  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
2959243830Sdimstatic MachineSchedRegistry ILPMinRegistry(
2960243830Sdim  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
2961243830Sdim
2962243830Sdim//===----------------------------------------------------------------------===//
2963234285Sdim// Machine Instruction Shuffler for Correctness Testing
2964234285Sdim//===----------------------------------------------------------------------===//
2965234285Sdim
2966234285Sdim#ifndef NDEBUG
2967234285Sdimnamespace {
2968234285Sdim/// Apply a less-than relation on the node order, which corresponds to the
2969234285Sdim/// instruction order prior to scheduling. IsReverse implements greater-than.
2970234285Sdimtemplate<bool IsReverse>
2971234285Sdimstruct SUnitOrder {
2972234285Sdim  bool operator()(SUnit *A, SUnit *B) const {
2973234285Sdim    if (IsReverse)
2974234285Sdim      return A->NodeNum > B->NodeNum;
2975234285Sdim    else
2976234285Sdim      return A->NodeNum < B->NodeNum;
2977234285Sdim  }
2978234285Sdim};
2979234285Sdim
2980234285Sdim/// Reorder instructions as much as possible.
2981234285Sdimclass InstructionShuffler : public MachineSchedStrategy {
2982234285Sdim  bool IsAlternating;
2983234285Sdim  bool IsTopDown;
2984234285Sdim
2985234285Sdim  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
2986234285Sdim  // gives nodes with a higher number higher priority causing the latest
2987234285Sdim  // instructions to be scheduled first.
2988234285Sdim  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
2989234285Sdim    TopQ;
2990234285Sdim  // When scheduling bottom-up, use greater-than as the queue priority.
2991234285Sdim  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
2992234285Sdim    BottomQ;
2993234285Sdimpublic:
2994234285Sdim  InstructionShuffler(bool alternate, bool topdown)
2995234285Sdim    : IsAlternating(alternate), IsTopDown(topdown) {}
2996234285Sdim
2997234285Sdim  virtual void initialize(ScheduleDAGMI *) {
2998234285Sdim    TopQ.clear();
2999234285Sdim    BottomQ.clear();
3000234285Sdim  }
3001234285Sdim
3002234285Sdim  /// Implement MachineSchedStrategy interface.
3003234285Sdim  /// -----------------------------------------
3004234285Sdim
3005234285Sdim  virtual SUnit *pickNode(bool &IsTopNode) {
3006234285Sdim    SUnit *SU;
3007234285Sdim    if (IsTopDown) {
3008234285Sdim      do {
3009234285Sdim        if (TopQ.empty()) return NULL;
3010234285Sdim        SU = TopQ.top();
3011234285Sdim        TopQ.pop();
3012234285Sdim      } while (SU->isScheduled);
3013234285Sdim      IsTopNode = true;
3014234285Sdim    }
3015234285Sdim    else {
3016234285Sdim      do {
3017234285Sdim        if (BottomQ.empty()) return NULL;
3018234285Sdim        SU = BottomQ.top();
3019234285Sdim        BottomQ.pop();
3020234285Sdim      } while (SU->isScheduled);
3021234285Sdim      IsTopNode = false;
3022234285Sdim    }
3023234285Sdim    if (IsAlternating)
3024234285Sdim      IsTopDown = !IsTopDown;
3025234285Sdim    return SU;
3026234285Sdim  }
3027234285Sdim
3028239462Sdim  virtual void schedNode(SUnit *SU, bool IsTopNode) {}
3029239462Sdim
3030234285Sdim  virtual void releaseTopNode(SUnit *SU) {
3031234285Sdim    TopQ.push(SU);
3032234285Sdim  }
3033234285Sdim  virtual void releaseBottomNode(SUnit *SU) {
3034234285Sdim    BottomQ.push(SU);
3035234285Sdim  }
3036234285Sdim};
3037234285Sdim} // namespace
3038234285Sdim
3039234285Sdimstatic ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
3040234285Sdim  bool Alternate = !ForceTopDown && !ForceBottomUp;
3041234285Sdim  bool TopDown = !ForceBottomUp;
3042234285Sdim  assert((TopDown || !ForceTopDown) &&
3043234285Sdim         "-misched-topdown incompatible with -misched-bottomup");
3044234285Sdim  return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
3045234285Sdim}
3046234285Sdimstatic MachineSchedRegistry ShufflerRegistry(
3047234285Sdim  "shuffle", "Shuffle machine instructions alternating directions",
3048234285Sdim  createInstructionShuffler);
3049234285Sdim#endif // !NDEBUG
3050249423Sdim
3051249423Sdim//===----------------------------------------------------------------------===//
3052249423Sdim// GraphWriter support for ScheduleDAGMI.
3053249423Sdim//===----------------------------------------------------------------------===//
3054249423Sdim
3055249423Sdim#ifndef NDEBUG
3056249423Sdimnamespace llvm {
3057249423Sdim
3058249423Sdimtemplate<> struct GraphTraits<
3059249423Sdim  ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
3060249423Sdim
3061249423Sdimtemplate<>
3062249423Sdimstruct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
3063249423Sdim
3064249423Sdim  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
3065249423Sdim
3066249423Sdim  static std::string getGraphName(const ScheduleDAG *G) {
3067249423Sdim    return G->MF.getName();
3068249423Sdim  }
3069249423Sdim
3070249423Sdim  static bool renderGraphFromBottomUp() {
3071249423Sdim    return true;
3072249423Sdim  }
3073249423Sdim
3074249423Sdim  static bool isNodeHidden(const SUnit *Node) {
3075263508Sdim    return (Node->Preds.size() > 10 || Node->Succs.size() > 10);
3076249423Sdim  }
3077249423Sdim
3078249423Sdim  static bool hasNodeAddressLabel(const SUnit *Node,
3079249423Sdim                                  const ScheduleDAG *Graph) {
3080249423Sdim    return false;
3081249423Sdim  }
3082249423Sdim
3083249423Sdim  /// If you want to override the dot attributes printed for a particular
3084249423Sdim  /// edge, override this method.
3085249423Sdim  static std::string getEdgeAttributes(const SUnit *Node,
3086249423Sdim                                       SUnitIterator EI,
3087249423Sdim                                       const ScheduleDAG *Graph) {
3088249423Sdim    if (EI.isArtificialDep())
3089249423Sdim      return "color=cyan,style=dashed";
3090249423Sdim    if (EI.isCtrlDep())
3091249423Sdim      return "color=blue,style=dashed";
3092249423Sdim    return "";
3093249423Sdim  }
3094249423Sdim
3095249423Sdim  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3096249423Sdim    std::string Str;
3097249423Sdim    raw_string_ostream SS(Str);
3098263508Sdim    const SchedDFSResult *DFS =
3099263508Sdim      static_cast<const ScheduleDAGMI*>(G)->getDFSResult();
3100263508Sdim    SS << "SU:" << SU->NodeNum;
3101263508Sdim    if (DFS)
3102263508Sdim      SS << " I:" << DFS->getNumInstrs(SU);
3103249423Sdim    return SS.str();
3104249423Sdim  }
3105249423Sdim  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3106249423Sdim    return G->getGraphNodeLabel(SU);
3107249423Sdim  }
3108249423Sdim
3109249423Sdim  static std::string getNodeAttributes(const SUnit *N,
3110249423Sdim                                       const ScheduleDAG *Graph) {
3111249423Sdim    std::string Str("shape=Mrecord");
3112249423Sdim    const SchedDFSResult *DFS =
3113249423Sdim      static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
3114249423Sdim    if (DFS) {
3115249423Sdim      Str += ",style=filled,fillcolor=\"#";
3116249423Sdim      Str += DOT::getColorString(DFS->getSubtreeID(N));
3117249423Sdim      Str += '"';
3118249423Sdim    }
3119249423Sdim    return Str;
3120249423Sdim  }
3121249423Sdim};
3122249423Sdim} // namespace llvm
3123249423Sdim#endif // NDEBUG
3124249423Sdim
3125249423Sdim/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3126249423Sdim/// rendered using 'dot'.
3127249423Sdim///
3128249423Sdimvoid ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3129249423Sdim#ifndef NDEBUG
3130249423Sdim  ViewGraph(this, Name, false, Title);
3131249423Sdim#else
3132249423Sdim  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3133249423Sdim         << "systems with Graphviz or gv!\n";
3134249423Sdim#endif  // NDEBUG
3135249423Sdim}
3136249423Sdim
3137249423Sdim/// Out-of-line implementation with no arguments is handy for gdb.
3138249423Sdimvoid ScheduleDAGMI::viewGraph() {
3139249423Sdim  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3140249423Sdim}
3141