1234285Sdim//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2234285Sdim// 3234285Sdim// The LLVM Compiler Infrastructure 4234285Sdim// 5234285Sdim// This file is distributed under the University of Illinois Open Source 6234285Sdim// License. See LICENSE.TXT for details. 7234285Sdim// 8234285Sdim//===----------------------------------------------------------------------===// 9234285Sdim// 10234285Sdim// MachineScheduler schedules machine instructions after phi elimination. It 11234285Sdim// preserves LiveIntervals so it can be invoked before register allocation. 12234285Sdim// 13234285Sdim//===----------------------------------------------------------------------===// 14234285Sdim 15234285Sdim#define DEBUG_TYPE "misched" 16234285Sdim 17249423Sdim#include "llvm/CodeGen/MachineScheduler.h" 18249423Sdim#include "llvm/ADT/OwningPtr.h" 19249423Sdim#include "llvm/ADT/PriorityQueue.h" 20249423Sdim#include "llvm/Analysis/AliasAnalysis.h" 21234285Sdim#include "llvm/CodeGen/LiveIntervalAnalysis.h" 22249423Sdim#include "llvm/CodeGen/MachineDominators.h" 23249423Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 24263508Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 25234285Sdim#include "llvm/CodeGen/Passes.h" 26239462Sdim#include "llvm/CodeGen/RegisterClassInfo.h" 27249423Sdim#include "llvm/CodeGen/ScheduleDFS.h" 28239462Sdim#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 29234285Sdim#include "llvm/Support/CommandLine.h" 30234285Sdim#include "llvm/Support/Debug.h" 31234285Sdim#include "llvm/Support/ErrorHandling.h" 32249423Sdim#include "llvm/Support/GraphWriter.h" 33234285Sdim#include "llvm/Support/raw_ostream.h" 34263508Sdim#include "llvm/Target/TargetInstrInfo.h" 35234285Sdim#include <queue> 36234285Sdim 37234285Sdimusing namespace llvm; 38234285Sdim 39243830Sdimnamespace llvm { 40243830Sdimcl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 41243830Sdim cl::desc("Force top-down list scheduling")); 42243830Sdimcl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 43243830Sdim cl::desc("Force bottom-up list scheduling")); 44243830Sdim} 45234285Sdim 46234285Sdim#ifndef NDEBUG 47234285Sdimstatic cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 48234285Sdim cl::desc("Pop up a window to show MISched dags after they are processed")); 49234285Sdim 50234285Sdimstatic cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 51234285Sdim cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 52234285Sdim#else 53234285Sdimstatic bool ViewMISchedDAGs = false; 54234285Sdim#endif // NDEBUG 55234285Sdim 56263508Sdimstatic cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden, 57263508Sdim cl::desc("Enable register pressure scheduling."), cl::init(true)); 58251662Sdim 59263508Sdimstatic cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, 60263508Sdim cl::desc("Enable cyclic critical path analysis."), cl::init(true)); 61263508Sdim 62249423Sdimstatic cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, 63249423Sdim cl::desc("Enable load clustering."), cl::init(true)); 64243830Sdim 65249423Sdim// Experimental heuristics 66249423Sdimstatic cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, 67249423Sdim cl::desc("Enable scheduling for macro fusion."), cl::init(true)); 68249423Sdim 69249423Sdimstatic cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, 70249423Sdim cl::desc("Verify machine instrs before and after machine scheduling")); 71249423Sdim 72249423Sdim// DAG subtrees must have at least this many nodes. 73249423Sdimstatic const unsigned MinSubtreeSize = 8; 74249423Sdim 75263508Sdim// Pin the vtables to this file. 76263508Sdimvoid MachineSchedStrategy::anchor() {} 77263508Sdimvoid ScheduleDAGMutation::anchor() {} 78263508Sdim 79234285Sdim//===----------------------------------------------------------------------===// 80234285Sdim// Machine Instruction Scheduling Pass and Registry 81234285Sdim//===----------------------------------------------------------------------===// 82234285Sdim 83239462SdimMachineSchedContext::MachineSchedContext(): 84239462Sdim MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { 85239462Sdim RegClassInfo = new RegisterClassInfo(); 86239462Sdim} 87239462Sdim 88239462SdimMachineSchedContext::~MachineSchedContext() { 89239462Sdim delete RegClassInfo; 90239462Sdim} 91239462Sdim 92234285Sdimnamespace { 93234285Sdim/// MachineScheduler runs after coalescing and before register allocation. 94234285Sdimclass MachineScheduler : public MachineSchedContext, 95234285Sdim public MachineFunctionPass { 96234285Sdimpublic: 97234285Sdim MachineScheduler(); 98234285Sdim 99234285Sdim virtual void getAnalysisUsage(AnalysisUsage &AU) const; 100234285Sdim 101234285Sdim virtual void releaseMemory() {} 102234285Sdim 103234285Sdim virtual bool runOnMachineFunction(MachineFunction&); 104234285Sdim 105234285Sdim virtual void print(raw_ostream &O, const Module* = 0) const; 106234285Sdim 107234285Sdim static char ID; // Class identification, replacement for typeinfo 108263508Sdim 109263508Sdimprotected: 110263508Sdim ScheduleDAGInstrs *createMachineScheduler(); 111234285Sdim}; 112234285Sdim} // namespace 113234285Sdim 114234285Sdimchar MachineScheduler::ID = 0; 115234285Sdim 116234285Sdimchar &llvm::MachineSchedulerID = MachineScheduler::ID; 117234285Sdim 118234285SdimINITIALIZE_PASS_BEGIN(MachineScheduler, "misched", 119234285Sdim "Machine Instruction Scheduler", false, false) 120234285SdimINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 121234285SdimINITIALIZE_PASS_DEPENDENCY(SlotIndexes) 122234285SdimINITIALIZE_PASS_DEPENDENCY(LiveIntervals) 123234285SdimINITIALIZE_PASS_END(MachineScheduler, "misched", 124234285Sdim "Machine Instruction Scheduler", false, false) 125234285Sdim 126234285SdimMachineScheduler::MachineScheduler() 127234285Sdim: MachineFunctionPass(ID) { 128234285Sdim initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 129234285Sdim} 130234285Sdim 131234285Sdimvoid MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 132234285Sdim AU.setPreservesCFG(); 133234285Sdim AU.addRequiredID(MachineDominatorsID); 134234285Sdim AU.addRequired<MachineLoopInfo>(); 135234285Sdim AU.addRequired<AliasAnalysis>(); 136234285Sdim AU.addRequired<TargetPassConfig>(); 137234285Sdim AU.addRequired<SlotIndexes>(); 138234285Sdim AU.addPreserved<SlotIndexes>(); 139234285Sdim AU.addRequired<LiveIntervals>(); 140234285Sdim AU.addPreserved<LiveIntervals>(); 141234285Sdim MachineFunctionPass::getAnalysisUsage(AU); 142234285Sdim} 143234285Sdim 144234285SdimMachinePassRegistry MachineSchedRegistry::Registry; 145234285Sdim 146234285Sdim/// A dummy default scheduler factory indicates whether the scheduler 147234285Sdim/// is overridden on the command line. 148234285Sdimstatic ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 149234285Sdim return 0; 150234285Sdim} 151234285Sdim 152234285Sdim/// MachineSchedOpt allows command line selection of the scheduler. 153234285Sdimstatic cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 154234285Sdim RegisterPassParser<MachineSchedRegistry> > 155234285SdimMachineSchedOpt("misched", 156234285Sdim cl::init(&useDefaultMachineSched), cl::Hidden, 157234285Sdim cl::desc("Machine instruction scheduler to use")); 158234285Sdim 159234285Sdimstatic MachineSchedRegistry 160234285SdimDefaultSchedRegistry("default", "Use the target's default scheduler choice.", 161234285Sdim useDefaultMachineSched); 162234285Sdim 163234285Sdim/// Forward declare the standard machine scheduler. This will be used as the 164234285Sdim/// default scheduler if the target does not set a default. 165263508Sdimstatic ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C); 166234285Sdim 167239462Sdim 168239462Sdim/// Decrement this iterator until reaching the top or a non-debug instr. 169263508Sdimstatic MachineBasicBlock::const_iterator 170263508SdimpriorNonDebug(MachineBasicBlock::const_iterator I, 171263508Sdim MachineBasicBlock::const_iterator Beg) { 172239462Sdim assert(I != Beg && "reached the top of the region, cannot decrement"); 173239462Sdim while (--I != Beg) { 174239462Sdim if (!I->isDebugValue()) 175239462Sdim break; 176239462Sdim } 177239462Sdim return I; 178239462Sdim} 179239462Sdim 180263508Sdim/// Non-const version. 181263508Sdimstatic MachineBasicBlock::iterator 182263508SdimpriorNonDebug(MachineBasicBlock::iterator I, 183263508Sdim MachineBasicBlock::const_iterator Beg) { 184263508Sdim return const_cast<MachineInstr*>( 185263508Sdim &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)); 186263508Sdim} 187263508Sdim 188239462Sdim/// If this iterator is a debug value, increment until reaching the End or a 189239462Sdim/// non-debug instruction. 190263508Sdimstatic MachineBasicBlock::const_iterator 191263508SdimnextIfDebug(MachineBasicBlock::const_iterator I, 192263508Sdim MachineBasicBlock::const_iterator End) { 193239462Sdim for(; I != End; ++I) { 194239462Sdim if (!I->isDebugValue()) 195239462Sdim break; 196239462Sdim } 197239462Sdim return I; 198239462Sdim} 199239462Sdim 200263508Sdim/// Non-const version. 201263508Sdimstatic MachineBasicBlock::iterator 202263508SdimnextIfDebug(MachineBasicBlock::iterator I, 203263508Sdim MachineBasicBlock::const_iterator End) { 204263508Sdim // Cast the return value to nonconst MachineInstr, then cast to an 205263508Sdim // instr_iterator, which does not check for null, finally return a 206263508Sdim // bundle_iterator. 207263508Sdim return MachineBasicBlock::instr_iterator( 208263508Sdim const_cast<MachineInstr*>( 209263508Sdim &*nextIfDebug(MachineBasicBlock::const_iterator(I), End))); 210263508Sdim} 211263508Sdim 212263508Sdim/// Instantiate a ScheduleDAGInstrs that will be owned by the caller. 213263508SdimScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { 214263508Sdim // Select the scheduler, or set the default. 215263508Sdim MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 216263508Sdim if (Ctor != useDefaultMachineSched) 217263508Sdim return Ctor(this); 218263508Sdim 219263508Sdim // Get the default scheduler set by the target for this function. 220263508Sdim ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); 221263508Sdim if (Scheduler) 222263508Sdim return Scheduler; 223263508Sdim 224263508Sdim // Default to GenericScheduler. 225263508Sdim return createGenericSched(this); 226263508Sdim} 227263508Sdim 228234285Sdim/// Top-level MachineScheduler pass driver. 229234285Sdim/// 230234285Sdim/// Visit blocks in function order. Divide each block into scheduling regions 231234285Sdim/// and visit them bottom-up. Visiting regions bottom-up is not required, but is 232234285Sdim/// consistent with the DAG builder, which traverses the interior of the 233234285Sdim/// scheduling regions bottom-up. 234234285Sdim/// 235234285Sdim/// This design avoids exposing scheduling boundaries to the DAG builder, 236234285Sdim/// simplifying the DAG builder's support for "special" target instructions. 237234285Sdim/// At the same time the design allows target schedulers to operate across 238234285Sdim/// scheduling boundaries, for example to bundle the boudary instructions 239234285Sdim/// without reordering them. This creates complexity, because the target 240234285Sdim/// scheduler must update the RegionBegin and RegionEnd positions cached by 241234285Sdim/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 242234285Sdim/// design would be to split blocks at scheduling boundaries, but LLVM has a 243234285Sdim/// general bias against block splitting purely for implementation simplicity. 244234285Sdimbool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 245239462Sdim DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); 246239462Sdim 247234285Sdim // Initialize the context of the pass. 248234285Sdim MF = &mf; 249234285Sdim MLI = &getAnalysis<MachineLoopInfo>(); 250234285Sdim MDT = &getAnalysis<MachineDominatorTree>(); 251234285Sdim PassConfig = &getAnalysis<TargetPassConfig>(); 252234285Sdim AA = &getAnalysis<AliasAnalysis>(); 253234285Sdim 254234285Sdim LIS = &getAnalysis<LiveIntervals>(); 255234285Sdim const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); 256234285Sdim 257249423Sdim if (VerifyScheduling) { 258263508Sdim DEBUG(LIS->dump()); 259249423Sdim MF->verify(this, "Before machine scheduling."); 260249423Sdim } 261239462Sdim RegClassInfo->runOnMachineFunction(*MF); 262239462Sdim 263263508Sdim // Instantiate the selected scheduler for this target, function, and 264263508Sdim // optimization level. 265263508Sdim OwningPtr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); 266234285Sdim 267234285Sdim // Visit all machine basic blocks. 268239462Sdim // 269239462Sdim // TODO: Visit blocks in global postorder or postorder within the bottom-up 270239462Sdim // loop tree. Then we can optionally compute global RegPressure. 271234285Sdim for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 272234285Sdim MBB != MBBEnd; ++MBB) { 273234285Sdim 274234285Sdim Scheduler->startBlock(MBB); 275234285Sdim 276234285Sdim // Break the block into scheduling regions [I, RegionEnd), and schedule each 277239462Sdim // region as soon as it is discovered. RegionEnd points the scheduling 278234285Sdim // boundary at the bottom of the region. The DAG does not include RegionEnd, 279234285Sdim // but the region does (i.e. the next RegionEnd is above the previous 280234285Sdim // RegionBegin). If the current block has no terminator then RegionEnd == 281234285Sdim // MBB->end() for the bottom region. 282234285Sdim // 283234285Sdim // The Scheduler may insert instructions during either schedule() or 284234285Sdim // exitRegion(), even for empty regions. So the local iterators 'I' and 285234285Sdim // 'RegionEnd' are invalid across these calls. 286243830Sdim unsigned RemainingInstrs = MBB->size(); 287234285Sdim for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 288234285Sdim RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { 289239462Sdim 290234285Sdim // Avoid decrementing RegionEnd for blocks with no terminator. 291234285Sdim if (RegionEnd != MBB->end() 292234285Sdim || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { 293234285Sdim --RegionEnd; 294234285Sdim // Count the boundary instruction. 295243830Sdim --RemainingInstrs; 296234285Sdim } 297234285Sdim 298234285Sdim // The next region starts above the previous region. Look backward in the 299234285Sdim // instruction stream until we find the nearest boundary. 300263508Sdim unsigned NumRegionInstrs = 0; 301234285Sdim MachineBasicBlock::iterator I = RegionEnd; 302263508Sdim for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { 303234285Sdim if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) 304234285Sdim break; 305234285Sdim } 306234285Sdim // Notify the scheduler of the region, even if we may skip scheduling 307234285Sdim // it. Perhaps it still needs to be bundled. 308263508Sdim Scheduler->enterRegion(MBB, I, RegionEnd, NumRegionInstrs); 309234285Sdim 310234285Sdim // Skip empty scheduling regions (0 or 1 schedulable instructions). 311234285Sdim if (I == RegionEnd || I == llvm::prior(RegionEnd)) { 312234285Sdim // Close the current region. Bundle the terminator if needed. 313234285Sdim // This invalidates 'RegionEnd' and 'I'. 314234285Sdim Scheduler->exitRegion(); 315234285Sdim continue; 316234285Sdim } 317239462Sdim DEBUG(dbgs() << "********** MI Scheduling **********\n"); 318243830Sdim DEBUG(dbgs() << MF->getName() 319249423Sdim << ":BB#" << MBB->getNumber() << " " << MBB->getName() 320249423Sdim << "\n From: " << *I << " To: "; 321234285Sdim if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 322234285Sdim else dbgs() << "End"; 323263508Sdim dbgs() << " RegionInstrs: " << NumRegionInstrs 324263508Sdim << " Remaining: " << RemainingInstrs << "\n"); 325234285Sdim 326234285Sdim // Schedule a region: possibly reorder instructions. 327234285Sdim // This invalidates 'RegionEnd' and 'I'. 328234285Sdim Scheduler->schedule(); 329234285Sdim 330234285Sdim // Close the current region. 331234285Sdim Scheduler->exitRegion(); 332234285Sdim 333234285Sdim // Scheduling has invalidated the current iterator 'I'. Ask the 334234285Sdim // scheduler for the top of it's scheduled region. 335234285Sdim RegionEnd = Scheduler->begin(); 336234285Sdim } 337243830Sdim assert(RemainingInstrs == 0 && "Instruction count mismatch!"); 338234285Sdim Scheduler->finishBlock(); 339234285Sdim } 340234285Sdim Scheduler->finalizeSchedule(); 341263508Sdim DEBUG(LIS->dump()); 342249423Sdim if (VerifyScheduling) 343249423Sdim MF->verify(this, "After machine scheduling."); 344234285Sdim return true; 345234285Sdim} 346234285Sdim 347234285Sdimvoid MachineScheduler::print(raw_ostream &O, const Module* m) const { 348234285Sdim // unimplemented 349234285Sdim} 350234285Sdim 351243830Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 352243830Sdimvoid ReadyQueue::dump() { 353263508Sdim dbgs() << Name << ": "; 354243830Sdim for (unsigned i = 0, e = Queue.size(); i < e; ++i) 355243830Sdim dbgs() << Queue[i]->NodeNum << " "; 356243830Sdim dbgs() << "\n"; 357243830Sdim} 358243830Sdim#endif 359234285Sdim 360234285Sdim//===----------------------------------------------------------------------===// 361234285Sdim// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals 362234285Sdim// preservation. 363234285Sdim//===----------------------------------------------------------------------===// 364234285Sdim 365249423SdimScheduleDAGMI::~ScheduleDAGMI() { 366249423Sdim delete DFSResult; 367249423Sdim DeleteContainerPointers(Mutations); 368249423Sdim delete SchedImpl; 369249423Sdim} 370249423Sdim 371251662Sdimbool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { 372251662Sdim return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); 373251662Sdim} 374251662Sdim 375249423Sdimbool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { 376249423Sdim if (SuccSU != &ExitSU) { 377249423Sdim // Do not use WillCreateCycle, it assumes SD scheduling. 378249423Sdim // If Pred is reachable from Succ, then the edge creates a cycle. 379249423Sdim if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) 380249423Sdim return false; 381249423Sdim Topo.AddPred(SuccSU, PredDep.getSUnit()); 382249423Sdim } 383249423Sdim SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); 384249423Sdim // Return true regardless of whether a new edge needed to be inserted. 385249423Sdim return true; 386249423Sdim} 387249423Sdim 388234285Sdim/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 389234285Sdim/// NumPredsLeft reaches zero, release the successor node. 390239462Sdim/// 391239462Sdim/// FIXME: Adjust SuccSU height based on MinLatency. 392234285Sdimvoid ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 393234285Sdim SUnit *SuccSU = SuccEdge->getSUnit(); 394234285Sdim 395249423Sdim if (SuccEdge->isWeak()) { 396249423Sdim --SuccSU->WeakPredsLeft; 397249423Sdim if (SuccEdge->isCluster()) 398249423Sdim NextClusterSucc = SuccSU; 399249423Sdim return; 400249423Sdim } 401234285Sdim#ifndef NDEBUG 402234285Sdim if (SuccSU->NumPredsLeft == 0) { 403234285Sdim dbgs() << "*** Scheduling failed! ***\n"; 404234285Sdim SuccSU->dump(this); 405234285Sdim dbgs() << " has been released too many times!\n"; 406234285Sdim llvm_unreachable(0); 407234285Sdim } 408234285Sdim#endif 409234285Sdim --SuccSU->NumPredsLeft; 410234285Sdim if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 411234285Sdim SchedImpl->releaseTopNode(SuccSU); 412234285Sdim} 413234285Sdim 414234285Sdim/// releaseSuccessors - Call releaseSucc on each of SU's successors. 415234285Sdimvoid ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 416234285Sdim for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 417234285Sdim I != E; ++I) { 418234285Sdim releaseSucc(SU, &*I); 419234285Sdim } 420234285Sdim} 421234285Sdim 422234285Sdim/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 423234285Sdim/// NumSuccsLeft reaches zero, release the predecessor node. 424239462Sdim/// 425239462Sdim/// FIXME: Adjust PredSU height based on MinLatency. 426234285Sdimvoid ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 427234285Sdim SUnit *PredSU = PredEdge->getSUnit(); 428234285Sdim 429249423Sdim if (PredEdge->isWeak()) { 430249423Sdim --PredSU->WeakSuccsLeft; 431249423Sdim if (PredEdge->isCluster()) 432249423Sdim NextClusterPred = PredSU; 433249423Sdim return; 434249423Sdim } 435234285Sdim#ifndef NDEBUG 436234285Sdim if (PredSU->NumSuccsLeft == 0) { 437234285Sdim dbgs() << "*** Scheduling failed! ***\n"; 438234285Sdim PredSU->dump(this); 439234285Sdim dbgs() << " has been released too many times!\n"; 440234285Sdim llvm_unreachable(0); 441234285Sdim } 442234285Sdim#endif 443234285Sdim --PredSU->NumSuccsLeft; 444234285Sdim if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 445234285Sdim SchedImpl->releaseBottomNode(PredSU); 446234285Sdim} 447234285Sdim 448234285Sdim/// releasePredecessors - Call releasePred on each of SU's predecessors. 449234285Sdimvoid ScheduleDAGMI::releasePredecessors(SUnit *SU) { 450234285Sdim for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 451234285Sdim I != E; ++I) { 452234285Sdim releasePred(SU, &*I); 453234285Sdim } 454234285Sdim} 455234285Sdim 456251662Sdim/// This is normally called from the main scheduler loop but may also be invoked 457251662Sdim/// by the scheduling strategy to perform additional code motion. 458234285Sdimvoid ScheduleDAGMI::moveInstruction(MachineInstr *MI, 459234285Sdim MachineBasicBlock::iterator InsertPos) { 460239462Sdim // Advance RegionBegin if the first instruction moves down. 461234285Sdim if (&*RegionBegin == MI) 462239462Sdim ++RegionBegin; 463239462Sdim 464239462Sdim // Update the instruction stream. 465234285Sdim BB->splice(InsertPos, BB, MI); 466239462Sdim 467239462Sdim // Update LiveIntervals 468243830Sdim LIS->handleMove(MI, /*UpdateFlags=*/true); 469239462Sdim 470239462Sdim // Recede RegionBegin if an instruction moves above the first. 471234285Sdim if (RegionBegin == InsertPos) 472234285Sdim RegionBegin = MI; 473234285Sdim} 474234285Sdim 475234285Sdimbool ScheduleDAGMI::checkSchedLimit() { 476234285Sdim#ifndef NDEBUG 477234285Sdim if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 478234285Sdim CurrentTop = CurrentBottom; 479234285Sdim return false; 480234285Sdim } 481234285Sdim ++NumInstrsScheduled; 482234285Sdim#endif 483234285Sdim return true; 484234285Sdim} 485234285Sdim 486239462Sdim/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 487239462Sdim/// crossing a scheduling boundary. [begin, end) includes all instructions in 488239462Sdim/// the region, including the boundary itself and single-instruction regions 489239462Sdim/// that don't get scheduled. 490239462Sdimvoid ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 491239462Sdim MachineBasicBlock::iterator begin, 492239462Sdim MachineBasicBlock::iterator end, 493263508Sdim unsigned regioninstrs) 494239462Sdim{ 495263508Sdim ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); 496239462Sdim 497239462Sdim // For convenience remember the end of the liveness region. 498239462Sdim LiveRegionEnd = 499239462Sdim (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); 500263508Sdim 501263508Sdim SUPressureDiffs.clear(); 502263508Sdim 503263508Sdim SchedImpl->initPolicy(begin, end, regioninstrs); 504263508Sdim 505263508Sdim ShouldTrackPressure = SchedImpl->shouldTrackPressure(); 506239462Sdim} 507239462Sdim 508239462Sdim// Setup the register pressure trackers for the top scheduled top and bottom 509239462Sdim// scheduled regions. 510239462Sdimvoid ScheduleDAGMI::initRegPressure() { 511239462Sdim TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); 512239462Sdim BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 513239462Sdim 514239462Sdim // Close the RPTracker to finalize live ins. 515239462Sdim RPTracker.closeRegion(); 516239462Sdim 517263508Sdim DEBUG(RPTracker.dump()); 518239462Sdim 519239462Sdim // Initialize the live ins and live outs. 520239462Sdim TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 521239462Sdim BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 522239462Sdim 523239462Sdim // Close one end of the tracker so we can call 524239462Sdim // getMaxUpward/DownwardPressureDelta before advancing across any 525239462Sdim // instructions. This converts currently live regs into live ins/outs. 526239462Sdim TopRPTracker.closeTop(); 527239462Sdim BotRPTracker.closeBottom(); 528239462Sdim 529263508Sdim BotRPTracker.initLiveThru(RPTracker); 530263508Sdim if (!BotRPTracker.getLiveThru().empty()) { 531263508Sdim TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); 532263508Sdim DEBUG(dbgs() << "Live Thru: "; 533263508Sdim dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); 534263508Sdim }; 535263508Sdim 536263508Sdim // For each live out vreg reduce the pressure change associated with other 537263508Sdim // uses of the same vreg below the live-out reaching def. 538263508Sdim updatePressureDiffs(RPTracker.getPressure().LiveOutRegs); 539263508Sdim 540239462Sdim // Account for liveness generated by the region boundary. 541263508Sdim if (LiveRegionEnd != RegionEnd) { 542263508Sdim SmallVector<unsigned, 8> LiveUses; 543263508Sdim BotRPTracker.recede(&LiveUses); 544263508Sdim updatePressureDiffs(LiveUses); 545263508Sdim } 546239462Sdim 547239462Sdim assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); 548239462Sdim 549239462Sdim // Cache the list of excess pressure sets in this region. This will also track 550239462Sdim // the max pressure in the scheduled code for these sets. 551239462Sdim RegionCriticalPSets.clear(); 552249423Sdim const std::vector<unsigned> &RegionPressure = 553249423Sdim RPTracker.getPressure().MaxSetPressure; 554239462Sdim for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 555263508Sdim unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); 556263508Sdim if (RegionPressure[i] > Limit) { 557263508Sdim DEBUG(dbgs() << TRI->getRegPressureSetName(i) 558263508Sdim << " Limit " << Limit 559263508Sdim << " Actual " << RegionPressure[i] << "\n"); 560263508Sdim RegionCriticalPSets.push_back(PressureChange(i)); 561263508Sdim } 562239462Sdim } 563239462Sdim DEBUG(dbgs() << "Excess PSets: "; 564239462Sdim for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) 565239462Sdim dbgs() << TRI->getRegPressureSetName( 566263508Sdim RegionCriticalPSets[i].getPSet()) << " "; 567239462Sdim dbgs() << "\n"); 568239462Sdim} 569239462Sdim 570239462Sdimvoid ScheduleDAGMI:: 571263508SdimupdateScheduledPressure(const SUnit *SU, 572263508Sdim const std::vector<unsigned> &NewMaxPressure) { 573263508Sdim const PressureDiff &PDiff = getPressureDiff(SU); 574263508Sdim unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); 575263508Sdim for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end(); 576263508Sdim I != E; ++I) { 577263508Sdim if (!I->isValid()) 578263508Sdim break; 579263508Sdim unsigned ID = I->getPSet(); 580263508Sdim while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) 581263508Sdim ++CritIdx; 582263508Sdim if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { 583263508Sdim if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() 584263508Sdim && NewMaxPressure[ID] <= INT16_MAX) 585263508Sdim RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); 586263508Sdim } 587263508Sdim unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); 588263508Sdim if (NewMaxPressure[ID] >= Limit - 2) { 589263508Sdim DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " 590263508Sdim << NewMaxPressure[ID] << " > " << Limit << "(+ " 591263508Sdim << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); 592263508Sdim } 593239462Sdim } 594263508Sdim} 595263508Sdim 596263508Sdim/// Update the PressureDiff array for liveness after scheduling this 597263508Sdim/// instruction. 598263508Sdimvoid ScheduleDAGMI::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { 599263508Sdim for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) { 600263508Sdim /// FIXME: Currently assuming single-use physregs. 601263508Sdim unsigned Reg = LiveUses[LUIdx]; 602263508Sdim DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); 603263508Sdim if (!TRI->isVirtualRegister(Reg)) 604263508Sdim continue; 605263508Sdim 606263508Sdim // This may be called before CurrentBottom has been initialized. However, 607263508Sdim // BotRPTracker must have a valid position. We want the value live into the 608263508Sdim // instruction or live out of the block, so ask for the previous 609263508Sdim // instruction's live-out. 610263508Sdim const LiveInterval &LI = LIS->getInterval(Reg); 611263508Sdim VNInfo *VNI; 612263508Sdim MachineBasicBlock::const_iterator I = 613263508Sdim nextIfDebug(BotRPTracker.getPos(), BB->end()); 614263508Sdim if (I == BB->end()) 615263508Sdim VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); 616263508Sdim else { 617263508Sdim LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I)); 618263508Sdim VNI = LRQ.valueIn(); 619263508Sdim } 620263508Sdim // RegisterPressureTracker guarantees that readsReg is true for LiveUses. 621263508Sdim assert(VNI && "No live value at use."); 622263508Sdim for (VReg2UseMap::iterator 623263508Sdim UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { 624263508Sdim SUnit *SU = UI->SU; 625263508Sdim DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " 626263508Sdim << *SU->getInstr()); 627263508Sdim // If this use comes before the reaching def, it cannot be a last use, so 628263508Sdim // descrease its pressure change. 629263508Sdim if (!SU->isScheduled && SU != &ExitSU) { 630263508Sdim LiveQueryResult LRQ 631263508Sdim = LI.Query(LIS->getInstructionIndex(SU->getInstr())); 632263508Sdim if (LRQ.valueIn() == VNI) 633263508Sdim getPressureDiff(SU).addPressureChange(Reg, true, &MRI); 634251662Sdim } 635263508Sdim } 636263508Sdim } 637239462Sdim} 638239462Sdim 639243830Sdim/// schedule - Called back from MachineScheduler::runOnMachineFunction 640243830Sdim/// after setting up the current scheduling region. [RegionBegin, RegionEnd) 641243830Sdim/// only includes instructions that have DAG nodes, not scheduling boundaries. 642243830Sdim/// 643243830Sdim/// This is a skeletal driver, with all the functionality pushed into helpers, 644243830Sdim/// so that it can be easilly extended by experimental schedulers. Generally, 645243830Sdim/// implementing MachineSchedStrategy should be sufficient to implement a new 646243830Sdim/// scheduling algorithm. However, if a scheduler further subclasses 647243830Sdim/// ScheduleDAGMI then it will want to override this virtual method in order to 648243830Sdim/// update any specialized state. 649243830Sdimvoid ScheduleDAGMI::schedule() { 650243830Sdim buildDAGWithRegPressure(); 651243830Sdim 652249423Sdim Topo.InitDAGTopologicalSorting(); 653249423Sdim 654243830Sdim postprocessDAG(); 655243830Sdim 656249423Sdim SmallVector<SUnit*, 8> TopRoots, BotRoots; 657249423Sdim findRootsAndBiasEdges(TopRoots, BotRoots); 658249423Sdim 659249423Sdim // Initialize the strategy before modifying the DAG. 660249423Sdim // This may initialize a DFSResult to be used for queue priority. 661249423Sdim SchedImpl->initialize(this); 662249423Sdim 663243830Sdim DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 664243830Sdim SUnits[su].dumpAll(this)); 665243830Sdim if (ViewMISchedDAGs) viewGraph(); 666243830Sdim 667249423Sdim // Initialize ready queues now that the DAG and priority data are finalized. 668249423Sdim initQueues(TopRoots, BotRoots); 669243830Sdim 670243830Sdim bool IsTopNode = false; 671243830Sdim while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 672243830Sdim assert(!SU->isScheduled && "Node already scheduled"); 673243830Sdim if (!checkSchedLimit()) 674243830Sdim break; 675243830Sdim 676243830Sdim scheduleMI(SU, IsTopNode); 677243830Sdim 678243830Sdim updateQueues(SU, IsTopNode); 679243830Sdim } 680243830Sdim assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 681243830Sdim 682243830Sdim placeDebugValues(); 683243830Sdim 684243830Sdim DEBUG({ 685249423Sdim unsigned BBNum = begin()->getParent()->getNumber(); 686243830Sdim dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 687243830Sdim dumpSchedule(); 688243830Sdim dbgs() << '\n'; 689243830Sdim }); 690243830Sdim} 691243830Sdim 692243830Sdim/// Build the DAG and setup three register pressure trackers. 693243830Sdimvoid ScheduleDAGMI::buildDAGWithRegPressure() { 694263508Sdim if (!ShouldTrackPressure) { 695263508Sdim RPTracker.reset(); 696263508Sdim RegionCriticalPSets.clear(); 697263508Sdim buildSchedGraph(AA); 698263508Sdim return; 699263508Sdim } 700263508Sdim 701243830Sdim // Initialize the register pressure tracker used by buildSchedGraph. 702263508Sdim RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, 703263508Sdim /*TrackUntiedDefs=*/true); 704243830Sdim 705243830Sdim // Account for liveness generate by the region boundary. 706243830Sdim if (LiveRegionEnd != RegionEnd) 707243830Sdim RPTracker.recede(); 708243830Sdim 709243830Sdim // Build the DAG, and compute current register pressure. 710263508Sdim buildSchedGraph(AA, &RPTracker, &SUPressureDiffs); 711243830Sdim 712243830Sdim // Initialize top/bottom trackers after computing region pressure. 713243830Sdim initRegPressure(); 714243830Sdim} 715243830Sdim 716243830Sdim/// Apply each ScheduleDAGMutation step in order. 717243830Sdimvoid ScheduleDAGMI::postprocessDAG() { 718243830Sdim for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { 719243830Sdim Mutations[i]->apply(this); 720243830Sdim } 721243830Sdim} 722243830Sdim 723249423Sdimvoid ScheduleDAGMI::computeDFSResult() { 724249423Sdim if (!DFSResult) 725249423Sdim DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); 726249423Sdim DFSResult->clear(); 727249423Sdim ScheduledTrees.clear(); 728249423Sdim DFSResult->resize(SUnits.size()); 729249423Sdim DFSResult->compute(SUnits); 730249423Sdim ScheduledTrees.resize(DFSResult->getNumSubtrees()); 731249423Sdim} 732239462Sdim 733249423Sdimvoid ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, 734249423Sdim SmallVectorImpl<SUnit*> &BotRoots) { 735239462Sdim for (std::vector<SUnit>::iterator 736239462Sdim I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { 737249423Sdim SUnit *SU = &(*I); 738249423Sdim assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); 739249423Sdim 740249423Sdim // Order predecessors so DFSResult follows the critical path. 741249423Sdim SU->biasCriticalPath(); 742249423Sdim 743239462Sdim // A SUnit is ready to top schedule if it has no predecessors. 744249423Sdim if (!I->NumPredsLeft) 745249423Sdim TopRoots.push_back(SU); 746239462Sdim // A SUnit is ready to bottom schedule if it has no successors. 747249423Sdim if (!I->NumSuccsLeft) 748249423Sdim BotRoots.push_back(SU); 749239462Sdim } 750249423Sdim ExitSU.biasCriticalPath(); 751249423Sdim} 752249423Sdim 753263508Sdim/// Compute the max cyclic critical path through the DAG. The scheduling DAG 754263508Sdim/// only provides the critical path for single block loops. To handle loops that 755263508Sdim/// span blocks, we could use the vreg path latencies provided by 756263508Sdim/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently 757263508Sdim/// available for use in the scheduler. 758263508Sdim/// 759263508Sdim/// The cyclic path estimation identifies a def-use pair that crosses the back 760263508Sdim/// edge and considers the depth and height of the nodes. For example, consider 761263508Sdim/// the following instruction sequence where each instruction has unit latency 762263508Sdim/// and defines an epomymous virtual register: 763263508Sdim/// 764263508Sdim/// a->b(a,c)->c(b)->d(c)->exit 765263508Sdim/// 766263508Sdim/// The cyclic critical path is a two cycles: b->c->b 767263508Sdim/// The acyclic critical path is four cycles: a->b->c->d->exit 768263508Sdim/// LiveOutHeight = height(c) = len(c->d->exit) = 2 769263508Sdim/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3 770263508Sdim/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4 771263508Sdim/// LiveInDepth = depth(b) = len(a->b) = 1 772263508Sdim/// 773263508Sdim/// LiveOutDepth - LiveInDepth = 3 - 1 = 2 774263508Sdim/// LiveInHeight - LiveOutHeight = 4 - 2 = 2 775263508Sdim/// CyclicCriticalPath = min(2, 2) = 2 776263508Sdimunsigned ScheduleDAGMI::computeCyclicCriticalPath() { 777263508Sdim // This only applies to single block loop. 778263508Sdim if (!BB->isSuccessor(BB)) 779263508Sdim return 0; 780263508Sdim 781263508Sdim unsigned MaxCyclicLatency = 0; 782263508Sdim // Visit each live out vreg def to find def/use pairs that cross iterations. 783263508Sdim ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs; 784263508Sdim for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end(); 785263508Sdim RI != RE; ++RI) { 786263508Sdim unsigned Reg = *RI; 787263508Sdim if (!TRI->isVirtualRegister(Reg)) 788263508Sdim continue; 789263508Sdim const LiveInterval &LI = LIS->getInterval(Reg); 790263508Sdim const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); 791263508Sdim if (!DefVNI) 792263508Sdim continue; 793263508Sdim 794263508Sdim MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); 795263508Sdim const SUnit *DefSU = getSUnit(DefMI); 796263508Sdim if (!DefSU) 797263508Sdim continue; 798263508Sdim 799263508Sdim unsigned LiveOutHeight = DefSU->getHeight(); 800263508Sdim unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; 801263508Sdim // Visit all local users of the vreg def. 802263508Sdim for (VReg2UseMap::iterator 803263508Sdim UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { 804263508Sdim if (UI->SU == &ExitSU) 805263508Sdim continue; 806263508Sdim 807263508Sdim // Only consider uses of the phi. 808263508Sdim LiveQueryResult LRQ = 809263508Sdim LI.Query(LIS->getInstructionIndex(UI->SU->getInstr())); 810263508Sdim if (!LRQ.valueIn()->isPHIDef()) 811263508Sdim continue; 812263508Sdim 813263508Sdim // Assume that a path spanning two iterations is a cycle, which could 814263508Sdim // overestimate in strange cases. This allows cyclic latency to be 815263508Sdim // estimated as the minimum slack of the vreg's depth or height. 816263508Sdim unsigned CyclicLatency = 0; 817263508Sdim if (LiveOutDepth > UI->SU->getDepth()) 818263508Sdim CyclicLatency = LiveOutDepth - UI->SU->getDepth(); 819263508Sdim 820263508Sdim unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency; 821263508Sdim if (LiveInHeight > LiveOutHeight) { 822263508Sdim if (LiveInHeight - LiveOutHeight < CyclicLatency) 823263508Sdim CyclicLatency = LiveInHeight - LiveOutHeight; 824263508Sdim } 825263508Sdim else 826263508Sdim CyclicLatency = 0; 827263508Sdim 828263508Sdim DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" 829263508Sdim << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n"); 830263508Sdim if (CyclicLatency > MaxCyclicLatency) 831263508Sdim MaxCyclicLatency = CyclicLatency; 832263508Sdim } 833263508Sdim } 834263508Sdim DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); 835263508Sdim return MaxCyclicLatency; 836263508Sdim} 837263508Sdim 838249423Sdim/// Identify DAG roots and setup scheduler queues. 839249423Sdimvoid ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, 840249423Sdim ArrayRef<SUnit*> BotRoots) { 841249423Sdim NextClusterSucc = NULL; 842249423Sdim NextClusterPred = NULL; 843249423Sdim 844249423Sdim // Release all DAG roots for scheduling, not including EntrySU/ExitSU. 845249423Sdim // 846249423Sdim // Nodes with unreleased weak edges can still be roots. 847249423Sdim // Release top roots in forward order. 848249423Sdim for (SmallVectorImpl<SUnit*>::const_iterator 849249423Sdim I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { 850249423Sdim SchedImpl->releaseTopNode(*I); 851249423Sdim } 852239462Sdim // Release bottom roots in reverse order so the higher priority nodes appear 853239462Sdim // first. This is more natural and slightly more efficient. 854239462Sdim for (SmallVectorImpl<SUnit*>::const_reverse_iterator 855249423Sdim I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { 856239462Sdim SchedImpl->releaseBottomNode(*I); 857249423Sdim } 858239462Sdim 859234285Sdim releaseSuccessors(&EntrySU); 860234285Sdim releasePredecessors(&ExitSU); 861234285Sdim 862243830Sdim SchedImpl->registerRoots(); 863243830Sdim 864249423Sdim // Advance past initial DebugValues. 865239462Sdim CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 866263508Sdim CurrentBottom = RegionEnd; 867249423Sdim 868263508Sdim if (ShouldTrackPressure) { 869263508Sdim assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); 870263508Sdim TopRPTracker.setPos(CurrentTop); 871263508Sdim } 872243830Sdim} 873234285Sdim 874243830Sdim/// Move an instruction and update register pressure. 875243830Sdimvoid ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { 876243830Sdim // Move the instruction to its new location in the instruction stream. 877243830Sdim MachineInstr *MI = SU->getInstr(); 878234285Sdim 879243830Sdim if (IsTopNode) { 880243830Sdim assert(SU->isTopReady() && "node still has unscheduled dependencies"); 881243830Sdim if (&*CurrentTop == MI) 882243830Sdim CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 883243830Sdim else { 884243830Sdim moveInstruction(MI, CurrentTop); 885243830Sdim TopRPTracker.setPos(MI); 886243830Sdim } 887239462Sdim 888263508Sdim if (ShouldTrackPressure) { 889263508Sdim // Update top scheduled pressure. 890263508Sdim TopRPTracker.advance(); 891263508Sdim assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 892263508Sdim updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); 893263508Sdim } 894243830Sdim } 895243830Sdim else { 896243830Sdim assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 897243830Sdim MachineBasicBlock::iterator priorII = 898243830Sdim priorNonDebug(CurrentBottom, CurrentTop); 899243830Sdim if (&*priorII == MI) 900243830Sdim CurrentBottom = priorII; 901234285Sdim else { 902243830Sdim if (&*CurrentTop == MI) { 903243830Sdim CurrentTop = nextIfDebug(++CurrentTop, priorII); 904243830Sdim TopRPTracker.setPos(CurrentTop); 905234285Sdim } 906243830Sdim moveInstruction(MI, CurrentBottom); 907243830Sdim CurrentBottom = MI; 908234285Sdim } 909263508Sdim if (ShouldTrackPressure) { 910263508Sdim // Update bottom scheduled pressure. 911263508Sdim SmallVector<unsigned, 8> LiveUses; 912263508Sdim BotRPTracker.recede(&LiveUses); 913263508Sdim assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 914263508Sdim updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); 915263508Sdim updatePressureDiffs(LiveUses); 916263508Sdim } 917234285Sdim } 918243830Sdim} 919239462Sdim 920243830Sdim/// Update scheduler queues after scheduling an instruction. 921243830Sdimvoid ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 922243830Sdim // Release dependent instructions for scheduling. 923243830Sdim if (IsTopNode) 924243830Sdim releaseSuccessors(SU); 925243830Sdim else 926243830Sdim releasePredecessors(SU); 927243830Sdim 928243830Sdim SU->isScheduled = true; 929243830Sdim 930249423Sdim if (DFSResult) { 931249423Sdim unsigned SubtreeID = DFSResult->getSubtreeID(SU); 932249423Sdim if (!ScheduledTrees.test(SubtreeID)) { 933249423Sdim ScheduledTrees.set(SubtreeID); 934249423Sdim DFSResult->scheduleTree(SubtreeID); 935249423Sdim SchedImpl->scheduleTree(SubtreeID); 936249423Sdim } 937249423Sdim } 938249423Sdim 939243830Sdim // Notify the scheduling strategy after updating the DAG. 940243830Sdim SchedImpl->schedNode(SU, IsTopNode); 941234285Sdim} 942234285Sdim 943239462Sdim/// Reinsert any remaining debug_values, just like the PostRA scheduler. 944239462Sdimvoid ScheduleDAGMI::placeDebugValues() { 945239462Sdim // If first instruction was a DBG_VALUE then put it back. 946239462Sdim if (FirstDbgValue) { 947239462Sdim BB->splice(RegionBegin, BB, FirstDbgValue); 948239462Sdim RegionBegin = FirstDbgValue; 949239462Sdim } 950239462Sdim 951239462Sdim for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 952239462Sdim DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 953239462Sdim std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); 954239462Sdim MachineInstr *DbgValue = P.first; 955239462Sdim MachineBasicBlock::iterator OrigPrevMI = P.second; 956249423Sdim if (&*RegionBegin == DbgValue) 957249423Sdim ++RegionBegin; 958239462Sdim BB->splice(++OrigPrevMI, BB, DbgValue); 959239462Sdim if (OrigPrevMI == llvm::prior(RegionEnd)) 960239462Sdim RegionEnd = DbgValue; 961239462Sdim } 962239462Sdim DbgValues.clear(); 963239462Sdim FirstDbgValue = NULL; 964239462Sdim} 965239462Sdim 966243830Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 967243830Sdimvoid ScheduleDAGMI::dumpSchedule() const { 968243830Sdim for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 969243830Sdim if (SUnit *SU = getSUnit(&(*MI))) 970243830Sdim SU->dump(this); 971243830Sdim else 972243830Sdim dbgs() << "Missing SUnit\n"; 973243830Sdim } 974243830Sdim} 975243830Sdim#endif 976243830Sdim 977234285Sdim//===----------------------------------------------------------------------===// 978249423Sdim// LoadClusterMutation - DAG post-processing to cluster loads. 979249423Sdim//===----------------------------------------------------------------------===// 980249423Sdim 981249423Sdimnamespace { 982249423Sdim/// \brief Post-process the DAG to create cluster edges between neighboring 983249423Sdim/// loads. 984249423Sdimclass LoadClusterMutation : public ScheduleDAGMutation { 985249423Sdim struct LoadInfo { 986249423Sdim SUnit *SU; 987249423Sdim unsigned BaseReg; 988249423Sdim unsigned Offset; 989249423Sdim LoadInfo(SUnit *su, unsigned reg, unsigned ofs) 990249423Sdim : SU(su), BaseReg(reg), Offset(ofs) {} 991249423Sdim }; 992249423Sdim static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS, 993249423Sdim const LoadClusterMutation::LoadInfo &RHS); 994249423Sdim 995249423Sdim const TargetInstrInfo *TII; 996249423Sdim const TargetRegisterInfo *TRI; 997249423Sdimpublic: 998249423Sdim LoadClusterMutation(const TargetInstrInfo *tii, 999249423Sdim const TargetRegisterInfo *tri) 1000249423Sdim : TII(tii), TRI(tri) {} 1001249423Sdim 1002249423Sdim virtual void apply(ScheduleDAGMI *DAG); 1003249423Sdimprotected: 1004249423Sdim void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); 1005249423Sdim}; 1006249423Sdim} // anonymous 1007249423Sdim 1008249423Sdimbool LoadClusterMutation::LoadInfoLess( 1009249423Sdim const LoadClusterMutation::LoadInfo &LHS, 1010249423Sdim const LoadClusterMutation::LoadInfo &RHS) { 1011249423Sdim if (LHS.BaseReg != RHS.BaseReg) 1012249423Sdim return LHS.BaseReg < RHS.BaseReg; 1013249423Sdim return LHS.Offset < RHS.Offset; 1014249423Sdim} 1015249423Sdim 1016249423Sdimvoid LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, 1017249423Sdim ScheduleDAGMI *DAG) { 1018249423Sdim SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; 1019249423Sdim for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { 1020249423Sdim SUnit *SU = Loads[Idx]; 1021249423Sdim unsigned BaseReg; 1022249423Sdim unsigned Offset; 1023249423Sdim if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) 1024249423Sdim LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); 1025249423Sdim } 1026249423Sdim if (LoadRecords.size() < 2) 1027249423Sdim return; 1028249423Sdim std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess); 1029249423Sdim unsigned ClusterLength = 1; 1030249423Sdim for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { 1031249423Sdim if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { 1032249423Sdim ClusterLength = 1; 1033249423Sdim continue; 1034249423Sdim } 1035249423Sdim 1036249423Sdim SUnit *SUa = LoadRecords[Idx].SU; 1037249423Sdim SUnit *SUb = LoadRecords[Idx+1].SU; 1038249423Sdim if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength) 1039249423Sdim && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { 1040249423Sdim 1041249423Sdim DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" 1042249423Sdim << SUb->NodeNum << ")\n"); 1043249423Sdim // Copy successor edges from SUa to SUb. Interleaving computation 1044249423Sdim // dependent on SUa can prevent load combining due to register reuse. 1045249423Sdim // Predecessor edges do not need to be copied from SUb to SUa since nearby 1046249423Sdim // loads should have effectively the same inputs. 1047249423Sdim for (SUnit::const_succ_iterator 1048249423Sdim SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { 1049249423Sdim if (SI->getSUnit() == SUb) 1050249423Sdim continue; 1051249423Sdim DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); 1052249423Sdim DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); 1053249423Sdim } 1054249423Sdim ++ClusterLength; 1055249423Sdim } 1056249423Sdim else 1057249423Sdim ClusterLength = 1; 1058249423Sdim } 1059249423Sdim} 1060249423Sdim 1061249423Sdim/// \brief Callback from DAG postProcessing to create cluster edges for loads. 1062249423Sdimvoid LoadClusterMutation::apply(ScheduleDAGMI *DAG) { 1063249423Sdim // Map DAG NodeNum to store chain ID. 1064249423Sdim DenseMap<unsigned, unsigned> StoreChainIDs; 1065249423Sdim // Map each store chain to a set of dependent loads. 1066249423Sdim SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; 1067249423Sdim for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 1068249423Sdim SUnit *SU = &DAG->SUnits[Idx]; 1069249423Sdim if (!SU->getInstr()->mayLoad()) 1070249423Sdim continue; 1071249423Sdim unsigned ChainPredID = DAG->SUnits.size(); 1072249423Sdim for (SUnit::const_pred_iterator 1073249423Sdim PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { 1074249423Sdim if (PI->isCtrl()) { 1075249423Sdim ChainPredID = PI->getSUnit()->NodeNum; 1076249423Sdim break; 1077249423Sdim } 1078249423Sdim } 1079249423Sdim // Check if this chain-like pred has been seen 1080249423Sdim // before. ChainPredID==MaxNodeID for loads at the top of the schedule. 1081249423Sdim unsigned NumChains = StoreChainDependents.size(); 1082249423Sdim std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = 1083249423Sdim StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); 1084249423Sdim if (Result.second) 1085249423Sdim StoreChainDependents.resize(NumChains + 1); 1086249423Sdim StoreChainDependents[Result.first->second].push_back(SU); 1087249423Sdim } 1088249423Sdim // Iterate over the store chains. 1089249423Sdim for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) 1090249423Sdim clusterNeighboringLoads(StoreChainDependents[Idx], DAG); 1091249423Sdim} 1092249423Sdim 1093249423Sdim//===----------------------------------------------------------------------===// 1094249423Sdim// MacroFusion - DAG post-processing to encourage fusion of macro ops. 1095249423Sdim//===----------------------------------------------------------------------===// 1096249423Sdim 1097249423Sdimnamespace { 1098249423Sdim/// \brief Post-process the DAG to create cluster edges between instructions 1099249423Sdim/// that may be fused by the processor into a single operation. 1100249423Sdimclass MacroFusion : public ScheduleDAGMutation { 1101249423Sdim const TargetInstrInfo *TII; 1102249423Sdimpublic: 1103249423Sdim MacroFusion(const TargetInstrInfo *tii): TII(tii) {} 1104249423Sdim 1105249423Sdim virtual void apply(ScheduleDAGMI *DAG); 1106249423Sdim}; 1107249423Sdim} // anonymous 1108249423Sdim 1109249423Sdim/// \brief Callback from DAG postProcessing to create cluster edges to encourage 1110249423Sdim/// fused operations. 1111249423Sdimvoid MacroFusion::apply(ScheduleDAGMI *DAG) { 1112249423Sdim // For now, assume targets can only fuse with the branch. 1113249423Sdim MachineInstr *Branch = DAG->ExitSU.getInstr(); 1114249423Sdim if (!Branch) 1115249423Sdim return; 1116249423Sdim 1117249423Sdim for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { 1118249423Sdim SUnit *SU = &DAG->SUnits[--Idx]; 1119249423Sdim if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) 1120249423Sdim continue; 1121249423Sdim 1122249423Sdim // Create a single weak edge from SU to ExitSU. The only effect is to cause 1123249423Sdim // bottom-up scheduling to heavily prioritize the clustered SU. There is no 1124249423Sdim // need to copy predecessor edges from ExitSU to SU, since top-down 1125249423Sdim // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling 1126249423Sdim // of SU, we could create an artificial edge from the deepest root, but it 1127249423Sdim // hasn't been needed yet. 1128249423Sdim bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); 1129249423Sdim (void)Success; 1130249423Sdim assert(Success && "No DAG nodes should be reachable from ExitSU"); 1131249423Sdim 1132249423Sdim DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); 1133249423Sdim break; 1134249423Sdim } 1135249423Sdim} 1136249423Sdim 1137249423Sdim//===----------------------------------------------------------------------===// 1138251662Sdim// CopyConstrain - DAG post-processing to encourage copy elimination. 1139251662Sdim//===----------------------------------------------------------------------===// 1140251662Sdim 1141251662Sdimnamespace { 1142251662Sdim/// \brief Post-process the DAG to create weak edges from all uses of a copy to 1143251662Sdim/// the one use that defines the copy's source vreg, most likely an induction 1144251662Sdim/// variable increment. 1145251662Sdimclass CopyConstrain : public ScheduleDAGMutation { 1146251662Sdim // Transient state. 1147251662Sdim SlotIndex RegionBeginIdx; 1148251662Sdim // RegionEndIdx is the slot index of the last non-debug instruction in the 1149251662Sdim // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. 1150251662Sdim SlotIndex RegionEndIdx; 1151251662Sdimpublic: 1152251662Sdim CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} 1153251662Sdim 1154251662Sdim virtual void apply(ScheduleDAGMI *DAG); 1155251662Sdim 1156251662Sdimprotected: 1157251662Sdim void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG); 1158251662Sdim}; 1159251662Sdim} // anonymous 1160251662Sdim 1161251662Sdim/// constrainLocalCopy handles two possibilities: 1162251662Sdim/// 1) Local src: 1163251662Sdim/// I0: = dst 1164251662Sdim/// I1: src = ... 1165251662Sdim/// I2: = dst 1166251662Sdim/// I3: dst = src (copy) 1167251662Sdim/// (create pred->succ edges I0->I1, I2->I1) 1168251662Sdim/// 1169251662Sdim/// 2) Local copy: 1170251662Sdim/// I0: dst = src (copy) 1171251662Sdim/// I1: = dst 1172251662Sdim/// I2: src = ... 1173251662Sdim/// I3: = dst 1174251662Sdim/// (create pred->succ edges I1->I2, I3->I2) 1175251662Sdim/// 1176251662Sdim/// Although the MachineScheduler is currently constrained to single blocks, 1177251662Sdim/// this algorithm should handle extended blocks. An EBB is a set of 1178251662Sdim/// contiguously numbered blocks such that the previous block in the EBB is 1179251662Sdim/// always the single predecessor. 1180251662Sdimvoid CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { 1181251662Sdim LiveIntervals *LIS = DAG->getLIS(); 1182251662Sdim MachineInstr *Copy = CopySU->getInstr(); 1183251662Sdim 1184251662Sdim // Check for pure vreg copies. 1185251662Sdim unsigned SrcReg = Copy->getOperand(1).getReg(); 1186251662Sdim if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) 1187251662Sdim return; 1188251662Sdim 1189251662Sdim unsigned DstReg = Copy->getOperand(0).getReg(); 1190251662Sdim if (!TargetRegisterInfo::isVirtualRegister(DstReg)) 1191251662Sdim return; 1192251662Sdim 1193251662Sdim // Check if either the dest or source is local. If it's live across a back 1194251662Sdim // edge, it's not local. Note that if both vregs are live across the back 1195251662Sdim // edge, we cannot successfully contrain the copy without cyclic scheduling. 1196251662Sdim unsigned LocalReg = DstReg; 1197251662Sdim unsigned GlobalReg = SrcReg; 1198251662Sdim LiveInterval *LocalLI = &LIS->getInterval(LocalReg); 1199251662Sdim if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { 1200251662Sdim LocalReg = SrcReg; 1201251662Sdim GlobalReg = DstReg; 1202251662Sdim LocalLI = &LIS->getInterval(LocalReg); 1203251662Sdim if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) 1204251662Sdim return; 1205251662Sdim } 1206251662Sdim LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); 1207251662Sdim 1208251662Sdim // Find the global segment after the start of the local LI. 1209251662Sdim LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); 1210251662Sdim // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a 1211251662Sdim // local live range. We could create edges from other global uses to the local 1212251662Sdim // start, but the coalescer should have already eliminated these cases, so 1213251662Sdim // don't bother dealing with it. 1214251662Sdim if (GlobalSegment == GlobalLI->end()) 1215251662Sdim return; 1216251662Sdim 1217251662Sdim // If GlobalSegment is killed at the LocalLI->start, the call to find() 1218251662Sdim // returned the next global segment. But if GlobalSegment overlaps with 1219251662Sdim // LocalLI->start, then advance to the next segement. If a hole in GlobalLI 1220251662Sdim // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. 1221251662Sdim if (GlobalSegment->contains(LocalLI->beginIndex())) 1222251662Sdim ++GlobalSegment; 1223251662Sdim 1224251662Sdim if (GlobalSegment == GlobalLI->end()) 1225251662Sdim return; 1226251662Sdim 1227251662Sdim // Check if GlobalLI contains a hole in the vicinity of LocalLI. 1228251662Sdim if (GlobalSegment != GlobalLI->begin()) { 1229251662Sdim // Two address defs have no hole. 1230251662Sdim if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end, 1231251662Sdim GlobalSegment->start)) { 1232251662Sdim return; 1233251662Sdim } 1234263508Sdim // If the prior global segment may be defined by the same two-address 1235263508Sdim // instruction that also defines LocalLI, then can't make a hole here. 1236263508Sdim if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start, 1237263508Sdim LocalLI->beginIndex())) { 1238263508Sdim return; 1239263508Sdim } 1240251662Sdim // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise 1241251662Sdim // it would be a disconnected component in the live range. 1242251662Sdim assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() && 1243251662Sdim "Disconnected LRG within the scheduling region."); 1244251662Sdim } 1245251662Sdim MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); 1246251662Sdim if (!GlobalDef) 1247251662Sdim return; 1248251662Sdim 1249251662Sdim SUnit *GlobalSU = DAG->getSUnit(GlobalDef); 1250251662Sdim if (!GlobalSU) 1251251662Sdim return; 1252251662Sdim 1253251662Sdim // GlobalDef is the bottom of the GlobalLI hole. Open the hole by 1254251662Sdim // constraining the uses of the last local def to precede GlobalDef. 1255251662Sdim SmallVector<SUnit*,8> LocalUses; 1256251662Sdim const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); 1257251662Sdim MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); 1258251662Sdim SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); 1259251662Sdim for (SUnit::const_succ_iterator 1260251662Sdim I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end(); 1261251662Sdim I != E; ++I) { 1262251662Sdim if (I->getKind() != SDep::Data || I->getReg() != LocalReg) 1263251662Sdim continue; 1264251662Sdim if (I->getSUnit() == GlobalSU) 1265251662Sdim continue; 1266251662Sdim if (!DAG->canAddEdge(GlobalSU, I->getSUnit())) 1267251662Sdim return; 1268251662Sdim LocalUses.push_back(I->getSUnit()); 1269251662Sdim } 1270251662Sdim // Open the top of the GlobalLI hole by constraining any earlier global uses 1271251662Sdim // to precede the start of LocalLI. 1272251662Sdim SmallVector<SUnit*,8> GlobalUses; 1273251662Sdim MachineInstr *FirstLocalDef = 1274251662Sdim LIS->getInstructionFromIndex(LocalLI->beginIndex()); 1275251662Sdim SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); 1276251662Sdim for (SUnit::const_pred_iterator 1277251662Sdim I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) { 1278251662Sdim if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg) 1279251662Sdim continue; 1280251662Sdim if (I->getSUnit() == FirstLocalSU) 1281251662Sdim continue; 1282251662Sdim if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit())) 1283251662Sdim return; 1284251662Sdim GlobalUses.push_back(I->getSUnit()); 1285251662Sdim } 1286251662Sdim DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); 1287251662Sdim // Add the weak edges. 1288251662Sdim for (SmallVectorImpl<SUnit*>::const_iterator 1289251662Sdim I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) { 1290251662Sdim DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU(" 1291251662Sdim << GlobalSU->NodeNum << ")\n"); 1292251662Sdim DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak)); 1293251662Sdim } 1294251662Sdim for (SmallVectorImpl<SUnit*>::const_iterator 1295251662Sdim I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) { 1296251662Sdim DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU(" 1297251662Sdim << FirstLocalSU->NodeNum << ")\n"); 1298251662Sdim DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak)); 1299251662Sdim } 1300251662Sdim} 1301251662Sdim 1302251662Sdim/// \brief Callback from DAG postProcessing to create weak edges to encourage 1303251662Sdim/// copy elimination. 1304251662Sdimvoid CopyConstrain::apply(ScheduleDAGMI *DAG) { 1305251662Sdim MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); 1306251662Sdim if (FirstPos == DAG->end()) 1307251662Sdim return; 1308251662Sdim RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos); 1309251662Sdim RegionEndIdx = DAG->getLIS()->getInstructionIndex( 1310251662Sdim &*priorNonDebug(DAG->end(), DAG->begin())); 1311251662Sdim 1312251662Sdim for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 1313251662Sdim SUnit *SU = &DAG->SUnits[Idx]; 1314251662Sdim if (!SU->getInstr()->isCopy()) 1315251662Sdim continue; 1316251662Sdim 1317251662Sdim constrainLocalCopy(SU, DAG); 1318251662Sdim } 1319251662Sdim} 1320251662Sdim 1321251662Sdim//===----------------------------------------------------------------------===// 1322263508Sdim// GenericScheduler - Implementation of the generic MachineSchedStrategy. 1323234285Sdim//===----------------------------------------------------------------------===// 1324234285Sdim 1325234285Sdimnamespace { 1326263508Sdim/// GenericScheduler shrinks the unscheduled zone using heuristics to balance 1327243830Sdim/// the schedule. 1328263508Sdimclass GenericScheduler : public MachineSchedStrategy { 1329239462Sdimpublic: 1330243830Sdim /// Represent the type of SchedCandidate found within a single queue. 1331243830Sdim /// pickNodeBidirectional depends on these listed by decreasing priority. 1332243830Sdim enum CandReason { 1333263508Sdim NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax, 1334249423Sdim ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 1335263508Sdim TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; 1336239462Sdim 1337243830Sdim#ifndef NDEBUG 1338263508Sdim static const char *getReasonStr(GenericScheduler::CandReason Reason); 1339243830Sdim#endif 1340239462Sdim 1341243830Sdim /// Policy for scheduling the next instruction in the candidate's zone. 1342243830Sdim struct CandPolicy { 1343243830Sdim bool ReduceLatency; 1344243830Sdim unsigned ReduceResIdx; 1345243830Sdim unsigned DemandResIdx; 1346239462Sdim 1347243830Sdim CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} 1348243830Sdim }; 1349239462Sdim 1350243830Sdim /// Status of an instruction's critical resource consumption. 1351243830Sdim struct SchedResourceDelta { 1352243830Sdim // Count critical resources in the scheduled region required by SU. 1353243830Sdim unsigned CritResources; 1354239462Sdim 1355243830Sdim // Count critical resources from another region consumed by SU. 1356243830Sdim unsigned DemandedResources; 1357239462Sdim 1358243830Sdim SchedResourceDelta(): CritResources(0), DemandedResources(0) {} 1359239462Sdim 1360243830Sdim bool operator==(const SchedResourceDelta &RHS) const { 1361243830Sdim return CritResources == RHS.CritResources 1362243830Sdim && DemandedResources == RHS.DemandedResources; 1363243830Sdim } 1364243830Sdim bool operator!=(const SchedResourceDelta &RHS) const { 1365243830Sdim return !operator==(RHS); 1366243830Sdim } 1367243830Sdim }; 1368239462Sdim 1369263508Sdim /// Store the state used by GenericScheduler heuristics, required for the 1370239462Sdim /// lifetime of one invocation of pickNode(). 1371239462Sdim struct SchedCandidate { 1372243830Sdim CandPolicy Policy; 1373243830Sdim 1374239462Sdim // The best SUnit candidate. 1375239462Sdim SUnit *SU; 1376239462Sdim 1377243830Sdim // The reason for this candidate. 1378243830Sdim CandReason Reason; 1379243830Sdim 1380263508Sdim // Set of reasons that apply to multiple candidates. 1381263508Sdim uint32_t RepeatReasonSet; 1382263508Sdim 1383239462Sdim // Register pressure values for the best candidate. 1384239462Sdim RegPressureDelta RPDelta; 1385239462Sdim 1386243830Sdim // Critical resource consumption of the best candidate. 1387243830Sdim SchedResourceDelta ResDelta; 1388243830Sdim 1389243830Sdim SchedCandidate(const CandPolicy &policy) 1390263508Sdim : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {} 1391243830Sdim 1392243830Sdim bool isValid() const { return SU; } 1393243830Sdim 1394243830Sdim // Copy the status of another candidate without changing policy. 1395243830Sdim void setBest(SchedCandidate &Best) { 1396243830Sdim assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 1397243830Sdim SU = Best.SU; 1398243830Sdim Reason = Best.Reason; 1399243830Sdim RPDelta = Best.RPDelta; 1400243830Sdim ResDelta = Best.ResDelta; 1401243830Sdim } 1402243830Sdim 1403263508Sdim bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } 1404263508Sdim void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } 1405263508Sdim 1406243830Sdim void initResourceDelta(const ScheduleDAGMI *DAG, 1407243830Sdim const TargetSchedModel *SchedModel); 1408239462Sdim }; 1409239462Sdim 1410243830Sdim /// Summarize the unscheduled region. 1411243830Sdim struct SchedRemainder { 1412243830Sdim // Critical path through the DAG in expected latency. 1413243830Sdim unsigned CriticalPath; 1414263508Sdim unsigned CyclicCritPath; 1415243830Sdim 1416263508Sdim // Scaled count of micro-ops left to schedule. 1417263508Sdim unsigned RemIssueCount; 1418263508Sdim 1419263508Sdim bool IsAcyclicLatencyLimited; 1420263508Sdim 1421243830Sdim // Unscheduled resources 1422243830Sdim SmallVector<unsigned, 16> RemainingCounts; 1423243830Sdim 1424243830Sdim void reset() { 1425243830Sdim CriticalPath = 0; 1426263508Sdim CyclicCritPath = 0; 1427263508Sdim RemIssueCount = 0; 1428263508Sdim IsAcyclicLatencyLimited = false; 1429243830Sdim RemainingCounts.clear(); 1430243830Sdim } 1431243830Sdim 1432243830Sdim SchedRemainder() { reset(); } 1433243830Sdim 1434243830Sdim void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 1435243830Sdim }; 1436243830Sdim 1437239462Sdim /// Each Scheduling boundary is associated with ready queues. It tracks the 1438243830Sdim /// current cycle in the direction of movement, and maintains the state 1439239462Sdim /// of "hazards" and other interlocks at the current cycle. 1440239462Sdim struct SchedBoundary { 1441239462Sdim ScheduleDAGMI *DAG; 1442243830Sdim const TargetSchedModel *SchedModel; 1443243830Sdim SchedRemainder *Rem; 1444239462Sdim 1445239462Sdim ReadyQueue Available; 1446239462Sdim ReadyQueue Pending; 1447239462Sdim bool CheckPending; 1448239462Sdim 1449243830Sdim // For heuristics, keep a list of the nodes that immediately depend on the 1450243830Sdim // most recently scheduled node. 1451243830Sdim SmallPtrSet<const SUnit*, 8> NextSUs; 1452243830Sdim 1453239462Sdim ScheduleHazardRecognizer *HazardRec; 1454239462Sdim 1455263508Sdim /// Number of cycles it takes to issue the instructions scheduled in this 1456263508Sdim /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. 1457263508Sdim /// See getStalls(). 1458239462Sdim unsigned CurrCycle; 1459239462Sdim 1460263508Sdim /// Micro-ops issued in the current cycle 1461263508Sdim unsigned CurrMOps; 1462263508Sdim 1463239462Sdim /// MinReadyCycle - Cycle of the soonest available instruction. 1464239462Sdim unsigned MinReadyCycle; 1465239462Sdim 1466243830Sdim // The expected latency of the critical path in this scheduled zone. 1467243830Sdim unsigned ExpectedLatency; 1468243830Sdim 1469263508Sdim // The latency of dependence chains leading into this zone. 1470263508Sdim // For each node scheduled bottom-up: DLat = max DLat, N.Depth. 1471263508Sdim // For each cycle scheduled: DLat -= 1. 1472263508Sdim unsigned DependentLatency; 1473243830Sdim 1474263508Sdim /// Count the scheduled (issued) micro-ops that can be retired by 1475263508Sdim /// time=CurrCycle assuming the first scheduled instr is retired at time=0. 1476263508Sdim unsigned RetiredMOps; 1477263508Sdim 1478263508Sdim // Count scheduled resources that have been executed. Resources are 1479263508Sdim // considered executed if they become ready in the time that it takes to 1480263508Sdim // saturate any resource including the one in question. Counts are scaled 1481263508Sdim // for direct comparison with other resources. Counts can be compared with 1482263508Sdim // MOps * getMicroOpFactor and Latency * getLatencyFactor. 1483263508Sdim SmallVector<unsigned, 16> ExecutedResCounts; 1484263508Sdim 1485263508Sdim /// Cache the max count for a single resource. 1486263508Sdim unsigned MaxExecutedResCount; 1487263508Sdim 1488243830Sdim // Cache the critical resources ID in this scheduled zone. 1489263508Sdim unsigned ZoneCritResIdx; 1490243830Sdim 1491243830Sdim // Is the scheduled region resource limited vs. latency limited. 1492243830Sdim bool IsResourceLimited; 1493243830Sdim 1494243830Sdim#ifndef NDEBUG 1495263508Sdim // Remember the greatest operand latency as an upper bound on the number of 1496263508Sdim // times we should retry the pending queue because of a hazard. 1497263508Sdim unsigned MaxObservedLatency; 1498243830Sdim#endif 1499239462Sdim 1500243830Sdim void reset() { 1501249423Sdim // A new HazardRec is created for each DAG and owned by SchedBoundary. 1502263508Sdim // Destroying and reconstructing it is very expensive though. So keep 1503263508Sdim // invalid, placeholder HazardRecs. 1504263508Sdim if (HazardRec && HazardRec->isEnabled()) { 1505263508Sdim delete HazardRec; 1506263508Sdim HazardRec = 0; 1507263508Sdim } 1508243830Sdim Available.clear(); 1509243830Sdim Pending.clear(); 1510243830Sdim CheckPending = false; 1511243830Sdim NextSUs.clear(); 1512243830Sdim CurrCycle = 0; 1513263508Sdim CurrMOps = 0; 1514243830Sdim MinReadyCycle = UINT_MAX; 1515243830Sdim ExpectedLatency = 0; 1516263508Sdim DependentLatency = 0; 1517263508Sdim RetiredMOps = 0; 1518263508Sdim MaxExecutedResCount = 0; 1519263508Sdim ZoneCritResIdx = 0; 1520243830Sdim IsResourceLimited = false; 1521243830Sdim#ifndef NDEBUG 1522263508Sdim MaxObservedLatency = 0; 1523243830Sdim#endif 1524243830Sdim // Reserve a zero-count for invalid CritResIdx. 1525263508Sdim ExecutedResCounts.resize(1); 1526263508Sdim assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); 1527243830Sdim } 1528243830Sdim 1529239462Sdim /// Pending queues extend the ready queues with the same ID and the 1530239462Sdim /// PendingFlag set. 1531239462Sdim SchedBoundary(unsigned ID, const Twine &Name): 1532243830Sdim DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), 1533263508Sdim Pending(ID << GenericScheduler::LogMaxQID, Name+".P"), 1534249423Sdim HazardRec(0) { 1535243830Sdim reset(); 1536243830Sdim } 1537239462Sdim 1538239462Sdim ~SchedBoundary() { delete HazardRec; } 1539239462Sdim 1540243830Sdim void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 1541243830Sdim SchedRemainder *rem); 1542243830Sdim 1543239462Sdim bool isTop() const { 1544263508Sdim return Available.getID() == GenericScheduler::TopQID; 1545239462Sdim } 1546239462Sdim 1547263508Sdim#ifndef NDEBUG 1548263508Sdim const char *getResourceName(unsigned PIdx) { 1549263508Sdim if (!PIdx) 1550263508Sdim return "MOps"; 1551263508Sdim return SchedModel->getProcResource(PIdx)->Name; 1552263508Sdim } 1553263508Sdim#endif 1554263508Sdim 1555263508Sdim /// Get the number of latency cycles "covered" by the scheduled 1556263508Sdim /// instructions. This is the larger of the critical path within the zone 1557263508Sdim /// and the number of cycles required to issue the instructions. 1558263508Sdim unsigned getScheduledLatency() const { 1559263508Sdim return std::max(ExpectedLatency, CurrCycle); 1560263508Sdim } 1561263508Sdim 1562243830Sdim unsigned getUnscheduledLatency(SUnit *SU) const { 1563263508Sdim return isTop() ? SU->getHeight() : SU->getDepth(); 1564243830Sdim } 1565243830Sdim 1566263508Sdim unsigned getResourceCount(unsigned ResIdx) const { 1567263508Sdim return ExecutedResCounts[ResIdx]; 1568263508Sdim } 1569263508Sdim 1570263508Sdim /// Get the scaled count of scheduled micro-ops and resources, including 1571263508Sdim /// executed resources. 1572243830Sdim unsigned getCriticalCount() const { 1573263508Sdim if (!ZoneCritResIdx) 1574263508Sdim return RetiredMOps * SchedModel->getMicroOpFactor(); 1575263508Sdim return getResourceCount(ZoneCritResIdx); 1576243830Sdim } 1577243830Sdim 1578263508Sdim /// Get a scaled count for the minimum execution time of the scheduled 1579263508Sdim /// micro-ops that are ready to execute by getExecutedCount. Notice the 1580263508Sdim /// feedback loop. 1581263508Sdim unsigned getExecutedCount() const { 1582263508Sdim return std::max(CurrCycle * SchedModel->getLatencyFactor(), 1583263508Sdim MaxExecutedResCount); 1584263508Sdim } 1585263508Sdim 1586239462Sdim bool checkHazard(SUnit *SU); 1587239462Sdim 1588263508Sdim unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); 1589243830Sdim 1590263508Sdim unsigned getOtherResourceCount(unsigned &OtherCritIdx); 1591263508Sdim 1592263508Sdim void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone); 1593263508Sdim 1594239462Sdim void releaseNode(SUnit *SU, unsigned ReadyCycle); 1595239462Sdim 1596263508Sdim void bumpCycle(unsigned NextCycle); 1597239462Sdim 1598263508Sdim void incExecutedResources(unsigned PIdx, unsigned Count); 1599243830Sdim 1600263508Sdim unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); 1601263508Sdim 1602239462Sdim void bumpNode(SUnit *SU); 1603239462Sdim 1604239462Sdim void releasePending(); 1605239462Sdim 1606239462Sdim void removeReady(SUnit *SU); 1607239462Sdim 1608239462Sdim SUnit *pickOnlyChoice(); 1609263508Sdim 1610263508Sdim#ifndef NDEBUG 1611263508Sdim void dumpScheduledState(); 1612263508Sdim#endif 1613239462Sdim }; 1614239462Sdim 1615243830Sdimprivate: 1616263508Sdim const MachineSchedContext *Context; 1617234285Sdim ScheduleDAGMI *DAG; 1618243830Sdim const TargetSchedModel *SchedModel; 1619239462Sdim const TargetRegisterInfo *TRI; 1620234285Sdim 1621239462Sdim // State of the top and bottom scheduled instruction boundaries. 1622243830Sdim SchedRemainder Rem; 1623239462Sdim SchedBoundary Top; 1624239462Sdim SchedBoundary Bot; 1625234285Sdim 1626263508Sdim MachineSchedPolicy RegionPolicy; 1627234285Sdimpublic: 1628239462Sdim /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 1629239462Sdim enum { 1630239462Sdim TopQID = 1, 1631239462Sdim BotQID = 2, 1632239462Sdim LogMaxQID = 2 1633239462Sdim }; 1634234285Sdim 1635263508Sdim GenericScheduler(const MachineSchedContext *C): 1636263508Sdim Context(C), DAG(0), SchedModel(0), TRI(0), 1637263508Sdim Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} 1638239462Sdim 1639263508Sdim virtual void initPolicy(MachineBasicBlock::iterator Begin, 1640263508Sdim MachineBasicBlock::iterator End, 1641263508Sdim unsigned NumRegionInstrs); 1642263508Sdim 1643263508Sdim bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; } 1644263508Sdim 1645239462Sdim virtual void initialize(ScheduleDAGMI *dag); 1646239462Sdim 1647239462Sdim virtual SUnit *pickNode(bool &IsTopNode); 1648239462Sdim 1649239462Sdim virtual void schedNode(SUnit *SU, bool IsTopNode); 1650239462Sdim 1651239462Sdim virtual void releaseTopNode(SUnit *SU); 1652239462Sdim 1653239462Sdim virtual void releaseBottomNode(SUnit *SU); 1654239462Sdim 1655243830Sdim virtual void registerRoots(); 1656243830Sdim 1657239462Sdimprotected: 1658263508Sdim void checkAcyclicLatency(); 1659239462Sdim 1660243830Sdim void tryCandidate(SchedCandidate &Cand, 1661243830Sdim SchedCandidate &TryCand, 1662243830Sdim SchedBoundary &Zone, 1663243830Sdim const RegPressureTracker &RPTracker, 1664243830Sdim RegPressureTracker &TempTracker); 1665243830Sdim 1666243830Sdim SUnit *pickNodeBidirectional(bool &IsTopNode); 1667243830Sdim 1668243830Sdim void pickNodeFromQueue(SchedBoundary &Zone, 1669243830Sdim const RegPressureTracker &RPTracker, 1670243830Sdim SchedCandidate &Candidate); 1671243830Sdim 1672251662Sdim void reschedulePhysRegCopies(SUnit *SU, bool isTop); 1673251662Sdim 1674239462Sdim#ifndef NDEBUG 1675249423Sdim void traceCandidate(const SchedCandidate &Cand); 1676239462Sdim#endif 1677239462Sdim}; 1678239462Sdim} // namespace 1679239462Sdim 1680263508Sdimvoid GenericScheduler::SchedRemainder:: 1681243830Sdiminit(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 1682243830Sdim reset(); 1683243830Sdim if (!SchedModel->hasInstrSchedModel()) 1684243830Sdim return; 1685243830Sdim RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 1686243830Sdim for (std::vector<SUnit>::iterator 1687243830Sdim I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { 1688243830Sdim const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); 1689263508Sdim RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) 1690263508Sdim * SchedModel->getMicroOpFactor(); 1691243830Sdim for (TargetSchedModel::ProcResIter 1692243830Sdim PI = SchedModel->getWriteProcResBegin(SC), 1693243830Sdim PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1694243830Sdim unsigned PIdx = PI->ProcResourceIdx; 1695243830Sdim unsigned Factor = SchedModel->getResourceFactor(PIdx); 1696243830Sdim RemainingCounts[PIdx] += (Factor * PI->Cycles); 1697243830Sdim } 1698243830Sdim } 1699243830Sdim} 1700243830Sdim 1701263508Sdimvoid GenericScheduler::SchedBoundary:: 1702243830Sdiminit(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 1703243830Sdim reset(); 1704243830Sdim DAG = dag; 1705243830Sdim SchedModel = smodel; 1706243830Sdim Rem = rem; 1707243830Sdim if (SchedModel->hasInstrSchedModel()) 1708263508Sdim ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); 1709243830Sdim} 1710243830Sdim 1711263508Sdim/// Initialize the per-region scheduling policy. 1712263508Sdimvoid GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, 1713263508Sdim MachineBasicBlock::iterator End, 1714263508Sdim unsigned NumRegionInstrs) { 1715263508Sdim const TargetMachine &TM = Context->MF->getTarget(); 1716263508Sdim 1717263508Sdim // Avoid setting up the register pressure tracker for small regions to save 1718263508Sdim // compile time. As a rough heuristic, only track pressure when the number of 1719263508Sdim // schedulable instructions exceeds half the integer register file. 1720263508Sdim unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( 1721263508Sdim TM.getTargetLowering()->getRegClassFor(MVT::i32)); 1722263508Sdim 1723263508Sdim RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); 1724263508Sdim 1725263508Sdim // For generic targets, we default to bottom-up, because it's simpler and more 1726263508Sdim // compile-time optimizations have been implemented in that direction. 1727263508Sdim RegionPolicy.OnlyBottomUp = true; 1728263508Sdim 1729263508Sdim // Allow the subtarget to override default policy. 1730263508Sdim const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 1731263508Sdim ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); 1732263508Sdim 1733263508Sdim // After subtarget overrides, apply command line options. 1734263508Sdim if (!EnableRegPressure) 1735263508Sdim RegionPolicy.ShouldTrackPressure = false; 1736263508Sdim 1737263508Sdim // Check -misched-topdown/bottomup can force or unforce scheduling direction. 1738263508Sdim // e.g. -misched-bottomup=false allows scheduling in both directions. 1739263508Sdim assert((!ForceTopDown || !ForceBottomUp) && 1740263508Sdim "-misched-topdown incompatible with -misched-bottomup"); 1741263508Sdim if (ForceBottomUp.getNumOccurrences() > 0) { 1742263508Sdim RegionPolicy.OnlyBottomUp = ForceBottomUp; 1743263508Sdim if (RegionPolicy.OnlyBottomUp) 1744263508Sdim RegionPolicy.OnlyTopDown = false; 1745263508Sdim } 1746263508Sdim if (ForceTopDown.getNumOccurrences() > 0) { 1747263508Sdim RegionPolicy.OnlyTopDown = ForceTopDown; 1748263508Sdim if (RegionPolicy.OnlyTopDown) 1749263508Sdim RegionPolicy.OnlyBottomUp = false; 1750263508Sdim } 1751263508Sdim} 1752263508Sdim 1753263508Sdimvoid GenericScheduler::initialize(ScheduleDAGMI *dag) { 1754239462Sdim DAG = dag; 1755243830Sdim SchedModel = DAG->getSchedModel(); 1756239462Sdim TRI = DAG->TRI; 1757249423Sdim 1758243830Sdim Rem.init(DAG, SchedModel); 1759243830Sdim Top.init(DAG, SchedModel, &Rem); 1760243830Sdim Bot.init(DAG, SchedModel, &Rem); 1761239462Sdim 1762243830Sdim // Initialize resource counts. 1763243830Sdim 1764243830Sdim // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 1765243830Sdim // are disabled, then these HazardRecs will be disabled. 1766243830Sdim const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 1767239462Sdim const TargetMachine &TM = DAG->MF.getTarget(); 1768263508Sdim if (!Top.HazardRec) { 1769263508Sdim Top.HazardRec = 1770263508Sdim TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1771263508Sdim } 1772263508Sdim if (!Bot.HazardRec) { 1773263508Sdim Bot.HazardRec = 1774263508Sdim TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1775263508Sdim } 1776239462Sdim} 1777239462Sdim 1778263508Sdimvoid GenericScheduler::releaseTopNode(SUnit *SU) { 1779239462Sdim if (SU->isScheduled) 1780239462Sdim return; 1781239462Sdim 1782249423Sdim for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1783239462Sdim I != E; ++I) { 1784263508Sdim if (I->isWeak()) 1785263508Sdim continue; 1786239462Sdim unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; 1787263508Sdim unsigned Latency = I->getLatency(); 1788239462Sdim#ifndef NDEBUG 1789263508Sdim Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency); 1790239462Sdim#endif 1791263508Sdim if (SU->TopReadyCycle < PredReadyCycle + Latency) 1792263508Sdim SU->TopReadyCycle = PredReadyCycle + Latency; 1793234285Sdim } 1794239462Sdim Top.releaseNode(SU, SU->TopReadyCycle); 1795239462Sdim} 1796234285Sdim 1797263508Sdimvoid GenericScheduler::releaseBottomNode(SUnit *SU) { 1798239462Sdim if (SU->isScheduled) 1799239462Sdim return; 1800234285Sdim 1801239462Sdim assert(SU->getInstr() && "Scheduled SUnit must have instr"); 1802239462Sdim 1803239462Sdim for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1804239462Sdim I != E; ++I) { 1805249423Sdim if (I->isWeak()) 1806249423Sdim continue; 1807239462Sdim unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 1808263508Sdim unsigned Latency = I->getLatency(); 1809239462Sdim#ifndef NDEBUG 1810263508Sdim Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency); 1811239462Sdim#endif 1812263508Sdim if (SU->BotReadyCycle < SuccReadyCycle + Latency) 1813263508Sdim SU->BotReadyCycle = SuccReadyCycle + Latency; 1814239462Sdim } 1815239462Sdim Bot.releaseNode(SU, SU->BotReadyCycle); 1816239462Sdim} 1817239462Sdim 1818263508Sdim/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic 1819263508Sdim/// critical path by more cycles than it takes to drain the instruction buffer. 1820263508Sdim/// We estimate an upper bounds on in-flight instructions as: 1821263508Sdim/// 1822263508Sdim/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) 1823263508Sdim/// InFlightIterations = AcyclicPath / CyclesPerIteration 1824263508Sdim/// InFlightResources = InFlightIterations * LoopResources 1825263508Sdim/// 1826263508Sdim/// TODO: Check execution resources in addition to IssueCount. 1827263508Sdimvoid GenericScheduler::checkAcyclicLatency() { 1828263508Sdim if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) 1829263508Sdim return; 1830263508Sdim 1831263508Sdim // Scaled number of cycles per loop iteration. 1832263508Sdim unsigned IterCount = 1833263508Sdim std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), 1834263508Sdim Rem.RemIssueCount); 1835263508Sdim // Scaled acyclic critical path. 1836263508Sdim unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); 1837263508Sdim // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop 1838263508Sdim unsigned InFlightCount = 1839263508Sdim (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; 1840263508Sdim unsigned BufferLimit = 1841263508Sdim SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); 1842263508Sdim 1843263508Sdim Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; 1844263508Sdim 1845263508Sdim DEBUG(dbgs() << "IssueCycles=" 1846263508Sdim << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " 1847263508Sdim << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() 1848263508Sdim << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount 1849263508Sdim << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() 1850263508Sdim << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; 1851263508Sdim if (Rem.IsAcyclicLatencyLimited) 1852263508Sdim dbgs() << " ACYCLIC LATENCY LIMIT\n"); 1853263508Sdim} 1854263508Sdim 1855263508Sdimvoid GenericScheduler::registerRoots() { 1856243830Sdim Rem.CriticalPath = DAG->ExitSU.getDepth(); 1857263508Sdim 1858243830Sdim // Some roots may not feed into ExitSU. Check all of them in case. 1859243830Sdim for (std::vector<SUnit*>::const_iterator 1860243830Sdim I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { 1861243830Sdim if ((*I)->getDepth() > Rem.CriticalPath) 1862243830Sdim Rem.CriticalPath = (*I)->getDepth(); 1863243830Sdim } 1864243830Sdim DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); 1865263508Sdim 1866263508Sdim if (EnableCyclicPath) { 1867263508Sdim Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); 1868263508Sdim checkAcyclicLatency(); 1869263508Sdim } 1870243830Sdim} 1871243830Sdim 1872239462Sdim/// Does this SU have a hazard within the current instruction group. 1873239462Sdim/// 1874239462Sdim/// The scheduler supports two modes of hazard recognition. The first is the 1875239462Sdim/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 1876239462Sdim/// supports highly complicated in-order reservation tables 1877239462Sdim/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. 1878239462Sdim/// 1879239462Sdim/// The second is a streamlined mechanism that checks for hazards based on 1880239462Sdim/// simple counters that the scheduler itself maintains. It explicitly checks 1881239462Sdim/// for instruction dispatch limitations, including the number of micro-ops that 1882239462Sdim/// can dispatch per cycle. 1883239462Sdim/// 1884239462Sdim/// TODO: Also check whether the SU must start a new group. 1885263508Sdimbool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) { 1886239462Sdim if (HazardRec->isEnabled()) 1887239462Sdim return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 1888239462Sdim 1889243830Sdim unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1890263508Sdim if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { 1891243830Sdim DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 1892243830Sdim << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 1893239462Sdim return true; 1894243830Sdim } 1895239462Sdim return false; 1896239462Sdim} 1897239462Sdim 1898263508Sdim// Find the unscheduled node in ReadySUs with the highest latency. 1899263508Sdimunsigned GenericScheduler::SchedBoundary:: 1900263508SdimfindMaxLatency(ArrayRef<SUnit*> ReadySUs) { 1901263508Sdim SUnit *LateSU = 0; 1902249423Sdim unsigned RemLatency = 0; 1903263508Sdim for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); 1904249423Sdim I != E; ++I) { 1905249423Sdim unsigned L = getUnscheduledLatency(*I); 1906263508Sdim if (L > RemLatency) { 1907249423Sdim RemLatency = L; 1908263508Sdim LateSU = *I; 1909263508Sdim } 1910243830Sdim } 1911263508Sdim if (LateSU) { 1912263508Sdim DEBUG(dbgs() << Available.getName() << " RemLatency SU(" 1913263508Sdim << LateSU->NodeNum << ") " << RemLatency << "c\n"); 1914249423Sdim } 1915263508Sdim return RemLatency; 1916263508Sdim} 1917263508Sdim 1918263508Sdim// Count resources in this zone and the remaining unscheduled 1919263508Sdim// instruction. Return the max count, scaled. Set OtherCritIdx to the critical 1920263508Sdim// resource index, or zero if the zone is issue limited. 1921263508Sdimunsigned GenericScheduler::SchedBoundary:: 1922263508SdimgetOtherResourceCount(unsigned &OtherCritIdx) { 1923263508Sdim OtherCritIdx = 0; 1924263508Sdim if (!SchedModel->hasInstrSchedModel()) 1925263508Sdim return 0; 1926263508Sdim 1927263508Sdim unsigned OtherCritCount = Rem->RemIssueCount 1928263508Sdim + (RetiredMOps * SchedModel->getMicroOpFactor()); 1929263508Sdim DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " 1930263508Sdim << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); 1931263508Sdim for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); 1932263508Sdim PIdx != PEnd; ++PIdx) { 1933263508Sdim unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; 1934263508Sdim if (OtherCount > OtherCritCount) { 1935263508Sdim OtherCritCount = OtherCount; 1936263508Sdim OtherCritIdx = PIdx; 1937263508Sdim } 1938249423Sdim } 1939263508Sdim if (OtherCritIdx) { 1940263508Sdim DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " 1941263508Sdim << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) 1942263508Sdim << " " << getResourceName(OtherCritIdx) << "\n"); 1943263508Sdim } 1944263508Sdim return OtherCritCount; 1945243830Sdim} 1946243830Sdim 1947263508Sdim/// Set the CandPolicy for this zone given the current resources and latencies 1948263508Sdim/// inside and outside the zone. 1949263508Sdimvoid GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy, 1950263508Sdim SchedBoundary &OtherZone) { 1951263508Sdim // Now that potential stalls have been considered, apply preemptive heuristics 1952263508Sdim // based on the the total latency and resources inside and outside this 1953263508Sdim // zone. 1954263508Sdim 1955263508Sdim // Compute remaining latency. We need this both to determine whether the 1956263508Sdim // overall schedule has become latency-limited and whether the instructions 1957263508Sdim // outside this zone are resource or latency limited. 1958263508Sdim // 1959263508Sdim // The "dependent" latency is updated incrementally during scheduling as the 1960263508Sdim // max height/depth of scheduled nodes minus the cycles since it was 1961263508Sdim // scheduled: 1962263508Sdim // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone 1963263508Sdim // 1964263508Sdim // The "independent" latency is the max ready queue depth: 1965263508Sdim // ILat = max N.depth for N in Available|Pending 1966263508Sdim // 1967263508Sdim // RemainingLatency is the greater of independent and dependent latency. 1968263508Sdim unsigned RemLatency = DependentLatency; 1969263508Sdim RemLatency = std::max(RemLatency, findMaxLatency(Available.elements())); 1970263508Sdim RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements())); 1971263508Sdim 1972263508Sdim // Compute the critical resource outside the zone. 1973263508Sdim unsigned OtherCritIdx; 1974263508Sdim unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx); 1975263508Sdim 1976263508Sdim bool OtherResLimited = false; 1977263508Sdim if (SchedModel->hasInstrSchedModel()) { 1978263508Sdim unsigned LFactor = SchedModel->getLatencyFactor(); 1979263508Sdim OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; 1980263508Sdim } 1981263508Sdim if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) { 1982263508Sdim Policy.ReduceLatency |= true; 1983263508Sdim DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency " 1984263508Sdim << RemLatency << " + " << CurrCycle << "c > CritPath " 1985263508Sdim << Rem->CriticalPath << "\n"); 1986263508Sdim } 1987263508Sdim // If the same resource is limiting inside and outside the zone, do nothing. 1988263508Sdim if (ZoneCritResIdx == OtherCritIdx) 1989263508Sdim return; 1990263508Sdim 1991263508Sdim DEBUG( 1992263508Sdim if (IsResourceLimited) { 1993263508Sdim dbgs() << " " << Available.getName() << " ResourceLimited: " 1994263508Sdim << getResourceName(ZoneCritResIdx) << "\n"; 1995263508Sdim } 1996263508Sdim if (OtherResLimited) 1997263508Sdim dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx) << "\n"; 1998263508Sdim if (!IsResourceLimited && !OtherResLimited) 1999263508Sdim dbgs() << " Latency limited both directions.\n"); 2000263508Sdim 2001263508Sdim if (IsResourceLimited && !Policy.ReduceResIdx) 2002263508Sdim Policy.ReduceResIdx = ZoneCritResIdx; 2003263508Sdim 2004263508Sdim if (OtherResLimited) 2005263508Sdim Policy.DemandResIdx = OtherCritIdx; 2006263508Sdim} 2007263508Sdim 2008263508Sdimvoid GenericScheduler::SchedBoundary::releaseNode(SUnit *SU, 2009239462Sdim unsigned ReadyCycle) { 2010239462Sdim if (ReadyCycle < MinReadyCycle) 2011239462Sdim MinReadyCycle = ReadyCycle; 2012239462Sdim 2013239462Sdim // Check for interlocks first. For the purpose of other heuristics, an 2014239462Sdim // instruction that cannot issue appears as if it's not in the ReadyQueue. 2015263508Sdim bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; 2016263508Sdim if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU)) 2017239462Sdim Pending.push(SU); 2018239462Sdim else 2019239462Sdim Available.push(SU); 2020243830Sdim 2021243830Sdim // Record this node as an immediate dependent of the scheduled node. 2022243830Sdim NextSUs.insert(SU); 2023239462Sdim} 2024239462Sdim 2025239462Sdim/// Move the boundary of scheduled code by one cycle. 2026263508Sdimvoid GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { 2027263508Sdim if (SchedModel->getMicroOpBufferSize() == 0) { 2028263508Sdim assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); 2029263508Sdim if (MinReadyCycle > NextCycle) 2030263508Sdim NextCycle = MinReadyCycle; 2031243830Sdim } 2032263508Sdim // Update the current micro-ops, which will issue in the next cycle. 2033263508Sdim unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); 2034263508Sdim CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; 2035239462Sdim 2036263508Sdim // Decrement DependentLatency based on the next cycle. 2037263508Sdim if ((NextCycle - CurrCycle) > DependentLatency) 2038263508Sdim DependentLatency = 0; 2039263508Sdim else 2040263508Sdim DependentLatency -= (NextCycle - CurrCycle); 2041263508Sdim 2042239462Sdim if (!HazardRec->isEnabled()) { 2043239462Sdim // Bypass HazardRec virtual calls. 2044239462Sdim CurrCycle = NextCycle; 2045239462Sdim } 2046239462Sdim else { 2047239462Sdim // Bypass getHazardType calls in case of long latency. 2048239462Sdim for (; CurrCycle != NextCycle; ++CurrCycle) { 2049239462Sdim if (isTop()) 2050239462Sdim HazardRec->AdvanceCycle(); 2051239462Sdim else 2052239462Sdim HazardRec->RecedeCycle(); 2053234285Sdim } 2054239462Sdim } 2055239462Sdim CheckPending = true; 2056263508Sdim unsigned LFactor = SchedModel->getLatencyFactor(); 2057263508Sdim IsResourceLimited = 2058263508Sdim (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) 2059263508Sdim > (int)LFactor; 2060239462Sdim 2061263508Sdim DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); 2062239462Sdim} 2063239462Sdim 2064263508Sdimvoid GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, 2065263508Sdim unsigned Count) { 2066263508Sdim ExecutedResCounts[PIdx] += Count; 2067263508Sdim if (ExecutedResCounts[PIdx] > MaxExecutedResCount) 2068263508Sdim MaxExecutedResCount = ExecutedResCounts[PIdx]; 2069263508Sdim} 2070263508Sdim 2071243830Sdim/// Add the given processor resource to this scheduled zone. 2072263508Sdim/// 2073263508Sdim/// \param Cycles indicates the number of consecutive (non-pipelined) cycles 2074263508Sdim/// during which this resource is consumed. 2075263508Sdim/// 2076263508Sdim/// \return the next cycle at which the instruction may execute without 2077263508Sdim/// oversubscribing resources. 2078263508Sdimunsigned GenericScheduler::SchedBoundary:: 2079263508SdimcountResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { 2080243830Sdim unsigned Factor = SchedModel->getResourceFactor(PIdx); 2081263508Sdim unsigned Count = Factor * Cycles; 2082263508Sdim DEBUG(dbgs() << " " << getResourceName(PIdx) 2083263508Sdim << " +" << Cycles << "x" << Factor << "u\n"); 2084243830Sdim 2085263508Sdim // Update Executed resources counts. 2086263508Sdim incExecutedResources(PIdx, Count); 2087243830Sdim assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 2088243830Sdim Rem->RemainingCounts[PIdx] -= Count; 2089243830Sdim 2090263508Sdim // Check if this resource exceeds the current critical resource. If so, it 2091263508Sdim // becomes the critical resource. 2092263508Sdim if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { 2093263508Sdim ZoneCritResIdx = PIdx; 2094243830Sdim DEBUG(dbgs() << " *** Critical resource " 2095263508Sdim << getResourceName(PIdx) << ": " 2096263508Sdim << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); 2097243830Sdim } 2098263508Sdim // TODO: We don't yet model reserved resources. It's not hard though. 2099263508Sdim return CurrCycle; 2100243830Sdim} 2101243830Sdim 2102239462Sdim/// Move the boundary of scheduled code by one SUnit. 2103263508Sdimvoid GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { 2104239462Sdim // Update the reservation table. 2105239462Sdim if (HazardRec->isEnabled()) { 2106239462Sdim if (!isTop() && SU->isCall) { 2107239462Sdim // Calls are scheduled with their preceding instructions. For bottom-up 2108239462Sdim // scheduling, clear the pipeline state before emitting. 2109239462Sdim HazardRec->Reset(); 2110234285Sdim } 2111239462Sdim HazardRec->EmitInstruction(SU); 2112239462Sdim } 2113263508Sdim const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 2114263508Sdim unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); 2115263508Sdim CurrMOps += IncMOps; 2116263508Sdim // checkHazard prevents scheduling multiple instructions per cycle that exceed 2117263508Sdim // issue width. However, we commonly reach the maximum. In this case 2118263508Sdim // opportunistically bump the cycle to avoid uselessly checking everything in 2119263508Sdim // the readyQ. Furthermore, a single instruction may produce more than one 2120263508Sdim // cycle's worth of micro-ops. 2121263508Sdim // 2122263508Sdim // TODO: Also check if this SU must end a dispatch group. 2123263508Sdim unsigned NextCycle = CurrCycle; 2124263508Sdim if (CurrMOps >= SchedModel->getIssueWidth()) { 2125263508Sdim ++NextCycle; 2126263508Sdim DEBUG(dbgs() << " *** Max MOps " << CurrMOps 2127263508Sdim << " at cycle " << CurrCycle << '\n'); 2128263508Sdim } 2129263508Sdim unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); 2130263508Sdim DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); 2131263508Sdim 2132263508Sdim switch (SchedModel->getMicroOpBufferSize()) { 2133263508Sdim case 0: 2134263508Sdim assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); 2135263508Sdim break; 2136263508Sdim case 1: 2137263508Sdim if (ReadyCycle > NextCycle) { 2138263508Sdim NextCycle = ReadyCycle; 2139263508Sdim DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); 2140263508Sdim } 2141263508Sdim break; 2142263508Sdim default: 2143263508Sdim // We don't currently model the OOO reorder buffer, so consider all 2144263508Sdim // scheduled MOps to be "retired". 2145263508Sdim break; 2146263508Sdim } 2147263508Sdim RetiredMOps += IncMOps; 2148263508Sdim 2149243830Sdim // Update resource counts and critical resource. 2150243830Sdim if (SchedModel->hasInstrSchedModel()) { 2151263508Sdim unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); 2152263508Sdim assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); 2153263508Sdim Rem->RemIssueCount -= DecRemIssue; 2154263508Sdim if (ZoneCritResIdx) { 2155263508Sdim // Scale scheduled micro-ops for comparing with the critical resource. 2156263508Sdim unsigned ScaledMOps = 2157263508Sdim RetiredMOps * SchedModel->getMicroOpFactor(); 2158263508Sdim 2159263508Sdim // If scaled micro-ops are now more than the previous critical resource by 2160263508Sdim // a full cycle, then micro-ops issue becomes critical. 2161263508Sdim if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) 2162263508Sdim >= (int)SchedModel->getLatencyFactor()) { 2163263508Sdim ZoneCritResIdx = 0; 2164263508Sdim DEBUG(dbgs() << " *** Critical resource NumMicroOps: " 2165263508Sdim << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); 2166263508Sdim } 2167263508Sdim } 2168243830Sdim for (TargetSchedModel::ProcResIter 2169243830Sdim PI = SchedModel->getWriteProcResBegin(SC), 2170243830Sdim PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 2171263508Sdim unsigned RCycle = 2172263508Sdim countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle); 2173263508Sdim if (RCycle > NextCycle) 2174263508Sdim NextCycle = RCycle; 2175243830Sdim } 2176243830Sdim } 2177263508Sdim // Update ExpectedLatency and DependentLatency. 2178263508Sdim unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; 2179263508Sdim unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; 2180263508Sdim if (SU->getDepth() > TopLatency) { 2181263508Sdim TopLatency = SU->getDepth(); 2182263508Sdim DEBUG(dbgs() << " " << Available.getName() 2183263508Sdim << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); 2184243830Sdim } 2185263508Sdim if (SU->getHeight() > BotLatency) { 2186263508Sdim BotLatency = SU->getHeight(); 2187263508Sdim DEBUG(dbgs() << " " << Available.getName() 2188263508Sdim << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); 2189263508Sdim } 2190263508Sdim // If we stall for any reason, bump the cycle. 2191263508Sdim if (NextCycle > CurrCycle) { 2192263508Sdim bumpCycle(NextCycle); 2193263508Sdim } 2194243830Sdim else { 2195263508Sdim // After updating ZoneCritResIdx and ExpectedLatency, check if we're 2196263508Sdim // resource limited. If a stall occured, bumpCycle does this. 2197263508Sdim unsigned LFactor = SchedModel->getLatencyFactor(); 2198263508Sdim IsResourceLimited = 2199263508Sdim (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) 2200263508Sdim > (int)LFactor; 2201243830Sdim } 2202263508Sdim DEBUG(dumpScheduledState()); 2203239462Sdim} 2204239462Sdim 2205239462Sdim/// Release pending ready nodes in to the available queue. This makes them 2206239462Sdim/// visible to heuristics. 2207263508Sdimvoid GenericScheduler::SchedBoundary::releasePending() { 2208239462Sdim // If the available queue is empty, it is safe to reset MinReadyCycle. 2209239462Sdim if (Available.empty()) 2210239462Sdim MinReadyCycle = UINT_MAX; 2211239462Sdim 2212239462Sdim // Check to see if any of the pending instructions are ready to issue. If 2213239462Sdim // so, add them to the available queue. 2214263508Sdim bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; 2215239462Sdim for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 2216239462Sdim SUnit *SU = *(Pending.begin()+i); 2217239462Sdim unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 2218239462Sdim 2219239462Sdim if (ReadyCycle < MinReadyCycle) 2220239462Sdim MinReadyCycle = ReadyCycle; 2221239462Sdim 2222263508Sdim if (!IsBuffered && ReadyCycle > CurrCycle) 2223239462Sdim continue; 2224239462Sdim 2225239462Sdim if (checkHazard(SU)) 2226239462Sdim continue; 2227239462Sdim 2228239462Sdim Available.push(SU); 2229239462Sdim Pending.remove(Pending.begin()+i); 2230239462Sdim --i; --e; 2231239462Sdim } 2232243830Sdim DEBUG(if (!Pending.empty()) Pending.dump()); 2233239462Sdim CheckPending = false; 2234239462Sdim} 2235239462Sdim 2236239462Sdim/// Remove SU from the ready set for this boundary. 2237263508Sdimvoid GenericScheduler::SchedBoundary::removeReady(SUnit *SU) { 2238239462Sdim if (Available.isInQueue(SU)) 2239239462Sdim Available.remove(Available.find(SU)); 2240239462Sdim else { 2241239462Sdim assert(Pending.isInQueue(SU) && "bad ready count"); 2242239462Sdim Pending.remove(Pending.find(SU)); 2243239462Sdim } 2244239462Sdim} 2245239462Sdim 2246239462Sdim/// If this queue only has one ready candidate, return it. As a side effect, 2247243830Sdim/// defer any nodes that now hit a hazard, and advance the cycle until at least 2248243830Sdim/// one node is ready. If multiple instructions are ready, return NULL. 2249263508SdimSUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() { 2250239462Sdim if (CheckPending) 2251239462Sdim releasePending(); 2252239462Sdim 2253263508Sdim if (CurrMOps > 0) { 2254243830Sdim // Defer any ready instrs that now have a hazard. 2255243830Sdim for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 2256243830Sdim if (checkHazard(*I)) { 2257243830Sdim Pending.push(*I); 2258243830Sdim I = Available.remove(I); 2259243830Sdim continue; 2260243830Sdim } 2261243830Sdim ++I; 2262243830Sdim } 2263243830Sdim } 2264239462Sdim for (unsigned i = 0; Available.empty(); ++i) { 2265263508Sdim assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && 2266239462Sdim "permanent hazard"); (void)i; 2267263508Sdim bumpCycle(CurrCycle + 1); 2268239462Sdim releasePending(); 2269239462Sdim } 2270239462Sdim if (Available.size() == 1) 2271239462Sdim return *Available.begin(); 2272239462Sdim return NULL; 2273239462Sdim} 2274239462Sdim 2275263508Sdim#ifndef NDEBUG 2276263508Sdim// This is useful information to dump after bumpNode. 2277263508Sdim// Note that the Queue contents are more useful before pickNodeFromQueue. 2278263508Sdimvoid GenericScheduler::SchedBoundary::dumpScheduledState() { 2279263508Sdim unsigned ResFactor; 2280263508Sdim unsigned ResCount; 2281263508Sdim if (ZoneCritResIdx) { 2282263508Sdim ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); 2283263508Sdim ResCount = getResourceCount(ZoneCritResIdx); 2284243830Sdim } 2285263508Sdim else { 2286263508Sdim ResFactor = SchedModel->getMicroOpFactor(); 2287263508Sdim ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); 2288243830Sdim } 2289263508Sdim unsigned LFactor = SchedModel->getLatencyFactor(); 2290263508Sdim dbgs() << Available.getName() << " @" << CurrCycle << "c\n" 2291263508Sdim << " Retired: " << RetiredMOps; 2292263508Sdim dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; 2293263508Sdim dbgs() << "\n Critical: " << ResCount / LFactor << "c, " 2294263508Sdim << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx) 2295263508Sdim << "\n ExpectedLatency: " << ExpectedLatency << "c\n" 2296263508Sdim << (IsResourceLimited ? " - Resource" : " - Latency") 2297263508Sdim << " limited.\n"; 2298239462Sdim} 2299263508Sdim#endif 2300239462Sdim 2301263508Sdimvoid GenericScheduler::SchedCandidate:: 2302243830SdiminitResourceDelta(const ScheduleDAGMI *DAG, 2303243830Sdim const TargetSchedModel *SchedModel) { 2304243830Sdim if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 2305243830Sdim return; 2306243830Sdim 2307243830Sdim const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 2308243830Sdim for (TargetSchedModel::ProcResIter 2309243830Sdim PI = SchedModel->getWriteProcResBegin(SC), 2310243830Sdim PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 2311243830Sdim if (PI->ProcResourceIdx == Policy.ReduceResIdx) 2312243830Sdim ResDelta.CritResources += PI->Cycles; 2313243830Sdim if (PI->ProcResourceIdx == Policy.DemandResIdx) 2314243830Sdim ResDelta.DemandedResources += PI->Cycles; 2315243830Sdim } 2316243830Sdim} 2317243830Sdim 2318263508Sdim 2319243830Sdim/// Return true if this heuristic determines order. 2320249423Sdimstatic bool tryLess(int TryVal, int CandVal, 2321263508Sdim GenericScheduler::SchedCandidate &TryCand, 2322263508Sdim GenericScheduler::SchedCandidate &Cand, 2323263508Sdim GenericScheduler::CandReason Reason) { 2324243830Sdim if (TryVal < CandVal) { 2325243830Sdim TryCand.Reason = Reason; 2326243830Sdim return true; 2327243830Sdim } 2328243830Sdim if (TryVal > CandVal) { 2329243830Sdim if (Cand.Reason > Reason) 2330243830Sdim Cand.Reason = Reason; 2331243830Sdim return true; 2332243830Sdim } 2333263508Sdim Cand.setRepeat(Reason); 2334243830Sdim return false; 2335243830Sdim} 2336249423Sdim 2337249423Sdimstatic bool tryGreater(int TryVal, int CandVal, 2338263508Sdim GenericScheduler::SchedCandidate &TryCand, 2339263508Sdim GenericScheduler::SchedCandidate &Cand, 2340263508Sdim GenericScheduler::CandReason Reason) { 2341243830Sdim if (TryVal > CandVal) { 2342243830Sdim TryCand.Reason = Reason; 2343243830Sdim return true; 2344243830Sdim } 2345243830Sdim if (TryVal < CandVal) { 2346243830Sdim if (Cand.Reason > Reason) 2347243830Sdim Cand.Reason = Reason; 2348243830Sdim return true; 2349243830Sdim } 2350263508Sdim Cand.setRepeat(Reason); 2351243830Sdim return false; 2352243830Sdim} 2353243830Sdim 2354263508Sdimstatic bool tryPressure(const PressureChange &TryP, 2355263508Sdim const PressureChange &CandP, 2356263508Sdim GenericScheduler::SchedCandidate &TryCand, 2357263508Sdim GenericScheduler::SchedCandidate &Cand, 2358263508Sdim GenericScheduler::CandReason Reason) { 2359263508Sdim int TryRank = TryP.getPSetOrMax(); 2360263508Sdim int CandRank = CandP.getPSetOrMax(); 2361263508Sdim // If both candidates affect the same set, go with the smallest increase. 2362263508Sdim if (TryRank == CandRank) { 2363263508Sdim return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, 2364263508Sdim Reason); 2365263508Sdim } 2366263508Sdim // If one candidate decreases and the other increases, go with it. 2367263508Sdim // Invalid candidates have UnitInc==0. 2368263508Sdim if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, 2369263508Sdim Reason)) { 2370263508Sdim return true; 2371263508Sdim } 2372263508Sdim // If the candidates are decreasing pressure, reverse priority. 2373263508Sdim if (TryP.getUnitInc() < 0) 2374263508Sdim std::swap(TryRank, CandRank); 2375263508Sdim return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); 2376263508Sdim} 2377263508Sdim 2378249423Sdimstatic unsigned getWeakLeft(const SUnit *SU, bool isTop) { 2379249423Sdim return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; 2380249423Sdim} 2381249423Sdim 2382251662Sdim/// Minimize physical register live ranges. Regalloc wants them adjacent to 2383251662Sdim/// their physreg def/use. 2384251662Sdim/// 2385251662Sdim/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf 2386251662Sdim/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled 2387251662Sdim/// with the operation that produces or consumes the physreg. We'll do this when 2388251662Sdim/// regalloc has support for parallel copies. 2389251662Sdimstatic int biasPhysRegCopy(const SUnit *SU, bool isTop) { 2390251662Sdim const MachineInstr *MI = SU->getInstr(); 2391251662Sdim if (!MI->isCopy()) 2392251662Sdim return 0; 2393251662Sdim 2394251662Sdim unsigned ScheduledOper = isTop ? 1 : 0; 2395251662Sdim unsigned UnscheduledOper = isTop ? 0 : 1; 2396251662Sdim // If we have already scheduled the physreg produce/consumer, immediately 2397251662Sdim // schedule the copy. 2398251662Sdim if (TargetRegisterInfo::isPhysicalRegister( 2399251662Sdim MI->getOperand(ScheduledOper).getReg())) 2400251662Sdim return 1; 2401251662Sdim // If the physreg is at the boundary, defer it. Otherwise schedule it 2402251662Sdim // immediately to free the dependent. We can hoist the copy later. 2403251662Sdim bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; 2404251662Sdim if (TargetRegisterInfo::isPhysicalRegister( 2405251662Sdim MI->getOperand(UnscheduledOper).getReg())) 2406251662Sdim return AtBoundary ? -1 : 1; 2407251662Sdim return 0; 2408251662Sdim} 2409251662Sdim 2410263508Sdimstatic bool tryLatency(GenericScheduler::SchedCandidate &TryCand, 2411263508Sdim GenericScheduler::SchedCandidate &Cand, 2412263508Sdim GenericScheduler::SchedBoundary &Zone) { 2413263508Sdim if (Zone.isTop()) { 2414263508Sdim if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { 2415263508Sdim if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 2416263508Sdim TryCand, Cand, GenericScheduler::TopDepthReduce)) 2417263508Sdim return true; 2418263508Sdim } 2419263508Sdim if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 2420263508Sdim TryCand, Cand, GenericScheduler::TopPathReduce)) 2421263508Sdim return true; 2422263508Sdim } 2423263508Sdim else { 2424263508Sdim if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { 2425263508Sdim if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 2426263508Sdim TryCand, Cand, GenericScheduler::BotHeightReduce)) 2427263508Sdim return true; 2428263508Sdim } 2429263508Sdim if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 2430263508Sdim TryCand, Cand, GenericScheduler::BotPathReduce)) 2431263508Sdim return true; 2432263508Sdim } 2433263508Sdim return false; 2434263508Sdim} 2435263508Sdim 2436243830Sdim/// Apply a set of heursitics to a new candidate. Heuristics are currently 2437243830Sdim/// hierarchical. This may be more efficient than a graduated cost model because 2438243830Sdim/// we don't need to evaluate all aspects of the model for each node in the 2439243830Sdim/// queue. But it's really done to make the heuristics easier to debug and 2440243830Sdim/// statistically analyze. 2441243830Sdim/// 2442243830Sdim/// \param Cand provides the policy and current best candidate. 2443243830Sdim/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 2444243830Sdim/// \param Zone describes the scheduled zone that we are extending. 2445243830Sdim/// \param RPTracker describes reg pressure within the scheduled zone. 2446243830Sdim/// \param TempTracker is a scratch pressure tracker to reuse in queries. 2447263508Sdimvoid GenericScheduler::tryCandidate(SchedCandidate &Cand, 2448243830Sdim SchedCandidate &TryCand, 2449243830Sdim SchedBoundary &Zone, 2450243830Sdim const RegPressureTracker &RPTracker, 2451243830Sdim RegPressureTracker &TempTracker) { 2452243830Sdim 2453263508Sdim if (DAG->isTrackingPressure()) { 2454263508Sdim // Always initialize TryCand's RPDelta. 2455263508Sdim if (Zone.isTop()) { 2456263508Sdim TempTracker.getMaxDownwardPressureDelta( 2457263508Sdim TryCand.SU->getInstr(), 2458263508Sdim TryCand.RPDelta, 2459263508Sdim DAG->getRegionCriticalPSets(), 2460263508Sdim DAG->getRegPressure().MaxSetPressure); 2461263508Sdim } 2462263508Sdim else { 2463263508Sdim if (VerifyScheduling) { 2464263508Sdim TempTracker.getMaxUpwardPressureDelta( 2465263508Sdim TryCand.SU->getInstr(), 2466263508Sdim &DAG->getPressureDiff(TryCand.SU), 2467263508Sdim TryCand.RPDelta, 2468263508Sdim DAG->getRegionCriticalPSets(), 2469263508Sdim DAG->getRegPressure().MaxSetPressure); 2470263508Sdim } 2471263508Sdim else { 2472263508Sdim RPTracker.getUpwardPressureDelta( 2473263508Sdim TryCand.SU->getInstr(), 2474263508Sdim DAG->getPressureDiff(TryCand.SU), 2475263508Sdim TryCand.RPDelta, 2476263508Sdim DAG->getRegionCriticalPSets(), 2477263508Sdim DAG->getRegPressure().MaxSetPressure); 2478263508Sdim } 2479263508Sdim } 2480263508Sdim } 2481263508Sdim DEBUG(if (TryCand.RPDelta.Excess.isValid()) 2482263508Sdim dbgs() << " SU(" << TryCand.SU->NodeNum << ") " 2483263508Sdim << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet()) 2484263508Sdim << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); 2485243830Sdim 2486243830Sdim // Initialize the candidate if needed. 2487243830Sdim if (!Cand.isValid()) { 2488243830Sdim TryCand.Reason = NodeOrder; 2489243830Sdim return; 2490243830Sdim } 2491251662Sdim 2492251662Sdim if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()), 2493251662Sdim biasPhysRegCopy(Cand.SU, Zone.isTop()), 2494251662Sdim TryCand, Cand, PhysRegCopy)) 2495251662Sdim return; 2496251662Sdim 2497263508Sdim // Avoid exceeding the target's limit. If signed PSetID is negative, it is 2498263508Sdim // invalid; convert it to INT_MAX to give it lowest priority. 2499263508Sdim if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, 2500263508Sdim Cand.RPDelta.Excess, 2501263508Sdim TryCand, Cand, RegExcess)) 2502243830Sdim return; 2503243830Sdim 2504243830Sdim // Avoid increasing the max critical pressure in the scheduled region. 2505263508Sdim if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, 2506263508Sdim Cand.RPDelta.CriticalMax, 2507263508Sdim TryCand, Cand, RegCritical)) 2508243830Sdim return; 2509243830Sdim 2510263508Sdim // For loops that are acyclic path limited, aggressively schedule for latency. 2511263508Sdim // This can result in very long dependence chains scheduled in sequence, so 2512263508Sdim // once every cycle (when CurrMOps == 0), switch to normal heuristics. 2513263508Sdim if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps 2514263508Sdim && tryLatency(TryCand, Cand, Zone)) 2515263508Sdim return; 2516263508Sdim 2517249423Sdim // Keep clustered nodes together to encourage downstream peephole 2518249423Sdim // optimizations which may reduce resource requirements. 2519249423Sdim // 2520249423Sdim // This is a best effort to set things up for a post-RA pass. Optimizations 2521249423Sdim // like generating loads of multiple registers should ideally be done within 2522249423Sdim // the scheduler pass by combining the loads during DAG postprocessing. 2523249423Sdim const SUnit *NextClusterSU = 2524249423Sdim Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 2525249423Sdim if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, 2526249423Sdim TryCand, Cand, Cluster)) 2527249423Sdim return; 2528251662Sdim 2529251662Sdim // Weak edges are for clustering and other constraints. 2530249423Sdim if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), 2531249423Sdim getWeakLeft(Cand.SU, Zone.isTop()), 2532251662Sdim TryCand, Cand, Weak)) { 2533249423Sdim return; 2534249423Sdim } 2535263508Sdim // Avoid increasing the max pressure of the entire region. 2536263508Sdim if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, 2537263508Sdim Cand.RPDelta.CurrentMax, 2538263508Sdim TryCand, Cand, RegMax)) 2539263508Sdim return; 2540263508Sdim 2541243830Sdim // Avoid critical resource consumption and balance the schedule. 2542243830Sdim TryCand.initResourceDelta(DAG, SchedModel); 2543243830Sdim if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 2544243830Sdim TryCand, Cand, ResourceReduce)) 2545243830Sdim return; 2546243830Sdim if (tryGreater(TryCand.ResDelta.DemandedResources, 2547243830Sdim Cand.ResDelta.DemandedResources, 2548243830Sdim TryCand, Cand, ResourceDemand)) 2549243830Sdim return; 2550243830Sdim 2551243830Sdim // Avoid serializing long latency dependence chains. 2552263508Sdim // For acyclic path limited loops, latency was already checked above. 2553263508Sdim if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited 2554263508Sdim && tryLatency(TryCand, Cand, Zone)) { 2555263508Sdim return; 2556243830Sdim } 2557243830Sdim 2558243830Sdim // Prefer immediate defs/users of the last scheduled instruction. This is a 2559263508Sdim // local pressure avoidance strategy that also makes the machine code 2560263508Sdim // readable. 2561249423Sdim if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), 2562249423Sdim TryCand, Cand, NextDefUse)) 2563243830Sdim return; 2564249423Sdim 2565243830Sdim // Fall through to original instruction order. 2566243830Sdim if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 2567243830Sdim || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 2568243830Sdim TryCand.Reason = NodeOrder; 2569243830Sdim } 2570243830Sdim} 2571243830Sdim 2572243830Sdim#ifndef NDEBUG 2573263508Sdimconst char *GenericScheduler::getReasonStr( 2574263508Sdim GenericScheduler::CandReason Reason) { 2575243830Sdim switch (Reason) { 2576243830Sdim case NoCand: return "NOCAND "; 2577251662Sdim case PhysRegCopy: return "PREG-COPY"; 2578263508Sdim case RegExcess: return "REG-EXCESS"; 2579263508Sdim case RegCritical: return "REG-CRIT "; 2580249423Sdim case Cluster: return "CLUSTER "; 2581251662Sdim case Weak: return "WEAK "; 2582263508Sdim case RegMax: return "REG-MAX "; 2583243830Sdim case ResourceReduce: return "RES-REDUCE"; 2584243830Sdim case ResourceDemand: return "RES-DEMAND"; 2585243830Sdim case TopDepthReduce: return "TOP-DEPTH "; 2586243830Sdim case TopPathReduce: return "TOP-PATH "; 2587243830Sdim case BotHeightReduce:return "BOT-HEIGHT"; 2588243830Sdim case BotPathReduce: return "BOT-PATH "; 2589243830Sdim case NextDefUse: return "DEF-USE "; 2590243830Sdim case NodeOrder: return "ORDER "; 2591243830Sdim }; 2592243830Sdim llvm_unreachable("Unknown reason!"); 2593243830Sdim} 2594243830Sdim 2595263508Sdimvoid GenericScheduler::traceCandidate(const SchedCandidate &Cand) { 2596263508Sdim PressureChange P; 2597243830Sdim unsigned ResIdx = 0; 2598243830Sdim unsigned Latency = 0; 2599243830Sdim switch (Cand.Reason) { 2600243830Sdim default: 2601243830Sdim break; 2602263508Sdim case RegExcess: 2603243830Sdim P = Cand.RPDelta.Excess; 2604243830Sdim break; 2605263508Sdim case RegCritical: 2606243830Sdim P = Cand.RPDelta.CriticalMax; 2607243830Sdim break; 2608263508Sdim case RegMax: 2609243830Sdim P = Cand.RPDelta.CurrentMax; 2610243830Sdim break; 2611243830Sdim case ResourceReduce: 2612243830Sdim ResIdx = Cand.Policy.ReduceResIdx; 2613243830Sdim break; 2614243830Sdim case ResourceDemand: 2615243830Sdim ResIdx = Cand.Policy.DemandResIdx; 2616243830Sdim break; 2617243830Sdim case TopDepthReduce: 2618243830Sdim Latency = Cand.SU->getDepth(); 2619243830Sdim break; 2620243830Sdim case TopPathReduce: 2621243830Sdim Latency = Cand.SU->getHeight(); 2622243830Sdim break; 2623243830Sdim case BotHeightReduce: 2624243830Sdim Latency = Cand.SU->getHeight(); 2625243830Sdim break; 2626243830Sdim case BotPathReduce: 2627243830Sdim Latency = Cand.SU->getDepth(); 2628243830Sdim break; 2629243830Sdim } 2630249423Sdim dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); 2631243830Sdim if (P.isValid()) 2632263508Sdim dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) 2633263508Sdim << ":" << P.getUnitInc() << " "; 2634243830Sdim else 2635249423Sdim dbgs() << " "; 2636243830Sdim if (ResIdx) 2637249423Sdim dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; 2638243830Sdim else 2639249423Sdim dbgs() << " "; 2640243830Sdim if (Latency) 2641249423Sdim dbgs() << " " << Latency << " cycles "; 2642243830Sdim else 2643249423Sdim dbgs() << " "; 2644249423Sdim dbgs() << '\n'; 2645243830Sdim} 2646243830Sdim#endif 2647243830Sdim 2648263508Sdim/// Pick the best candidate from the queue. 2649239462Sdim/// 2650239462Sdim/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 2651239462Sdim/// DAG building. To adjust for the current scheduling location we need to 2652239462Sdim/// maintain the number of vreg uses remaining to be top-scheduled. 2653263508Sdimvoid GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, 2654243830Sdim const RegPressureTracker &RPTracker, 2655243830Sdim SchedCandidate &Cand) { 2656243830Sdim ReadyQueue &Q = Zone.Available; 2657243830Sdim 2658239462Sdim DEBUG(Q.dump()); 2659239462Sdim 2660239462Sdim // getMaxPressureDelta temporarily modifies the tracker. 2661239462Sdim RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 2662239462Sdim 2663239462Sdim for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 2664239462Sdim 2665243830Sdim SchedCandidate TryCand(Cand.Policy); 2666243830Sdim TryCand.SU = *I; 2667243830Sdim tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); 2668243830Sdim if (TryCand.Reason != NoCand) { 2669243830Sdim // Initialize resource delta if needed in case future heuristics query it. 2670243830Sdim if (TryCand.ResDelta == SchedResourceDelta()) 2671243830Sdim TryCand.initResourceDelta(DAG, SchedModel); 2672243830Sdim Cand.setBest(TryCand); 2673249423Sdim DEBUG(traceCandidate(Cand)); 2674234285Sdim } 2675239462Sdim } 2676239462Sdim} 2677239462Sdim 2678263508Sdimstatic void tracePick(const GenericScheduler::SchedCandidate &Cand, 2679243830Sdim bool IsTop) { 2680251662Sdim DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") 2681263508Sdim << GenericScheduler::getReasonStr(Cand.Reason) << '\n'); 2682243830Sdim} 2683243830Sdim 2684239462Sdim/// Pick the best candidate node from either the top or bottom queue. 2685263508SdimSUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { 2686239462Sdim // Schedule as far as possible in the direction of no choice. This is most 2687239462Sdim // efficient, but also provides the best heuristics for CriticalPSets. 2688239462Sdim if (SUnit *SU = Bot.pickOnlyChoice()) { 2689239462Sdim IsTopNode = false; 2690263508Sdim DEBUG(dbgs() << "Pick Bot NOCAND\n"); 2691234285Sdim return SU; 2692234285Sdim } 2693239462Sdim if (SUnit *SU = Top.pickOnlyChoice()) { 2694239462Sdim IsTopNode = true; 2695263508Sdim DEBUG(dbgs() << "Pick Top NOCAND\n"); 2696239462Sdim return SU; 2697239462Sdim } 2698243830Sdim CandPolicy NoPolicy; 2699243830Sdim SchedCandidate BotCand(NoPolicy); 2700243830Sdim SchedCandidate TopCand(NoPolicy); 2701263508Sdim Bot.setPolicy(BotCand.Policy, Top); 2702263508Sdim Top.setPolicy(TopCand.Policy, Bot); 2703243830Sdim 2704239462Sdim // Prefer bottom scheduling when heuristics are silent. 2705243830Sdim pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 2706243830Sdim assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 2707234285Sdim 2708239462Sdim // If either Q has a single candidate that provides the least increase in 2709239462Sdim // Excess pressure, we can immediately schedule from that Q. 2710239462Sdim // 2711239462Sdim // RegionCriticalPSets summarizes the pressure within the scheduled region and 2712239462Sdim // affects picking from either Q. If scheduling in one direction must 2713239462Sdim // increase pressure for one of the excess PSets, then schedule in that 2714239462Sdim // direction first to provide more freedom in the other direction. 2715263508Sdim if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess)) 2716263508Sdim || (BotCand.Reason == RegCritical 2717263508Sdim && !BotCand.isRepeat(RegCritical))) 2718263508Sdim { 2719239462Sdim IsTopNode = false; 2720243830Sdim tracePick(BotCand, IsTopNode); 2721239462Sdim return BotCand.SU; 2722234285Sdim } 2723239462Sdim // Check if the top Q has a better candidate. 2724243830Sdim pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 2725243830Sdim assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 2726239462Sdim 2727263508Sdim // Choose the queue with the most important (lowest enum) reason. 2728263508Sdim if (TopCand.Reason < BotCand.Reason) { 2729239462Sdim IsTopNode = true; 2730243830Sdim tracePick(TopCand, IsTopNode); 2731239462Sdim return TopCand.SU; 2732239462Sdim } 2733243830Sdim // Otherwise prefer the bottom candidate, in node order if all else failed. 2734239462Sdim IsTopNode = false; 2735243830Sdim tracePick(BotCand, IsTopNode); 2736239462Sdim return BotCand.SU; 2737239462Sdim} 2738234285Sdim 2739239462Sdim/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 2740263508SdimSUnit *GenericScheduler::pickNode(bool &IsTopNode) { 2741239462Sdim if (DAG->top() == DAG->bottom()) { 2742239462Sdim assert(Top.Available.empty() && Top.Pending.empty() && 2743239462Sdim Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 2744239462Sdim return NULL; 2745239462Sdim } 2746239462Sdim SUnit *SU; 2747243830Sdim do { 2748263508Sdim if (RegionPolicy.OnlyTopDown) { 2749243830Sdim SU = Top.pickOnlyChoice(); 2750243830Sdim if (!SU) { 2751243830Sdim CandPolicy NoPolicy; 2752243830Sdim SchedCandidate TopCand(NoPolicy); 2753243830Sdim pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 2754263508Sdim assert(TopCand.Reason != NoCand && "failed to find a candidate"); 2755263508Sdim tracePick(TopCand, true); 2756243830Sdim SU = TopCand.SU; 2757243830Sdim } 2758243830Sdim IsTopNode = true; 2759239462Sdim } 2760263508Sdim else if (RegionPolicy.OnlyBottomUp) { 2761243830Sdim SU = Bot.pickOnlyChoice(); 2762243830Sdim if (!SU) { 2763243830Sdim CandPolicy NoPolicy; 2764243830Sdim SchedCandidate BotCand(NoPolicy); 2765243830Sdim pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 2766263508Sdim assert(BotCand.Reason != NoCand && "failed to find a candidate"); 2767263508Sdim tracePick(BotCand, false); 2768243830Sdim SU = BotCand.SU; 2769243830Sdim } 2770243830Sdim IsTopNode = false; 2771239462Sdim } 2772243830Sdim else { 2773243830Sdim SU = pickNodeBidirectional(IsTopNode); 2774243830Sdim } 2775243830Sdim } while (SU->isScheduled); 2776243830Sdim 2777239462Sdim if (SU->isTopReady()) 2778239462Sdim Top.removeReady(SU); 2779239462Sdim if (SU->isBottomReady()) 2780239462Sdim Bot.removeReady(SU); 2781239462Sdim 2782251662Sdim DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); 2783239462Sdim return SU; 2784239462Sdim} 2785239462Sdim 2786263508Sdimvoid GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { 2787251662Sdim 2788251662Sdim MachineBasicBlock::iterator InsertPos = SU->getInstr(); 2789251662Sdim if (!isTop) 2790251662Sdim ++InsertPos; 2791251662Sdim SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; 2792251662Sdim 2793251662Sdim // Find already scheduled copies with a single physreg dependence and move 2794251662Sdim // them just above the scheduled instruction. 2795251662Sdim for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end(); 2796251662Sdim I != E; ++I) { 2797251662Sdim if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) 2798251662Sdim continue; 2799251662Sdim SUnit *DepSU = I->getSUnit(); 2800251662Sdim if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) 2801251662Sdim continue; 2802251662Sdim MachineInstr *Copy = DepSU->getInstr(); 2803251662Sdim if (!Copy->isCopy()) 2804251662Sdim continue; 2805251662Sdim DEBUG(dbgs() << " Rescheduling physreg copy "; 2806251662Sdim I->getSUnit()->dump(DAG)); 2807251662Sdim DAG->moveInstruction(Copy, InsertPos); 2808251662Sdim } 2809251662Sdim} 2810251662Sdim 2811239462Sdim/// Update the scheduler's state after scheduling a node. This is the same node 2812239462Sdim/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update 2813239462Sdim/// it's state based on the current cycle before MachineSchedStrategy does. 2814251662Sdim/// 2815251662Sdim/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling 2816251662Sdim/// them here. See comments in biasPhysRegCopy. 2817263508Sdimvoid GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { 2818239462Sdim if (IsTopNode) { 2819263508Sdim SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle); 2820239462Sdim Top.bumpNode(SU); 2821251662Sdim if (SU->hasPhysRegUses) 2822251662Sdim reschedulePhysRegCopies(SU, true); 2823239462Sdim } 2824239462Sdim else { 2825263508Sdim SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle); 2826239462Sdim Bot.bumpNode(SU); 2827251662Sdim if (SU->hasPhysRegDefs) 2828251662Sdim reschedulePhysRegCopies(SU, false); 2829239462Sdim } 2830239462Sdim} 2831239462Sdim 2832234285Sdim/// Create the standard converging machine scheduler. This will be used as the 2833234285Sdim/// default scheduler if the target does not set a default. 2834263508Sdimstatic ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) { 2835263508Sdim ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new GenericScheduler(C)); 2836249423Sdim // Register DAG post-processors. 2837251662Sdim // 2838251662Sdim // FIXME: extend the mutation API to allow earlier mutations to instantiate 2839251662Sdim // data and pass it to later mutations. Have a single mutation that gathers 2840251662Sdim // the interesting nodes in one pass. 2841263508Sdim DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); 2842263508Sdim if (EnableLoadCluster && DAG->TII->enableClusterLoads()) 2843249423Sdim DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); 2844249423Sdim if (EnableMacroFusion) 2845249423Sdim DAG->addMutation(new MacroFusion(DAG->TII)); 2846249423Sdim return DAG; 2847234285Sdim} 2848234285Sdimstatic MachineSchedRegistry 2849263508SdimGenericSchedRegistry("converge", "Standard converging scheduler.", 2850263508Sdim createGenericSched); 2851234285Sdim 2852234285Sdim//===----------------------------------------------------------------------===// 2853243830Sdim// ILP Scheduler. Currently for experimental analysis of heuristics. 2854243830Sdim//===----------------------------------------------------------------------===// 2855243830Sdim 2856243830Sdimnamespace { 2857243830Sdim/// \brief Order nodes by the ILP metric. 2858243830Sdimstruct ILPOrder { 2859249423Sdim const SchedDFSResult *DFSResult; 2860249423Sdim const BitVector *ScheduledTrees; 2861243830Sdim bool MaximizeILP; 2862243830Sdim 2863249423Sdim ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {} 2864243830Sdim 2865243830Sdim /// \brief Apply a less-than relation on node priority. 2866249423Sdim /// 2867249423Sdim /// (Return true if A comes after B in the Q.) 2868243830Sdim bool operator()(const SUnit *A, const SUnit *B) const { 2869249423Sdim unsigned SchedTreeA = DFSResult->getSubtreeID(A); 2870249423Sdim unsigned SchedTreeB = DFSResult->getSubtreeID(B); 2871249423Sdim if (SchedTreeA != SchedTreeB) { 2872249423Sdim // Unscheduled trees have lower priority. 2873249423Sdim if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) 2874249423Sdim return ScheduledTrees->test(SchedTreeB); 2875249423Sdim 2876249423Sdim // Trees with shallower connections have have lower priority. 2877249423Sdim if (DFSResult->getSubtreeLevel(SchedTreeA) 2878249423Sdim != DFSResult->getSubtreeLevel(SchedTreeB)) { 2879249423Sdim return DFSResult->getSubtreeLevel(SchedTreeA) 2880249423Sdim < DFSResult->getSubtreeLevel(SchedTreeB); 2881249423Sdim } 2882249423Sdim } 2883243830Sdim if (MaximizeILP) 2884249423Sdim return DFSResult->getILP(A) < DFSResult->getILP(B); 2885243830Sdim else 2886249423Sdim return DFSResult->getILP(A) > DFSResult->getILP(B); 2887243830Sdim } 2888243830Sdim}; 2889243830Sdim 2890243830Sdim/// \brief Schedule based on the ILP metric. 2891243830Sdimclass ILPScheduler : public MachineSchedStrategy { 2892249423Sdim ScheduleDAGMI *DAG; 2893243830Sdim ILPOrder Cmp; 2894243830Sdim 2895243830Sdim std::vector<SUnit*> ReadyQ; 2896243830Sdimpublic: 2897249423Sdim ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {} 2898243830Sdim 2899249423Sdim virtual void initialize(ScheduleDAGMI *dag) { 2900249423Sdim DAG = dag; 2901249423Sdim DAG->computeDFSResult(); 2902249423Sdim Cmp.DFSResult = DAG->getDFSResult(); 2903249423Sdim Cmp.ScheduledTrees = &DAG->getScheduledTrees(); 2904243830Sdim ReadyQ.clear(); 2905243830Sdim } 2906243830Sdim 2907243830Sdim virtual void registerRoots() { 2908249423Sdim // Restore the heap in ReadyQ with the updated DFS results. 2909249423Sdim std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2910243830Sdim } 2911243830Sdim 2912243830Sdim /// Implement MachineSchedStrategy interface. 2913243830Sdim /// ----------------------------------------- 2914243830Sdim 2915249423Sdim /// Callback to select the highest priority node from the ready Q. 2916243830Sdim virtual SUnit *pickNode(bool &IsTopNode) { 2917243830Sdim if (ReadyQ.empty()) return NULL; 2918249423Sdim std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2919243830Sdim SUnit *SU = ReadyQ.back(); 2920243830Sdim ReadyQ.pop_back(); 2921243830Sdim IsTopNode = false; 2922251662Sdim DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") " 2923249423Sdim << " ILP: " << DAG->getDFSResult()->getILP(SU) 2924249423Sdim << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" 2925249423Sdim << DAG->getDFSResult()->getSubtreeLevel( 2926251662Sdim DAG->getDFSResult()->getSubtreeID(SU)) << '\n' 2927251662Sdim << "Scheduling " << *SU->getInstr()); 2928243830Sdim return SU; 2929243830Sdim } 2930243830Sdim 2931249423Sdim /// \brief Scheduler callback to notify that a new subtree is scheduled. 2932249423Sdim virtual void scheduleTree(unsigned SubtreeID) { 2933249423Sdim std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2934249423Sdim } 2935243830Sdim 2936249423Sdim /// Callback after a node is scheduled. Mark a newly scheduled tree, notify 2937249423Sdim /// DFSResults, and resort the priority Q. 2938249423Sdim virtual void schedNode(SUnit *SU, bool IsTopNode) { 2939249423Sdim assert(!IsTopNode && "SchedDFSResult needs bottom-up"); 2940249423Sdim } 2941249423Sdim 2942243830Sdim virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } 2943243830Sdim 2944243830Sdim virtual void releaseBottomNode(SUnit *SU) { 2945243830Sdim ReadyQ.push_back(SU); 2946243830Sdim std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2947243830Sdim } 2948243830Sdim}; 2949243830Sdim} // namespace 2950243830Sdim 2951243830Sdimstatic ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 2952243830Sdim return new ScheduleDAGMI(C, new ILPScheduler(true)); 2953243830Sdim} 2954243830Sdimstatic ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 2955243830Sdim return new ScheduleDAGMI(C, new ILPScheduler(false)); 2956243830Sdim} 2957243830Sdimstatic MachineSchedRegistry ILPMaxRegistry( 2958243830Sdim "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 2959243830Sdimstatic MachineSchedRegistry ILPMinRegistry( 2960243830Sdim "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 2961243830Sdim 2962243830Sdim//===----------------------------------------------------------------------===// 2963234285Sdim// Machine Instruction Shuffler for Correctness Testing 2964234285Sdim//===----------------------------------------------------------------------===// 2965234285Sdim 2966234285Sdim#ifndef NDEBUG 2967234285Sdimnamespace { 2968234285Sdim/// Apply a less-than relation on the node order, which corresponds to the 2969234285Sdim/// instruction order prior to scheduling. IsReverse implements greater-than. 2970234285Sdimtemplate<bool IsReverse> 2971234285Sdimstruct SUnitOrder { 2972234285Sdim bool operator()(SUnit *A, SUnit *B) const { 2973234285Sdim if (IsReverse) 2974234285Sdim return A->NodeNum > B->NodeNum; 2975234285Sdim else 2976234285Sdim return A->NodeNum < B->NodeNum; 2977234285Sdim } 2978234285Sdim}; 2979234285Sdim 2980234285Sdim/// Reorder instructions as much as possible. 2981234285Sdimclass InstructionShuffler : public MachineSchedStrategy { 2982234285Sdim bool IsAlternating; 2983234285Sdim bool IsTopDown; 2984234285Sdim 2985234285Sdim // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 2986234285Sdim // gives nodes with a higher number higher priority causing the latest 2987234285Sdim // instructions to be scheduled first. 2988234285Sdim PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > 2989234285Sdim TopQ; 2990234285Sdim // When scheduling bottom-up, use greater-than as the queue priority. 2991234285Sdim PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > 2992234285Sdim BottomQ; 2993234285Sdimpublic: 2994234285Sdim InstructionShuffler(bool alternate, bool topdown) 2995234285Sdim : IsAlternating(alternate), IsTopDown(topdown) {} 2996234285Sdim 2997234285Sdim virtual void initialize(ScheduleDAGMI *) { 2998234285Sdim TopQ.clear(); 2999234285Sdim BottomQ.clear(); 3000234285Sdim } 3001234285Sdim 3002234285Sdim /// Implement MachineSchedStrategy interface. 3003234285Sdim /// ----------------------------------------- 3004234285Sdim 3005234285Sdim virtual SUnit *pickNode(bool &IsTopNode) { 3006234285Sdim SUnit *SU; 3007234285Sdim if (IsTopDown) { 3008234285Sdim do { 3009234285Sdim if (TopQ.empty()) return NULL; 3010234285Sdim SU = TopQ.top(); 3011234285Sdim TopQ.pop(); 3012234285Sdim } while (SU->isScheduled); 3013234285Sdim IsTopNode = true; 3014234285Sdim } 3015234285Sdim else { 3016234285Sdim do { 3017234285Sdim if (BottomQ.empty()) return NULL; 3018234285Sdim SU = BottomQ.top(); 3019234285Sdim BottomQ.pop(); 3020234285Sdim } while (SU->isScheduled); 3021234285Sdim IsTopNode = false; 3022234285Sdim } 3023234285Sdim if (IsAlternating) 3024234285Sdim IsTopDown = !IsTopDown; 3025234285Sdim return SU; 3026234285Sdim } 3027234285Sdim 3028239462Sdim virtual void schedNode(SUnit *SU, bool IsTopNode) {} 3029239462Sdim 3030234285Sdim virtual void releaseTopNode(SUnit *SU) { 3031234285Sdim TopQ.push(SU); 3032234285Sdim } 3033234285Sdim virtual void releaseBottomNode(SUnit *SU) { 3034234285Sdim BottomQ.push(SU); 3035234285Sdim } 3036234285Sdim}; 3037234285Sdim} // namespace 3038234285Sdim 3039234285Sdimstatic ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 3040234285Sdim bool Alternate = !ForceTopDown && !ForceBottomUp; 3041234285Sdim bool TopDown = !ForceBottomUp; 3042234285Sdim assert((TopDown || !ForceTopDown) && 3043234285Sdim "-misched-topdown incompatible with -misched-bottomup"); 3044234285Sdim return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); 3045234285Sdim} 3046234285Sdimstatic MachineSchedRegistry ShufflerRegistry( 3047234285Sdim "shuffle", "Shuffle machine instructions alternating directions", 3048234285Sdim createInstructionShuffler); 3049234285Sdim#endif // !NDEBUG 3050249423Sdim 3051249423Sdim//===----------------------------------------------------------------------===// 3052249423Sdim// GraphWriter support for ScheduleDAGMI. 3053249423Sdim//===----------------------------------------------------------------------===// 3054249423Sdim 3055249423Sdim#ifndef NDEBUG 3056249423Sdimnamespace llvm { 3057249423Sdim 3058249423Sdimtemplate<> struct GraphTraits< 3059249423Sdim ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; 3060249423Sdim 3061249423Sdimtemplate<> 3062249423Sdimstruct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { 3063249423Sdim 3064249423Sdim DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} 3065249423Sdim 3066249423Sdim static std::string getGraphName(const ScheduleDAG *G) { 3067249423Sdim return G->MF.getName(); 3068249423Sdim } 3069249423Sdim 3070249423Sdim static bool renderGraphFromBottomUp() { 3071249423Sdim return true; 3072249423Sdim } 3073249423Sdim 3074249423Sdim static bool isNodeHidden(const SUnit *Node) { 3075263508Sdim return (Node->Preds.size() > 10 || Node->Succs.size() > 10); 3076249423Sdim } 3077249423Sdim 3078249423Sdim static bool hasNodeAddressLabel(const SUnit *Node, 3079249423Sdim const ScheduleDAG *Graph) { 3080249423Sdim return false; 3081249423Sdim } 3082249423Sdim 3083249423Sdim /// If you want to override the dot attributes printed for a particular 3084249423Sdim /// edge, override this method. 3085249423Sdim static std::string getEdgeAttributes(const SUnit *Node, 3086249423Sdim SUnitIterator EI, 3087249423Sdim const ScheduleDAG *Graph) { 3088249423Sdim if (EI.isArtificialDep()) 3089249423Sdim return "color=cyan,style=dashed"; 3090249423Sdim if (EI.isCtrlDep()) 3091249423Sdim return "color=blue,style=dashed"; 3092249423Sdim return ""; 3093249423Sdim } 3094249423Sdim 3095249423Sdim static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { 3096249423Sdim std::string Str; 3097249423Sdim raw_string_ostream SS(Str); 3098263508Sdim const SchedDFSResult *DFS = 3099263508Sdim static_cast<const ScheduleDAGMI*>(G)->getDFSResult(); 3100263508Sdim SS << "SU:" << SU->NodeNum; 3101263508Sdim if (DFS) 3102263508Sdim SS << " I:" << DFS->getNumInstrs(SU); 3103249423Sdim return SS.str(); 3104249423Sdim } 3105249423Sdim static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { 3106249423Sdim return G->getGraphNodeLabel(SU); 3107249423Sdim } 3108249423Sdim 3109249423Sdim static std::string getNodeAttributes(const SUnit *N, 3110249423Sdim const ScheduleDAG *Graph) { 3111249423Sdim std::string Str("shape=Mrecord"); 3112249423Sdim const SchedDFSResult *DFS = 3113249423Sdim static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult(); 3114249423Sdim if (DFS) { 3115249423Sdim Str += ",style=filled,fillcolor=\"#"; 3116249423Sdim Str += DOT::getColorString(DFS->getSubtreeID(N)); 3117249423Sdim Str += '"'; 3118249423Sdim } 3119249423Sdim return Str; 3120249423Sdim } 3121249423Sdim}; 3122249423Sdim} // namespace llvm 3123249423Sdim#endif // NDEBUG 3124249423Sdim 3125249423Sdim/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG 3126249423Sdim/// rendered using 'dot'. 3127249423Sdim/// 3128249423Sdimvoid ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { 3129249423Sdim#ifndef NDEBUG 3130249423Sdim ViewGraph(this, Name, false, Title); 3131249423Sdim#else 3132249423Sdim errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " 3133249423Sdim << "systems with Graphviz or gv!\n"; 3134249423Sdim#endif // NDEBUG 3135249423Sdim} 3136249423Sdim 3137249423Sdim/// Out-of-line implementation with no arguments is handy for gdb. 3138249423Sdimvoid ScheduleDAGMI::viewGraph() { 3139249423Sdim viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); 3140249423Sdim} 3141