1234285Sdim//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2234285Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6234285Sdim// 7234285Sdim//===----------------------------------------------------------------------===// 8234285Sdim// 9234285Sdim// MachineScheduler schedules machine instructions after phi elimination. It 10234285Sdim// preserves LiveIntervals so it can be invoked before register allocation. 11234285Sdim// 12234285Sdim//===----------------------------------------------------------------------===// 13234285Sdim 14249423Sdim#include "llvm/CodeGen/MachineScheduler.h" 15321369Sdim#include "llvm/ADT/ArrayRef.h" 16321369Sdim#include "llvm/ADT/BitVector.h" 17321369Sdim#include "llvm/ADT/DenseMap.h" 18249423Sdim#include "llvm/ADT/PriorityQueue.h" 19321369Sdim#include "llvm/ADT/STLExtras.h" 20321369Sdim#include "llvm/ADT/SmallVector.h" 21321369Sdim#include "llvm/ADT/iterator_range.h" 22249423Sdim#include "llvm/Analysis/AliasAnalysis.h" 23321369Sdim#include "llvm/CodeGen/LiveInterval.h" 24327952Sdim#include "llvm/CodeGen/LiveIntervals.h" 25321369Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 26249423Sdim#include "llvm/CodeGen/MachineDominators.h" 27321369Sdim#include "llvm/CodeGen/MachineFunction.h" 28321369Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 29321369Sdim#include "llvm/CodeGen/MachineInstr.h" 30249423Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 31321369Sdim#include "llvm/CodeGen/MachineOperand.h" 32321369Sdim#include "llvm/CodeGen/MachinePassRegistry.h" 33261991Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 34234285Sdim#include "llvm/CodeGen/Passes.h" 35239462Sdim#include "llvm/CodeGen/RegisterClassInfo.h" 36321369Sdim#include "llvm/CodeGen/RegisterPressure.h" 37321369Sdim#include "llvm/CodeGen/ScheduleDAG.h" 38321369Sdim#include "llvm/CodeGen/ScheduleDAGInstrs.h" 39321369Sdim#include "llvm/CodeGen/ScheduleDAGMutation.h" 40249423Sdim#include "llvm/CodeGen/ScheduleDFS.h" 41239462Sdim#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 42321369Sdim#include "llvm/CodeGen/SlotIndexes.h" 43344779Sdim#include "llvm/CodeGen/TargetFrameLowering.h" 44327952Sdim#include "llvm/CodeGen/TargetInstrInfo.h" 45327952Sdim#include "llvm/CodeGen/TargetLowering.h" 46309124Sdim#include "llvm/CodeGen/TargetPassConfig.h" 47327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h" 48321369Sdim#include "llvm/CodeGen/TargetSchedule.h" 49327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h" 50341825Sdim#include "llvm/Config/llvm-config.h" 51360784Sdim#include "llvm/InitializePasses.h" 52321369Sdim#include "llvm/MC/LaneBitmask.h" 53321369Sdim#include "llvm/Pass.h" 54234285Sdim#include "llvm/Support/CommandLine.h" 55321369Sdim#include "llvm/Support/Compiler.h" 56234285Sdim#include "llvm/Support/Debug.h" 57234285Sdim#include "llvm/Support/ErrorHandling.h" 58249423Sdim#include "llvm/Support/GraphWriter.h" 59341825Sdim#include "llvm/Support/MachineValueType.h" 60234285Sdim#include "llvm/Support/raw_ostream.h" 61321369Sdim#include <algorithm> 62321369Sdim#include <cassert> 63321369Sdim#include <cstdint> 64321369Sdim#include <iterator> 65321369Sdim#include <limits> 66321369Sdim#include <memory> 67321369Sdim#include <string> 68321369Sdim#include <tuple> 69321369Sdim#include <utility> 70321369Sdim#include <vector> 71234285Sdim 72234285Sdimusing namespace llvm; 73234285Sdim 74321369Sdim#define DEBUG_TYPE "machine-scheduler" 75276479Sdim 76243830Sdimnamespace llvm { 77321369Sdim 78243830Sdimcl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 79243830Sdim cl::desc("Force top-down list scheduling")); 80243830Sdimcl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 81243830Sdim cl::desc("Force bottom-up list scheduling")); 82280031Sdimcl::opt<bool> 83280031SdimDumpCriticalPathLength("misched-dcpl", cl::Hidden, 84280031Sdim cl::desc("Print critical path length to stdout")); 85234285Sdim 86360784Sdimcl::opt<bool> VerifyScheduling( 87360784Sdim "verify-misched", cl::Hidden, 88360784Sdim cl::desc("Verify machine instrs before and after machine scheduling")); 89360784Sdim 90321369Sdim} // end namespace llvm 91321369Sdim 92234285Sdim#ifndef NDEBUG 93234285Sdimstatic cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 94234285Sdim cl::desc("Pop up a window to show MISched dags after they are processed")); 95234285Sdim 96296417Sdim/// In some situations a few uninteresting nodes depend on nearly all other 97296417Sdim/// nodes in the graph, provide a cutoff to hide them. 98296417Sdimstatic cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, 99296417Sdim cl::desc("Hide nodes with more predecessor/successor than cutoff")); 100296417Sdim 101234285Sdimstatic cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 102234285Sdim cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 103276479Sdim 104276479Sdimstatic cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, 105276479Sdim cl::desc("Only schedule this function")); 106276479Sdimstatic cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, 107327952Sdim cl::desc("Only schedule this MBB#")); 108344779Sdimstatic cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden, 109344779Sdim cl::desc("Print schedule DAGs")); 110234285Sdim#else 111344779Sdimstatic const bool ViewMISchedDAGs = false; 112344779Sdimstatic const bool PrintDAGs = false; 113234285Sdim#endif // NDEBUG 114234285Sdim 115309124Sdim/// Avoid quadratic complexity in unusually large basic blocks by limiting the 116309124Sdim/// size of the ready lists. 117309124Sdimstatic cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden, 118309124Sdim cl::desc("Limit ready list to N instructions"), cl::init(256)); 119309124Sdim 120261991Sdimstatic cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden, 121261991Sdim cl::desc("Enable register pressure scheduling."), cl::init(true)); 122251662Sdim 123261991Sdimstatic cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, 124261991Sdim cl::desc("Enable cyclic critical path analysis."), cl::init(true)); 125261991Sdim 126309124Sdimstatic cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden, 127309124Sdim cl::desc("Enable memop clustering."), 128309124Sdim cl::init(true)); 129243830Sdim 130249423Sdim// DAG subtrees must have at least this many nodes. 131249423Sdimstatic const unsigned MinSubtreeSize = 8; 132249423Sdim 133261991Sdim// Pin the vtables to this file. 134261991Sdimvoid MachineSchedStrategy::anchor() {} 135321369Sdim 136261991Sdimvoid ScheduleDAGMutation::anchor() {} 137261991Sdim 138234285Sdim//===----------------------------------------------------------------------===// 139234285Sdim// Machine Instruction Scheduling Pass and Registry 140234285Sdim//===----------------------------------------------------------------------===// 141234285Sdim 142321369SdimMachineSchedContext::MachineSchedContext() { 143239462Sdim RegClassInfo = new RegisterClassInfo(); 144239462Sdim} 145239462Sdim 146239462SdimMachineSchedContext::~MachineSchedContext() { 147239462Sdim delete RegClassInfo; 148239462Sdim} 149239462Sdim 150234285Sdimnamespace { 151321369Sdim 152276479Sdim/// Base class for a machine scheduler class that can run at any point. 153276479Sdimclass MachineSchedulerBase : public MachineSchedContext, 154276479Sdim public MachineFunctionPass { 155276479Sdimpublic: 156276479Sdim MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {} 157276479Sdim 158276479Sdim void print(raw_ostream &O, const Module* = nullptr) const override; 159276479Sdim 160276479Sdimprotected: 161296417Sdim void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags); 162276479Sdim}; 163276479Sdim 164234285Sdim/// MachineScheduler runs after coalescing and before register allocation. 165276479Sdimclass MachineScheduler : public MachineSchedulerBase { 166234285Sdimpublic: 167234285Sdim MachineScheduler(); 168234285Sdim 169276479Sdim void getAnalysisUsage(AnalysisUsage &AU) const override; 170234285Sdim 171276479Sdim bool runOnMachineFunction(MachineFunction&) override; 172234285Sdim 173276479Sdim static char ID; // Class identification, replacement for typeinfo 174234285Sdim 175276479Sdimprotected: 176276479Sdim ScheduleDAGInstrs *createMachineScheduler(); 177276479Sdim}; 178234285Sdim 179276479Sdim/// PostMachineScheduler runs after shortly before code emission. 180276479Sdimclass PostMachineScheduler : public MachineSchedulerBase { 181276479Sdimpublic: 182276479Sdim PostMachineScheduler(); 183276479Sdim 184276479Sdim void getAnalysisUsage(AnalysisUsage &AU) const override; 185276479Sdim 186276479Sdim bool runOnMachineFunction(MachineFunction&) override; 187276479Sdim 188234285Sdim static char ID; // Class identification, replacement for typeinfo 189261991Sdim 190261991Sdimprotected: 191276479Sdim ScheduleDAGInstrs *createPostMachineScheduler(); 192234285Sdim}; 193234285Sdim 194321369Sdim} // end anonymous namespace 195321369Sdim 196234285Sdimchar MachineScheduler::ID = 0; 197234285Sdim 198234285Sdimchar &llvm::MachineSchedulerID = MachineScheduler::ID; 199234285Sdim 200321369SdimINITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, 201234285Sdim "Machine Instruction Scheduler", false, false) 202296417SdimINITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 203360784SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 204321369SdimINITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 205234285SdimINITIALIZE_PASS_DEPENDENCY(SlotIndexes) 206234285SdimINITIALIZE_PASS_DEPENDENCY(LiveIntervals) 207321369SdimINITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE, 208234285Sdim "Machine Instruction Scheduler", false, false) 209234285Sdim 210327952SdimMachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) { 211234285Sdim initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 212234285Sdim} 213234285Sdim 214234285Sdimvoid MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 215234285Sdim AU.setPreservesCFG(); 216360784Sdim AU.addRequired<MachineDominatorTree>(); 217234285Sdim AU.addRequired<MachineLoopInfo>(); 218296417Sdim AU.addRequired<AAResultsWrapperPass>(); 219234285Sdim AU.addRequired<TargetPassConfig>(); 220234285Sdim AU.addRequired<SlotIndexes>(); 221234285Sdim AU.addPreserved<SlotIndexes>(); 222234285Sdim AU.addRequired<LiveIntervals>(); 223234285Sdim AU.addPreserved<LiveIntervals>(); 224234285Sdim MachineFunctionPass::getAnalysisUsage(AU); 225234285Sdim} 226234285Sdim 227276479Sdimchar PostMachineScheduler::ID = 0; 228276479Sdim 229276479Sdimchar &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; 230276479Sdim 231276479SdimINITIALIZE_PASS(PostMachineScheduler, "postmisched", 232276479Sdim "PostRA Machine Instruction Scheduler", false, false) 233276479Sdim 234327952SdimPostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) { 235276479Sdim initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry()); 236276479Sdim} 237276479Sdim 238276479Sdimvoid PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 239276479Sdim AU.setPreservesCFG(); 240360784Sdim AU.addRequired<MachineDominatorTree>(); 241276479Sdim AU.addRequired<MachineLoopInfo>(); 242360784Sdim AU.addRequired<AAResultsWrapperPass>(); 243276479Sdim AU.addRequired<TargetPassConfig>(); 244276479Sdim MachineFunctionPass::getAnalysisUsage(AU); 245276479Sdim} 246276479Sdim 247344779SdimMachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor> 248344779Sdim MachineSchedRegistry::Registry; 249234285Sdim 250234285Sdim/// A dummy default scheduler factory indicates whether the scheduler 251234285Sdim/// is overridden on the command line. 252234285Sdimstatic ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 253276479Sdim return nullptr; 254234285Sdim} 255234285Sdim 256234285Sdim/// MachineSchedOpt allows command line selection of the scheduler. 257234285Sdimstatic cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 258321369Sdim RegisterPassParser<MachineSchedRegistry>> 259234285SdimMachineSchedOpt("misched", 260234285Sdim cl::init(&useDefaultMachineSched), cl::Hidden, 261234285Sdim cl::desc("Machine instruction scheduler to use")); 262234285Sdim 263234285Sdimstatic MachineSchedRegistry 264234285SdimDefaultSchedRegistry("default", "Use the target's default scheduler choice.", 265234285Sdim useDefaultMachineSched); 266234285Sdim 267288943Sdimstatic cl::opt<bool> EnableMachineSched( 268288943Sdim "enable-misched", 269288943Sdim cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), 270288943Sdim cl::Hidden); 271288943Sdim 272309124Sdimstatic cl::opt<bool> EnablePostRAMachineSched( 273309124Sdim "enable-post-misched", 274309124Sdim cl::desc("Enable the post-ra machine instruction scheduling pass."), 275309124Sdim cl::init(true), cl::Hidden); 276309124Sdim 277239462Sdim/// Decrement this iterator until reaching the top or a non-debug instr. 278261991Sdimstatic MachineBasicBlock::const_iterator 279261991SdimpriorNonDebug(MachineBasicBlock::const_iterator I, 280261991Sdim MachineBasicBlock::const_iterator Beg) { 281239462Sdim assert(I != Beg && "reached the top of the region, cannot decrement"); 282239462Sdim while (--I != Beg) { 283341825Sdim if (!I->isDebugInstr()) 284239462Sdim break; 285239462Sdim } 286239462Sdim return I; 287239462Sdim} 288239462Sdim 289261991Sdim/// Non-const version. 290261991Sdimstatic MachineBasicBlock::iterator 291261991SdimpriorNonDebug(MachineBasicBlock::iterator I, 292261991Sdim MachineBasicBlock::const_iterator Beg) { 293314564Sdim return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg) 294314564Sdim .getNonConstIterator(); 295261991Sdim} 296261991Sdim 297239462Sdim/// If this iterator is a debug value, increment until reaching the End or a 298239462Sdim/// non-debug instruction. 299261991Sdimstatic MachineBasicBlock::const_iterator 300261991SdimnextIfDebug(MachineBasicBlock::const_iterator I, 301261991Sdim MachineBasicBlock::const_iterator End) { 302239462Sdim for(; I != End; ++I) { 303341825Sdim if (!I->isDebugInstr()) 304239462Sdim break; 305239462Sdim } 306239462Sdim return I; 307239462Sdim} 308239462Sdim 309261991Sdim/// Non-const version. 310261991Sdimstatic MachineBasicBlock::iterator 311261991SdimnextIfDebug(MachineBasicBlock::iterator I, 312261991Sdim MachineBasicBlock::const_iterator End) { 313314564Sdim return nextIfDebug(MachineBasicBlock::const_iterator(I), End) 314314564Sdim .getNonConstIterator(); 315261991Sdim} 316261991Sdim 317261991Sdim/// Instantiate a ScheduleDAGInstrs that will be owned by the caller. 318261991SdimScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { 319261991Sdim // Select the scheduler, or set the default. 320261991Sdim MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 321261991Sdim if (Ctor != useDefaultMachineSched) 322261991Sdim return Ctor(this); 323261991Sdim 324261991Sdim // Get the default scheduler set by the target for this function. 325261991Sdim ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); 326261991Sdim if (Scheduler) 327261991Sdim return Scheduler; 328261991Sdim 329261991Sdim // Default to GenericScheduler. 330276479Sdim return createGenericSchedLive(this); 331261991Sdim} 332261991Sdim 333276479Sdim/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by 334276479Sdim/// the caller. We don't have a command line option to override the postRA 335276479Sdim/// scheduler. The Target must configure it. 336276479SdimScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { 337276479Sdim // Get the postRA scheduler set by the target for this function. 338276479Sdim ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this); 339276479Sdim if (Scheduler) 340276479Sdim return Scheduler; 341276479Sdim 342276479Sdim // Default to GenericScheduler. 343276479Sdim return createGenericSchedPostRA(this); 344276479Sdim} 345276479Sdim 346234285Sdim/// Top-level MachineScheduler pass driver. 347234285Sdim/// 348234285Sdim/// Visit blocks in function order. Divide each block into scheduling regions 349234285Sdim/// and visit them bottom-up. Visiting regions bottom-up is not required, but is 350234285Sdim/// consistent with the DAG builder, which traverses the interior of the 351234285Sdim/// scheduling regions bottom-up. 352234285Sdim/// 353234285Sdim/// This design avoids exposing scheduling boundaries to the DAG builder, 354234285Sdim/// simplifying the DAG builder's support for "special" target instructions. 355234285Sdim/// At the same time the design allows target schedulers to operate across 356341825Sdim/// scheduling boundaries, for example to bundle the boundary instructions 357234285Sdim/// without reordering them. This creates complexity, because the target 358234285Sdim/// scheduler must update the RegionBegin and RegionEnd positions cached by 359234285Sdim/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 360234285Sdim/// design would be to split blocks at scheduling boundaries, but LLVM has a 361234285Sdim/// general bias against block splitting purely for implementation simplicity. 362234285Sdimbool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 363327952Sdim if (skipFunction(mf.getFunction())) 364309124Sdim return false; 365309124Sdim 366288943Sdim if (EnableMachineSched.getNumOccurrences()) { 367288943Sdim if (!EnableMachineSched) 368288943Sdim return false; 369288943Sdim } else if (!mf.getSubtarget().enableMachineScheduler()) 370288943Sdim return false; 371288943Sdim 372341825Sdim LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs())); 373239462Sdim 374234285Sdim // Initialize the context of the pass. 375234285Sdim MF = &mf; 376234285Sdim MLI = &getAnalysis<MachineLoopInfo>(); 377234285Sdim MDT = &getAnalysis<MachineDominatorTree>(); 378234285Sdim PassConfig = &getAnalysis<TargetPassConfig>(); 379296417Sdim AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 380234285Sdim 381234285Sdim LIS = &getAnalysis<LiveIntervals>(); 382234285Sdim 383249423Sdim if (VerifyScheduling) { 384341825Sdim LLVM_DEBUG(LIS->dump()); 385249423Sdim MF->verify(this, "Before machine scheduling."); 386249423Sdim } 387239462Sdim RegClassInfo->runOnMachineFunction(*MF); 388239462Sdim 389261991Sdim // Instantiate the selected scheduler for this target, function, and 390261991Sdim // optimization level. 391276479Sdim std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); 392296417Sdim scheduleRegions(*Scheduler, false); 393234285Sdim 394341825Sdim LLVM_DEBUG(LIS->dump()); 395276479Sdim if (VerifyScheduling) 396276479Sdim MF->verify(this, "After machine scheduling."); 397276479Sdim return true; 398276479Sdim} 399276479Sdim 400276479Sdimbool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { 401327952Sdim if (skipFunction(mf.getFunction())) 402276479Sdim return false; 403276479Sdim 404309124Sdim if (EnablePostRAMachineSched.getNumOccurrences()) { 405309124Sdim if (!EnablePostRAMachineSched) 406309124Sdim return false; 407360784Sdim } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) { 408341825Sdim LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); 409276479Sdim return false; 410276479Sdim } 411341825Sdim LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); 412276479Sdim 413276479Sdim // Initialize the context of the pass. 414276479Sdim MF = &mf; 415327952Sdim MLI = &getAnalysis<MachineLoopInfo>(); 416276479Sdim PassConfig = &getAnalysis<TargetPassConfig>(); 417360784Sdim AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 418276479Sdim 419276479Sdim if (VerifyScheduling) 420276479Sdim MF->verify(this, "Before post machine scheduling."); 421276479Sdim 422276479Sdim // Instantiate the selected scheduler for this target, function, and 423276479Sdim // optimization level. 424276479Sdim std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler()); 425296417Sdim scheduleRegions(*Scheduler, true); 426276479Sdim 427276479Sdim if (VerifyScheduling) 428276479Sdim MF->verify(this, "After post machine scheduling."); 429276479Sdim return true; 430276479Sdim} 431276479Sdim 432276479Sdim/// Return true of the given instruction should not be included in a scheduling 433276479Sdim/// region. 434276479Sdim/// 435276479Sdim/// MachineScheduler does not currently support scheduling across calls. To 436276479Sdim/// handle calls, the DAG builder needs to be modified to create register 437276479Sdim/// anti/output dependencies on the registers clobbered by the call's regmask 438276479Sdim/// operand. In PreRA scheduling, the stack pointer adjustment already prevents 439276479Sdim/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce 440276479Sdim/// the boundary, but there would be no benefit to postRA scheduling across 441276479Sdim/// calls this late anyway. 442276479Sdimstatic bool isSchedBoundary(MachineBasicBlock::iterator MI, 443276479Sdim MachineBasicBlock *MBB, 444276479Sdim MachineFunction *MF, 445296417Sdim const TargetInstrInfo *TII) { 446309124Sdim return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF); 447276479Sdim} 448276479Sdim 449327952Sdim/// A region of an MBB for scheduling. 450327952Sdimnamespace { 451327952Sdimstruct SchedRegion { 452327952Sdim /// RegionBegin is the first instruction in the scheduling region, and 453327952Sdim /// RegionEnd is either MBB->end() or the scheduling boundary after the 454327952Sdim /// last instruction in the scheduling region. These iterators cannot refer 455327952Sdim /// to instructions outside of the identified scheduling region because 456327952Sdim /// those may be reordered before scheduling this region. 457327952Sdim MachineBasicBlock::iterator RegionBegin; 458327952Sdim MachineBasicBlock::iterator RegionEnd; 459327952Sdim unsigned NumRegionInstrs; 460327952Sdim 461327952Sdim SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E, 462327952Sdim unsigned N) : 463327952Sdim RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {} 464327952Sdim}; 465327952Sdim} // end anonymous namespace 466327952Sdim 467327952Sdimusing MBBRegionsVector = SmallVector<SchedRegion, 16>; 468327952Sdim 469327952Sdimstatic void 470327952SdimgetSchedRegions(MachineBasicBlock *MBB, 471327952Sdim MBBRegionsVector &Regions, 472327952Sdim bool RegionsTopDown) { 473327952Sdim MachineFunction *MF = MBB->getParent(); 474327952Sdim const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 475327952Sdim 476327952Sdim MachineBasicBlock::iterator I = nullptr; 477327952Sdim for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 478327952Sdim RegionEnd != MBB->begin(); RegionEnd = I) { 479327952Sdim 480327952Sdim // Avoid decrementing RegionEnd for blocks with no terminator. 481327952Sdim if (RegionEnd != MBB->end() || 482327952Sdim isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { 483327952Sdim --RegionEnd; 484327952Sdim } 485327952Sdim 486327952Sdim // The next region starts above the previous region. Look backward in the 487327952Sdim // instruction stream until we find the nearest boundary. 488327952Sdim unsigned NumRegionInstrs = 0; 489327952Sdim I = RegionEnd; 490327952Sdim for (;I != MBB->begin(); --I) { 491327952Sdim MachineInstr &MI = *std::prev(I); 492327952Sdim if (isSchedBoundary(&MI, &*MBB, MF, TII)) 493327952Sdim break; 494353358Sdim if (!MI.isDebugInstr()) { 495327952Sdim // MBB::size() uses instr_iterator to count. Here we need a bundle to 496327952Sdim // count as a single instruction. 497327952Sdim ++NumRegionInstrs; 498353358Sdim } 499327952Sdim } 500327952Sdim 501353358Sdim // It's possible we found a scheduling region that only has debug 502353358Sdim // instructions. Don't bother scheduling these. 503353358Sdim if (NumRegionInstrs != 0) 504353358Sdim Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); 505327952Sdim } 506327952Sdim 507327952Sdim if (RegionsTopDown) 508327952Sdim std::reverse(Regions.begin(), Regions.end()); 509327952Sdim} 510327952Sdim 511276479Sdim/// Main driver for both MachineScheduler and PostMachineScheduler. 512296417Sdimvoid MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, 513296417Sdim bool FixKillFlags) { 514234285Sdim // Visit all machine basic blocks. 515239462Sdim // 516239462Sdim // TODO: Visit blocks in global postorder or postorder within the bottom-up 517239462Sdim // loop tree. Then we can optionally compute global RegPressure. 518234285Sdim for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 519234285Sdim MBB != MBBEnd; ++MBB) { 520234285Sdim 521296417Sdim Scheduler.startBlock(&*MBB); 522234285Sdim 523276479Sdim#ifndef NDEBUG 524276479Sdim if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) 525276479Sdim continue; 526276479Sdim if (SchedOnlyBlock.getNumOccurrences() 527276479Sdim && (int)SchedOnlyBlock != MBB->getNumber()) 528276479Sdim continue; 529276479Sdim#endif 530276479Sdim 531327952Sdim // Break the block into scheduling regions [I, RegionEnd). RegionEnd 532327952Sdim // points to the scheduling boundary at the bottom of the region. The DAG 533327952Sdim // does not include RegionEnd, but the region does (i.e. the next 534327952Sdim // RegionEnd is above the previous RegionBegin). If the current block has 535327952Sdim // no terminator then RegionEnd == MBB->end() for the bottom region. 536234285Sdim // 537327952Sdim // All the regions of MBB are first found and stored in MBBRegions, which 538327952Sdim // will be processed (MBB) top-down if initialized with true. 539327952Sdim // 540234285Sdim // The Scheduler may insert instructions during either schedule() or 541234285Sdim // exitRegion(), even for empty regions. So the local iterators 'I' and 542327952Sdim // 'RegionEnd' are invalid across these calls. Instructions must not be 543327952Sdim // added to other regions than the current one without updating MBBRegions. 544239462Sdim 545327952Sdim MBBRegionsVector MBBRegions; 546327952Sdim getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown()); 547327952Sdim for (MBBRegionsVector::iterator R = MBBRegions.begin(); 548327952Sdim R != MBBRegions.end(); ++R) { 549327952Sdim MachineBasicBlock::iterator I = R->RegionBegin; 550327952Sdim MachineBasicBlock::iterator RegionEnd = R->RegionEnd; 551327952Sdim unsigned NumRegionInstrs = R->NumRegionInstrs; 552234285Sdim 553234285Sdim // Notify the scheduler of the region, even if we may skip scheduling 554234285Sdim // it. Perhaps it still needs to be bundled. 555296417Sdim Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs); 556234285Sdim 557234285Sdim // Skip empty scheduling regions (0 or 1 schedulable instructions). 558276479Sdim if (I == RegionEnd || I == std::prev(RegionEnd)) { 559234285Sdim // Close the current region. Bundle the terminator if needed. 560234285Sdim // This invalidates 'RegionEnd' and 'I'. 561276479Sdim Scheduler.exitRegion(); 562234285Sdim continue; 563234285Sdim } 564341825Sdim LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 565341825Sdim LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) 566341825Sdim << " " << MBB->getName() << "\n From: " << *I 567341825Sdim << " To: "; 568341825Sdim if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 569341825Sdim else dbgs() << "End"; 570341825Sdim dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 571280031Sdim if (DumpCriticalPathLength) { 572280031Sdim errs() << MF->getName(); 573327952Sdim errs() << ":%bb. " << MBB->getNumber(); 574280031Sdim errs() << " " << MBB->getName() << " \n"; 575280031Sdim } 576234285Sdim 577234285Sdim // Schedule a region: possibly reorder instructions. 578327952Sdim // This invalidates the original region iterators. 579276479Sdim Scheduler.schedule(); 580234285Sdim 581234285Sdim // Close the current region. 582276479Sdim Scheduler.exitRegion(); 583234285Sdim } 584276479Sdim Scheduler.finishBlock(); 585296417Sdim // FIXME: Ideally, no further passes should rely on kill flags. However, 586296417Sdim // thumb2 size reduction is currently an exception, so the PostMIScheduler 587296417Sdim // needs to do this. 588296417Sdim if (FixKillFlags) 589321369Sdim Scheduler.fixupKills(*MBB); 590234285Sdim } 591276479Sdim Scheduler.finalizeSchedule(); 592234285Sdim} 593234285Sdim 594276479Sdimvoid MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { 595234285Sdim // unimplemented 596234285Sdim} 597234285Sdim 598321369Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 599321369SdimLLVM_DUMP_METHOD void ReadyQueue::dump() const { 600296417Sdim dbgs() << "Queue " << Name << ": "; 601321369Sdim for (const SUnit *SU : Queue) 602321369Sdim dbgs() << SU->NodeNum << " "; 603243830Sdim dbgs() << "\n"; 604243830Sdim} 605321369Sdim#endif 606234285Sdim 607234285Sdim//===----------------------------------------------------------------------===// 608276479Sdim// ScheduleDAGMI - Basic machine instruction scheduling. This is 609276479Sdim// independent of PreRA/PostRA scheduling and involves no extra book-keeping for 610276479Sdim// virtual registers. 611276479Sdim// ===----------------------------------------------------------------------===/ 612234285Sdim 613276479Sdim// Provide a vtable anchor. 614321369SdimScheduleDAGMI::~ScheduleDAGMI() = default; 615249423Sdim 616234285Sdim/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 617234285Sdim/// NumPredsLeft reaches zero, release the successor node. 618239462Sdim/// 619239462Sdim/// FIXME: Adjust SuccSU height based on MinLatency. 620234285Sdimvoid ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 621234285Sdim SUnit *SuccSU = SuccEdge->getSUnit(); 622234285Sdim 623249423Sdim if (SuccEdge->isWeak()) { 624249423Sdim --SuccSU->WeakPredsLeft; 625249423Sdim if (SuccEdge->isCluster()) 626249423Sdim NextClusterSucc = SuccSU; 627249423Sdim return; 628249423Sdim } 629234285Sdim#ifndef NDEBUG 630234285Sdim if (SuccSU->NumPredsLeft == 0) { 631234285Sdim dbgs() << "*** Scheduling failed! ***\n"; 632344779Sdim dumpNode(*SuccSU); 633234285Sdim dbgs() << " has been released too many times!\n"; 634276479Sdim llvm_unreachable(nullptr); 635234285Sdim } 636234285Sdim#endif 637276479Sdim // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However, 638276479Sdim // CurrCycle may have advanced since then. 639276479Sdim if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency()) 640276479Sdim SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency(); 641276479Sdim 642234285Sdim --SuccSU->NumPredsLeft; 643234285Sdim if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 644234285Sdim SchedImpl->releaseTopNode(SuccSU); 645234285Sdim} 646234285Sdim 647234285Sdim/// releaseSuccessors - Call releaseSucc on each of SU's successors. 648234285Sdimvoid ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 649321369Sdim for (SDep &Succ : SU->Succs) 650321369Sdim releaseSucc(SU, &Succ); 651234285Sdim} 652234285Sdim 653234285Sdim/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 654234285Sdim/// NumSuccsLeft reaches zero, release the predecessor node. 655239462Sdim/// 656239462Sdim/// FIXME: Adjust PredSU height based on MinLatency. 657234285Sdimvoid ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 658234285Sdim SUnit *PredSU = PredEdge->getSUnit(); 659234285Sdim 660249423Sdim if (PredEdge->isWeak()) { 661249423Sdim --PredSU->WeakSuccsLeft; 662249423Sdim if (PredEdge->isCluster()) 663249423Sdim NextClusterPred = PredSU; 664249423Sdim return; 665249423Sdim } 666234285Sdim#ifndef NDEBUG 667234285Sdim if (PredSU->NumSuccsLeft == 0) { 668234285Sdim dbgs() << "*** Scheduling failed! ***\n"; 669344779Sdim dumpNode(*PredSU); 670234285Sdim dbgs() << " has been released too many times!\n"; 671276479Sdim llvm_unreachable(nullptr); 672234285Sdim } 673234285Sdim#endif 674276479Sdim // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However, 675276479Sdim // CurrCycle may have advanced since then. 676276479Sdim if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency()) 677276479Sdim PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency(); 678276479Sdim 679234285Sdim --PredSU->NumSuccsLeft; 680234285Sdim if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 681234285Sdim SchedImpl->releaseBottomNode(PredSU); 682234285Sdim} 683234285Sdim 684234285Sdim/// releasePredecessors - Call releasePred on each of SU's predecessors. 685234285Sdimvoid ScheduleDAGMI::releasePredecessors(SUnit *SU) { 686321369Sdim for (SDep &Pred : SU->Preds) 687321369Sdim releasePred(SU, &Pred); 688234285Sdim} 689234285Sdim 690327952Sdimvoid ScheduleDAGMI::startBlock(MachineBasicBlock *bb) { 691327952Sdim ScheduleDAGInstrs::startBlock(bb); 692327952Sdim SchedImpl->enterMBB(bb); 693327952Sdim} 694327952Sdim 695327952Sdimvoid ScheduleDAGMI::finishBlock() { 696327952Sdim SchedImpl->leaveMBB(); 697327952Sdim ScheduleDAGInstrs::finishBlock(); 698327952Sdim} 699327952Sdim 700276479Sdim/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 701276479Sdim/// crossing a scheduling boundary. [begin, end) includes all instructions in 702276479Sdim/// the region, including the boundary itself and single-instruction regions 703276479Sdim/// that don't get scheduled. 704276479Sdimvoid ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 705276479Sdim MachineBasicBlock::iterator begin, 706276479Sdim MachineBasicBlock::iterator end, 707276479Sdim unsigned regioninstrs) 708276479Sdim{ 709276479Sdim ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); 710276479Sdim 711276479Sdim SchedImpl->initPolicy(begin, end, regioninstrs); 712276479Sdim} 713276479Sdim 714251662Sdim/// This is normally called from the main scheduler loop but may also be invoked 715251662Sdim/// by the scheduling strategy to perform additional code motion. 716276479Sdimvoid ScheduleDAGMI::moveInstruction( 717276479Sdim MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { 718239462Sdim // Advance RegionBegin if the first instruction moves down. 719234285Sdim if (&*RegionBegin == MI) 720239462Sdim ++RegionBegin; 721239462Sdim 722239462Sdim // Update the instruction stream. 723234285Sdim BB->splice(InsertPos, BB, MI); 724239462Sdim 725239462Sdim // Update LiveIntervals 726276479Sdim if (LIS) 727309124Sdim LIS->handleMove(*MI, /*UpdateFlags=*/true); 728239462Sdim 729239462Sdim // Recede RegionBegin if an instruction moves above the first. 730234285Sdim if (RegionBegin == InsertPos) 731234285Sdim RegionBegin = MI; 732234285Sdim} 733234285Sdim 734234285Sdimbool ScheduleDAGMI::checkSchedLimit() { 735234285Sdim#ifndef NDEBUG 736234285Sdim if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 737234285Sdim CurrentTop = CurrentBottom; 738234285Sdim return false; 739234285Sdim } 740234285Sdim ++NumInstrsScheduled; 741234285Sdim#endif 742234285Sdim return true; 743234285Sdim} 744234285Sdim 745276479Sdim/// Per-region scheduling driver, called back from 746276479Sdim/// MachineScheduler::runOnMachineFunction. This is a simplified driver that 747276479Sdim/// does not consider liveness or register pressure. It is useful for PostRA 748276479Sdim/// scheduling and potentially other custom schedulers. 749276479Sdimvoid ScheduleDAGMI::schedule() { 750341825Sdim LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); 751341825Sdim LLVM_DEBUG(SchedImpl->dumpPolicy()); 752296417Sdim 753276479Sdim // Build the DAG. 754276479Sdim buildSchedGraph(AA); 755276479Sdim 756276479Sdim postprocessDAG(); 757276479Sdim 758276479Sdim SmallVector<SUnit*, 8> TopRoots, BotRoots; 759276479Sdim findRootsAndBiasEdges(TopRoots, BotRoots); 760276479Sdim 761344779Sdim LLVM_DEBUG(dump()); 762344779Sdim if (PrintDAGs) dump(); 763341825Sdim if (ViewMISchedDAGs) viewGraph(); 764341825Sdim 765276479Sdim // Initialize the strategy before modifying the DAG. 766276479Sdim // This may initialize a DFSResult to be used for queue priority. 767276479Sdim SchedImpl->initialize(this); 768276479Sdim 769276479Sdim // Initialize ready queues now that the DAG and priority data are finalized. 770276479Sdim initQueues(TopRoots, BotRoots); 771276479Sdim 772276479Sdim bool IsTopNode = false; 773296417Sdim while (true) { 774341825Sdim LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n"); 775296417Sdim SUnit *SU = SchedImpl->pickNode(IsTopNode); 776296417Sdim if (!SU) break; 777296417Sdim 778276479Sdim assert(!SU->isScheduled && "Node already scheduled"); 779276479Sdim if (!checkSchedLimit()) 780276479Sdim break; 781276479Sdim 782276479Sdim MachineInstr *MI = SU->getInstr(); 783276479Sdim if (IsTopNode) { 784276479Sdim assert(SU->isTopReady() && "node still has unscheduled dependencies"); 785276479Sdim if (&*CurrentTop == MI) 786276479Sdim CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 787276479Sdim else 788276479Sdim moveInstruction(MI, CurrentTop); 789309124Sdim } else { 790276479Sdim assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 791276479Sdim MachineBasicBlock::iterator priorII = 792276479Sdim priorNonDebug(CurrentBottom, CurrentTop); 793276479Sdim if (&*priorII == MI) 794276479Sdim CurrentBottom = priorII; 795276479Sdim else { 796276479Sdim if (&*CurrentTop == MI) 797276479Sdim CurrentTop = nextIfDebug(++CurrentTop, priorII); 798276479Sdim moveInstruction(MI, CurrentBottom); 799276479Sdim CurrentBottom = MI; 800276479Sdim } 801276479Sdim } 802276479Sdim // Notify the scheduling strategy before updating the DAG. 803276479Sdim // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues 804276479Sdim // runs, it can then use the accurate ReadyCycle time to determine whether 805276479Sdim // newly released nodes can move to the readyQ. 806276479Sdim SchedImpl->schedNode(SU, IsTopNode); 807276479Sdim 808276479Sdim updateQueues(SU, IsTopNode); 809276479Sdim } 810276479Sdim assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 811276479Sdim 812276479Sdim placeDebugValues(); 813276479Sdim 814341825Sdim LLVM_DEBUG({ 815327952Sdim dbgs() << "*** Final schedule for " 816327952Sdim << printMBBReference(*begin()->getParent()) << " ***\n"; 817327952Sdim dumpSchedule(); 818327952Sdim dbgs() << '\n'; 819327952Sdim }); 820276479Sdim} 821276479Sdim 822276479Sdim/// Apply each ScheduleDAGMutation step in order. 823276479Sdimvoid ScheduleDAGMI::postprocessDAG() { 824321369Sdim for (auto &m : Mutations) 825321369Sdim m->apply(this); 826276479Sdim} 827276479Sdim 828276479Sdimvoid ScheduleDAGMI:: 829276479SdimfindRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, 830276479Sdim SmallVectorImpl<SUnit*> &BotRoots) { 831321369Sdim for (SUnit &SU : SUnits) { 832321369Sdim assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits"); 833276479Sdim 834276479Sdim // Order predecessors so DFSResult follows the critical path. 835321369Sdim SU.biasCriticalPath(); 836276479Sdim 837276479Sdim // A SUnit is ready to top schedule if it has no predecessors. 838321369Sdim if (!SU.NumPredsLeft) 839321369Sdim TopRoots.push_back(&SU); 840276479Sdim // A SUnit is ready to bottom schedule if it has no successors. 841321369Sdim if (!SU.NumSuccsLeft) 842321369Sdim BotRoots.push_back(&SU); 843276479Sdim } 844276479Sdim ExitSU.biasCriticalPath(); 845276479Sdim} 846276479Sdim 847276479Sdim/// Identify DAG roots and setup scheduler queues. 848276479Sdimvoid ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, 849276479Sdim ArrayRef<SUnit*> BotRoots) { 850276479Sdim NextClusterSucc = nullptr; 851276479Sdim NextClusterPred = nullptr; 852276479Sdim 853276479Sdim // Release all DAG roots for scheduling, not including EntrySU/ExitSU. 854276479Sdim // 855276479Sdim // Nodes with unreleased weak edges can still be roots. 856276479Sdim // Release top roots in forward order. 857321369Sdim for (SUnit *SU : TopRoots) 858321369Sdim SchedImpl->releaseTopNode(SU); 859321369Sdim 860276479Sdim // Release bottom roots in reverse order so the higher priority nodes appear 861276479Sdim // first. This is more natural and slightly more efficient. 862276479Sdim for (SmallVectorImpl<SUnit*>::const_reverse_iterator 863276479Sdim I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { 864276479Sdim SchedImpl->releaseBottomNode(*I); 865276479Sdim } 866276479Sdim 867276479Sdim releaseSuccessors(&EntrySU); 868276479Sdim releasePredecessors(&ExitSU); 869276479Sdim 870276479Sdim SchedImpl->registerRoots(); 871276479Sdim 872276479Sdim // Advance past initial DebugValues. 873276479Sdim CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 874276479Sdim CurrentBottom = RegionEnd; 875276479Sdim} 876276479Sdim 877276479Sdim/// Update scheduler queues after scheduling an instruction. 878276479Sdimvoid ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 879276479Sdim // Release dependent instructions for scheduling. 880276479Sdim if (IsTopNode) 881276479Sdim releaseSuccessors(SU); 882276479Sdim else 883276479Sdim releasePredecessors(SU); 884276479Sdim 885276479Sdim SU->isScheduled = true; 886276479Sdim} 887276479Sdim 888276479Sdim/// Reinsert any remaining debug_values, just like the PostRA scheduler. 889276479Sdimvoid ScheduleDAGMI::placeDebugValues() { 890276479Sdim // If first instruction was a DBG_VALUE then put it back. 891276479Sdim if (FirstDbgValue) { 892276479Sdim BB->splice(RegionBegin, BB, FirstDbgValue); 893276479Sdim RegionBegin = FirstDbgValue; 894276479Sdim } 895276479Sdim 896321369Sdim for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator 897276479Sdim DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 898276479Sdim std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); 899276479Sdim MachineInstr *DbgValue = P.first; 900276479Sdim MachineBasicBlock::iterator OrigPrevMI = P.second; 901276479Sdim if (&*RegionBegin == DbgValue) 902276479Sdim ++RegionBegin; 903276479Sdim BB->splice(++OrigPrevMI, BB, DbgValue); 904276479Sdim if (OrigPrevMI == std::prev(RegionEnd)) 905276479Sdim RegionEnd = DbgValue; 906276479Sdim } 907276479Sdim DbgValues.clear(); 908276479Sdim FirstDbgValue = nullptr; 909276479Sdim} 910276479Sdim 911276479Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 912321369SdimLLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const { 913276479Sdim for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 914276479Sdim if (SUnit *SU = getSUnit(&(*MI))) 915344779Sdim dumpNode(*SU); 916276479Sdim else 917276479Sdim dbgs() << "Missing SUnit\n"; 918276479Sdim } 919276479Sdim} 920276479Sdim#endif 921276479Sdim 922276479Sdim//===----------------------------------------------------------------------===// 923276479Sdim// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals 924276479Sdim// preservation. 925276479Sdim//===----------------------------------------------------------------------===// 926276479Sdim 927276479SdimScheduleDAGMILive::~ScheduleDAGMILive() { 928276479Sdim delete DFSResult; 929276479Sdim} 930276479Sdim 931314564Sdimvoid ScheduleDAGMILive::collectVRegUses(SUnit &SU) { 932314564Sdim const MachineInstr &MI = *SU.getInstr(); 933314564Sdim for (const MachineOperand &MO : MI.operands()) { 934314564Sdim if (!MO.isReg()) 935314564Sdim continue; 936314564Sdim if (!MO.readsReg()) 937314564Sdim continue; 938314564Sdim if (TrackLaneMasks && !MO.isUse()) 939314564Sdim continue; 940314564Sdim 941360784Sdim Register Reg = MO.getReg(); 942360784Sdim if (!Register::isVirtualRegister(Reg)) 943314564Sdim continue; 944314564Sdim 945314564Sdim // Ignore re-defs. 946314564Sdim if (TrackLaneMasks) { 947314564Sdim bool FoundDef = false; 948314564Sdim for (const MachineOperand &MO2 : MI.operands()) { 949314564Sdim if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) { 950314564Sdim FoundDef = true; 951314564Sdim break; 952314564Sdim } 953314564Sdim } 954314564Sdim if (FoundDef) 955314564Sdim continue; 956314564Sdim } 957314564Sdim 958314564Sdim // Record this local VReg use. 959314564Sdim VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); 960314564Sdim for (; UI != VRegUses.end(); ++UI) { 961314564Sdim if (UI->SU == &SU) 962314564Sdim break; 963314564Sdim } 964314564Sdim if (UI == VRegUses.end()) 965314564Sdim VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU)); 966314564Sdim } 967314564Sdim} 968314564Sdim 969239462Sdim/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 970239462Sdim/// crossing a scheduling boundary. [begin, end) includes all instructions in 971239462Sdim/// the region, including the boundary itself and single-instruction regions 972239462Sdim/// that don't get scheduled. 973276479Sdimvoid ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, 974239462Sdim MachineBasicBlock::iterator begin, 975239462Sdim MachineBasicBlock::iterator end, 976261991Sdim unsigned regioninstrs) 977239462Sdim{ 978276479Sdim // ScheduleDAGMI initializes SchedImpl's per-region policy. 979276479Sdim ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs); 980239462Sdim 981239462Sdim // For convenience remember the end of the liveness region. 982276479Sdim LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd); 983261991Sdim 984261991Sdim SUPressureDiffs.clear(); 985261991Sdim 986261991Sdim ShouldTrackPressure = SchedImpl->shouldTrackPressure(); 987309124Sdim ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks(); 988309124Sdim 989309124Sdim assert((!ShouldTrackLaneMasks || ShouldTrackPressure) && 990309124Sdim "ShouldTrackLaneMasks requires ShouldTrackPressure"); 991239462Sdim} 992239462Sdim 993360784Sdim// Setup the register pressure trackers for the top scheduled and bottom 994239462Sdim// scheduled regions. 995276479Sdimvoid ScheduleDAGMILive::initRegPressure() { 996314564Sdim VRegUses.clear(); 997314564Sdim VRegUses.setUniverse(MRI.getNumVirtRegs()); 998314564Sdim for (SUnit &SU : SUnits) 999314564Sdim collectVRegUses(SU); 1000314564Sdim 1001309124Sdim TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, 1002309124Sdim ShouldTrackLaneMasks, false); 1003309124Sdim BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, 1004309124Sdim ShouldTrackLaneMasks, false); 1005239462Sdim 1006239462Sdim // Close the RPTracker to finalize live ins. 1007239462Sdim RPTracker.closeRegion(); 1008239462Sdim 1009341825Sdim LLVM_DEBUG(RPTracker.dump()); 1010239462Sdim 1011239462Sdim // Initialize the live ins and live outs. 1012239462Sdim TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 1013239462Sdim BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 1014239462Sdim 1015239462Sdim // Close one end of the tracker so we can call 1016239462Sdim // getMaxUpward/DownwardPressureDelta before advancing across any 1017239462Sdim // instructions. This converts currently live regs into live ins/outs. 1018239462Sdim TopRPTracker.closeTop(); 1019239462Sdim BotRPTracker.closeBottom(); 1020239462Sdim 1021261991Sdim BotRPTracker.initLiveThru(RPTracker); 1022261991Sdim if (!BotRPTracker.getLiveThru().empty()) { 1023261991Sdim TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); 1024341825Sdim LLVM_DEBUG(dbgs() << "Live Thru: "; 1025341825Sdim dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); 1026261991Sdim }; 1027261991Sdim 1028261991Sdim // For each live out vreg reduce the pressure change associated with other 1029261991Sdim // uses of the same vreg below the live-out reaching def. 1030261991Sdim updatePressureDiffs(RPTracker.getPressure().LiveOutRegs); 1031261991Sdim 1032239462Sdim // Account for liveness generated by the region boundary. 1033261991Sdim if (LiveRegionEnd != RegionEnd) { 1034309124Sdim SmallVector<RegisterMaskPair, 8> LiveUses; 1035261991Sdim BotRPTracker.recede(&LiveUses); 1036261991Sdim updatePressureDiffs(LiveUses); 1037261991Sdim } 1038239462Sdim 1039341825Sdim LLVM_DEBUG(dbgs() << "Top Pressure:\n"; 1040341825Sdim dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); 1041341825Sdim dbgs() << "Bottom Pressure:\n"; 1042341825Sdim dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);); 1043296417Sdim 1044327952Sdim assert((BotRPTracker.getPos() == RegionEnd || 1045341825Sdim (RegionEnd->isDebugInstr() && 1046327952Sdim BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) && 1047327952Sdim "Can't find the region bottom"); 1048239462Sdim 1049239462Sdim // Cache the list of excess pressure sets in this region. This will also track 1050239462Sdim // the max pressure in the scheduled code for these sets. 1051239462Sdim RegionCriticalPSets.clear(); 1052249423Sdim const std::vector<unsigned> &RegionPressure = 1053249423Sdim RPTracker.getPressure().MaxSetPressure; 1054239462Sdim for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 1055261991Sdim unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); 1056261991Sdim if (RegionPressure[i] > Limit) { 1057341825Sdim LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit 1058341825Sdim << " Actual " << RegionPressure[i] << "\n"); 1059261991Sdim RegionCriticalPSets.push_back(PressureChange(i)); 1060261991Sdim } 1061239462Sdim } 1062341825Sdim LLVM_DEBUG(dbgs() << "Excess PSets: "; 1063341825Sdim for (const PressureChange &RCPS 1064341825Sdim : RegionCriticalPSets) dbgs() 1065341825Sdim << TRI->getRegPressureSetName(RCPS.getPSet()) << " "; 1066341825Sdim dbgs() << "\n"); 1067239462Sdim} 1068239462Sdim 1069276479Sdimvoid ScheduleDAGMILive:: 1070261991SdimupdateScheduledPressure(const SUnit *SU, 1071261991Sdim const std::vector<unsigned> &NewMaxPressure) { 1072261991Sdim const PressureDiff &PDiff = getPressureDiff(SU); 1073261991Sdim unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); 1074321369Sdim for (const PressureChange &PC : PDiff) { 1075321369Sdim if (!PC.isValid()) 1076261991Sdim break; 1077321369Sdim unsigned ID = PC.getPSet(); 1078261991Sdim while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) 1079261991Sdim ++CritIdx; 1080261991Sdim if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { 1081261991Sdim if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() 1082321369Sdim && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max()) 1083261991Sdim RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); 1084261991Sdim } 1085261991Sdim unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); 1086261991Sdim if (NewMaxPressure[ID] >= Limit - 2) { 1087341825Sdim LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " 1088341825Sdim << NewMaxPressure[ID] 1089341825Sdim << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") 1090341825Sdim << Limit << "(+ " << BotRPTracker.getLiveThru()[ID] 1091341825Sdim << " livethru)\n"); 1092261991Sdim } 1093239462Sdim } 1094261991Sdim} 1095261991Sdim 1096261991Sdim/// Update the PressureDiff array for liveness after scheduling this 1097261991Sdim/// instruction. 1098309124Sdimvoid ScheduleDAGMILive::updatePressureDiffs( 1099309124Sdim ArrayRef<RegisterMaskPair> LiveUses) { 1100309124Sdim for (const RegisterMaskPair &P : LiveUses) { 1101309124Sdim unsigned Reg = P.RegUnit; 1102261991Sdim /// FIXME: Currently assuming single-use physregs. 1103360784Sdim if (!Register::isVirtualRegister(Reg)) 1104261991Sdim continue; 1105261991Sdim 1106309124Sdim if (ShouldTrackLaneMasks) { 1107309124Sdim // If the register has just become live then other uses won't change 1108309124Sdim // this fact anymore => decrement pressure. 1109309124Sdim // If the register has just become dead then other uses make it come 1110309124Sdim // back to life => increment pressure. 1111314564Sdim bool Decrement = P.LaneMask.any(); 1112309124Sdim 1113309124Sdim for (const VReg2SUnit &V2SU 1114309124Sdim : make_range(VRegUses.find(Reg), VRegUses.end())) { 1115309124Sdim SUnit &SU = *V2SU.SU; 1116309124Sdim if (SU.isScheduled || &SU == &ExitSU) 1117309124Sdim continue; 1118309124Sdim 1119309124Sdim PressureDiff &PDiff = getPressureDiff(&SU); 1120309124Sdim PDiff.addPressureChange(Reg, Decrement, &MRI); 1121341825Sdim LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") " 1122341825Sdim << printReg(Reg, TRI) << ':' 1123341825Sdim << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr(); 1124341825Sdim dbgs() << " to "; PDiff.dump(*TRI);); 1125309124Sdim } 1126309124Sdim } else { 1127314564Sdim assert(P.LaneMask.any()); 1128341825Sdim LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n"); 1129309124Sdim // This may be called before CurrentBottom has been initialized. However, 1130309124Sdim // BotRPTracker must have a valid position. We want the value live into the 1131309124Sdim // instruction or live out of the block, so ask for the previous 1132309124Sdim // instruction's live-out. 1133309124Sdim const LiveInterval &LI = LIS->getInterval(Reg); 1134309124Sdim VNInfo *VNI; 1135309124Sdim MachineBasicBlock::const_iterator I = 1136309124Sdim nextIfDebug(BotRPTracker.getPos(), BB->end()); 1137309124Sdim if (I == BB->end()) 1138309124Sdim VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); 1139309124Sdim else { 1140309124Sdim LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I)); 1141309124Sdim VNI = LRQ.valueIn(); 1142309124Sdim } 1143309124Sdim // RegisterPressureTracker guarantees that readsReg is true for LiveUses. 1144309124Sdim assert(VNI && "No live value at use."); 1145309124Sdim for (const VReg2SUnit &V2SU 1146309124Sdim : make_range(VRegUses.find(Reg), VRegUses.end())) { 1147309124Sdim SUnit *SU = V2SU.SU; 1148309124Sdim // If this use comes before the reaching def, it cannot be a last use, 1149309124Sdim // so decrease its pressure change. 1150309124Sdim if (!SU->isScheduled && SU != &ExitSU) { 1151309124Sdim LiveQueryResult LRQ = 1152309124Sdim LI.Query(LIS->getInstructionIndex(*SU->getInstr())); 1153309124Sdim if (LRQ.valueIn() == VNI) { 1154309124Sdim PressureDiff &PDiff = getPressureDiff(SU); 1155309124Sdim PDiff.addPressureChange(Reg, true, &MRI); 1156341825Sdim LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " 1157341825Sdim << *SU->getInstr(); 1158341825Sdim dbgs() << " to "; PDiff.dump(*TRI);); 1159309124Sdim } 1160296417Sdim } 1161251662Sdim } 1162261991Sdim } 1163261991Sdim } 1164239462Sdim} 1165239462Sdim 1166344779Sdimvoid ScheduleDAGMILive::dump() const { 1167344779Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1168344779Sdim if (EntrySU.getInstr() != nullptr) 1169344779Sdim dumpNodeAll(EntrySU); 1170344779Sdim for (const SUnit &SU : SUnits) { 1171344779Sdim dumpNodeAll(SU); 1172344779Sdim if (ShouldTrackPressure) { 1173344779Sdim dbgs() << " Pressure Diff : "; 1174344779Sdim getPressureDiff(&SU).dump(*TRI); 1175344779Sdim } 1176344779Sdim dbgs() << " Single Issue : "; 1177344779Sdim if (SchedModel.mustBeginGroup(SU.getInstr()) && 1178344779Sdim SchedModel.mustEndGroup(SU.getInstr())) 1179344779Sdim dbgs() << "true;"; 1180344779Sdim else 1181344779Sdim dbgs() << "false;"; 1182344779Sdim dbgs() << '\n'; 1183344779Sdim } 1184344779Sdim if (ExitSU.getInstr() != nullptr) 1185344779Sdim dumpNodeAll(ExitSU); 1186344779Sdim#endif 1187344779Sdim} 1188344779Sdim 1189243830Sdim/// schedule - Called back from MachineScheduler::runOnMachineFunction 1190243830Sdim/// after setting up the current scheduling region. [RegionBegin, RegionEnd) 1191243830Sdim/// only includes instructions that have DAG nodes, not scheduling boundaries. 1192243830Sdim/// 1193243830Sdim/// This is a skeletal driver, with all the functionality pushed into helpers, 1194296417Sdim/// so that it can be easily extended by experimental schedulers. Generally, 1195243830Sdim/// implementing MachineSchedStrategy should be sufficient to implement a new 1196243830Sdim/// scheduling algorithm. However, if a scheduler further subclasses 1197276479Sdim/// ScheduleDAGMILive then it will want to override this virtual method in order 1198276479Sdim/// to update any specialized state. 1199276479Sdimvoid ScheduleDAGMILive::schedule() { 1200341825Sdim LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); 1201341825Sdim LLVM_DEBUG(SchedImpl->dumpPolicy()); 1202243830Sdim buildDAGWithRegPressure(); 1203243830Sdim 1204243830Sdim postprocessDAG(); 1205243830Sdim 1206249423Sdim SmallVector<SUnit*, 8> TopRoots, BotRoots; 1207249423Sdim findRootsAndBiasEdges(TopRoots, BotRoots); 1208249423Sdim 1209249423Sdim // Initialize the strategy before modifying the DAG. 1210249423Sdim // This may initialize a DFSResult to be used for queue priority. 1211249423Sdim SchedImpl->initialize(this); 1212249423Sdim 1213344779Sdim LLVM_DEBUG(dump()); 1214344779Sdim if (PrintDAGs) dump(); 1215243830Sdim if (ViewMISchedDAGs) viewGraph(); 1216243830Sdim 1217249423Sdim // Initialize ready queues now that the DAG and priority data are finalized. 1218249423Sdim initQueues(TopRoots, BotRoots); 1219243830Sdim 1220243830Sdim bool IsTopNode = false; 1221296417Sdim while (true) { 1222341825Sdim LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); 1223296417Sdim SUnit *SU = SchedImpl->pickNode(IsTopNode); 1224296417Sdim if (!SU) break; 1225296417Sdim 1226243830Sdim assert(!SU->isScheduled && "Node already scheduled"); 1227243830Sdim if (!checkSchedLimit()) 1228243830Sdim break; 1229243830Sdim 1230243830Sdim scheduleMI(SU, IsTopNode); 1231243830Sdim 1232276479Sdim if (DFSResult) { 1233276479Sdim unsigned SubtreeID = DFSResult->getSubtreeID(SU); 1234276479Sdim if (!ScheduledTrees.test(SubtreeID)) { 1235276479Sdim ScheduledTrees.set(SubtreeID); 1236276479Sdim DFSResult->scheduleTree(SubtreeID); 1237276479Sdim SchedImpl->scheduleTree(SubtreeID); 1238276479Sdim } 1239276479Sdim } 1240276479Sdim 1241276479Sdim // Notify the scheduling strategy after updating the DAG. 1242276479Sdim SchedImpl->schedNode(SU, IsTopNode); 1243288943Sdim 1244288943Sdim updateQueues(SU, IsTopNode); 1245243830Sdim } 1246243830Sdim assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 1247243830Sdim 1248243830Sdim placeDebugValues(); 1249243830Sdim 1250341825Sdim LLVM_DEBUG({ 1251327952Sdim dbgs() << "*** Final schedule for " 1252327952Sdim << printMBBReference(*begin()->getParent()) << " ***\n"; 1253327952Sdim dumpSchedule(); 1254327952Sdim dbgs() << '\n'; 1255327952Sdim }); 1256243830Sdim} 1257243830Sdim 1258243830Sdim/// Build the DAG and setup three register pressure trackers. 1259276479Sdimvoid ScheduleDAGMILive::buildDAGWithRegPressure() { 1260261991Sdim if (!ShouldTrackPressure) { 1261261991Sdim RPTracker.reset(); 1262261991Sdim RegionCriticalPSets.clear(); 1263261991Sdim buildSchedGraph(AA); 1264261991Sdim return; 1265261991Sdim } 1266261991Sdim 1267243830Sdim // Initialize the register pressure tracker used by buildSchedGraph. 1268261991Sdim RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, 1269309124Sdim ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true); 1270243830Sdim 1271243830Sdim // Account for liveness generate by the region boundary. 1272243830Sdim if (LiveRegionEnd != RegionEnd) 1273243830Sdim RPTracker.recede(); 1274243830Sdim 1275243830Sdim // Build the DAG, and compute current register pressure. 1276309124Sdim buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks); 1277243830Sdim 1278243830Sdim // Initialize top/bottom trackers after computing region pressure. 1279243830Sdim initRegPressure(); 1280243830Sdim} 1281243830Sdim 1282276479Sdimvoid ScheduleDAGMILive::computeDFSResult() { 1283249423Sdim if (!DFSResult) 1284249423Sdim DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); 1285249423Sdim DFSResult->clear(); 1286249423Sdim ScheduledTrees.clear(); 1287249423Sdim DFSResult->resize(SUnits.size()); 1288249423Sdim DFSResult->compute(SUnits); 1289249423Sdim ScheduledTrees.resize(DFSResult->getNumSubtrees()); 1290249423Sdim} 1291239462Sdim 1292261991Sdim/// Compute the max cyclic critical path through the DAG. The scheduling DAG 1293261991Sdim/// only provides the critical path for single block loops. To handle loops that 1294261991Sdim/// span blocks, we could use the vreg path latencies provided by 1295261991Sdim/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently 1296261991Sdim/// available for use in the scheduler. 1297261991Sdim/// 1298261991Sdim/// The cyclic path estimation identifies a def-use pair that crosses the back 1299261991Sdim/// edge and considers the depth and height of the nodes. For example, consider 1300261991Sdim/// the following instruction sequence where each instruction has unit latency 1301261991Sdim/// and defines an epomymous virtual register: 1302261991Sdim/// 1303261991Sdim/// a->b(a,c)->c(b)->d(c)->exit 1304261991Sdim/// 1305261991Sdim/// The cyclic critical path is a two cycles: b->c->b 1306261991Sdim/// The acyclic critical path is four cycles: a->b->c->d->exit 1307261991Sdim/// LiveOutHeight = height(c) = len(c->d->exit) = 2 1308261991Sdim/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3 1309261991Sdim/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4 1310261991Sdim/// LiveInDepth = depth(b) = len(a->b) = 1 1311261991Sdim/// 1312261991Sdim/// LiveOutDepth - LiveInDepth = 3 - 1 = 2 1313261991Sdim/// LiveInHeight - LiveOutHeight = 4 - 2 = 2 1314261991Sdim/// CyclicCriticalPath = min(2, 2) = 2 1315276479Sdim/// 1316276479Sdim/// This could be relevant to PostRA scheduling, but is currently implemented 1317276479Sdim/// assuming LiveIntervals. 1318276479Sdimunsigned ScheduleDAGMILive::computeCyclicCriticalPath() { 1319261991Sdim // This only applies to single block loop. 1320261991Sdim if (!BB->isSuccessor(BB)) 1321261991Sdim return 0; 1322261991Sdim 1323261991Sdim unsigned MaxCyclicLatency = 0; 1324261991Sdim // Visit each live out vreg def to find def/use pairs that cross iterations. 1325309124Sdim for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) { 1326309124Sdim unsigned Reg = P.RegUnit; 1327360784Sdim if (!Register::isVirtualRegister(Reg)) 1328360784Sdim continue; 1329261991Sdim const LiveInterval &LI = LIS->getInterval(Reg); 1330261991Sdim const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); 1331261991Sdim if (!DefVNI) 1332261991Sdim continue; 1333261991Sdim 1334261991Sdim MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); 1335261991Sdim const SUnit *DefSU = getSUnit(DefMI); 1336261991Sdim if (!DefSU) 1337261991Sdim continue; 1338261991Sdim 1339261991Sdim unsigned LiveOutHeight = DefSU->getHeight(); 1340261991Sdim unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; 1341261991Sdim // Visit all local users of the vreg def. 1342296417Sdim for (const VReg2SUnit &V2SU 1343296417Sdim : make_range(VRegUses.find(Reg), VRegUses.end())) { 1344296417Sdim SUnit *SU = V2SU.SU; 1345296417Sdim if (SU == &ExitSU) 1346261991Sdim continue; 1347261991Sdim 1348261991Sdim // Only consider uses of the phi. 1349309124Sdim LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr())); 1350261991Sdim if (!LRQ.valueIn()->isPHIDef()) 1351261991Sdim continue; 1352261991Sdim 1353261991Sdim // Assume that a path spanning two iterations is a cycle, which could 1354261991Sdim // overestimate in strange cases. This allows cyclic latency to be 1355261991Sdim // estimated as the minimum slack of the vreg's depth or height. 1356261991Sdim unsigned CyclicLatency = 0; 1357296417Sdim if (LiveOutDepth > SU->getDepth()) 1358296417Sdim CyclicLatency = LiveOutDepth - SU->getDepth(); 1359261991Sdim 1360296417Sdim unsigned LiveInHeight = SU->getHeight() + DefSU->Latency; 1361261991Sdim if (LiveInHeight > LiveOutHeight) { 1362261991Sdim if (LiveInHeight - LiveOutHeight < CyclicLatency) 1363261991Sdim CyclicLatency = LiveInHeight - LiveOutHeight; 1364309124Sdim } else 1365261991Sdim CyclicLatency = 0; 1366261991Sdim 1367341825Sdim LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" 1368341825Sdim << SU->NodeNum << ") = " << CyclicLatency << "c\n"); 1369261991Sdim if (CyclicLatency > MaxCyclicLatency) 1370261991Sdim MaxCyclicLatency = CyclicLatency; 1371261991Sdim } 1372261991Sdim } 1373341825Sdim LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); 1374261991Sdim return MaxCyclicLatency; 1375261991Sdim} 1376261991Sdim 1377309124Sdim/// Release ExitSU predecessors and setup scheduler queues. Re-position 1378309124Sdim/// the Top RP tracker in case the region beginning has changed. 1379309124Sdimvoid ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots, 1380309124Sdim ArrayRef<SUnit*> BotRoots) { 1381309124Sdim ScheduleDAGMI::initQueues(TopRoots, BotRoots); 1382309124Sdim if (ShouldTrackPressure) { 1383309124Sdim assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); 1384309124Sdim TopRPTracker.setPos(CurrentTop); 1385309124Sdim } 1386309124Sdim} 1387309124Sdim 1388243830Sdim/// Move an instruction and update register pressure. 1389276479Sdimvoid ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { 1390243830Sdim // Move the instruction to its new location in the instruction stream. 1391243830Sdim MachineInstr *MI = SU->getInstr(); 1392234285Sdim 1393243830Sdim if (IsTopNode) { 1394243830Sdim assert(SU->isTopReady() && "node still has unscheduled dependencies"); 1395243830Sdim if (&*CurrentTop == MI) 1396243830Sdim CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 1397243830Sdim else { 1398243830Sdim moveInstruction(MI, CurrentTop); 1399243830Sdim TopRPTracker.setPos(MI); 1400243830Sdim } 1401239462Sdim 1402261991Sdim if (ShouldTrackPressure) { 1403261991Sdim // Update top scheduled pressure. 1404309124Sdim RegisterOperands RegOpers; 1405309124Sdim RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); 1406309124Sdim if (ShouldTrackLaneMasks) { 1407309124Sdim // Adjust liveness and add missing dead+read-undef flags. 1408309124Sdim SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); 1409309124Sdim RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); 1410309124Sdim } else { 1411309124Sdim // Adjust for missing dead-def flags. 1412309124Sdim RegOpers.detectDeadDefs(*MI, *LIS); 1413309124Sdim } 1414309124Sdim 1415309124Sdim TopRPTracker.advance(RegOpers); 1416261991Sdim assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 1417341825Sdim LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure( 1418341825Sdim TopRPTracker.getRegSetPressureAtPos(), TRI);); 1419296417Sdim 1420261991Sdim updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); 1421261991Sdim } 1422309124Sdim } else { 1423243830Sdim assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 1424243830Sdim MachineBasicBlock::iterator priorII = 1425243830Sdim priorNonDebug(CurrentBottom, CurrentTop); 1426243830Sdim if (&*priorII == MI) 1427243830Sdim CurrentBottom = priorII; 1428234285Sdim else { 1429243830Sdim if (&*CurrentTop == MI) { 1430243830Sdim CurrentTop = nextIfDebug(++CurrentTop, priorII); 1431243830Sdim TopRPTracker.setPos(CurrentTop); 1432234285Sdim } 1433243830Sdim moveInstruction(MI, CurrentBottom); 1434243830Sdim CurrentBottom = MI; 1435341825Sdim BotRPTracker.setPos(CurrentBottom); 1436234285Sdim } 1437261991Sdim if (ShouldTrackPressure) { 1438309124Sdim RegisterOperands RegOpers; 1439309124Sdim RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); 1440309124Sdim if (ShouldTrackLaneMasks) { 1441309124Sdim // Adjust liveness and add missing dead+read-undef flags. 1442309124Sdim SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); 1443309124Sdim RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); 1444309124Sdim } else { 1445309124Sdim // Adjust for missing dead-def flags. 1446309124Sdim RegOpers.detectDeadDefs(*MI, *LIS); 1447309124Sdim } 1448309124Sdim 1449327952Sdim if (BotRPTracker.getPos() != CurrentBottom) 1450327952Sdim BotRPTracker.recedeSkipDebugValues(); 1451309124Sdim SmallVector<RegisterMaskPair, 8> LiveUses; 1452309124Sdim BotRPTracker.recede(RegOpers, &LiveUses); 1453261991Sdim assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 1454341825Sdim LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure( 1455341825Sdim BotRPTracker.getRegSetPressureAtPos(), TRI);); 1456296417Sdim 1457261991Sdim updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); 1458261991Sdim updatePressureDiffs(LiveUses); 1459261991Sdim } 1460234285Sdim } 1461243830Sdim} 1462239462Sdim 1463234285Sdim//===----------------------------------------------------------------------===// 1464309124Sdim// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores. 1465249423Sdim//===----------------------------------------------------------------------===// 1466249423Sdim 1467249423Sdimnamespace { 1468321369Sdim 1469341825Sdim/// Post-process the DAG to create cluster edges between neighboring 1470309124Sdim/// loads or between neighboring stores. 1471309124Sdimclass BaseMemOpClusterMutation : public ScheduleDAGMutation { 1472309124Sdim struct MemOpInfo { 1473249423Sdim SUnit *SU; 1474353358Sdim const MachineOperand *BaseOp; 1475309124Sdim int64_t Offset; 1476321369Sdim 1477353358Sdim MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs) 1478344779Sdim : SU(su), BaseOp(Op), Offset(ofs) {} 1479276479Sdim 1480344779Sdim bool operator<(const MemOpInfo &RHS) const { 1481344779Sdim if (BaseOp->getType() != RHS.BaseOp->getType()) 1482344779Sdim return BaseOp->getType() < RHS.BaseOp->getType(); 1483344779Sdim 1484344779Sdim if (BaseOp->isReg()) 1485344779Sdim return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < 1486344779Sdim std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, 1487344779Sdim RHS.SU->NodeNum); 1488344779Sdim if (BaseOp->isFI()) { 1489344779Sdim const MachineFunction &MF = 1490344779Sdim *BaseOp->getParent()->getParent()->getParent(); 1491344779Sdim const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); 1492344779Sdim bool StackGrowsDown = TFI.getStackGrowthDirection() == 1493344779Sdim TargetFrameLowering::StackGrowsDown; 1494344779Sdim // Can't use tuple comparison here since we might need to use a 1495344779Sdim // different order when the stack grows down. 1496344779Sdim if (BaseOp->getIndex() != RHS.BaseOp->getIndex()) 1497344779Sdim return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex() 1498344779Sdim : BaseOp->getIndex() < RHS.BaseOp->getIndex(); 1499344779Sdim 1500344779Sdim if (Offset != RHS.Offset) 1501360784Sdim return Offset < RHS.Offset; 1502344779Sdim 1503344779Sdim return SU->NodeNum < RHS.SU->NodeNum; 1504344779Sdim } 1505344779Sdim 1506344779Sdim llvm_unreachable("MemOpClusterMutation only supports register or frame " 1507344779Sdim "index bases."); 1508276479Sdim } 1509249423Sdim }; 1510249423Sdim 1511249423Sdim const TargetInstrInfo *TII; 1512249423Sdim const TargetRegisterInfo *TRI; 1513309124Sdim bool IsLoad; 1514309124Sdim 1515249423Sdimpublic: 1516309124Sdim BaseMemOpClusterMutation(const TargetInstrInfo *tii, 1517309124Sdim const TargetRegisterInfo *tri, bool IsLoad) 1518309124Sdim : TII(tii), TRI(tri), IsLoad(IsLoad) {} 1519249423Sdim 1520309124Sdim void apply(ScheduleDAGInstrs *DAGInstrs) override; 1521309124Sdim 1522249423Sdimprotected: 1523353358Sdim void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG); 1524249423Sdim}; 1525309124Sdim 1526309124Sdimclass StoreClusterMutation : public BaseMemOpClusterMutation { 1527309124Sdimpublic: 1528309124Sdim StoreClusterMutation(const TargetInstrInfo *tii, 1529309124Sdim const TargetRegisterInfo *tri) 1530309124Sdim : BaseMemOpClusterMutation(tii, tri, false) {} 1531309124Sdim}; 1532309124Sdim 1533309124Sdimclass LoadClusterMutation : public BaseMemOpClusterMutation { 1534309124Sdimpublic: 1535309124Sdim LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri) 1536309124Sdim : BaseMemOpClusterMutation(tii, tri, true) {} 1537309124Sdim}; 1538249423Sdim 1539321369Sdim} // end anonymous namespace 1540321369Sdim 1541314564Sdimnamespace llvm { 1542314564Sdim 1543314564Sdimstd::unique_ptr<ScheduleDAGMutation> 1544314564SdimcreateLoadClusterDAGMutation(const TargetInstrInfo *TII, 1545314564Sdim const TargetRegisterInfo *TRI) { 1546360784Sdim return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI) 1547314564Sdim : nullptr; 1548314564Sdim} 1549314564Sdim 1550314564Sdimstd::unique_ptr<ScheduleDAGMutation> 1551314564SdimcreateStoreClusterDAGMutation(const TargetInstrInfo *TII, 1552314564Sdim const TargetRegisterInfo *TRI) { 1553360784Sdim return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI) 1554314564Sdim : nullptr; 1555314564Sdim} 1556314564Sdim 1557321369Sdim} // end namespace llvm 1558314564Sdim 1559309124Sdimvoid BaseMemOpClusterMutation::clusterNeighboringMemOps( 1560353358Sdim ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) { 1561309124Sdim SmallVector<MemOpInfo, 32> MemOpRecords; 1562321369Sdim for (SUnit *SU : MemOps) { 1563353358Sdim const MachineOperand *BaseOp; 1564309124Sdim int64_t Offset; 1565344779Sdim if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI)) 1566344779Sdim MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset)); 1567249423Sdim } 1568309124Sdim if (MemOpRecords.size() < 2) 1569249423Sdim return; 1570309124Sdim 1571344779Sdim llvm::sort(MemOpRecords); 1572249423Sdim unsigned ClusterLength = 1; 1573309124Sdim for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { 1574309124Sdim SUnit *SUa = MemOpRecords[Idx].SU; 1575309124Sdim SUnit *SUb = MemOpRecords[Idx+1].SU; 1576360784Sdim if (SUa->NodeNum > SUb->NodeNum) 1577360784Sdim std::swap(SUa, SUb); 1578344779Sdim if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp, 1579344779Sdim *MemOpRecords[Idx + 1].BaseOp, 1580309124Sdim ClusterLength) && 1581309124Sdim DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { 1582341825Sdim LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" 1583341825Sdim << SUb->NodeNum << ")\n"); 1584249423Sdim // Copy successor edges from SUa to SUb. Interleaving computation 1585249423Sdim // dependent on SUa can prevent load combining due to register reuse. 1586249423Sdim // Predecessor edges do not need to be copied from SUb to SUa since nearby 1587249423Sdim // loads should have effectively the same inputs. 1588321369Sdim for (const SDep &Succ : SUa->Succs) { 1589321369Sdim if (Succ.getSUnit() == SUb) 1590249423Sdim continue; 1591341825Sdim LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum 1592341825Sdim << ")\n"); 1593321369Sdim DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); 1594249423Sdim } 1595249423Sdim ++ClusterLength; 1596309124Sdim } else 1597249423Sdim ClusterLength = 1; 1598249423Sdim } 1599249423Sdim} 1600249423Sdim 1601341825Sdim/// Callback from DAG postProcessing to create cluster edges for loads. 1602353358Sdimvoid BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { 1603360784Sdim // Map DAG NodeNum to a set of dependent MemOps in store chain. 1604360784Sdim DenseMap<unsigned, SmallVector<SUnit *, 4>> StoreChains; 1605321369Sdim for (SUnit &SU : DAG->SUnits) { 1606321369Sdim if ((IsLoad && !SU.getInstr()->mayLoad()) || 1607321369Sdim (!IsLoad && !SU.getInstr()->mayStore())) 1608249423Sdim continue; 1609309124Sdim 1610249423Sdim unsigned ChainPredID = DAG->SUnits.size(); 1611321369Sdim for (const SDep &Pred : SU.Preds) { 1612321369Sdim if (Pred.isCtrl()) { 1613321369Sdim ChainPredID = Pred.getSUnit()->NodeNum; 1614249423Sdim break; 1615249423Sdim } 1616249423Sdim } 1617360784Sdim // Insert the SU to corresponding store chain. 1618360784Sdim auto &Chain = StoreChains.FindAndConstruct(ChainPredID).second; 1619360784Sdim Chain.push_back(&SU); 1620249423Sdim } 1621309124Sdim 1622249423Sdim // Iterate over the store chains. 1623360784Sdim for (auto &SCD : StoreChains) 1624360784Sdim clusterNeighboringMemOps(SCD.second, DAG); 1625249423Sdim} 1626249423Sdim 1627249423Sdim//===----------------------------------------------------------------------===// 1628321369Sdim// CopyConstrain - DAG post-processing to encourage copy elimination. 1629249423Sdim//===----------------------------------------------------------------------===// 1630249423Sdim 1631249423Sdimnamespace { 1632249423Sdim 1633341825Sdim/// Post-process the DAG to create weak edges from all uses of a copy to 1634251662Sdim/// the one use that defines the copy's source vreg, most likely an induction 1635251662Sdim/// variable increment. 1636251662Sdimclass CopyConstrain : public ScheduleDAGMutation { 1637251662Sdim // Transient state. 1638251662Sdim SlotIndex RegionBeginIdx; 1639327952Sdim 1640251662Sdim // RegionEndIdx is the slot index of the last non-debug instruction in the 1641251662Sdim // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. 1642251662Sdim SlotIndex RegionEndIdx; 1643321369Sdim 1644251662Sdimpublic: 1645251662Sdim CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} 1646251662Sdim 1647309124Sdim void apply(ScheduleDAGInstrs *DAGInstrs) override; 1648251662Sdim 1649251662Sdimprotected: 1650276479Sdim void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); 1651251662Sdim}; 1652251662Sdim 1653321369Sdim} // end anonymous namespace 1654321369Sdim 1655314564Sdimnamespace llvm { 1656314564Sdim 1657314564Sdimstd::unique_ptr<ScheduleDAGMutation> 1658314564SdimcreateCopyConstrainDAGMutation(const TargetInstrInfo *TII, 1659321369Sdim const TargetRegisterInfo *TRI) { 1660360784Sdim return std::make_unique<CopyConstrain>(TII, TRI); 1661314564Sdim} 1662314564Sdim 1663321369Sdim} // end namespace llvm 1664314564Sdim 1665251662Sdim/// constrainLocalCopy handles two possibilities: 1666251662Sdim/// 1) Local src: 1667251662Sdim/// I0: = dst 1668251662Sdim/// I1: src = ... 1669251662Sdim/// I2: = dst 1670251662Sdim/// I3: dst = src (copy) 1671251662Sdim/// (create pred->succ edges I0->I1, I2->I1) 1672251662Sdim/// 1673251662Sdim/// 2) Local copy: 1674251662Sdim/// I0: dst = src (copy) 1675251662Sdim/// I1: = dst 1676251662Sdim/// I2: src = ... 1677251662Sdim/// I3: = dst 1678251662Sdim/// (create pred->succ edges I1->I2, I3->I2) 1679251662Sdim/// 1680251662Sdim/// Although the MachineScheduler is currently constrained to single blocks, 1681251662Sdim/// this algorithm should handle extended blocks. An EBB is a set of 1682251662Sdim/// contiguously numbered blocks such that the previous block in the EBB is 1683251662Sdim/// always the single predecessor. 1684276479Sdimvoid CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { 1685251662Sdim LiveIntervals *LIS = DAG->getLIS(); 1686251662Sdim MachineInstr *Copy = CopySU->getInstr(); 1687251662Sdim 1688251662Sdim // Check for pure vreg copies. 1689309124Sdim const MachineOperand &SrcOp = Copy->getOperand(1); 1690360784Sdim Register SrcReg = SrcOp.getReg(); 1691360784Sdim if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg()) 1692251662Sdim return; 1693251662Sdim 1694309124Sdim const MachineOperand &DstOp = Copy->getOperand(0); 1695360784Sdim Register DstReg = DstOp.getReg(); 1696360784Sdim if (!Register::isVirtualRegister(DstReg) || DstOp.isDead()) 1697251662Sdim return; 1698251662Sdim 1699251662Sdim // Check if either the dest or source is local. If it's live across a back 1700251662Sdim // edge, it's not local. Note that if both vregs are live across the back 1701251662Sdim // edge, we cannot successfully contrain the copy without cyclic scheduling. 1702288943Sdim // If both the copy's source and dest are local live intervals, then we 1703288943Sdim // should treat the dest as the global for the purpose of adding 1704288943Sdim // constraints. This adds edges from source's other uses to the copy. 1705288943Sdim unsigned LocalReg = SrcReg; 1706288943Sdim unsigned GlobalReg = DstReg; 1707251662Sdim LiveInterval *LocalLI = &LIS->getInterval(LocalReg); 1708251662Sdim if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { 1709288943Sdim LocalReg = DstReg; 1710288943Sdim GlobalReg = SrcReg; 1711251662Sdim LocalLI = &LIS->getInterval(LocalReg); 1712251662Sdim if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) 1713251662Sdim return; 1714251662Sdim } 1715251662Sdim LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); 1716251662Sdim 1717251662Sdim // Find the global segment after the start of the local LI. 1718251662Sdim LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); 1719251662Sdim // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a 1720251662Sdim // local live range. We could create edges from other global uses to the local 1721251662Sdim // start, but the coalescer should have already eliminated these cases, so 1722251662Sdim // don't bother dealing with it. 1723251662Sdim if (GlobalSegment == GlobalLI->end()) 1724251662Sdim return; 1725251662Sdim 1726251662Sdim // If GlobalSegment is killed at the LocalLI->start, the call to find() 1727251662Sdim // returned the next global segment. But if GlobalSegment overlaps with 1728341825Sdim // LocalLI->start, then advance to the next segment. If a hole in GlobalLI 1729251662Sdim // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. 1730251662Sdim if (GlobalSegment->contains(LocalLI->beginIndex())) 1731251662Sdim ++GlobalSegment; 1732251662Sdim 1733251662Sdim if (GlobalSegment == GlobalLI->end()) 1734251662Sdim return; 1735251662Sdim 1736251662Sdim // Check if GlobalLI contains a hole in the vicinity of LocalLI. 1737251662Sdim if (GlobalSegment != GlobalLI->begin()) { 1738251662Sdim // Two address defs have no hole. 1739276479Sdim if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end, 1740251662Sdim GlobalSegment->start)) { 1741251662Sdim return; 1742251662Sdim } 1743261991Sdim // If the prior global segment may be defined by the same two-address 1744261991Sdim // instruction that also defines LocalLI, then can't make a hole here. 1745276479Sdim if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start, 1746261991Sdim LocalLI->beginIndex())) { 1747261991Sdim return; 1748261991Sdim } 1749251662Sdim // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise 1750251662Sdim // it would be a disconnected component in the live range. 1751276479Sdim assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() && 1752251662Sdim "Disconnected LRG within the scheduling region."); 1753251662Sdim } 1754251662Sdim MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); 1755251662Sdim if (!GlobalDef) 1756251662Sdim return; 1757251662Sdim 1758251662Sdim SUnit *GlobalSU = DAG->getSUnit(GlobalDef); 1759251662Sdim if (!GlobalSU) 1760251662Sdim return; 1761251662Sdim 1762251662Sdim // GlobalDef is the bottom of the GlobalLI hole. Open the hole by 1763251662Sdim // constraining the uses of the last local def to precede GlobalDef. 1764251662Sdim SmallVector<SUnit*,8> LocalUses; 1765251662Sdim const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); 1766251662Sdim MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); 1767251662Sdim SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); 1768321369Sdim for (const SDep &Succ : LastLocalSU->Succs) { 1769321369Sdim if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg) 1770251662Sdim continue; 1771321369Sdim if (Succ.getSUnit() == GlobalSU) 1772251662Sdim continue; 1773321369Sdim if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit())) 1774251662Sdim return; 1775321369Sdim LocalUses.push_back(Succ.getSUnit()); 1776251662Sdim } 1777251662Sdim // Open the top of the GlobalLI hole by constraining any earlier global uses 1778251662Sdim // to precede the start of LocalLI. 1779251662Sdim SmallVector<SUnit*,8> GlobalUses; 1780251662Sdim MachineInstr *FirstLocalDef = 1781251662Sdim LIS->getInstructionFromIndex(LocalLI->beginIndex()); 1782251662Sdim SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); 1783321369Sdim for (const SDep &Pred : GlobalSU->Preds) { 1784321369Sdim if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg) 1785251662Sdim continue; 1786321369Sdim if (Pred.getSUnit() == FirstLocalSU) 1787251662Sdim continue; 1788321369Sdim if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit())) 1789251662Sdim return; 1790321369Sdim GlobalUses.push_back(Pred.getSUnit()); 1791251662Sdim } 1792341825Sdim LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); 1793251662Sdim // Add the weak edges. 1794251662Sdim for (SmallVectorImpl<SUnit*>::const_iterator 1795251662Sdim I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) { 1796341825Sdim LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU(" 1797341825Sdim << GlobalSU->NodeNum << ")\n"); 1798251662Sdim DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak)); 1799251662Sdim } 1800251662Sdim for (SmallVectorImpl<SUnit*>::const_iterator 1801251662Sdim I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) { 1802341825Sdim LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU(" 1803341825Sdim << FirstLocalSU->NodeNum << ")\n"); 1804251662Sdim DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak)); 1805251662Sdim } 1806251662Sdim} 1807251662Sdim 1808341825Sdim/// Callback from DAG postProcessing to create weak edges to encourage 1809251662Sdim/// copy elimination. 1810309124Sdimvoid CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) { 1811309124Sdim ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); 1812276479Sdim assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals"); 1813276479Sdim 1814251662Sdim MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); 1815251662Sdim if (FirstPos == DAG->end()) 1816251662Sdim return; 1817309124Sdim RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos); 1818251662Sdim RegionEndIdx = DAG->getLIS()->getInstructionIndex( 1819309124Sdim *priorNonDebug(DAG->end(), DAG->begin())); 1820251662Sdim 1821321369Sdim for (SUnit &SU : DAG->SUnits) { 1822321369Sdim if (!SU.getInstr()->isCopy()) 1823251662Sdim continue; 1824251662Sdim 1825321369Sdim constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG)); 1826251662Sdim } 1827251662Sdim} 1828251662Sdim 1829251662Sdim//===----------------------------------------------------------------------===// 1830276479Sdim// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler 1831276479Sdim// and possibly other custom schedulers. 1832234285Sdim//===----------------------------------------------------------------------===// 1833234285Sdim 1834276479Sdimstatic const unsigned InvalidCycle = ~0U; 1835239462Sdim 1836276479SdimSchedBoundary::~SchedBoundary() { delete HazardRec; } 1837239462Sdim 1838327952Sdim/// Given a Count of resource usage and a Latency value, return true if a 1839327952Sdim/// SchedBoundary becomes resource limited. 1840353358Sdim/// If we are checking after scheduling a node, we should return true when 1841353358Sdim/// we just reach the resource limit. 1842327952Sdimstatic bool checkResourceLimit(unsigned LFactor, unsigned Count, 1843353358Sdim unsigned Latency, bool AfterSchedNode) { 1844353358Sdim int ResCntFactor = (int)(Count - (Latency * LFactor)); 1845353358Sdim if (AfterSchedNode) 1846353358Sdim return ResCntFactor >= (int)LFactor; 1847353358Sdim else 1848353358Sdim return ResCntFactor > (int)LFactor; 1849327952Sdim} 1850327952Sdim 1851276479Sdimvoid SchedBoundary::reset() { 1852276479Sdim // A new HazardRec is created for each DAG and owned by SchedBoundary. 1853276479Sdim // Destroying and reconstructing it is very expensive though. So keep 1854276479Sdim // invalid, placeholder HazardRecs. 1855276479Sdim if (HazardRec && HazardRec->isEnabled()) { 1856276479Sdim delete HazardRec; 1857276479Sdim HazardRec = nullptr; 1858276479Sdim } 1859276479Sdim Available.clear(); 1860276479Sdim Pending.clear(); 1861276479Sdim CheckPending = false; 1862276479Sdim CurrCycle = 0; 1863276479Sdim CurrMOps = 0; 1864321369Sdim MinReadyCycle = std::numeric_limits<unsigned>::max(); 1865276479Sdim ExpectedLatency = 0; 1866276479Sdim DependentLatency = 0; 1867276479Sdim RetiredMOps = 0; 1868276479Sdim MaxExecutedResCount = 0; 1869276479Sdim ZoneCritResIdx = 0; 1870276479Sdim IsResourceLimited = false; 1871276479Sdim ReservedCycles.clear(); 1872353358Sdim ReservedCyclesIndex.clear(); 1873243830Sdim#ifndef NDEBUG 1874276479Sdim // Track the maximum number of stall cycles that could arise either from the 1875276479Sdim // latency of a DAG edge or the number of cycles that a processor resource is 1876276479Sdim // reserved (SchedBoundary::ReservedCycles). 1877276479Sdim MaxObservedStall = 0; 1878243830Sdim#endif 1879276479Sdim // Reserve a zero-count for invalid CritResIdx. 1880276479Sdim ExecutedResCounts.resize(1); 1881276479Sdim assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); 1882276479Sdim} 1883239462Sdim 1884276479Sdimvoid SchedRemainder:: 1885243830Sdiminit(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 1886243830Sdim reset(); 1887243830Sdim if (!SchedModel->hasInstrSchedModel()) 1888243830Sdim return; 1889243830Sdim RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 1890321369Sdim for (SUnit &SU : DAG->SUnits) { 1891321369Sdim const MCSchedClassDesc *SC = DAG->getSchedClass(&SU); 1892321369Sdim RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC) 1893261991Sdim * SchedModel->getMicroOpFactor(); 1894243830Sdim for (TargetSchedModel::ProcResIter 1895243830Sdim PI = SchedModel->getWriteProcResBegin(SC), 1896243830Sdim PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1897243830Sdim unsigned PIdx = PI->ProcResourceIdx; 1898243830Sdim unsigned Factor = SchedModel->getResourceFactor(PIdx); 1899243830Sdim RemainingCounts[PIdx] += (Factor * PI->Cycles); 1900243830Sdim } 1901243830Sdim } 1902243830Sdim} 1903243830Sdim 1904276479Sdimvoid SchedBoundary:: 1905243830Sdiminit(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 1906243830Sdim reset(); 1907243830Sdim DAG = dag; 1908243830Sdim SchedModel = smodel; 1909243830Sdim Rem = rem; 1910276479Sdim if (SchedModel->hasInstrSchedModel()) { 1911353358Sdim unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); 1912353358Sdim ReservedCyclesIndex.resize(ResourceCount); 1913353358Sdim ExecutedResCounts.resize(ResourceCount); 1914353358Sdim unsigned NumUnits = 0; 1915353358Sdim 1916353358Sdim for (unsigned i = 0; i < ResourceCount; ++i) { 1917353358Sdim ReservedCyclesIndex[i] = NumUnits; 1918353358Sdim NumUnits += SchedModel->getProcResource(i)->NumUnits; 1919353358Sdim } 1920353358Sdim 1921353358Sdim ReservedCycles.resize(NumUnits, InvalidCycle); 1922261991Sdim } 1923261991Sdim} 1924261991Sdim 1925276479Sdim/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat 1926276479Sdim/// these "soft stalls" differently than the hard stall cycles based on CPU 1927276479Sdim/// resources and computed by checkHazard(). A fully in-order model 1928276479Sdim/// (MicroOpBufferSize==0) will not make use of this since instructions are not 1929276479Sdim/// available for scheduling until they are ready. However, a weaker in-order 1930276479Sdim/// model may use this for heuristics. For example, if a processor has in-order 1931276479Sdim/// behavior when reading certain resources, this may come into play. 1932276479Sdimunsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { 1933276479Sdim if (!SU->isUnbuffered) 1934276479Sdim return 0; 1935249423Sdim 1936276479Sdim unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); 1937276479Sdim if (ReadyCycle > CurrCycle) 1938276479Sdim return ReadyCycle - CurrCycle; 1939276479Sdim return 0; 1940239462Sdim} 1941239462Sdim 1942353358Sdim/// Compute the next cycle at which the given processor resource unit 1943353358Sdim/// can be scheduled. 1944353358Sdimunsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx, 1945353358Sdim unsigned Cycles) { 1946353358Sdim unsigned NextUnreserved = ReservedCycles[InstanceIdx]; 1947276479Sdim // If this resource has never been used, always return cycle zero. 1948276479Sdim if (NextUnreserved == InvalidCycle) 1949276479Sdim return 0; 1950276479Sdim // For bottom-up scheduling add the cycles needed for the current operation. 1951276479Sdim if (!isTop()) 1952276479Sdim NextUnreserved += Cycles; 1953276479Sdim return NextUnreserved; 1954239462Sdim} 1955234285Sdim 1956353358Sdim/// Compute the next cycle at which the given processor resource can be 1957353358Sdim/// scheduled. Returns the next cycle and the index of the processor resource 1958353358Sdim/// instance in the reserved cycles vector. 1959353358Sdimstd::pair<unsigned, unsigned> 1960353358SdimSchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) { 1961353358Sdim unsigned MinNextUnreserved = InvalidCycle; 1962353358Sdim unsigned InstanceIdx = 0; 1963353358Sdim unsigned StartIndex = ReservedCyclesIndex[PIdx]; 1964353358Sdim unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits; 1965353358Sdim assert(NumberOfInstances > 0 && 1966353358Sdim "Cannot have zero instances of a ProcResource"); 1967353358Sdim 1968353358Sdim for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; 1969353358Sdim ++I) { 1970353358Sdim unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); 1971353358Sdim if (MinNextUnreserved > NextUnreserved) { 1972353358Sdim InstanceIdx = I; 1973353358Sdim MinNextUnreserved = NextUnreserved; 1974353358Sdim } 1975353358Sdim } 1976353358Sdim return std::make_pair(MinNextUnreserved, InstanceIdx); 1977353358Sdim} 1978353358Sdim 1979239462Sdim/// Does this SU have a hazard within the current instruction group. 1980239462Sdim/// 1981239462Sdim/// The scheduler supports two modes of hazard recognition. The first is the 1982239462Sdim/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 1983239462Sdim/// supports highly complicated in-order reservation tables 1984341825Sdim/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. 1985239462Sdim/// 1986239462Sdim/// The second is a streamlined mechanism that checks for hazards based on 1987239462Sdim/// simple counters that the scheduler itself maintains. It explicitly checks 1988239462Sdim/// for instruction dispatch limitations, including the number of micro-ops that 1989239462Sdim/// can dispatch per cycle. 1990239462Sdim/// 1991239462Sdim/// TODO: Also check whether the SU must start a new group. 1992276479Sdimbool SchedBoundary::checkHazard(SUnit *SU) { 1993276479Sdim if (HazardRec->isEnabled() 1994276479Sdim && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) { 1995276479Sdim return true; 1996276479Sdim } 1997321369Sdim 1998243830Sdim unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1999261991Sdim if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { 2000341825Sdim LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 2001341825Sdim << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 2002239462Sdim return true; 2003243830Sdim } 2004321369Sdim 2005321369Sdim if (CurrMOps > 0 && 2006321369Sdim ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) || 2007321369Sdim (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) { 2008341825Sdim LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must " 2009341825Sdim << (isTop() ? "begin" : "end") << " group\n"); 2010321369Sdim return true; 2011321369Sdim } 2012321369Sdim 2013276479Sdim if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { 2014276479Sdim const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 2015327952Sdim for (const MCWriteProcResEntry &PE : 2016327952Sdim make_range(SchedModel->getWriteProcResBegin(SC), 2017327952Sdim SchedModel->getWriteProcResEnd(SC))) { 2018327952Sdim unsigned ResIdx = PE.ProcResourceIdx; 2019327952Sdim unsigned Cycles = PE.Cycles; 2020353358Sdim unsigned NRCycle, InstanceIdx; 2021353358Sdim std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles); 2022276479Sdim if (NRCycle > CurrCycle) { 2023276479Sdim#ifndef NDEBUG 2024327952Sdim MaxObservedStall = std::max(Cycles, MaxObservedStall); 2025276479Sdim#endif 2026341825Sdim LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " 2027353358Sdim << SchedModel->getResourceName(ResIdx) 2028353358Sdim << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']' 2029353358Sdim << "=" << NRCycle << "c\n"); 2030276479Sdim return true; 2031276479Sdim } 2032276479Sdim } 2033276479Sdim } 2034239462Sdim return false; 2035239462Sdim} 2036239462Sdim 2037261991Sdim// Find the unscheduled node in ReadySUs with the highest latency. 2038276479Sdimunsigned SchedBoundary:: 2039261991SdimfindMaxLatency(ArrayRef<SUnit*> ReadySUs) { 2040276479Sdim SUnit *LateSU = nullptr; 2041249423Sdim unsigned RemLatency = 0; 2042321369Sdim for (SUnit *SU : ReadySUs) { 2043321369Sdim unsigned L = getUnscheduledLatency(SU); 2044261991Sdim if (L > RemLatency) { 2045249423Sdim RemLatency = L; 2046321369Sdim LateSU = SU; 2047261991Sdim } 2048243830Sdim } 2049261991Sdim if (LateSU) { 2050341825Sdim LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU(" 2051341825Sdim << LateSU->NodeNum << ") " << RemLatency << "c\n"); 2052249423Sdim } 2053261991Sdim return RemLatency; 2054261991Sdim} 2055261991Sdim 2056261991Sdim// Count resources in this zone and the remaining unscheduled 2057261991Sdim// instruction. Return the max count, scaled. Set OtherCritIdx to the critical 2058261991Sdim// resource index, or zero if the zone is issue limited. 2059276479Sdimunsigned SchedBoundary:: 2060261991SdimgetOtherResourceCount(unsigned &OtherCritIdx) { 2061261991Sdim OtherCritIdx = 0; 2062261991Sdim if (!SchedModel->hasInstrSchedModel()) 2063261991Sdim return 0; 2064261991Sdim 2065261991Sdim unsigned OtherCritCount = Rem->RemIssueCount 2066261991Sdim + (RetiredMOps * SchedModel->getMicroOpFactor()); 2067341825Sdim LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " 2068341825Sdim << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); 2069261991Sdim for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); 2070261991Sdim PIdx != PEnd; ++PIdx) { 2071261991Sdim unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; 2072261991Sdim if (OtherCount > OtherCritCount) { 2073261991Sdim OtherCritCount = OtherCount; 2074261991Sdim OtherCritIdx = PIdx; 2075261991Sdim } 2076249423Sdim } 2077261991Sdim if (OtherCritIdx) { 2078341825Sdim LLVM_DEBUG( 2079341825Sdim dbgs() << " " << Available.getName() << " + Remain CritRes: " 2080341825Sdim << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) 2081341825Sdim << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); 2082261991Sdim } 2083261991Sdim return OtherCritCount; 2084243830Sdim} 2085243830Sdim 2086360784Sdimvoid SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, 2087360784Sdim unsigned Idx) { 2088276479Sdim assert(SU->getInstr() && "Scheduled SUnit must have instr"); 2089261991Sdim 2090276479Sdim#ifndef NDEBUG 2091276479Sdim // ReadyCycle was been bumped up to the CurrCycle when this node was 2092276479Sdim // scheduled, but CurrCycle may have been eagerly advanced immediately after 2093276479Sdim // scheduling, so may now be greater than ReadyCycle. 2094276479Sdim if (ReadyCycle > CurrCycle) 2095276479Sdim MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall); 2096276479Sdim#endif 2097261991Sdim 2098239462Sdim if (ReadyCycle < MinReadyCycle) 2099239462Sdim MinReadyCycle = ReadyCycle; 2100239462Sdim 2101239462Sdim // Check for interlocks first. For the purpose of other heuristics, an 2102239462Sdim // instruction that cannot issue appears as if it's not in the ReadyQueue. 2103261991Sdim bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; 2104360784Sdim bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) || 2105360784Sdim checkHazard(SU) || (Available.size() >= ReadyListLimit); 2106360784Sdim 2107360784Sdim if (!HazardDetected) { 2108360784Sdim Available.push(SU); 2109360784Sdim 2110360784Sdim if (InPQueue) 2111360784Sdim Pending.remove(Pending.begin() + Idx); 2112360784Sdim return; 2113360784Sdim } 2114360784Sdim 2115360784Sdim if (!InPQueue) 2116239462Sdim Pending.push(SU); 2117239462Sdim} 2118239462Sdim 2119239462Sdim/// Move the boundary of scheduled code by one cycle. 2120276479Sdimvoid SchedBoundary::bumpCycle(unsigned NextCycle) { 2121261991Sdim if (SchedModel->getMicroOpBufferSize() == 0) { 2122321369Sdim assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && 2123321369Sdim "MinReadyCycle uninitialized"); 2124261991Sdim if (MinReadyCycle > NextCycle) 2125261991Sdim NextCycle = MinReadyCycle; 2126243830Sdim } 2127261991Sdim // Update the current micro-ops, which will issue in the next cycle. 2128261991Sdim unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); 2129261991Sdim CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; 2130239462Sdim 2131261991Sdim // Decrement DependentLatency based on the next cycle. 2132261991Sdim if ((NextCycle - CurrCycle) > DependentLatency) 2133261991Sdim DependentLatency = 0; 2134261991Sdim else 2135261991Sdim DependentLatency -= (NextCycle - CurrCycle); 2136261991Sdim 2137239462Sdim if (!HazardRec->isEnabled()) { 2138239462Sdim // Bypass HazardRec virtual calls. 2139239462Sdim CurrCycle = NextCycle; 2140309124Sdim } else { 2141239462Sdim // Bypass getHazardType calls in case of long latency. 2142239462Sdim for (; CurrCycle != NextCycle; ++CurrCycle) { 2143239462Sdim if (isTop()) 2144239462Sdim HazardRec->AdvanceCycle(); 2145239462Sdim else 2146239462Sdim HazardRec->RecedeCycle(); 2147234285Sdim } 2148239462Sdim } 2149239462Sdim CheckPending = true; 2150261991Sdim IsResourceLimited = 2151327952Sdim checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), 2152353358Sdim getScheduledLatency(), true); 2153239462Sdim 2154341825Sdim LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() 2155341825Sdim << '\n'); 2156239462Sdim} 2157239462Sdim 2158276479Sdimvoid SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { 2159261991Sdim ExecutedResCounts[PIdx] += Count; 2160261991Sdim if (ExecutedResCounts[PIdx] > MaxExecutedResCount) 2161261991Sdim MaxExecutedResCount = ExecutedResCounts[PIdx]; 2162261991Sdim} 2163261991Sdim 2164243830Sdim/// Add the given processor resource to this scheduled zone. 2165261991Sdim/// 2166261991Sdim/// \param Cycles indicates the number of consecutive (non-pipelined) cycles 2167261991Sdim/// during which this resource is consumed. 2168261991Sdim/// 2169261991Sdim/// \return the next cycle at which the instruction may execute without 2170261991Sdim/// oversubscribing resources. 2171276479Sdimunsigned SchedBoundary:: 2172276479SdimcountResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { 2173243830Sdim unsigned Factor = SchedModel->getResourceFactor(PIdx); 2174261991Sdim unsigned Count = Factor * Cycles; 2175341825Sdim LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +" 2176341825Sdim << Cycles << "x" << Factor << "u\n"); 2177243830Sdim 2178261991Sdim // Update Executed resources counts. 2179261991Sdim incExecutedResources(PIdx, Count); 2180243830Sdim assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 2181243830Sdim Rem->RemainingCounts[PIdx] -= Count; 2182243830Sdim 2183261991Sdim // Check if this resource exceeds the current critical resource. If so, it 2184261991Sdim // becomes the critical resource. 2185261991Sdim if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { 2186261991Sdim ZoneCritResIdx = PIdx; 2187341825Sdim LLVM_DEBUG(dbgs() << " *** Critical resource " 2188341825Sdim << SchedModel->getResourceName(PIdx) << ": " 2189341825Sdim << getResourceCount(PIdx) / SchedModel->getLatencyFactor() 2190341825Sdim << "c\n"); 2191243830Sdim } 2192276479Sdim // For reserved resources, record the highest cycle using the resource. 2193353358Sdim unsigned NextAvailable, InstanceIdx; 2194353358Sdim std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles); 2195276479Sdim if (NextAvailable > CurrCycle) { 2196341825Sdim LLVM_DEBUG(dbgs() << " Resource conflict: " 2197353358Sdim << SchedModel->getResourceName(PIdx) 2198353358Sdim << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']' 2199341825Sdim << " reserved until @" << NextAvailable << "\n"); 2200276479Sdim } 2201276479Sdim return NextAvailable; 2202243830Sdim} 2203243830Sdim 2204239462Sdim/// Move the boundary of scheduled code by one SUnit. 2205276479Sdimvoid SchedBoundary::bumpNode(SUnit *SU) { 2206239462Sdim // Update the reservation table. 2207239462Sdim if (HazardRec->isEnabled()) { 2208239462Sdim if (!isTop() && SU->isCall) { 2209239462Sdim // Calls are scheduled with their preceding instructions. For bottom-up 2210239462Sdim // scheduling, clear the pipeline state before emitting. 2211239462Sdim HazardRec->Reset(); 2212234285Sdim } 2213239462Sdim HazardRec->EmitInstruction(SU); 2214353358Sdim // Scheduling an instruction may have made pending instructions available. 2215353358Sdim CheckPending = true; 2216239462Sdim } 2217276479Sdim // checkHazard should prevent scheduling multiple instructions per cycle that 2218276479Sdim // exceed the issue width. 2219261991Sdim const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 2220261991Sdim unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); 2221276479Sdim assert( 2222276479Sdim (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) && 2223276479Sdim "Cannot schedule this instruction's MicroOps in the current cycle."); 2224276479Sdim 2225261991Sdim unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); 2226341825Sdim LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); 2227261991Sdim 2228276479Sdim unsigned NextCycle = CurrCycle; 2229261991Sdim switch (SchedModel->getMicroOpBufferSize()) { 2230261991Sdim case 0: 2231261991Sdim assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); 2232261991Sdim break; 2233261991Sdim case 1: 2234261991Sdim if (ReadyCycle > NextCycle) { 2235261991Sdim NextCycle = ReadyCycle; 2236341825Sdim LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); 2237261991Sdim } 2238261991Sdim break; 2239261991Sdim default: 2240261991Sdim // We don't currently model the OOO reorder buffer, so consider all 2241276479Sdim // scheduled MOps to be "retired". We do loosely model in-order resource 2242276479Sdim // latency. If this instruction uses an in-order resource, account for any 2243276479Sdim // likely stall cycles. 2244276479Sdim if (SU->isUnbuffered && ReadyCycle > NextCycle) 2245276479Sdim NextCycle = ReadyCycle; 2246261991Sdim break; 2247261991Sdim } 2248261991Sdim RetiredMOps += IncMOps; 2249261991Sdim 2250243830Sdim // Update resource counts and critical resource. 2251243830Sdim if (SchedModel->hasInstrSchedModel()) { 2252261991Sdim unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); 2253261991Sdim assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); 2254261991Sdim Rem->RemIssueCount -= DecRemIssue; 2255261991Sdim if (ZoneCritResIdx) { 2256261991Sdim // Scale scheduled micro-ops for comparing with the critical resource. 2257261991Sdim unsigned ScaledMOps = 2258261991Sdim RetiredMOps * SchedModel->getMicroOpFactor(); 2259261991Sdim 2260261991Sdim // If scaled micro-ops are now more than the previous critical resource by 2261261991Sdim // a full cycle, then micro-ops issue becomes critical. 2262261991Sdim if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) 2263261991Sdim >= (int)SchedModel->getLatencyFactor()) { 2264261991Sdim ZoneCritResIdx = 0; 2265341825Sdim LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: " 2266341825Sdim << ScaledMOps / SchedModel->getLatencyFactor() 2267341825Sdim << "c\n"); 2268261991Sdim } 2269261991Sdim } 2270243830Sdim for (TargetSchedModel::ProcResIter 2271243830Sdim PI = SchedModel->getWriteProcResBegin(SC), 2272243830Sdim PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 2273261991Sdim unsigned RCycle = 2274276479Sdim countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); 2275261991Sdim if (RCycle > NextCycle) 2276261991Sdim NextCycle = RCycle; 2277243830Sdim } 2278276479Sdim if (SU->hasReservedResource) { 2279276479Sdim // For reserved resources, record the highest cycle using the resource. 2280276479Sdim // For top-down scheduling, this is the cycle in which we schedule this 2281276479Sdim // instruction plus the number of cycles the operations reserves the 2282276479Sdim // resource. For bottom-up is it simply the instruction's cycle. 2283276479Sdim for (TargetSchedModel::ProcResIter 2284276479Sdim PI = SchedModel->getWriteProcResBegin(SC), 2285276479Sdim PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 2286276479Sdim unsigned PIdx = PI->ProcResourceIdx; 2287276479Sdim if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { 2288353358Sdim unsigned ReservedUntil, InstanceIdx; 2289353358Sdim std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0); 2290276479Sdim if (isTop()) { 2291353358Sdim ReservedCycles[InstanceIdx] = 2292353358Sdim std::max(ReservedUntil, NextCycle + PI->Cycles); 2293353358Sdim } else 2294353358Sdim ReservedCycles[InstanceIdx] = NextCycle; 2295276479Sdim } 2296276479Sdim } 2297276479Sdim } 2298243830Sdim } 2299261991Sdim // Update ExpectedLatency and DependentLatency. 2300261991Sdim unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; 2301261991Sdim unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; 2302261991Sdim if (SU->getDepth() > TopLatency) { 2303261991Sdim TopLatency = SU->getDepth(); 2304341825Sdim LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU(" 2305341825Sdim << SU->NodeNum << ") " << TopLatency << "c\n"); 2306243830Sdim } 2307261991Sdim if (SU->getHeight() > BotLatency) { 2308261991Sdim BotLatency = SU->getHeight(); 2309341825Sdim LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU(" 2310341825Sdim << SU->NodeNum << ") " << BotLatency << "c\n"); 2311261991Sdim } 2312261991Sdim // If we stall for any reason, bump the cycle. 2313327952Sdim if (NextCycle > CurrCycle) 2314261991Sdim bumpCycle(NextCycle); 2315327952Sdim else 2316261991Sdim // After updating ZoneCritResIdx and ExpectedLatency, check if we're 2317276479Sdim // resource limited. If a stall occurred, bumpCycle does this. 2318261991Sdim IsResourceLimited = 2319327952Sdim checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), 2320353358Sdim getScheduledLatency(), true); 2321327952Sdim 2322276479Sdim // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle 2323276479Sdim // resets CurrMOps. Loop to handle instructions with more MOps than issue in 2324276479Sdim // one cycle. Since we commonly reach the max MOps here, opportunistically 2325276479Sdim // bump the cycle to avoid uselessly checking everything in the readyQ. 2326276479Sdim CurrMOps += IncMOps; 2327321369Sdim 2328321369Sdim // Bump the cycle count for issue group constraints. 2329321369Sdim // This must be done after NextCycle has been adjust for all other stalls. 2330321369Sdim // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set 2331321369Sdim // currCycle to X. 2332321369Sdim if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) || 2333321369Sdim (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) { 2334341825Sdim LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin") 2335341825Sdim << " group\n"); 2336321369Sdim bumpCycle(++NextCycle); 2337321369Sdim } 2338321369Sdim 2339276479Sdim while (CurrMOps >= SchedModel->getIssueWidth()) { 2340341825Sdim LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle " 2341341825Sdim << CurrCycle << '\n'); 2342276479Sdim bumpCycle(++NextCycle); 2343276479Sdim } 2344341825Sdim LLVM_DEBUG(dumpScheduledState()); 2345239462Sdim} 2346239462Sdim 2347239462Sdim/// Release pending ready nodes in to the available queue. This makes them 2348239462Sdim/// visible to heuristics. 2349276479Sdimvoid SchedBoundary::releasePending() { 2350239462Sdim // If the available queue is empty, it is safe to reset MinReadyCycle. 2351239462Sdim if (Available.empty()) 2352321369Sdim MinReadyCycle = std::numeric_limits<unsigned>::max(); 2353239462Sdim 2354239462Sdim // Check to see if any of the pending instructions are ready to issue. If 2355239462Sdim // so, add them to the available queue. 2356360784Sdim for (unsigned I = 0, E = Pending.size(); I < E; ++I) { 2357360784Sdim SUnit *SU = *(Pending.begin() + I); 2358239462Sdim unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 2359239462Sdim 2360239462Sdim if (ReadyCycle < MinReadyCycle) 2361239462Sdim MinReadyCycle = ReadyCycle; 2362239462Sdim 2363309124Sdim if (Available.size() >= ReadyListLimit) 2364309124Sdim break; 2365309124Sdim 2366360784Sdim releaseNode(SU, ReadyCycle, true, I); 2367360784Sdim if (E != Pending.size()) { 2368360784Sdim --I; 2369360784Sdim --E; 2370360784Sdim } 2371239462Sdim } 2372239462Sdim CheckPending = false; 2373239462Sdim} 2374239462Sdim 2375239462Sdim/// Remove SU from the ready set for this boundary. 2376276479Sdimvoid SchedBoundary::removeReady(SUnit *SU) { 2377239462Sdim if (Available.isInQueue(SU)) 2378239462Sdim Available.remove(Available.find(SU)); 2379239462Sdim else { 2380239462Sdim assert(Pending.isInQueue(SU) && "bad ready count"); 2381239462Sdim Pending.remove(Pending.find(SU)); 2382239462Sdim } 2383239462Sdim} 2384239462Sdim 2385239462Sdim/// If this queue only has one ready candidate, return it. As a side effect, 2386243830Sdim/// defer any nodes that now hit a hazard, and advance the cycle until at least 2387243830Sdim/// one node is ready. If multiple instructions are ready, return NULL. 2388276479SdimSUnit *SchedBoundary::pickOnlyChoice() { 2389239462Sdim if (CheckPending) 2390239462Sdim releasePending(); 2391239462Sdim 2392261991Sdim if (CurrMOps > 0) { 2393243830Sdim // Defer any ready instrs that now have a hazard. 2394243830Sdim for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 2395243830Sdim if (checkHazard(*I)) { 2396243830Sdim Pending.push(*I); 2397243830Sdim I = Available.remove(I); 2398243830Sdim continue; 2399243830Sdim } 2400243830Sdim ++I; 2401243830Sdim } 2402243830Sdim } 2403239462Sdim for (unsigned i = 0; Available.empty(); ++i) { 2404276479Sdim// FIXME: Re-enable assert once PR20057 is resolved. 2405276479Sdim// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) && 2406276479Sdim// "permanent hazard"); 2407276479Sdim (void)i; 2408261991Sdim bumpCycle(CurrCycle + 1); 2409239462Sdim releasePending(); 2410239462Sdim } 2411309124Sdim 2412341825Sdim LLVM_DEBUG(Pending.dump()); 2413341825Sdim LLVM_DEBUG(Available.dump()); 2414309124Sdim 2415239462Sdim if (Available.size() == 1) 2416239462Sdim return *Available.begin(); 2417276479Sdim return nullptr; 2418239462Sdim} 2419239462Sdim 2420321369Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2421261991Sdim// This is useful information to dump after bumpNode. 2422261991Sdim// Note that the Queue contents are more useful before pickNodeFromQueue. 2423321369SdimLLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const { 2424261991Sdim unsigned ResFactor; 2425261991Sdim unsigned ResCount; 2426261991Sdim if (ZoneCritResIdx) { 2427261991Sdim ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); 2428261991Sdim ResCount = getResourceCount(ZoneCritResIdx); 2429309124Sdim } else { 2430261991Sdim ResFactor = SchedModel->getMicroOpFactor(); 2431327952Sdim ResCount = RetiredMOps * ResFactor; 2432243830Sdim } 2433261991Sdim unsigned LFactor = SchedModel->getLatencyFactor(); 2434261991Sdim dbgs() << Available.getName() << " @" << CurrCycle << "c\n" 2435261991Sdim << " Retired: " << RetiredMOps; 2436261991Sdim dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; 2437261991Sdim dbgs() << "\n Critical: " << ResCount / LFactor << "c, " 2438276479Sdim << ResCount / ResFactor << " " 2439276479Sdim << SchedModel->getResourceName(ZoneCritResIdx) 2440261991Sdim << "\n ExpectedLatency: " << ExpectedLatency << "c\n" 2441261991Sdim << (IsResourceLimited ? " - Resource" : " - Latency") 2442261991Sdim << " limited.\n"; 2443239462Sdim} 2444261991Sdim#endif 2445239462Sdim 2446276479Sdim//===----------------------------------------------------------------------===// 2447276479Sdim// GenericScheduler - Generic implementation of MachineSchedStrategy. 2448276479Sdim//===----------------------------------------------------------------------===// 2449276479Sdim 2450276479Sdimvoid GenericSchedulerBase::SchedCandidate:: 2451243830SdiminitResourceDelta(const ScheduleDAGMI *DAG, 2452243830Sdim const TargetSchedModel *SchedModel) { 2453243830Sdim if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 2454243830Sdim return; 2455243830Sdim 2456243830Sdim const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 2457243830Sdim for (TargetSchedModel::ProcResIter 2458243830Sdim PI = SchedModel->getWriteProcResBegin(SC), 2459243830Sdim PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 2460243830Sdim if (PI->ProcResourceIdx == Policy.ReduceResIdx) 2461243830Sdim ResDelta.CritResources += PI->Cycles; 2462243830Sdim if (PI->ProcResourceIdx == Policy.DemandResIdx) 2463243830Sdim ResDelta.DemandedResources += PI->Cycles; 2464243830Sdim } 2465243830Sdim} 2466243830Sdim 2467344779Sdim/// Compute remaining latency. We need this both to determine whether the 2468344779Sdim/// overall schedule has become latency-limited and whether the instructions 2469344779Sdim/// outside this zone are resource or latency limited. 2470344779Sdim/// 2471344779Sdim/// The "dependent" latency is updated incrementally during scheduling as the 2472344779Sdim/// max height/depth of scheduled nodes minus the cycles since it was 2473344779Sdim/// scheduled: 2474344779Sdim/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone 2475344779Sdim/// 2476344779Sdim/// The "independent" latency is the max ready queue depth: 2477344779Sdim/// ILat = max N.depth for N in Available|Pending 2478344779Sdim/// 2479344779Sdim/// RemainingLatency is the greater of independent and dependent latency. 2480344779Sdim/// 2481344779Sdim/// These computations are expensive, especially in DAGs with many edges, so 2482344779Sdim/// only do them if necessary. 2483344779Sdimstatic unsigned computeRemLatency(SchedBoundary &CurrZone) { 2484344779Sdim unsigned RemLatency = CurrZone.getDependentLatency(); 2485344779Sdim RemLatency = std::max(RemLatency, 2486344779Sdim CurrZone.findMaxLatency(CurrZone.Available.elements())); 2487344779Sdim RemLatency = std::max(RemLatency, 2488344779Sdim CurrZone.findMaxLatency(CurrZone.Pending.elements())); 2489344779Sdim return RemLatency; 2490344779Sdim} 2491344779Sdim 2492344779Sdim/// Returns true if the current cycle plus remaning latency is greater than 2493344779Sdim/// the critical path in the scheduling region. 2494344779Sdimbool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy, 2495344779Sdim SchedBoundary &CurrZone, 2496344779Sdim bool ComputeRemLatency, 2497344779Sdim unsigned &RemLatency) const { 2498344779Sdim // The current cycle is already greater than the critical path, so we are 2499344779Sdim // already latency limited and don't need to compute the remaining latency. 2500344779Sdim if (CurrZone.getCurrCycle() > Rem.CriticalPath) 2501344779Sdim return true; 2502344779Sdim 2503344779Sdim // If we haven't scheduled anything yet, then we aren't latency limited. 2504344779Sdim if (CurrZone.getCurrCycle() == 0) 2505344779Sdim return false; 2506344779Sdim 2507344779Sdim if (ComputeRemLatency) 2508344779Sdim RemLatency = computeRemLatency(CurrZone); 2509344779Sdim 2510344779Sdim return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath; 2511344779Sdim} 2512344779Sdim 2513276479Sdim/// Set the CandPolicy given a scheduling zone given the current resources and 2514276479Sdim/// latencies inside and outside the zone. 2515309124Sdimvoid GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, 2516276479Sdim SchedBoundary &CurrZone, 2517276479Sdim SchedBoundary *OtherZone) { 2518288943Sdim // Apply preemptive heuristics based on the total latency and resources 2519276479Sdim // inside and outside this zone. Potential stalls should be considered before 2520276479Sdim // following this policy. 2521261991Sdim 2522276479Sdim // Compute the critical resource outside the zone. 2523276479Sdim unsigned OtherCritIdx = 0; 2524276479Sdim unsigned OtherCount = 2525276479Sdim OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; 2526276479Sdim 2527276479Sdim bool OtherResLimited = false; 2528344779Sdim unsigned RemLatency = 0; 2529344779Sdim bool RemLatencyComputed = false; 2530344779Sdim if (SchedModel->hasInstrSchedModel() && OtherCount != 0) { 2531344779Sdim RemLatency = computeRemLatency(CurrZone); 2532344779Sdim RemLatencyComputed = true; 2533327952Sdim OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(), 2534353358Sdim OtherCount, RemLatency, false); 2535344779Sdim } 2536327952Sdim 2537276479Sdim // Schedule aggressively for latency in PostRA mode. We don't check for 2538276479Sdim // acyclic latency during PostRA, and highly out-of-order processors will 2539276479Sdim // skip PostRA scheduling. 2540344779Sdim if (!OtherResLimited && 2541344779Sdim (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed, 2542344779Sdim RemLatency))) { 2543344779Sdim Policy.ReduceLatency |= true; 2544344779Sdim LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName() 2545344779Sdim << " RemainingLatency " << RemLatency << " + " 2546344779Sdim << CurrZone.getCurrCycle() << "c > CritPath " 2547344779Sdim << Rem.CriticalPath << "\n"); 2548276479Sdim } 2549276479Sdim // If the same resource is limiting inside and outside the zone, do nothing. 2550276479Sdim if (CurrZone.getZoneCritResIdx() == OtherCritIdx) 2551276479Sdim return; 2552276479Sdim 2553341825Sdim LLVM_DEBUG(if (CurrZone.isResourceLimited()) { 2554341825Sdim dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: " 2555341825Sdim << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n"; 2556341825Sdim } if (OtherResLimited) dbgs() 2557341825Sdim << " RemainingLimit: " 2558341825Sdim << SchedModel->getResourceName(OtherCritIdx) << "\n"; 2559341825Sdim if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs() 2560341825Sdim << " Latency limited both directions.\n"); 2561276479Sdim 2562276479Sdim if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx) 2563276479Sdim Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); 2564276479Sdim 2565276479Sdim if (OtherResLimited) 2566276479Sdim Policy.DemandResIdx = OtherCritIdx; 2567276479Sdim} 2568276479Sdim 2569276479Sdim#ifndef NDEBUG 2570276479Sdimconst char *GenericSchedulerBase::getReasonStr( 2571276479Sdim GenericSchedulerBase::CandReason Reason) { 2572276479Sdim switch (Reason) { 2573276479Sdim case NoCand: return "NOCAND "; 2574309124Sdim case Only1: return "ONLY1 "; 2575344779Sdim case PhysReg: return "PHYS-REG "; 2576276479Sdim case RegExcess: return "REG-EXCESS"; 2577276479Sdim case RegCritical: return "REG-CRIT "; 2578276479Sdim case Stall: return "STALL "; 2579276479Sdim case Cluster: return "CLUSTER "; 2580276479Sdim case Weak: return "WEAK "; 2581276479Sdim case RegMax: return "REG-MAX "; 2582276479Sdim case ResourceReduce: return "RES-REDUCE"; 2583276479Sdim case ResourceDemand: return "RES-DEMAND"; 2584276479Sdim case TopDepthReduce: return "TOP-DEPTH "; 2585276479Sdim case TopPathReduce: return "TOP-PATH "; 2586276479Sdim case BotHeightReduce:return "BOT-HEIGHT"; 2587276479Sdim case BotPathReduce: return "BOT-PATH "; 2588276479Sdim case NextDefUse: return "DEF-USE "; 2589276479Sdim case NodeOrder: return "ORDER "; 2590276479Sdim }; 2591276479Sdim llvm_unreachable("Unknown reason!"); 2592276479Sdim} 2593276479Sdim 2594276479Sdimvoid GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { 2595276479Sdim PressureChange P; 2596276479Sdim unsigned ResIdx = 0; 2597276479Sdim unsigned Latency = 0; 2598276479Sdim switch (Cand.Reason) { 2599276479Sdim default: 2600276479Sdim break; 2601276479Sdim case RegExcess: 2602276479Sdim P = Cand.RPDelta.Excess; 2603276479Sdim break; 2604276479Sdim case RegCritical: 2605276479Sdim P = Cand.RPDelta.CriticalMax; 2606276479Sdim break; 2607276479Sdim case RegMax: 2608276479Sdim P = Cand.RPDelta.CurrentMax; 2609276479Sdim break; 2610276479Sdim case ResourceReduce: 2611276479Sdim ResIdx = Cand.Policy.ReduceResIdx; 2612276479Sdim break; 2613276479Sdim case ResourceDemand: 2614276479Sdim ResIdx = Cand.Policy.DemandResIdx; 2615276479Sdim break; 2616276479Sdim case TopDepthReduce: 2617276479Sdim Latency = Cand.SU->getDepth(); 2618276479Sdim break; 2619276479Sdim case TopPathReduce: 2620276479Sdim Latency = Cand.SU->getHeight(); 2621276479Sdim break; 2622276479Sdim case BotHeightReduce: 2623276479Sdim Latency = Cand.SU->getHeight(); 2624276479Sdim break; 2625276479Sdim case BotPathReduce: 2626276479Sdim Latency = Cand.SU->getDepth(); 2627276479Sdim break; 2628276479Sdim } 2629296417Sdim dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); 2630276479Sdim if (P.isValid()) 2631276479Sdim dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) 2632276479Sdim << ":" << P.getUnitInc() << " "; 2633276479Sdim else 2634276479Sdim dbgs() << " "; 2635276479Sdim if (ResIdx) 2636276479Sdim dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; 2637276479Sdim else 2638276479Sdim dbgs() << " "; 2639276479Sdim if (Latency) 2640276479Sdim dbgs() << " " << Latency << " cycles "; 2641276479Sdim else 2642276479Sdim dbgs() << " "; 2643276479Sdim dbgs() << '\n'; 2644276479Sdim} 2645276479Sdim#endif 2646276479Sdim 2647341825Sdimnamespace llvm { 2648243830Sdim/// Return true if this heuristic determines order. 2649341825Sdimbool tryLess(int TryVal, int CandVal, 2650341825Sdim GenericSchedulerBase::SchedCandidate &TryCand, 2651341825Sdim GenericSchedulerBase::SchedCandidate &Cand, 2652341825Sdim GenericSchedulerBase::CandReason Reason) { 2653243830Sdim if (TryVal < CandVal) { 2654243830Sdim TryCand.Reason = Reason; 2655243830Sdim return true; 2656243830Sdim } 2657243830Sdim if (TryVal > CandVal) { 2658243830Sdim if (Cand.Reason > Reason) 2659243830Sdim Cand.Reason = Reason; 2660243830Sdim return true; 2661243830Sdim } 2662243830Sdim return false; 2663243830Sdim} 2664249423Sdim 2665341825Sdimbool tryGreater(int TryVal, int CandVal, 2666341825Sdim GenericSchedulerBase::SchedCandidate &TryCand, 2667341825Sdim GenericSchedulerBase::SchedCandidate &Cand, 2668341825Sdim GenericSchedulerBase::CandReason Reason) { 2669243830Sdim if (TryVal > CandVal) { 2670243830Sdim TryCand.Reason = Reason; 2671243830Sdim return true; 2672243830Sdim } 2673243830Sdim if (TryVal < CandVal) { 2674243830Sdim if (Cand.Reason > Reason) 2675243830Sdim Cand.Reason = Reason; 2676243830Sdim return true; 2677243830Sdim } 2678243830Sdim return false; 2679243830Sdim} 2680243830Sdim 2681341825Sdimbool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, 2682341825Sdim GenericSchedulerBase::SchedCandidate &Cand, 2683341825Sdim SchedBoundary &Zone) { 2684276479Sdim if (Zone.isTop()) { 2685276479Sdim if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { 2686276479Sdim if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 2687276479Sdim TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) 2688276479Sdim return true; 2689276479Sdim } 2690276479Sdim if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 2691276479Sdim TryCand, Cand, GenericSchedulerBase::TopPathReduce)) 2692276479Sdim return true; 2693309124Sdim } else { 2694276479Sdim if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { 2695276479Sdim if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 2696276479Sdim TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) 2697276479Sdim return true; 2698276479Sdim } 2699276479Sdim if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 2700276479Sdim TryCand, Cand, GenericSchedulerBase::BotPathReduce)) 2701276479Sdim return true; 2702276479Sdim } 2703276479Sdim return false; 2704276479Sdim} 2705341825Sdim} // end namespace llvm 2706276479Sdim 2707309124Sdimstatic void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) { 2708341825Sdim LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") 2709341825Sdim << GenericSchedulerBase::getReasonStr(Reason) << '\n'); 2710276479Sdim} 2711276479Sdim 2712309124Sdimstatic void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) { 2713309124Sdim tracePick(Cand.Reason, Cand.AtTop); 2714309124Sdim} 2715309124Sdim 2716276479Sdimvoid GenericScheduler::initialize(ScheduleDAGMI *dag) { 2717276479Sdim assert(dag->hasVRegLiveness() && 2718276479Sdim "(PreRA)GenericScheduler needs vreg liveness"); 2719276479Sdim DAG = static_cast<ScheduleDAGMILive*>(dag); 2720276479Sdim SchedModel = DAG->getSchedModel(); 2721276479Sdim TRI = DAG->TRI; 2722276479Sdim 2723276479Sdim Rem.init(DAG, SchedModel); 2724276479Sdim Top.init(DAG, SchedModel, &Rem); 2725276479Sdim Bot.init(DAG, SchedModel, &Rem); 2726276479Sdim 2727276479Sdim // Initialize resource counts. 2728276479Sdim 2729276479Sdim // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 2730276479Sdim // are disabled, then these HazardRecs will be disabled. 2731276479Sdim const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 2732276479Sdim if (!Top.HazardRec) { 2733276479Sdim Top.HazardRec = 2734280031Sdim DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( 2735280031Sdim Itin, DAG); 2736276479Sdim } 2737276479Sdim if (!Bot.HazardRec) { 2738276479Sdim Bot.HazardRec = 2739280031Sdim DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( 2740280031Sdim Itin, DAG); 2741276479Sdim } 2742309124Sdim TopCand.SU = nullptr; 2743309124Sdim BotCand.SU = nullptr; 2744276479Sdim} 2745276479Sdim 2746276479Sdim/// Initialize the per-region scheduling policy. 2747276479Sdimvoid GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, 2748276479Sdim MachineBasicBlock::iterator End, 2749276479Sdim unsigned NumRegionInstrs) { 2750327952Sdim const MachineFunction &MF = *Begin->getMF(); 2751280031Sdim const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); 2752276479Sdim 2753276479Sdim // Avoid setting up the register pressure tracker for small regions to save 2754276479Sdim // compile time. As a rough heuristic, only track pressure when the number of 2755276479Sdim // schedulable instructions exceeds half the integer register file. 2756276479Sdim RegionPolicy.ShouldTrackPressure = true; 2757276479Sdim for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) { 2758276479Sdim MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT; 2759276479Sdim if (TLI->isTypeLegal(LegalIntVT)) { 2760276479Sdim unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( 2761276479Sdim TLI->getRegClassFor(LegalIntVT)); 2762276479Sdim RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); 2763276479Sdim } 2764276479Sdim } 2765276479Sdim 2766276479Sdim // For generic targets, we default to bottom-up, because it's simpler and more 2767276479Sdim // compile-time optimizations have been implemented in that direction. 2768276479Sdim RegionPolicy.OnlyBottomUp = true; 2769276479Sdim 2770276479Sdim // Allow the subtarget to override default policy. 2771309124Sdim MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs); 2772276479Sdim 2773276479Sdim // After subtarget overrides, apply command line options. 2774353358Sdim if (!EnableRegPressure) { 2775276479Sdim RegionPolicy.ShouldTrackPressure = false; 2776353358Sdim RegionPolicy.ShouldTrackLaneMasks = false; 2777353358Sdim } 2778276479Sdim 2779276479Sdim // Check -misched-topdown/bottomup can force or unforce scheduling direction. 2780276479Sdim // e.g. -misched-bottomup=false allows scheduling in both directions. 2781276479Sdim assert((!ForceTopDown || !ForceBottomUp) && 2782276479Sdim "-misched-topdown incompatible with -misched-bottomup"); 2783276479Sdim if (ForceBottomUp.getNumOccurrences() > 0) { 2784276479Sdim RegionPolicy.OnlyBottomUp = ForceBottomUp; 2785276479Sdim if (RegionPolicy.OnlyBottomUp) 2786276479Sdim RegionPolicy.OnlyTopDown = false; 2787276479Sdim } 2788276479Sdim if (ForceTopDown.getNumOccurrences() > 0) { 2789276479Sdim RegionPolicy.OnlyTopDown = ForceTopDown; 2790276479Sdim if (RegionPolicy.OnlyTopDown) 2791276479Sdim RegionPolicy.OnlyBottomUp = false; 2792276479Sdim } 2793276479Sdim} 2794276479Sdim 2795321369Sdimvoid GenericScheduler::dumpPolicy() const { 2796321369Sdim // Cannot completely remove virtual function even in release mode. 2797321369Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2798296417Sdim dbgs() << "GenericScheduler RegionPolicy: " 2799296417Sdim << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure 2800296417Sdim << " OnlyTopDown=" << RegionPolicy.OnlyTopDown 2801296417Sdim << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp 2802296417Sdim << "\n"; 2803321369Sdim#endif 2804296417Sdim} 2805296417Sdim 2806276479Sdim/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic 2807276479Sdim/// critical path by more cycles than it takes to drain the instruction buffer. 2808276479Sdim/// We estimate an upper bounds on in-flight instructions as: 2809276479Sdim/// 2810276479Sdim/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) 2811276479Sdim/// InFlightIterations = AcyclicPath / CyclesPerIteration 2812276479Sdim/// InFlightResources = InFlightIterations * LoopResources 2813276479Sdim/// 2814276479Sdim/// TODO: Check execution resources in addition to IssueCount. 2815276479Sdimvoid GenericScheduler::checkAcyclicLatency() { 2816276479Sdim if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) 2817276479Sdim return; 2818276479Sdim 2819276479Sdim // Scaled number of cycles per loop iteration. 2820276479Sdim unsigned IterCount = 2821276479Sdim std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), 2822276479Sdim Rem.RemIssueCount); 2823276479Sdim // Scaled acyclic critical path. 2824276479Sdim unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); 2825276479Sdim // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop 2826276479Sdim unsigned InFlightCount = 2827276479Sdim (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; 2828276479Sdim unsigned BufferLimit = 2829276479Sdim SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); 2830276479Sdim 2831276479Sdim Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; 2832276479Sdim 2833341825Sdim LLVM_DEBUG( 2834341825Sdim dbgs() << "IssueCycles=" 2835341825Sdim << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " 2836341825Sdim << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() 2837341825Sdim << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount 2838341825Sdim << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() 2839341825Sdim << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; 2840341825Sdim if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n"); 2841276479Sdim} 2842276479Sdim 2843276479Sdimvoid GenericScheduler::registerRoots() { 2844276479Sdim Rem.CriticalPath = DAG->ExitSU.getDepth(); 2845276479Sdim 2846276479Sdim // Some roots may not feed into ExitSU. Check all of them in case. 2847321369Sdim for (const SUnit *SU : Bot.Available) { 2848321369Sdim if (SU->getDepth() > Rem.CriticalPath) 2849321369Sdim Rem.CriticalPath = SU->getDepth(); 2850276479Sdim } 2851341825Sdim LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); 2852280031Sdim if (DumpCriticalPathLength) { 2853280031Sdim errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; 2854280031Sdim } 2855276479Sdim 2856321369Sdim if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) { 2857276479Sdim Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); 2858276479Sdim checkAcyclicLatency(); 2859276479Sdim } 2860276479Sdim} 2861276479Sdim 2862341825Sdimnamespace llvm { 2863341825Sdimbool tryPressure(const PressureChange &TryP, 2864341825Sdim const PressureChange &CandP, 2865341825Sdim GenericSchedulerBase::SchedCandidate &TryCand, 2866341825Sdim GenericSchedulerBase::SchedCandidate &Cand, 2867341825Sdim GenericSchedulerBase::CandReason Reason, 2868341825Sdim const TargetRegisterInfo *TRI, 2869341825Sdim const MachineFunction &MF) { 2870261991Sdim // If one candidate decreases and the other increases, go with it. 2871261991Sdim // Invalid candidates have UnitInc==0. 2872280031Sdim if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, 2873280031Sdim Reason)) { 2874261991Sdim return true; 2875261991Sdim } 2876309124Sdim // Do not compare the magnitude of pressure changes between top and bottom 2877309124Sdim // boundary. 2878309124Sdim if (Cand.AtTop != TryCand.AtTop) 2879309124Sdim return false; 2880296417Sdim 2881309124Sdim // If both candidates affect the same set in the same boundary, go with the 2882309124Sdim // smallest increase. 2883309124Sdim unsigned TryPSet = TryP.getPSetOrMax(); 2884309124Sdim unsigned CandPSet = CandP.getPSetOrMax(); 2885309124Sdim if (TryPSet == CandPSet) { 2886309124Sdim return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, 2887309124Sdim Reason); 2888309124Sdim } 2889309124Sdim 2890296417Sdim int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) : 2891296417Sdim std::numeric_limits<int>::max(); 2892296417Sdim 2893296417Sdim int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) : 2894296417Sdim std::numeric_limits<int>::max(); 2895296417Sdim 2896261991Sdim // If the candidates are decreasing pressure, reverse priority. 2897261991Sdim if (TryP.getUnitInc() < 0) 2898261991Sdim std::swap(TryRank, CandRank); 2899261991Sdim return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); 2900261991Sdim} 2901261991Sdim 2902341825Sdimunsigned getWeakLeft(const SUnit *SU, bool isTop) { 2903249423Sdim return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; 2904249423Sdim} 2905249423Sdim 2906251662Sdim/// Minimize physical register live ranges. Regalloc wants them adjacent to 2907251662Sdim/// their physreg def/use. 2908251662Sdim/// 2909251662Sdim/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf 2910251662Sdim/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled 2911251662Sdim/// with the operation that produces or consumes the physreg. We'll do this when 2912251662Sdim/// regalloc has support for parallel copies. 2913344779Sdimint biasPhysReg(const SUnit *SU, bool isTop) { 2914251662Sdim const MachineInstr *MI = SU->getInstr(); 2915251662Sdim 2916344779Sdim if (MI->isCopy()) { 2917344779Sdim unsigned ScheduledOper = isTop ? 1 : 0; 2918344779Sdim unsigned UnscheduledOper = isTop ? 0 : 1; 2919344779Sdim // If we have already scheduled the physreg produce/consumer, immediately 2920344779Sdim // schedule the copy. 2921360784Sdim if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg())) 2922344779Sdim return 1; 2923344779Sdim // If the physreg is at the boundary, defer it. Otherwise schedule it 2924344779Sdim // immediately to free the dependent. We can hoist the copy later. 2925344779Sdim bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; 2926360784Sdim if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg())) 2927344779Sdim return AtBoundary ? -1 : 1; 2928344779Sdim } 2929344779Sdim 2930344779Sdim if (MI->isMoveImmediate()) { 2931344779Sdim // If we have a move immediate and all successors have been assigned, bias 2932344779Sdim // towards scheduling this later. Make sure all register defs are to 2933344779Sdim // physical registers. 2934344779Sdim bool DoBias = true; 2935344779Sdim for (const MachineOperand &Op : MI->defs()) { 2936360784Sdim if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) { 2937344779Sdim DoBias = false; 2938344779Sdim break; 2939344779Sdim } 2940344779Sdim } 2941344779Sdim 2942344779Sdim if (DoBias) 2943344779Sdim return isTop ? -1 : 1; 2944344779Sdim } 2945344779Sdim 2946251662Sdim return 0; 2947251662Sdim} 2948341825Sdim} // end namespace llvm 2949251662Sdim 2950309124Sdimvoid GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, 2951309124Sdim bool AtTop, 2952309124Sdim const RegPressureTracker &RPTracker, 2953309124Sdim RegPressureTracker &TempTracker) { 2954309124Sdim Cand.SU = SU; 2955309124Sdim Cand.AtTop = AtTop; 2956261991Sdim if (DAG->isTrackingPressure()) { 2957309124Sdim if (AtTop) { 2958261991Sdim TempTracker.getMaxDownwardPressureDelta( 2959309124Sdim Cand.SU->getInstr(), 2960309124Sdim Cand.RPDelta, 2961261991Sdim DAG->getRegionCriticalPSets(), 2962261991Sdim DAG->getRegPressure().MaxSetPressure); 2963309124Sdim } else { 2964261991Sdim if (VerifyScheduling) { 2965261991Sdim TempTracker.getMaxUpwardPressureDelta( 2966309124Sdim Cand.SU->getInstr(), 2967309124Sdim &DAG->getPressureDiff(Cand.SU), 2968309124Sdim Cand.RPDelta, 2969261991Sdim DAG->getRegionCriticalPSets(), 2970261991Sdim DAG->getRegPressure().MaxSetPressure); 2971309124Sdim } else { 2972261991Sdim RPTracker.getUpwardPressureDelta( 2973309124Sdim Cand.SU->getInstr(), 2974309124Sdim DAG->getPressureDiff(Cand.SU), 2975309124Sdim Cand.RPDelta, 2976261991Sdim DAG->getRegionCriticalPSets(), 2977261991Sdim DAG->getRegPressure().MaxSetPressure); 2978261991Sdim } 2979261991Sdim } 2980261991Sdim } 2981341825Sdim LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs() 2982341825Sdim << " Try SU(" << Cand.SU->NodeNum << ") " 2983341825Sdim << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":" 2984341825Sdim << Cand.RPDelta.Excess.getUnitInc() << "\n"); 2985309124Sdim} 2986243830Sdim 2987341825Sdim/// Apply a set of heuristics to a new candidate. Heuristics are currently 2988309124Sdim/// hierarchical. This may be more efficient than a graduated cost model because 2989309124Sdim/// we don't need to evaluate all aspects of the model for each node in the 2990309124Sdim/// queue. But it's really done to make the heuristics easier to debug and 2991309124Sdim/// statistically analyze. 2992309124Sdim/// 2993309124Sdim/// \param Cand provides the policy and current best candidate. 2994309124Sdim/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 2995309124Sdim/// \param Zone describes the scheduled zone that we are extending, or nullptr 2996309124Sdim// if Cand is from a different zone than TryCand. 2997309124Sdimvoid GenericScheduler::tryCandidate(SchedCandidate &Cand, 2998309124Sdim SchedCandidate &TryCand, 2999341825Sdim SchedBoundary *Zone) const { 3000243830Sdim // Initialize the candidate if needed. 3001243830Sdim if (!Cand.isValid()) { 3002243830Sdim TryCand.Reason = NodeOrder; 3003243830Sdim return; 3004243830Sdim } 3005251662Sdim 3006344779Sdim // Bias PhysReg Defs and copies to their uses and defined respectively. 3007344779Sdim if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), 3008344779Sdim biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) 3009251662Sdim return; 3010251662Sdim 3011288943Sdim // Avoid exceeding the target's limit. 3012261991Sdim if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, 3013261991Sdim Cand.RPDelta.Excess, 3014296417Sdim TryCand, Cand, RegExcess, TRI, 3015296417Sdim DAG->MF)) 3016243830Sdim return; 3017243830Sdim 3018243830Sdim // Avoid increasing the max critical pressure in the scheduled region. 3019261991Sdim if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, 3020261991Sdim Cand.RPDelta.CriticalMax, 3021296417Sdim TryCand, Cand, RegCritical, TRI, 3022296417Sdim DAG->MF)) 3023243830Sdim return; 3024243830Sdim 3025309124Sdim // We only compare a subset of features when comparing nodes between 3026309124Sdim // Top and Bottom boundary. Some properties are simply incomparable, in many 3027309124Sdim // other instances we should only override the other boundary if something 3028309124Sdim // is a clear good pick on one boundary. Skip heuristics that are more 3029309124Sdim // "tie-breaking" in nature. 3030309124Sdim bool SameBoundary = Zone != nullptr; 3031309124Sdim if (SameBoundary) { 3032309124Sdim // For loops that are acyclic path limited, aggressively schedule for 3033314564Sdim // latency. Within an single cycle, whenever CurrMOps > 0, allow normal 3034314564Sdim // heuristics to take precedence. 3035309124Sdim if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && 3036309124Sdim tryLatency(TryCand, Cand, *Zone)) 3037309124Sdim return; 3038261991Sdim 3039309124Sdim // Prioritize instructions that read unbuffered resources by stall cycles. 3040309124Sdim if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), 3041309124Sdim Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 3042309124Sdim return; 3043309124Sdim } 3044276479Sdim 3045249423Sdim // Keep clustered nodes together to encourage downstream peephole 3046249423Sdim // optimizations which may reduce resource requirements. 3047249423Sdim // 3048249423Sdim // This is a best effort to set things up for a post-RA pass. Optimizations 3049249423Sdim // like generating loads of multiple registers should ideally be done within 3050249423Sdim // the scheduler pass by combining the loads during DAG postprocessing. 3051309124Sdim const SUnit *CandNextClusterSU = 3052309124Sdim Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 3053309124Sdim const SUnit *TryCandNextClusterSU = 3054309124Sdim TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 3055309124Sdim if (tryGreater(TryCand.SU == TryCandNextClusterSU, 3056309124Sdim Cand.SU == CandNextClusterSU, 3057249423Sdim TryCand, Cand, Cluster)) 3058249423Sdim return; 3059251662Sdim 3060309124Sdim if (SameBoundary) { 3061309124Sdim // Weak edges are for clustering and other constraints. 3062309124Sdim if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), 3063309124Sdim getWeakLeft(Cand.SU, Cand.AtTop), 3064309124Sdim TryCand, Cand, Weak)) 3065309124Sdim return; 3066249423Sdim } 3067309124Sdim 3068261991Sdim // Avoid increasing the max pressure of the entire region. 3069261991Sdim if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, 3070261991Sdim Cand.RPDelta.CurrentMax, 3071296417Sdim TryCand, Cand, RegMax, TRI, 3072296417Sdim DAG->MF)) 3073261991Sdim return; 3074261991Sdim 3075309124Sdim if (SameBoundary) { 3076309124Sdim // Avoid critical resource consumption and balance the schedule. 3077309124Sdim TryCand.initResourceDelta(DAG, SchedModel); 3078309124Sdim if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 3079309124Sdim TryCand, Cand, ResourceReduce)) 3080309124Sdim return; 3081309124Sdim if (tryGreater(TryCand.ResDelta.DemandedResources, 3082309124Sdim Cand.ResDelta.DemandedResources, 3083309124Sdim TryCand, Cand, ResourceDemand)) 3084309124Sdim return; 3085243830Sdim 3086309124Sdim // Avoid serializing long latency dependence chains. 3087309124Sdim // For acyclic path limited loops, latency was already checked above. 3088309124Sdim if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && 3089309124Sdim !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) 3090309124Sdim return; 3091243830Sdim 3092309124Sdim // Fall through to original instruction order. 3093309124Sdim if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 3094309124Sdim || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 3095309124Sdim TryCand.Reason = NodeOrder; 3096309124Sdim } 3097243830Sdim } 3098243830Sdim} 3099243830Sdim 3100261991Sdim/// Pick the best candidate from the queue. 3101239462Sdim/// 3102239462Sdim/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 3103239462Sdim/// DAG building. To adjust for the current scheduling location we need to 3104239462Sdim/// maintain the number of vreg uses remaining to be top-scheduled. 3105261991Sdimvoid GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, 3106309124Sdim const CandPolicy &ZonePolicy, 3107276479Sdim const RegPressureTracker &RPTracker, 3108276479Sdim SchedCandidate &Cand) { 3109239462Sdim // getMaxPressureDelta temporarily modifies the tracker. 3110239462Sdim RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 3111239462Sdim 3112309124Sdim ReadyQueue &Q = Zone.Available; 3113321369Sdim for (SUnit *SU : Q) { 3114239462Sdim 3115309124Sdim SchedCandidate TryCand(ZonePolicy); 3116321369Sdim initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker); 3117309124Sdim // Pass SchedBoundary only when comparing nodes from the same boundary. 3118309124Sdim SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 3119309124Sdim tryCandidate(Cand, TryCand, ZoneArg); 3120243830Sdim if (TryCand.Reason != NoCand) { 3121243830Sdim // Initialize resource delta if needed in case future heuristics query it. 3122243830Sdim if (TryCand.ResDelta == SchedResourceDelta()) 3123243830Sdim TryCand.initResourceDelta(DAG, SchedModel); 3124243830Sdim Cand.setBest(TryCand); 3125341825Sdim LLVM_DEBUG(traceCandidate(Cand)); 3126234285Sdim } 3127239462Sdim } 3128239462Sdim} 3129239462Sdim 3130239462Sdim/// Pick the best candidate node from either the top or bottom queue. 3131261991SdimSUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { 3132239462Sdim // Schedule as far as possible in the direction of no choice. This is most 3133239462Sdim // efficient, but also provides the best heuristics for CriticalPSets. 3134239462Sdim if (SUnit *SU = Bot.pickOnlyChoice()) { 3135239462Sdim IsTopNode = false; 3136309124Sdim tracePick(Only1, false); 3137234285Sdim return SU; 3138234285Sdim } 3139239462Sdim if (SUnit *SU = Top.pickOnlyChoice()) { 3140239462Sdim IsTopNode = true; 3141309124Sdim tracePick(Only1, true); 3142239462Sdim return SU; 3143239462Sdim } 3144276479Sdim // Set the bottom-up policy based on the state of the current bottom zone and 3145276479Sdim // the instructions outside the zone, including the top zone. 3146309124Sdim CandPolicy BotPolicy; 3147309124Sdim setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 3148276479Sdim // Set the top-down policy based on the state of the current top zone and 3149276479Sdim // the instructions outside the zone, including the bottom zone. 3150309124Sdim CandPolicy TopPolicy; 3151309124Sdim setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 3152243830Sdim 3153309124Sdim // See if BotCand is still valid (because we previously scheduled from Top). 3154341825Sdim LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 3155309124Sdim if (!BotCand.isValid() || BotCand.SU->isScheduled || 3156309124Sdim BotCand.Policy != BotPolicy) { 3157309124Sdim BotCand.reset(CandPolicy()); 3158309124Sdim pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 3159309124Sdim assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 3160309124Sdim } else { 3161341825Sdim LLVM_DEBUG(traceCandidate(BotCand)); 3162309124Sdim#ifndef NDEBUG 3163309124Sdim if (VerifyScheduling) { 3164309124Sdim SchedCandidate TCand; 3165309124Sdim TCand.reset(CandPolicy()); 3166309124Sdim pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); 3167309124Sdim assert(TCand.SU == BotCand.SU && 3168309124Sdim "Last pick result should correspond to re-picking right now"); 3169309124Sdim } 3170309124Sdim#endif 3171309124Sdim } 3172234285Sdim 3173309124Sdim // Check if the top Q has a better candidate. 3174341825Sdim LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 3175309124Sdim if (!TopCand.isValid() || TopCand.SU->isScheduled || 3176309124Sdim TopCand.Policy != TopPolicy) { 3177309124Sdim TopCand.reset(CandPolicy()); 3178309124Sdim pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 3179309124Sdim assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 3180309124Sdim } else { 3181341825Sdim LLVM_DEBUG(traceCandidate(TopCand)); 3182309124Sdim#ifndef NDEBUG 3183309124Sdim if (VerifyScheduling) { 3184309124Sdim SchedCandidate TCand; 3185309124Sdim TCand.reset(CandPolicy()); 3186309124Sdim pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); 3187309124Sdim assert(TCand.SU == TopCand.SU && 3188309124Sdim "Last pick result should correspond to re-picking right now"); 3189309124Sdim } 3190309124Sdim#endif 3191234285Sdim } 3192239462Sdim 3193309124Sdim // Pick best from BotCand and TopCand. 3194309124Sdim assert(BotCand.isValid()); 3195309124Sdim assert(TopCand.isValid()); 3196309124Sdim SchedCandidate Cand = BotCand; 3197309124Sdim TopCand.Reason = NoCand; 3198309124Sdim tryCandidate(Cand, TopCand, nullptr); 3199309124Sdim if (TopCand.Reason != NoCand) { 3200309124Sdim Cand.setBest(TopCand); 3201341825Sdim LLVM_DEBUG(traceCandidate(Cand)); 3202239462Sdim } 3203309124Sdim 3204309124Sdim IsTopNode = Cand.AtTop; 3205309124Sdim tracePick(Cand); 3206309124Sdim return Cand.SU; 3207239462Sdim} 3208234285Sdim 3209239462Sdim/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 3210261991SdimSUnit *GenericScheduler::pickNode(bool &IsTopNode) { 3211239462Sdim if (DAG->top() == DAG->bottom()) { 3212239462Sdim assert(Top.Available.empty() && Top.Pending.empty() && 3213239462Sdim Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 3214276479Sdim return nullptr; 3215239462Sdim } 3216239462Sdim SUnit *SU; 3217243830Sdim do { 3218261991Sdim if (RegionPolicy.OnlyTopDown) { 3219243830Sdim SU = Top.pickOnlyChoice(); 3220243830Sdim if (!SU) { 3221243830Sdim CandPolicy NoPolicy; 3222309124Sdim TopCand.reset(NoPolicy); 3223309124Sdim pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 3224261991Sdim assert(TopCand.Reason != NoCand && "failed to find a candidate"); 3225309124Sdim tracePick(TopCand); 3226243830Sdim SU = TopCand.SU; 3227243830Sdim } 3228243830Sdim IsTopNode = true; 3229309124Sdim } else if (RegionPolicy.OnlyBottomUp) { 3230243830Sdim SU = Bot.pickOnlyChoice(); 3231243830Sdim if (!SU) { 3232243830Sdim CandPolicy NoPolicy; 3233309124Sdim BotCand.reset(NoPolicy); 3234309124Sdim pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 3235261991Sdim assert(BotCand.Reason != NoCand && "failed to find a candidate"); 3236309124Sdim tracePick(BotCand); 3237243830Sdim SU = BotCand.SU; 3238243830Sdim } 3239243830Sdim IsTopNode = false; 3240309124Sdim } else { 3241243830Sdim SU = pickNodeBidirectional(IsTopNode); 3242243830Sdim } 3243243830Sdim } while (SU->isScheduled); 3244243830Sdim 3245239462Sdim if (SU->isTopReady()) 3246239462Sdim Top.removeReady(SU); 3247239462Sdim if (SU->isBottomReady()) 3248239462Sdim Bot.removeReady(SU); 3249239462Sdim 3250341825Sdim LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 3251341825Sdim << *SU->getInstr()); 3252239462Sdim return SU; 3253239462Sdim} 3254239462Sdim 3255344779Sdimvoid GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) { 3256251662Sdim MachineBasicBlock::iterator InsertPos = SU->getInstr(); 3257251662Sdim if (!isTop) 3258251662Sdim ++InsertPos; 3259251662Sdim SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; 3260251662Sdim 3261251662Sdim // Find already scheduled copies with a single physreg dependence and move 3262251662Sdim // them just above the scheduled instruction. 3263321369Sdim for (SDep &Dep : Deps) { 3264360784Sdim if (Dep.getKind() != SDep::Data || 3265360784Sdim !Register::isPhysicalRegister(Dep.getReg())) 3266251662Sdim continue; 3267321369Sdim SUnit *DepSU = Dep.getSUnit(); 3268251662Sdim if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) 3269251662Sdim continue; 3270251662Sdim MachineInstr *Copy = DepSU->getInstr(); 3271344779Sdim if (!Copy->isCopy() && !Copy->isMoveImmediate()) 3272251662Sdim continue; 3273341825Sdim LLVM_DEBUG(dbgs() << " Rescheduling physreg copy "; 3274344779Sdim DAG->dumpNode(*Dep.getSUnit())); 3275251662Sdim DAG->moveInstruction(Copy, InsertPos); 3276251662Sdim } 3277251662Sdim} 3278251662Sdim 3279239462Sdim/// Update the scheduler's state after scheduling a node. This is the same node 3280276479Sdim/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to 3281276479Sdim/// update it's state based on the current cycle before MachineSchedStrategy 3282276479Sdim/// does. 3283251662Sdim/// 3284251662Sdim/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling 3285344779Sdim/// them here. See comments in biasPhysReg. 3286261991Sdimvoid GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { 3287239462Sdim if (IsTopNode) { 3288276479Sdim SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); 3289239462Sdim Top.bumpNode(SU); 3290251662Sdim if (SU->hasPhysRegUses) 3291344779Sdim reschedulePhysReg(SU, true); 3292309124Sdim } else { 3293276479Sdim SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); 3294239462Sdim Bot.bumpNode(SU); 3295251662Sdim if (SU->hasPhysRegDefs) 3296344779Sdim reschedulePhysReg(SU, false); 3297239462Sdim } 3298239462Sdim} 3299239462Sdim 3300234285Sdim/// Create the standard converging machine scheduler. This will be used as the 3301234285Sdim/// default scheduler if the target does not set a default. 3302314564SdimScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { 3303321369Sdim ScheduleDAGMILive *DAG = 3304360784Sdim new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)); 3305249423Sdim // Register DAG post-processors. 3306251662Sdim // 3307251662Sdim // FIXME: extend the mutation API to allow earlier mutations to instantiate 3308251662Sdim // data and pass it to later mutations. Have a single mutation that gathers 3309251662Sdim // the interesting nodes in one pass. 3310314564Sdim DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); 3311249423Sdim return DAG; 3312234285Sdim} 3313276479Sdim 3314314564Sdimstatic ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) { 3315314564Sdim return createGenericSchedLive(C); 3316314564Sdim} 3317314564Sdim 3318234285Sdimstatic MachineSchedRegistry 3319261991SdimGenericSchedRegistry("converge", "Standard converging scheduler.", 3320314564Sdim createConveringSched); 3321234285Sdim 3322234285Sdim//===----------------------------------------------------------------------===// 3323276479Sdim// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. 3324276479Sdim//===----------------------------------------------------------------------===// 3325276479Sdim 3326276479Sdimvoid PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { 3327276479Sdim DAG = Dag; 3328276479Sdim SchedModel = DAG->getSchedModel(); 3329276479Sdim TRI = DAG->TRI; 3330276479Sdim 3331276479Sdim Rem.init(DAG, SchedModel); 3332276479Sdim Top.init(DAG, SchedModel, &Rem); 3333276479Sdim BotRoots.clear(); 3334276479Sdim 3335276479Sdim // Initialize the HazardRecognizers. If itineraries don't exist, are empty, 3336276479Sdim // or are disabled, then these HazardRecs will be disabled. 3337276479Sdim const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 3338276479Sdim if (!Top.HazardRec) { 3339276479Sdim Top.HazardRec = 3340280031Sdim DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( 3341280031Sdim Itin, DAG); 3342276479Sdim } 3343276479Sdim} 3344276479Sdim 3345276479Sdimvoid PostGenericScheduler::registerRoots() { 3346276479Sdim Rem.CriticalPath = DAG->ExitSU.getDepth(); 3347276479Sdim 3348276479Sdim // Some roots may not feed into ExitSU. Check all of them in case. 3349321369Sdim for (const SUnit *SU : BotRoots) { 3350321369Sdim if (SU->getDepth() > Rem.CriticalPath) 3351321369Sdim Rem.CriticalPath = SU->getDepth(); 3352276479Sdim } 3353341825Sdim LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); 3354280031Sdim if (DumpCriticalPathLength) { 3355280031Sdim errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n"; 3356280031Sdim } 3357276479Sdim} 3358276479Sdim 3359341825Sdim/// Apply a set of heuristics to a new candidate for PostRA scheduling. 3360276479Sdim/// 3361276479Sdim/// \param Cand provides the policy and current best candidate. 3362276479Sdim/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 3363276479Sdimvoid PostGenericScheduler::tryCandidate(SchedCandidate &Cand, 3364276479Sdim SchedCandidate &TryCand) { 3365276479Sdim // Initialize the candidate if needed. 3366276479Sdim if (!Cand.isValid()) { 3367276479Sdim TryCand.Reason = NodeOrder; 3368276479Sdim return; 3369276479Sdim } 3370276479Sdim 3371276479Sdim // Prioritize instructions that read unbuffered resources by stall cycles. 3372276479Sdim if (tryLess(Top.getLatencyStallCycles(TryCand.SU), 3373276479Sdim Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 3374276479Sdim return; 3375276479Sdim 3376321369Sdim // Keep clustered nodes together. 3377321369Sdim if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), 3378321369Sdim Cand.SU == DAG->getNextClusterSucc(), 3379321369Sdim TryCand, Cand, Cluster)) 3380321369Sdim return; 3381321369Sdim 3382276479Sdim // Avoid critical resource consumption and balance the schedule. 3383276479Sdim if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 3384276479Sdim TryCand, Cand, ResourceReduce)) 3385276479Sdim return; 3386276479Sdim if (tryGreater(TryCand.ResDelta.DemandedResources, 3387276479Sdim Cand.ResDelta.DemandedResources, 3388276479Sdim TryCand, Cand, ResourceDemand)) 3389276479Sdim return; 3390276479Sdim 3391276479Sdim // Avoid serializing long latency dependence chains. 3392276479Sdim if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { 3393276479Sdim return; 3394276479Sdim } 3395276479Sdim 3396276479Sdim // Fall through to original instruction order. 3397276479Sdim if (TryCand.SU->NodeNum < Cand.SU->NodeNum) 3398276479Sdim TryCand.Reason = NodeOrder; 3399276479Sdim} 3400276479Sdim 3401276479Sdimvoid PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { 3402276479Sdim ReadyQueue &Q = Top.Available; 3403321369Sdim for (SUnit *SU : Q) { 3404276479Sdim SchedCandidate TryCand(Cand.Policy); 3405321369Sdim TryCand.SU = SU; 3406309124Sdim TryCand.AtTop = true; 3407276479Sdim TryCand.initResourceDelta(DAG, SchedModel); 3408276479Sdim tryCandidate(Cand, TryCand); 3409276479Sdim if (TryCand.Reason != NoCand) { 3410276479Sdim Cand.setBest(TryCand); 3411341825Sdim LLVM_DEBUG(traceCandidate(Cand)); 3412276479Sdim } 3413276479Sdim } 3414276479Sdim} 3415276479Sdim 3416276479Sdim/// Pick the next node to schedule. 3417276479SdimSUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { 3418276479Sdim if (DAG->top() == DAG->bottom()) { 3419276479Sdim assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage"); 3420276479Sdim return nullptr; 3421276479Sdim } 3422276479Sdim SUnit *SU; 3423276479Sdim do { 3424276479Sdim SU = Top.pickOnlyChoice(); 3425309124Sdim if (SU) { 3426309124Sdim tracePick(Only1, true); 3427309124Sdim } else { 3428276479Sdim CandPolicy NoPolicy; 3429276479Sdim SchedCandidate TopCand(NoPolicy); 3430276479Sdim // Set the top-down policy based on the state of the current top zone and 3431276479Sdim // the instructions outside the zone, including the bottom zone. 3432276479Sdim setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); 3433276479Sdim pickNodeFromQueue(TopCand); 3434276479Sdim assert(TopCand.Reason != NoCand && "failed to find a candidate"); 3435309124Sdim tracePick(TopCand); 3436276479Sdim SU = TopCand.SU; 3437276479Sdim } 3438276479Sdim } while (SU->isScheduled); 3439276479Sdim 3440276479Sdim IsTopNode = true; 3441276479Sdim Top.removeReady(SU); 3442276479Sdim 3443341825Sdim LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 3444341825Sdim << *SU->getInstr()); 3445276479Sdim return SU; 3446276479Sdim} 3447276479Sdim 3448276479Sdim/// Called after ScheduleDAGMI has scheduled an instruction and updated 3449276479Sdim/// scheduled/remaining flags in the DAG nodes. 3450276479Sdimvoid PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { 3451276479Sdim SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); 3452276479Sdim Top.bumpNode(SU); 3453276479Sdim} 3454276479Sdim 3455314564SdimScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { 3456360784Sdim return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C), 3457314564Sdim /*RemoveKillFlags=*/true); 3458276479Sdim} 3459276479Sdim 3460276479Sdim//===----------------------------------------------------------------------===// 3461243830Sdim// ILP Scheduler. Currently for experimental analysis of heuristics. 3462243830Sdim//===----------------------------------------------------------------------===// 3463243830Sdim 3464243830Sdimnamespace { 3465321369Sdim 3466341825Sdim/// Order nodes by the ILP metric. 3467243830Sdimstruct ILPOrder { 3468321369Sdim const SchedDFSResult *DFSResult = nullptr; 3469321369Sdim const BitVector *ScheduledTrees = nullptr; 3470243830Sdim bool MaximizeILP; 3471243830Sdim 3472321369Sdim ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {} 3473243830Sdim 3474341825Sdim /// Apply a less-than relation on node priority. 3475249423Sdim /// 3476249423Sdim /// (Return true if A comes after B in the Q.) 3477243830Sdim bool operator()(const SUnit *A, const SUnit *B) const { 3478249423Sdim unsigned SchedTreeA = DFSResult->getSubtreeID(A); 3479249423Sdim unsigned SchedTreeB = DFSResult->getSubtreeID(B); 3480249423Sdim if (SchedTreeA != SchedTreeB) { 3481249423Sdim // Unscheduled trees have lower priority. 3482249423Sdim if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) 3483249423Sdim return ScheduledTrees->test(SchedTreeB); 3484249423Sdim 3485249423Sdim // Trees with shallower connections have have lower priority. 3486249423Sdim if (DFSResult->getSubtreeLevel(SchedTreeA) 3487249423Sdim != DFSResult->getSubtreeLevel(SchedTreeB)) { 3488249423Sdim return DFSResult->getSubtreeLevel(SchedTreeA) 3489249423Sdim < DFSResult->getSubtreeLevel(SchedTreeB); 3490249423Sdim } 3491249423Sdim } 3492243830Sdim if (MaximizeILP) 3493249423Sdim return DFSResult->getILP(A) < DFSResult->getILP(B); 3494243830Sdim else 3495249423Sdim return DFSResult->getILP(A) > DFSResult->getILP(B); 3496243830Sdim } 3497243830Sdim}; 3498243830Sdim 3499341825Sdim/// Schedule based on the ILP metric. 3500243830Sdimclass ILPScheduler : public MachineSchedStrategy { 3501321369Sdim ScheduleDAGMILive *DAG = nullptr; 3502243830Sdim ILPOrder Cmp; 3503243830Sdim 3504243830Sdim std::vector<SUnit*> ReadyQ; 3505321369Sdim 3506243830Sdimpublic: 3507321369Sdim ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {} 3508243830Sdim 3509276479Sdim void initialize(ScheduleDAGMI *dag) override { 3510276479Sdim assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); 3511276479Sdim DAG = static_cast<ScheduleDAGMILive*>(dag); 3512249423Sdim DAG->computeDFSResult(); 3513249423Sdim Cmp.DFSResult = DAG->getDFSResult(); 3514249423Sdim Cmp.ScheduledTrees = &DAG->getScheduledTrees(); 3515243830Sdim ReadyQ.clear(); 3516243830Sdim } 3517243830Sdim 3518276479Sdim void registerRoots() override { 3519249423Sdim // Restore the heap in ReadyQ with the updated DFS results. 3520249423Sdim std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3521243830Sdim } 3522243830Sdim 3523243830Sdim /// Implement MachineSchedStrategy interface. 3524243830Sdim /// ----------------------------------------- 3525243830Sdim 3526249423Sdim /// Callback to select the highest priority node from the ready Q. 3527276479Sdim SUnit *pickNode(bool &IsTopNode) override { 3528276479Sdim if (ReadyQ.empty()) return nullptr; 3529249423Sdim std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3530243830Sdim SUnit *SU = ReadyQ.back(); 3531243830Sdim ReadyQ.pop_back(); 3532243830Sdim IsTopNode = false; 3533341825Sdim LLVM_DEBUG(dbgs() << "Pick node " 3534341825Sdim << "SU(" << SU->NodeNum << ") " 3535341825Sdim << " ILP: " << DAG->getDFSResult()->getILP(SU) 3536341825Sdim << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) 3537341825Sdim << " @" 3538341825Sdim << DAG->getDFSResult()->getSubtreeLevel( 3539341825Sdim DAG->getDFSResult()->getSubtreeID(SU)) 3540341825Sdim << '\n' 3541341825Sdim << "Scheduling " << *SU->getInstr()); 3542243830Sdim return SU; 3543243830Sdim } 3544243830Sdim 3545341825Sdim /// Scheduler callback to notify that a new subtree is scheduled. 3546276479Sdim void scheduleTree(unsigned SubtreeID) override { 3547249423Sdim std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3548249423Sdim } 3549243830Sdim 3550249423Sdim /// Callback after a node is scheduled. Mark a newly scheduled tree, notify 3551249423Sdim /// DFSResults, and resort the priority Q. 3552276479Sdim void schedNode(SUnit *SU, bool IsTopNode) override { 3553249423Sdim assert(!IsTopNode && "SchedDFSResult needs bottom-up"); 3554249423Sdim } 3555249423Sdim 3556276479Sdim void releaseTopNode(SUnit *) override { /*only called for top roots*/ } 3557243830Sdim 3558276479Sdim void releaseBottomNode(SUnit *SU) override { 3559243830Sdim ReadyQ.push_back(SU); 3560243830Sdim std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3561243830Sdim } 3562243830Sdim}; 3563243830Sdim 3564321369Sdim} // end anonymous namespace 3565321369Sdim 3566243830Sdimstatic ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 3567360784Sdim return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true)); 3568243830Sdim} 3569243830Sdimstatic ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 3570360784Sdim return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false)); 3571243830Sdim} 3572321369Sdim 3573243830Sdimstatic MachineSchedRegistry ILPMaxRegistry( 3574243830Sdim "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 3575243830Sdimstatic MachineSchedRegistry ILPMinRegistry( 3576243830Sdim "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 3577243830Sdim 3578243830Sdim//===----------------------------------------------------------------------===// 3579234285Sdim// Machine Instruction Shuffler for Correctness Testing 3580234285Sdim//===----------------------------------------------------------------------===// 3581234285Sdim 3582234285Sdim#ifndef NDEBUG 3583234285Sdimnamespace { 3584321369Sdim 3585234285Sdim/// Apply a less-than relation on the node order, which corresponds to the 3586234285Sdim/// instruction order prior to scheduling. IsReverse implements greater-than. 3587234285Sdimtemplate<bool IsReverse> 3588234285Sdimstruct SUnitOrder { 3589234285Sdim bool operator()(SUnit *A, SUnit *B) const { 3590234285Sdim if (IsReverse) 3591234285Sdim return A->NodeNum > B->NodeNum; 3592234285Sdim else 3593234285Sdim return A->NodeNum < B->NodeNum; 3594234285Sdim } 3595234285Sdim}; 3596234285Sdim 3597234285Sdim/// Reorder instructions as much as possible. 3598234285Sdimclass InstructionShuffler : public MachineSchedStrategy { 3599234285Sdim bool IsAlternating; 3600234285Sdim bool IsTopDown; 3601234285Sdim 3602234285Sdim // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 3603234285Sdim // gives nodes with a higher number higher priority causing the latest 3604234285Sdim // instructions to be scheduled first. 3605321369Sdim PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>> 3606234285Sdim TopQ; 3607327952Sdim 3608234285Sdim // When scheduling bottom-up, use greater-than as the queue priority. 3609321369Sdim PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>> 3610234285Sdim BottomQ; 3611321369Sdim 3612234285Sdimpublic: 3613234285Sdim InstructionShuffler(bool alternate, bool topdown) 3614234285Sdim : IsAlternating(alternate), IsTopDown(topdown) {} 3615234285Sdim 3616276479Sdim void initialize(ScheduleDAGMI*) override { 3617234285Sdim TopQ.clear(); 3618234285Sdim BottomQ.clear(); 3619234285Sdim } 3620234285Sdim 3621234285Sdim /// Implement MachineSchedStrategy interface. 3622234285Sdim /// ----------------------------------------- 3623234285Sdim 3624276479Sdim SUnit *pickNode(bool &IsTopNode) override { 3625234285Sdim SUnit *SU; 3626234285Sdim if (IsTopDown) { 3627234285Sdim do { 3628276479Sdim if (TopQ.empty()) return nullptr; 3629234285Sdim SU = TopQ.top(); 3630234285Sdim TopQ.pop(); 3631234285Sdim } while (SU->isScheduled); 3632234285Sdim IsTopNode = true; 3633309124Sdim } else { 3634234285Sdim do { 3635276479Sdim if (BottomQ.empty()) return nullptr; 3636234285Sdim SU = BottomQ.top(); 3637234285Sdim BottomQ.pop(); 3638234285Sdim } while (SU->isScheduled); 3639234285Sdim IsTopNode = false; 3640234285Sdim } 3641234285Sdim if (IsAlternating) 3642234285Sdim IsTopDown = !IsTopDown; 3643234285Sdim return SU; 3644234285Sdim } 3645234285Sdim 3646276479Sdim void schedNode(SUnit *SU, bool IsTopNode) override {} 3647239462Sdim 3648276479Sdim void releaseTopNode(SUnit *SU) override { 3649234285Sdim TopQ.push(SU); 3650234285Sdim } 3651276479Sdim void releaseBottomNode(SUnit *SU) override { 3652234285Sdim BottomQ.push(SU); 3653234285Sdim } 3654234285Sdim}; 3655234285Sdim 3656321369Sdim} // end anonymous namespace 3657321369Sdim 3658234285Sdimstatic ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 3659234285Sdim bool Alternate = !ForceTopDown && !ForceBottomUp; 3660234285Sdim bool TopDown = !ForceBottomUp; 3661234285Sdim assert((TopDown || !ForceTopDown) && 3662234285Sdim "-misched-topdown incompatible with -misched-bottomup"); 3663321369Sdim return new ScheduleDAGMILive( 3664360784Sdim C, std::make_unique<InstructionShuffler>(Alternate, TopDown)); 3665234285Sdim} 3666321369Sdim 3667234285Sdimstatic MachineSchedRegistry ShufflerRegistry( 3668234285Sdim "shuffle", "Shuffle machine instructions alternating directions", 3669234285Sdim createInstructionShuffler); 3670234285Sdim#endif // !NDEBUG 3671249423Sdim 3672249423Sdim//===----------------------------------------------------------------------===// 3673276479Sdim// GraphWriter support for ScheduleDAGMILive. 3674249423Sdim//===----------------------------------------------------------------------===// 3675249423Sdim 3676249423Sdim#ifndef NDEBUG 3677249423Sdimnamespace llvm { 3678249423Sdim 3679249423Sdimtemplate<> struct GraphTraits< 3680249423Sdim ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; 3681249423Sdim 3682249423Sdimtemplate<> 3683249423Sdimstruct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { 3684321369Sdim DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} 3685249423Sdim 3686249423Sdim static std::string getGraphName(const ScheduleDAG *G) { 3687249423Sdim return G->MF.getName(); 3688249423Sdim } 3689249423Sdim 3690249423Sdim static bool renderGraphFromBottomUp() { 3691249423Sdim return true; 3692249423Sdim } 3693249423Sdim 3694249423Sdim static bool isNodeHidden(const SUnit *Node) { 3695296417Sdim if (ViewMISchedCutoff == 0) 3696296417Sdim return false; 3697296417Sdim return (Node->Preds.size() > ViewMISchedCutoff 3698296417Sdim || Node->Succs.size() > ViewMISchedCutoff); 3699249423Sdim } 3700249423Sdim 3701249423Sdim /// If you want to override the dot attributes printed for a particular 3702249423Sdim /// edge, override this method. 3703249423Sdim static std::string getEdgeAttributes(const SUnit *Node, 3704249423Sdim SUnitIterator EI, 3705249423Sdim const ScheduleDAG *Graph) { 3706249423Sdim if (EI.isArtificialDep()) 3707249423Sdim return "color=cyan,style=dashed"; 3708249423Sdim if (EI.isCtrlDep()) 3709249423Sdim return "color=blue,style=dashed"; 3710249423Sdim return ""; 3711249423Sdim } 3712249423Sdim 3713249423Sdim static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { 3714249423Sdim std::string Str; 3715249423Sdim raw_string_ostream SS(Str); 3716276479Sdim const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); 3717276479Sdim const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? 3718276479Sdim static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; 3719261991Sdim SS << "SU:" << SU->NodeNum; 3720261991Sdim if (DFS) 3721261991Sdim SS << " I:" << DFS->getNumInstrs(SU); 3722249423Sdim return SS.str(); 3723249423Sdim } 3724327952Sdim 3725249423Sdim static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { 3726249423Sdim return G->getGraphNodeLabel(SU); 3727249423Sdim } 3728249423Sdim 3729276479Sdim static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) { 3730249423Sdim std::string Str("shape=Mrecord"); 3731276479Sdim const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); 3732276479Sdim const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? 3733276479Sdim static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; 3734249423Sdim if (DFS) { 3735249423Sdim Str += ",style=filled,fillcolor=\"#"; 3736249423Sdim Str += DOT::getColorString(DFS->getSubtreeID(N)); 3737249423Sdim Str += '"'; 3738249423Sdim } 3739249423Sdim return Str; 3740249423Sdim } 3741249423Sdim}; 3742321369Sdim 3743321369Sdim} // end namespace llvm 3744249423Sdim#endif // NDEBUG 3745249423Sdim 3746249423Sdim/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG 3747249423Sdim/// rendered using 'dot'. 3748249423Sdimvoid ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { 3749249423Sdim#ifndef NDEBUG 3750249423Sdim ViewGraph(this, Name, false, Title); 3751249423Sdim#else 3752249423Sdim errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " 3753249423Sdim << "systems with Graphviz or gv!\n"; 3754249423Sdim#endif // NDEBUG 3755249423Sdim} 3756249423Sdim 3757249423Sdim/// Out-of-line implementation with no arguments is handy for gdb. 3758249423Sdimvoid ScheduleDAGMI::viewGraph() { 3759249423Sdim viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); 3760249423Sdim} 3761