1193323Sed//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This implements a fast scheduler. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14249423Sdim#include "llvm/CodeGen/SchedulerRegistry.h" 15249423Sdim#include "InstrEmitter.h" 16193323Sed#include "ScheduleDAGSDNodes.h" 17249423Sdim#include "llvm/ADT/STLExtras.h" 18249423Sdim#include "llvm/ADT/SmallSet.h" 19249423Sdim#include "llvm/ADT/Statistic.h" 20193323Sed#include "llvm/CodeGen/SelectionDAGISel.h" 21249423Sdim#include "llvm/IR/DataLayout.h" 22249423Sdim#include "llvm/IR/InlineAsm.h" 23193323Sed#include "llvm/Support/Debug.h" 24198090Srdivacky#include "llvm/Support/ErrorHandling.h" 25198090Srdivacky#include "llvm/Support/raw_ostream.h" 26249423Sdim#include "llvm/Target/TargetInstrInfo.h" 27249423Sdim#include "llvm/Target/TargetRegisterInfo.h" 28193323Sedusing namespace llvm; 29193323Sed 30276479Sdim#define DEBUG_TYPE "pre-RA-sched" 31276479Sdim 32193323SedSTATISTIC(NumUnfolds, "Number of nodes unfolded"); 33193323SedSTATISTIC(NumDups, "Number of duplicated nodes"); 34193323SedSTATISTIC(NumPRCopies, "Number of physical copies"); 35193323Sed 36193323Sedstatic RegisterScheduler 37193323Sed fastDAGScheduler("fast", "Fast suboptimal list scheduling", 38193323Sed createFastDAGScheduler); 39243830Sdimstatic RegisterScheduler 40243830Sdim linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling", 41243830Sdim createDAGLinearizer); 42193323Sed 43243830Sdim 44193323Sednamespace { 45193323Sed /// FastPriorityQueue - A degenerate priority queue that considers 46193323Sed /// all nodes to have the same priority. 47193323Sed /// 48198892Srdivacky struct FastPriorityQueue { 49193323Sed SmallVector<SUnit *, 16> Queue; 50193323Sed 51193323Sed bool empty() const { return Queue.empty(); } 52234353Sdim 53193323Sed void push(SUnit *U) { 54193323Sed Queue.push_back(U); 55193323Sed } 56193323Sed 57193323Sed SUnit *pop() { 58276479Sdim if (empty()) return nullptr; 59193323Sed SUnit *V = Queue.back(); 60193323Sed Queue.pop_back(); 61193323Sed return V; 62193323Sed } 63193323Sed }; 64193323Sed 65193323Sed//===----------------------------------------------------------------------===// 66193323Sed/// ScheduleDAGFast - The actual "fast" list scheduler implementation. 67193323Sed/// 68198892Srdivackyclass ScheduleDAGFast : public ScheduleDAGSDNodes { 69193323Sedprivate: 70193323Sed /// AvailableQueue - The priority queue to use for the available SUnits. 71193323Sed FastPriorityQueue AvailableQueue; 72193323Sed 73193323Sed /// LiveRegDefs - A set of physical registers and their definition 74193323Sed /// that are "live". These nodes must be scheduled before any other nodes that 75193323Sed /// modifies the registers can be scheduled. 76193323Sed unsigned NumLiveRegs; 77193323Sed std::vector<SUnit*> LiveRegDefs; 78193323Sed std::vector<unsigned> LiveRegCycles; 79193323Sed 80193323Sedpublic: 81193323Sed ScheduleDAGFast(MachineFunction &mf) 82193323Sed : ScheduleDAGSDNodes(mf) {} 83193323Sed 84276479Sdim void Schedule() override; 85193323Sed 86193323Sed /// AddPred - adds a predecessor edge to SUnit SU. 87193323Sed /// This returns true if this is a new predecessor. 88193323Sed void AddPred(SUnit *SU, const SDep &D) { 89193323Sed SU->addPred(D); 90193323Sed } 91193323Sed 92193323Sed /// RemovePred - removes a predecessor edge from SUnit SU. 93193323Sed /// This returns true if an edge was removed. 94193323Sed void RemovePred(SUnit *SU, const SDep &D) { 95193323Sed SU->removePred(D); 96193323Sed } 97193323Sed 98193323Sedprivate: 99193323Sed void ReleasePred(SUnit *SU, SDep *PredEdge); 100193323Sed void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 101193323Sed void ScheduleNodeBottomUp(SUnit*, unsigned); 102193323Sed SUnit *CopyAndMoveSuccessors(SUnit*); 103193323Sed void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 104193323Sed const TargetRegisterClass*, 105193323Sed const TargetRegisterClass*, 106261991Sdim SmallVectorImpl<SUnit*>&); 107261991Sdim bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); 108193323Sed void ListScheduleBottomUp(); 109193323Sed 110234353Sdim /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. 111276479Sdim bool forceUnitLatencies() const override { return true; } 112193323Sed}; 113193323Sed} // end anonymous namespace 114193323Sed 115193323Sed 116193323Sed/// Schedule - Schedule the DAG using list scheduling. 117193323Sedvoid ScheduleDAGFast::Schedule() { 118202375Srdivacky DEBUG(dbgs() << "********** List Scheduling **********\n"); 119193323Sed 120193323Sed NumLiveRegs = 0; 121276479Sdim LiveRegDefs.resize(TRI->getNumRegs(), nullptr); 122193323Sed LiveRegCycles.resize(TRI->getNumRegs(), 0); 123193323Sed 124193323Sed // Build the scheduling graph. 125276479Sdim BuildSchedGraph(nullptr); 126193323Sed 127193323Sed DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 128193323Sed SUnits[su].dumpAll(this)); 129193323Sed 130193323Sed // Execute the actual scheduling loop. 131193323Sed ListScheduleBottomUp(); 132193323Sed} 133193323Sed 134193323Sed//===----------------------------------------------------------------------===// 135193323Sed// Bottom-Up Scheduling 136193323Sed//===----------------------------------------------------------------------===// 137193323Sed 138193323Sed/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 139193323Sed/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 140193323Sedvoid ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { 141193323Sed SUnit *PredSU = PredEdge->getSUnit(); 142198090Srdivacky 143193323Sed#ifndef NDEBUG 144198090Srdivacky if (PredSU->NumSuccsLeft == 0) { 145202375Srdivacky dbgs() << "*** Scheduling failed! ***\n"; 146193323Sed PredSU->dump(this); 147202375Srdivacky dbgs() << " has been released too many times!\n"; 148276479Sdim llvm_unreachable(nullptr); 149193323Sed } 150193323Sed#endif 151198090Srdivacky --PredSU->NumSuccsLeft; 152198090Srdivacky 153193323Sed // If all the node's successors are scheduled, this node is ready 154193323Sed // to be scheduled. Ignore the special EntrySU node. 155193323Sed if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 156193323Sed PredSU->isAvailable = true; 157193323Sed AvailableQueue.push(PredSU); 158193323Sed } 159193323Sed} 160193323Sed 161193323Sedvoid ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 162193323Sed // Bottom up: release predecessors 163193323Sed for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 164193323Sed I != E; ++I) { 165193323Sed ReleasePred(SU, &*I); 166193323Sed if (I->isAssignedRegDep()) { 167193323Sed // This is a physical register dependency and it's impossible or 168234353Sdim // expensive to copy the register. Make sure nothing that can 169193323Sed // clobber the register is scheduled between the predecessor and 170193323Sed // this node. 171193323Sed if (!LiveRegDefs[I->getReg()]) { 172193323Sed ++NumLiveRegs; 173193323Sed LiveRegDefs[I->getReg()] = I->getSUnit(); 174193323Sed LiveRegCycles[I->getReg()] = CurCycle; 175193323Sed } 176193323Sed } 177193323Sed } 178193323Sed} 179193323Sed 180193323Sed/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 181193323Sed/// count of its predecessors. If a predecessor pending count is zero, add it to 182193323Sed/// the Available queue. 183193323Sedvoid ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 184202375Srdivacky DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 185193323Sed DEBUG(SU->dump(this)); 186193323Sed 187193323Sed assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); 188193323Sed SU->setHeightToAtLeast(CurCycle); 189193323Sed Sequence.push_back(SU); 190193323Sed 191193323Sed ReleasePredecessors(SU, CurCycle); 192193323Sed 193193323Sed // Release all the implicit physical register defs that are live. 194193323Sed for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 195193323Sed I != E; ++I) { 196193323Sed if (I->isAssignedRegDep()) { 197193323Sed if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 198193323Sed assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 199193323Sed assert(LiveRegDefs[I->getReg()] == SU && 200193323Sed "Physical register dependency violated?"); 201193323Sed --NumLiveRegs; 202276479Sdim LiveRegDefs[I->getReg()] = nullptr; 203193323Sed LiveRegCycles[I->getReg()] = 0; 204193323Sed } 205193323Sed } 206193323Sed } 207193323Sed 208193323Sed SU->isScheduled = true; 209193323Sed} 210193323Sed 211193323Sed/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 212193323Sed/// successors to the newly created node. 213193323SedSUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { 214218893Sdim if (SU->getNode()->getGluedNode()) 215276479Sdim return nullptr; 216193323Sed 217193323Sed SDNode *N = SU->getNode(); 218193323Sed if (!N) 219276479Sdim return nullptr; 220193323Sed 221193323Sed SUnit *NewSU; 222193323Sed bool TryUnfold = false; 223193323Sed for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 224280031Sdim MVT VT = N->getSimpleValueType(i); 225218893Sdim if (VT == MVT::Glue) 226276479Sdim return nullptr; 227193323Sed else if (VT == MVT::Other) 228193323Sed TryUnfold = true; 229193323Sed } 230288943Sdim for (const SDValue &Op : N->op_values()) { 231280031Sdim MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 232218893Sdim if (VT == MVT::Glue) 233276479Sdim return nullptr; 234193323Sed } 235193323Sed 236193323Sed if (TryUnfold) { 237193323Sed SmallVector<SDNode*, 2> NewNodes; 238193323Sed if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 239276479Sdim return nullptr; 240193323Sed 241202375Srdivacky DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); 242193323Sed assert(NewNodes.size() == 2 && "Expected a load folding node!"); 243193323Sed 244193323Sed N = NewNodes[1]; 245193323Sed SDNode *LoadNode = NewNodes[0]; 246193323Sed unsigned NumVals = N->getNumValues(); 247193323Sed unsigned OldNumVals = SU->getNode()->getNumValues(); 248193323Sed for (unsigned i = 0; i != NumVals; ++i) 249193323Sed DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 250193323Sed DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 251193323Sed SDValue(LoadNode, 1)); 252193323Sed 253234353Sdim SUnit *NewSU = newSUnit(N); 254193323Sed assert(N->getNodeId() == -1 && "Node already inserted!"); 255193323Sed N->setNodeId(NewSU->NodeNum); 256234353Sdim 257224145Sdim const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 258224145Sdim for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 259224145Sdim if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 260193323Sed NewSU->isTwoAddress = true; 261193323Sed break; 262193323Sed } 263193323Sed } 264224145Sdim if (MCID.isCommutable()) 265193323Sed NewSU->isCommutable = true; 266193323Sed 267193323Sed // LoadNode may already exist. This can happen when there is another 268193323Sed // load from the same location and producing the same type of value 269193323Sed // but it has different alignment or volatileness. 270193323Sed bool isNewLoad = true; 271193323Sed SUnit *LoadSU; 272193323Sed if (LoadNode->getNodeId() != -1) { 273193323Sed LoadSU = &SUnits[LoadNode->getNodeId()]; 274193323Sed isNewLoad = false; 275193323Sed } else { 276234353Sdim LoadSU = newSUnit(LoadNode); 277193323Sed LoadNode->setNodeId(LoadSU->NodeNum); 278193323Sed } 279193323Sed 280193323Sed SDep ChainPred; 281193323Sed SmallVector<SDep, 4> ChainSuccs; 282193323Sed SmallVector<SDep, 4> LoadPreds; 283193323Sed SmallVector<SDep, 4> NodePreds; 284193323Sed SmallVector<SDep, 4> NodeSuccs; 285193323Sed for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 286193323Sed I != E; ++I) { 287193323Sed if (I->isCtrl()) 288193323Sed ChainPred = *I; 289193323Sed else if (I->getSUnit()->getNode() && 290193323Sed I->getSUnit()->getNode()->isOperandOf(LoadNode)) 291193323Sed LoadPreds.push_back(*I); 292193323Sed else 293193323Sed NodePreds.push_back(*I); 294193323Sed } 295193323Sed for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 296193323Sed I != E; ++I) { 297193323Sed if (I->isCtrl()) 298193323Sed ChainSuccs.push_back(*I); 299193323Sed else 300193323Sed NodeSuccs.push_back(*I); 301193323Sed } 302193323Sed 303193323Sed if (ChainPred.getSUnit()) { 304193323Sed RemovePred(SU, ChainPred); 305193323Sed if (isNewLoad) 306193323Sed AddPred(LoadSU, ChainPred); 307193323Sed } 308193323Sed for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 309193323Sed const SDep &Pred = LoadPreds[i]; 310193323Sed RemovePred(SU, Pred); 311193323Sed if (isNewLoad) { 312193323Sed AddPred(LoadSU, Pred); 313193323Sed } 314193323Sed } 315193323Sed for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 316193323Sed const SDep &Pred = NodePreds[i]; 317193323Sed RemovePred(SU, Pred); 318193323Sed AddPred(NewSU, Pred); 319193323Sed } 320193323Sed for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 321193323Sed SDep D = NodeSuccs[i]; 322193323Sed SUnit *SuccDep = D.getSUnit(); 323193323Sed D.setSUnit(SU); 324193323Sed RemovePred(SuccDep, D); 325193323Sed D.setSUnit(NewSU); 326193323Sed AddPred(SuccDep, D); 327193323Sed } 328193323Sed for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 329193323Sed SDep D = ChainSuccs[i]; 330193323Sed SUnit *SuccDep = D.getSUnit(); 331193323Sed D.setSUnit(SU); 332193323Sed RemovePred(SuccDep, D); 333193323Sed if (isNewLoad) { 334193323Sed D.setSUnit(LoadSU); 335193323Sed AddPred(SuccDep, D); 336193323Sed } 337234353Sdim } 338193323Sed if (isNewLoad) { 339243830Sdim SDep D(LoadSU, SDep::Barrier); 340243830Sdim D.setLatency(LoadSU->Latency); 341243830Sdim AddPred(NewSU, D); 342193323Sed } 343193323Sed 344193323Sed ++NumUnfolds; 345193323Sed 346193323Sed if (NewSU->NumSuccsLeft == 0) { 347193323Sed NewSU->isAvailable = true; 348193323Sed return NewSU; 349193323Sed } 350193323Sed SU = NewSU; 351193323Sed } 352193323Sed 353202375Srdivacky DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); 354193323Sed NewSU = Clone(SU); 355193323Sed 356193323Sed // New SUnit has the exact same predecessors. 357193323Sed for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 358193323Sed I != E; ++I) 359193323Sed if (!I->isArtificial()) 360193323Sed AddPred(NewSU, *I); 361193323Sed 362193323Sed // Only copy scheduled successors. Cut them from old node's successor 363193323Sed // list and move them over. 364193323Sed SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 365193323Sed for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 366193323Sed I != E; ++I) { 367193323Sed if (I->isArtificial()) 368193323Sed continue; 369193323Sed SUnit *SuccSU = I->getSUnit(); 370193323Sed if (SuccSU->isScheduled) { 371193323Sed SDep D = *I; 372193323Sed D.setSUnit(NewSU); 373193323Sed AddPred(SuccSU, D); 374193323Sed D.setSUnit(SU); 375193323Sed DelDeps.push_back(std::make_pair(SuccSU, D)); 376193323Sed } 377193323Sed } 378193323Sed for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 379193323Sed RemovePred(DelDeps[i].first, DelDeps[i].second); 380193323Sed 381193323Sed ++NumDups; 382193323Sed return NewSU; 383193323Sed} 384193323Sed 385193323Sed/// InsertCopiesAndMoveSuccs - Insert register copies and move all 386193323Sed/// scheduled successors of the given SUnit to the last copy. 387193323Sedvoid ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 388193323Sed const TargetRegisterClass *DestRC, 389193323Sed const TargetRegisterClass *SrcRC, 390261991Sdim SmallVectorImpl<SUnit*> &Copies) { 391276479Sdim SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(nullptr)); 392193323Sed CopyFromSU->CopySrcRC = SrcRC; 393193323Sed CopyFromSU->CopyDstRC = DestRC; 394193323Sed 395276479Sdim SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(nullptr)); 396193323Sed CopyToSU->CopySrcRC = DestRC; 397193323Sed CopyToSU->CopyDstRC = SrcRC; 398193323Sed 399193323Sed // Only copy scheduled successors. Cut them from old node's successor 400193323Sed // list and move them over. 401193323Sed SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 402193323Sed for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 403193323Sed I != E; ++I) { 404193323Sed if (I->isArtificial()) 405193323Sed continue; 406193323Sed SUnit *SuccSU = I->getSUnit(); 407193323Sed if (SuccSU->isScheduled) { 408193323Sed SDep D = *I; 409193323Sed D.setSUnit(CopyToSU); 410193323Sed AddPred(SuccSU, D); 411193323Sed DelDeps.push_back(std::make_pair(SuccSU, *I)); 412193323Sed } 413193323Sed } 414193323Sed for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 415193323Sed RemovePred(DelDeps[i].first, DelDeps[i].second); 416193323Sed } 417243830Sdim SDep FromDep(SU, SDep::Data, Reg); 418243830Sdim FromDep.setLatency(SU->Latency); 419243830Sdim AddPred(CopyFromSU, FromDep); 420243830Sdim SDep ToDep(CopyFromSU, SDep::Data, 0); 421243830Sdim ToDep.setLatency(CopyFromSU->Latency); 422243830Sdim AddPred(CopyToSU, ToDep); 423193323Sed 424193323Sed Copies.push_back(CopyFromSU); 425193323Sed Copies.push_back(CopyToSU); 426193323Sed 427193323Sed ++NumPRCopies; 428193323Sed} 429193323Sed 430193323Sed/// getPhysicalRegisterVT - Returns the ValueType of the physical register 431193323Sed/// definition of the specified node. 432193323Sed/// FIXME: Move to SelectionDAG? 433280031Sdimstatic MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 434193323Sed const TargetInstrInfo *TII) { 435280031Sdim unsigned NumRes; 436280031Sdim if (N->getOpcode() == ISD::CopyFromReg) { 437280031Sdim // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. 438280031Sdim NumRes = 1; 439280031Sdim } else { 440280031Sdim const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 441280031Sdim assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 442280031Sdim NumRes = MCID.getNumDefs(); 443296417Sdim for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { 444280031Sdim if (Reg == *ImpDef) 445280031Sdim break; 446280031Sdim ++NumRes; 447280031Sdim } 448193323Sed } 449280031Sdim return N->getSimpleValueType(NumRes); 450193323Sed} 451193323Sed 452212904Sdim/// CheckForLiveRegDef - Return true and update live register vector if the 453212904Sdim/// specified register def of the specified SUnit clobbers any "live" registers. 454212904Sdimstatic bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, 455212904Sdim std::vector<SUnit*> &LiveRegDefs, 456212904Sdim SmallSet<unsigned, 4> &RegAdded, 457261991Sdim SmallVectorImpl<unsigned> &LRegs, 458212904Sdim const TargetRegisterInfo *TRI) { 459212904Sdim bool Added = false; 460239462Sdim for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { 461239462Sdim if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { 462280031Sdim if (RegAdded.insert(*AI).second) { 463239462Sdim LRegs.push_back(*AI); 464212904Sdim Added = true; 465212904Sdim } 466212904Sdim } 467239462Sdim } 468212904Sdim return Added; 469212904Sdim} 470212904Sdim 471193323Sed/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 472193323Sed/// scheduling of the given node to satisfy live physical register dependencies. 473193323Sed/// If the specific node is the last one that's available to schedule, do 474193323Sed/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 475193323Sedbool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, 476261991Sdim SmallVectorImpl<unsigned> &LRegs){ 477193323Sed if (NumLiveRegs == 0) 478193323Sed return false; 479193323Sed 480193323Sed SmallSet<unsigned, 4> RegAdded; 481193323Sed // If this node would clobber any "live" register, then it's not ready. 482193323Sed for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 483193323Sed I != E; ++I) { 484193323Sed if (I->isAssignedRegDep()) { 485212904Sdim CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 486212904Sdim RegAdded, LRegs, TRI); 487193323Sed } 488193323Sed } 489193323Sed 490218893Sdim for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 491212904Sdim if (Node->getOpcode() == ISD::INLINEASM) { 492212904Sdim // Inline asm can clobber physical defs. 493212904Sdim unsigned NumOps = Node->getNumOperands(); 494218893Sdim if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 495218893Sdim --NumOps; // Ignore the glue operand. 496212904Sdim 497212904Sdim for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 498212904Sdim unsigned Flags = 499212904Sdim cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 500212904Sdim unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 501212904Sdim 502212904Sdim ++i; // Skip the ID value. 503212904Sdim if (InlineAsm::isRegDefKind(Flags) || 504224145Sdim InlineAsm::isRegDefEarlyClobberKind(Flags) || 505224145Sdim InlineAsm::isClobberKind(Flags)) { 506212904Sdim // Check for def of register or earlyclobber register. 507212904Sdim for (; NumVals; --NumVals, ++i) { 508212904Sdim unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 509212904Sdim if (TargetRegisterInfo::isPhysicalRegister(Reg)) 510212904Sdim CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 511212904Sdim } 512212904Sdim } else 513212904Sdim i += NumVals; 514212904Sdim } 515212904Sdim continue; 516212904Sdim } 517193323Sed if (!Node->isMachineOpcode()) 518193323Sed continue; 519224145Sdim const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 520224145Sdim if (!MCID.ImplicitDefs) 521193323Sed continue; 522296417Sdim for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { 523212904Sdim CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 524193323Sed } 525193323Sed } 526193323Sed return !LRegs.empty(); 527193323Sed} 528193323Sed 529193323Sed 530193323Sed/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 531193323Sed/// schedulers. 532193323Sedvoid ScheduleDAGFast::ListScheduleBottomUp() { 533193323Sed unsigned CurCycle = 0; 534193323Sed 535193323Sed // Release any predecessors of the special Exit node. 536193323Sed ReleasePredecessors(&ExitSU, CurCycle); 537193323Sed 538193323Sed // Add root to Available queue. 539193323Sed if (!SUnits.empty()) { 540193323Sed SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 541193323Sed assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 542193323Sed RootSU->isAvailable = true; 543193323Sed AvailableQueue.push(RootSU); 544193323Sed } 545193323Sed 546193323Sed // While Available queue is not empty, grab the node with the highest 547193323Sed // priority. If it is not ready put it back. Schedule the node. 548193323Sed SmallVector<SUnit*, 4> NotReady; 549193323Sed DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 550193323Sed Sequence.reserve(SUnits.size()); 551193323Sed while (!AvailableQueue.empty()) { 552193323Sed bool Delayed = false; 553193323Sed LRegsMap.clear(); 554193323Sed SUnit *CurSU = AvailableQueue.pop(); 555193323Sed while (CurSU) { 556193323Sed SmallVector<unsigned, 4> LRegs; 557193323Sed if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 558193323Sed break; 559193323Sed Delayed = true; 560193323Sed LRegsMap.insert(std::make_pair(CurSU, LRegs)); 561193323Sed 562193323Sed CurSU->isPending = true; // This SU is not in AvailableQueue right now. 563193323Sed NotReady.push_back(CurSU); 564193323Sed CurSU = AvailableQueue.pop(); 565193323Sed } 566193323Sed 567193323Sed // All candidates are delayed due to live physical reg dependencies. 568193323Sed // Try code duplication or inserting cross class copies 569193323Sed // to resolve it. 570193323Sed if (Delayed && !CurSU) { 571193323Sed if (!CurSU) { 572193323Sed // Try duplicating the nodes that produces these 573193323Sed // "expensive to copy" values to break the dependency. In case even 574193323Sed // that doesn't work, insert cross class copies. 575193323Sed SUnit *TrySU = NotReady[0]; 576261991Sdim SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; 577193323Sed assert(LRegs.size() == 1 && "Can't handle this yet!"); 578193323Sed unsigned Reg = LRegs[0]; 579193323Sed SUnit *LRDef = LiveRegDefs[Reg]; 580280031Sdim MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 581193323Sed const TargetRegisterClass *RC = 582210299Sed TRI->getMinimalPhysRegClass(Reg, VT); 583193323Sed const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 584193323Sed 585221345Sdim // If cross copy register class is the same as RC, then it must be 586221345Sdim // possible copy the value directly. Do not try duplicate the def. 587221345Sdim // If cross copy register class is not the same as RC, then it's 588221345Sdim // possible to copy the value but it require cross register class copies 589221345Sdim // and it is expensive. 590221345Sdim // If cross copy register class is null, then it's not possible to copy 591221345Sdim // the value at all. 592276479Sdim SUnit *NewDef = nullptr; 593221345Sdim if (DestRC != RC) { 594193323Sed NewDef = CopyAndMoveSuccessors(LRDef); 595221345Sdim if (!DestRC && !NewDef) 596221345Sdim report_fatal_error("Can't handle live physical " 597221345Sdim "register dependency!"); 598221345Sdim } 599193323Sed if (!NewDef) { 600193323Sed // Issue copies, these can be expensive cross register class copies. 601193323Sed SmallVector<SUnit*, 2> Copies; 602193323Sed InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 603202375Srdivacky DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum 604198090Srdivacky << " to SU #" << Copies.front()->NodeNum << "\n"); 605243830Sdim AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); 606193323Sed NewDef = Copies.back(); 607193323Sed } 608193323Sed 609202375Srdivacky DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum 610198090Srdivacky << " to SU #" << TrySU->NodeNum << "\n"); 611193323Sed LiveRegDefs[Reg] = NewDef; 612243830Sdim AddPred(NewDef, SDep(TrySU, SDep::Artificial)); 613193323Sed TrySU->isAvailable = false; 614193323Sed CurSU = NewDef; 615193323Sed } 616193323Sed 617193323Sed if (!CurSU) { 618198090Srdivacky llvm_unreachable("Unable to resolve live physical register dependencies!"); 619193323Sed } 620193323Sed } 621193323Sed 622193323Sed // Add the nodes that aren't ready back onto the available list. 623193323Sed for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 624193323Sed NotReady[i]->isPending = false; 625193323Sed // May no longer be available due to backtracking. 626193323Sed if (NotReady[i]->isAvailable) 627193323Sed AvailableQueue.push(NotReady[i]); 628193323Sed } 629193323Sed NotReady.clear(); 630193323Sed 631193323Sed if (CurSU) 632193323Sed ScheduleNodeBottomUp(CurSU, CurCycle); 633193323Sed ++CurCycle; 634193323Sed } 635193323Sed 636198090Srdivacky // Reverse the order since it is bottom up. 637193323Sed std::reverse(Sequence.begin(), Sequence.end()); 638198090Srdivacky 639193323Sed#ifndef NDEBUG 640234353Sdim VerifyScheduledSequence(/*isBottomUp=*/true); 641193323Sed#endif 642193323Sed} 643193323Sed 644243830Sdim 645243830Sdimnamespace { 646193323Sed//===----------------------------------------------------------------------===// 647243830Sdim// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the 648243830Sdim// DAG in topological order. 649243830Sdim// IMPORTANT: this may not work for targets with phyreg dependency. 650243830Sdim// 651243830Sdimclass ScheduleDAGLinearize : public ScheduleDAGSDNodes { 652243830Sdimpublic: 653243830Sdim ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} 654243830Sdim 655276479Sdim void Schedule() override; 656243830Sdim 657276479Sdim MachineBasicBlock * 658276479Sdim EmitSchedule(MachineBasicBlock::iterator &InsertPos) override; 659243830Sdim 660243830Sdimprivate: 661243830Sdim std::vector<SDNode*> Sequence; 662243830Sdim DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user 663243830Sdim 664243830Sdim void ScheduleNode(SDNode *N); 665243830Sdim}; 666243830Sdim} // end anonymous namespace 667243830Sdim 668243830Sdimvoid ScheduleDAGLinearize::ScheduleNode(SDNode *N) { 669243830Sdim if (N->getNodeId() != 0) 670276479Sdim llvm_unreachable(nullptr); 671243830Sdim 672243830Sdim if (!N->isMachineOpcode() && 673243830Sdim (N->getOpcode() == ISD::EntryToken || isPassiveNode(N))) 674243830Sdim // These nodes do not need to be translated into MIs. 675243830Sdim return; 676243830Sdim 677243830Sdim DEBUG(dbgs() << "\n*** Scheduling: "); 678243830Sdim DEBUG(N->dump(DAG)); 679243830Sdim Sequence.push_back(N); 680243830Sdim 681243830Sdim unsigned NumOps = N->getNumOperands(); 682243830Sdim if (unsigned NumLeft = NumOps) { 683276479Sdim SDNode *GluedOpN = nullptr; 684243830Sdim do { 685243830Sdim const SDValue &Op = N->getOperand(NumLeft-1); 686243830Sdim SDNode *OpN = Op.getNode(); 687243830Sdim 688243830Sdim if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) { 689243830Sdim // Schedule glue operand right above N. 690243830Sdim GluedOpN = OpN; 691243830Sdim assert(OpN->getNodeId() != 0 && "Glue operand not ready?"); 692243830Sdim OpN->setNodeId(0); 693243830Sdim ScheduleNode(OpN); 694243830Sdim continue; 695243830Sdim } 696243830Sdim 697243830Sdim if (OpN == GluedOpN) 698243830Sdim // Glue operand is already scheduled. 699243830Sdim continue; 700243830Sdim 701243830Sdim DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN); 702243830Sdim if (DI != GluedMap.end() && DI->second != N) 703243830Sdim // Users of glues are counted against the glued users. 704243830Sdim OpN = DI->second; 705243830Sdim 706243830Sdim unsigned Degree = OpN->getNodeId(); 707243830Sdim assert(Degree > 0 && "Predecessor over-released!"); 708243830Sdim OpN->setNodeId(--Degree); 709243830Sdim if (Degree == 0) 710243830Sdim ScheduleNode(OpN); 711243830Sdim } while (--NumLeft); 712243830Sdim } 713243830Sdim} 714243830Sdim 715243830Sdim/// findGluedUser - Find the representative use of a glue value by walking 716243830Sdim/// the use chain. 717243830Sdimstatic SDNode *findGluedUser(SDNode *N) { 718243830Sdim while (SDNode *Glued = N->getGluedUser()) 719243830Sdim N = Glued; 720243830Sdim return N; 721243830Sdim} 722243830Sdim 723243830Sdimvoid ScheduleDAGLinearize::Schedule() { 724243830Sdim DEBUG(dbgs() << "********** DAG Linearization **********\n"); 725243830Sdim 726243830Sdim SmallVector<SDNode*, 8> Glues; 727243830Sdim unsigned DAGSize = 0; 728288943Sdim for (SDNode &Node : DAG->allnodes()) { 729288943Sdim SDNode *N = &Node; 730243830Sdim 731243830Sdim // Use node id to record degree. 732243830Sdim unsigned Degree = N->use_size(); 733243830Sdim N->setNodeId(Degree); 734243830Sdim unsigned NumVals = N->getNumValues(); 735243830Sdim if (NumVals && N->getValueType(NumVals-1) == MVT::Glue && 736243830Sdim N->hasAnyUseOfValue(NumVals-1)) { 737243830Sdim SDNode *User = findGluedUser(N); 738243830Sdim if (User) { 739243830Sdim Glues.push_back(N); 740243830Sdim GluedMap.insert(std::make_pair(N, User)); 741243830Sdim } 742243830Sdim } 743243830Sdim 744243830Sdim if (N->isMachineOpcode() || 745243830Sdim (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N))) 746243830Sdim ++DAGSize; 747243830Sdim } 748243830Sdim 749243830Sdim for (unsigned i = 0, e = Glues.size(); i != e; ++i) { 750243830Sdim SDNode *Glue = Glues[i]; 751243830Sdim SDNode *GUser = GluedMap[Glue]; 752243830Sdim unsigned Degree = Glue->getNodeId(); 753243830Sdim unsigned UDegree = GUser->getNodeId(); 754243830Sdim 755243830Sdim // Glue user must be scheduled together with the glue operand. So other 756243830Sdim // users of the glue operand must be treated as its users. 757243830Sdim SDNode *ImmGUser = Glue->getGluedUser(); 758243830Sdim for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end(); 759243830Sdim ui != ue; ++ui) 760243830Sdim if (*ui == ImmGUser) 761243830Sdim --Degree; 762243830Sdim GUser->setNodeId(UDegree + Degree); 763243830Sdim Glue->setNodeId(1); 764243830Sdim } 765243830Sdim 766243830Sdim Sequence.reserve(DAGSize); 767243830Sdim ScheduleNode(DAG->getRoot().getNode()); 768243830Sdim} 769243830Sdim 770243830SdimMachineBasicBlock* 771243830SdimScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { 772243830Sdim InstrEmitter Emitter(BB, InsertPos); 773243830Sdim DenseMap<SDValue, unsigned> VRBaseMap; 774243830Sdim 775243830Sdim DEBUG({ 776243830Sdim dbgs() << "\n*** Final schedule ***\n"; 777243830Sdim }); 778243830Sdim 779243830Sdim // FIXME: Handle dbg_values. 780243830Sdim unsigned NumNodes = Sequence.size(); 781243830Sdim for (unsigned i = 0; i != NumNodes; ++i) { 782243830Sdim SDNode *N = Sequence[NumNodes-i-1]; 783243830Sdim DEBUG(N->dump(DAG)); 784243830Sdim Emitter.EmitNode(N, false, false, VRBaseMap); 785243830Sdim } 786243830Sdim 787243830Sdim DEBUG(dbgs() << '\n'); 788243830Sdim 789243830Sdim InsertPos = Emitter.getInsertPos(); 790243830Sdim return Emitter.getBlock(); 791243830Sdim} 792243830Sdim 793243830Sdim//===----------------------------------------------------------------------===// 794193323Sed// Public Constructor Functions 795193323Sed//===----------------------------------------------------------------------===// 796193323Sed 797193323Sedllvm::ScheduleDAGSDNodes * 798193323Sedllvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 799193323Sed return new ScheduleDAGFast(*IS->MF); 800193323Sed} 801243830Sdim 802243830Sdimllvm::ScheduleDAGSDNodes * 803243830Sdimllvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) { 804243830Sdim return new ScheduleDAGLinearize(*IS->MF); 805243830Sdim} 806