ScheduleDAGFast.cpp revision 261991
1193323Sed//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This implements a fast scheduler. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#define DEBUG_TYPE "pre-RA-sched" 15249423Sdim#include "llvm/CodeGen/SchedulerRegistry.h" 16249423Sdim#include "InstrEmitter.h" 17193323Sed#include "ScheduleDAGSDNodes.h" 18249423Sdim#include "llvm/ADT/STLExtras.h" 19249423Sdim#include "llvm/ADT/SmallSet.h" 20249423Sdim#include "llvm/ADT/Statistic.h" 21193323Sed#include "llvm/CodeGen/SelectionDAGISel.h" 22249423Sdim#include "llvm/IR/DataLayout.h" 23249423Sdim#include "llvm/IR/InlineAsm.h" 24193323Sed#include "llvm/Support/Debug.h" 25198090Srdivacky#include "llvm/Support/ErrorHandling.h" 26198090Srdivacky#include "llvm/Support/raw_ostream.h" 27249423Sdim#include "llvm/Target/TargetInstrInfo.h" 28249423Sdim#include "llvm/Target/TargetRegisterInfo.h" 29193323Sedusing namespace llvm; 30193323Sed 31193323SedSTATISTIC(NumUnfolds, "Number of nodes unfolded"); 32193323SedSTATISTIC(NumDups, "Number of duplicated nodes"); 33193323SedSTATISTIC(NumPRCopies, "Number of physical copies"); 34193323Sed 35193323Sedstatic RegisterScheduler 36193323Sed fastDAGScheduler("fast", "Fast suboptimal list scheduling", 37193323Sed createFastDAGScheduler); 38243830Sdimstatic RegisterScheduler 39243830Sdim linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling", 40243830Sdim createDAGLinearizer); 41193323Sed 42243830Sdim 43193323Sednamespace { 44193323Sed /// FastPriorityQueue - A degenerate priority queue that considers 45193323Sed /// all nodes to have the same priority. 46193323Sed /// 47198892Srdivacky struct FastPriorityQueue { 48193323Sed SmallVector<SUnit *, 16> Queue; 49193323Sed 50193323Sed bool empty() const { return Queue.empty(); } 51234353Sdim 52193323Sed void push(SUnit *U) { 53193323Sed Queue.push_back(U); 54193323Sed } 55193323Sed 56193323Sed SUnit *pop() { 57193323Sed if (empty()) return NULL; 58193323Sed SUnit *V = Queue.back(); 59193323Sed Queue.pop_back(); 60193323Sed return V; 61193323Sed } 62193323Sed }; 63193323Sed 64193323Sed//===----------------------------------------------------------------------===// 65193323Sed/// ScheduleDAGFast - The actual "fast" list scheduler implementation. 66193323Sed/// 67198892Srdivackyclass ScheduleDAGFast : public ScheduleDAGSDNodes { 68193323Sedprivate: 69193323Sed /// AvailableQueue - The priority queue to use for the available SUnits. 70193323Sed FastPriorityQueue AvailableQueue; 71193323Sed 72193323Sed /// LiveRegDefs - A set of physical registers and their definition 73193323Sed /// that are "live". These nodes must be scheduled before any other nodes that 74193323Sed /// modifies the registers can be scheduled. 75193323Sed unsigned NumLiveRegs; 76193323Sed std::vector<SUnit*> LiveRegDefs; 77193323Sed std::vector<unsigned> LiveRegCycles; 78193323Sed 79193323Sedpublic: 80193323Sed ScheduleDAGFast(MachineFunction &mf) 81193323Sed : ScheduleDAGSDNodes(mf) {} 82193323Sed 83193323Sed void Schedule(); 84193323Sed 85193323Sed /// AddPred - adds a predecessor edge to SUnit SU. 86193323Sed /// This returns true if this is a new predecessor. 87193323Sed void AddPred(SUnit *SU, const SDep &D) { 88193323Sed SU->addPred(D); 89193323Sed } 90193323Sed 91193323Sed /// RemovePred - removes a predecessor edge from SUnit SU. 92193323Sed /// This returns true if an edge was removed. 93193323Sed void RemovePred(SUnit *SU, const SDep &D) { 94193323Sed SU->removePred(D); 95193323Sed } 96193323Sed 97193323Sedprivate: 98193323Sed void ReleasePred(SUnit *SU, SDep *PredEdge); 99193323Sed void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 100193323Sed void ScheduleNodeBottomUp(SUnit*, unsigned); 101193323Sed SUnit *CopyAndMoveSuccessors(SUnit*); 102193323Sed void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 103193323Sed const TargetRegisterClass*, 104193323Sed const TargetRegisterClass*, 105261991Sdim SmallVectorImpl<SUnit*>&); 106261991Sdim bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); 107193323Sed void ListScheduleBottomUp(); 108193323Sed 109234353Sdim /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. 110234353Sdim bool forceUnitLatencies() const { return true; } 111193323Sed}; 112193323Sed} // end anonymous namespace 113193323Sed 114193323Sed 115193323Sed/// Schedule - Schedule the DAG using list scheduling. 116193323Sedvoid ScheduleDAGFast::Schedule() { 117202375Srdivacky DEBUG(dbgs() << "********** List Scheduling **********\n"); 118193323Sed 119193323Sed NumLiveRegs = 0; 120234353Sdim LiveRegDefs.resize(TRI->getNumRegs(), NULL); 121193323Sed LiveRegCycles.resize(TRI->getNumRegs(), 0); 122193323Sed 123193323Sed // Build the scheduling graph. 124198090Srdivacky BuildSchedGraph(NULL); 125193323Sed 126193323Sed DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 127193323Sed SUnits[su].dumpAll(this)); 128193323Sed 129193323Sed // Execute the actual scheduling loop. 130193323Sed ListScheduleBottomUp(); 131193323Sed} 132193323Sed 133193323Sed//===----------------------------------------------------------------------===// 134193323Sed// Bottom-Up Scheduling 135193323Sed//===----------------------------------------------------------------------===// 136193323Sed 137193323Sed/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 138193323Sed/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 139193323Sedvoid ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { 140193323Sed SUnit *PredSU = PredEdge->getSUnit(); 141198090Srdivacky 142193323Sed#ifndef NDEBUG 143198090Srdivacky if (PredSU->NumSuccsLeft == 0) { 144202375Srdivacky dbgs() << "*** Scheduling failed! ***\n"; 145193323Sed PredSU->dump(this); 146202375Srdivacky dbgs() << " has been released too many times!\n"; 147198090Srdivacky llvm_unreachable(0); 148193323Sed } 149193323Sed#endif 150198090Srdivacky --PredSU->NumSuccsLeft; 151198090Srdivacky 152193323Sed // If all the node's successors are scheduled, this node is ready 153193323Sed // to be scheduled. Ignore the special EntrySU node. 154193323Sed if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 155193323Sed PredSU->isAvailable = true; 156193323Sed AvailableQueue.push(PredSU); 157193323Sed } 158193323Sed} 159193323Sed 160193323Sedvoid ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 161193323Sed // Bottom up: release predecessors 162193323Sed for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 163193323Sed I != E; ++I) { 164193323Sed ReleasePred(SU, &*I); 165193323Sed if (I->isAssignedRegDep()) { 166193323Sed // This is a physical register dependency and it's impossible or 167234353Sdim // expensive to copy the register. Make sure nothing that can 168193323Sed // clobber the register is scheduled between the predecessor and 169193323Sed // this node. 170193323Sed if (!LiveRegDefs[I->getReg()]) { 171193323Sed ++NumLiveRegs; 172193323Sed LiveRegDefs[I->getReg()] = I->getSUnit(); 173193323Sed LiveRegCycles[I->getReg()] = CurCycle; 174193323Sed } 175193323Sed } 176193323Sed } 177193323Sed} 178193323Sed 179193323Sed/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 180193323Sed/// count of its predecessors. If a predecessor pending count is zero, add it to 181193323Sed/// the Available queue. 182193323Sedvoid ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 183202375Srdivacky DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 184193323Sed DEBUG(SU->dump(this)); 185193323Sed 186193323Sed assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); 187193323Sed SU->setHeightToAtLeast(CurCycle); 188193323Sed Sequence.push_back(SU); 189193323Sed 190193323Sed ReleasePredecessors(SU, CurCycle); 191193323Sed 192193323Sed // Release all the implicit physical register defs that are live. 193193323Sed for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 194193323Sed I != E; ++I) { 195193323Sed if (I->isAssignedRegDep()) { 196193323Sed if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 197193323Sed assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 198193323Sed assert(LiveRegDefs[I->getReg()] == SU && 199193323Sed "Physical register dependency violated?"); 200193323Sed --NumLiveRegs; 201193323Sed LiveRegDefs[I->getReg()] = NULL; 202193323Sed LiveRegCycles[I->getReg()] = 0; 203193323Sed } 204193323Sed } 205193323Sed } 206193323Sed 207193323Sed SU->isScheduled = true; 208193323Sed} 209193323Sed 210193323Sed/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 211193323Sed/// successors to the newly created node. 212193323SedSUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { 213218893Sdim if (SU->getNode()->getGluedNode()) 214193323Sed return NULL; 215193323Sed 216193323Sed SDNode *N = SU->getNode(); 217193323Sed if (!N) 218193323Sed return NULL; 219193323Sed 220193323Sed SUnit *NewSU; 221193323Sed bool TryUnfold = false; 222193323Sed for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 223198090Srdivacky EVT VT = N->getValueType(i); 224218893Sdim if (VT == MVT::Glue) 225193323Sed return NULL; 226193323Sed else if (VT == MVT::Other) 227193323Sed TryUnfold = true; 228193323Sed } 229193323Sed for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 230193323Sed const SDValue &Op = N->getOperand(i); 231198090Srdivacky EVT VT = Op.getNode()->getValueType(Op.getResNo()); 232218893Sdim if (VT == MVT::Glue) 233193323Sed return NULL; 234193323Sed } 235193323Sed 236193323Sed if (TryUnfold) { 237193323Sed SmallVector<SDNode*, 2> NewNodes; 238193323Sed if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 239193323Sed return NULL; 240193323Sed 241202375Srdivacky DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); 242193323Sed assert(NewNodes.size() == 2 && "Expected a load folding node!"); 243193323Sed 244193323Sed N = NewNodes[1]; 245193323Sed SDNode *LoadNode = NewNodes[0]; 246193323Sed unsigned NumVals = N->getNumValues(); 247193323Sed unsigned OldNumVals = SU->getNode()->getNumValues(); 248193323Sed for (unsigned i = 0; i != NumVals; ++i) 249193323Sed DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 250193323Sed DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 251193323Sed SDValue(LoadNode, 1)); 252193323Sed 253234353Sdim SUnit *NewSU = newSUnit(N); 254193323Sed assert(N->getNodeId() == -1 && "Node already inserted!"); 255193323Sed N->setNodeId(NewSU->NodeNum); 256234353Sdim 257224145Sdim const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 258224145Sdim for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 259224145Sdim if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 260193323Sed NewSU->isTwoAddress = true; 261193323Sed break; 262193323Sed } 263193323Sed } 264224145Sdim if (MCID.isCommutable()) 265193323Sed NewSU->isCommutable = true; 266193323Sed 267193323Sed // LoadNode may already exist. This can happen when there is another 268193323Sed // load from the same location and producing the same type of value 269193323Sed // but it has different alignment or volatileness. 270193323Sed bool isNewLoad = true; 271193323Sed SUnit *LoadSU; 272193323Sed if (LoadNode->getNodeId() != -1) { 273193323Sed LoadSU = &SUnits[LoadNode->getNodeId()]; 274193323Sed isNewLoad = false; 275193323Sed } else { 276234353Sdim LoadSU = newSUnit(LoadNode); 277193323Sed LoadNode->setNodeId(LoadSU->NodeNum); 278193323Sed } 279193323Sed 280193323Sed SDep ChainPred; 281193323Sed SmallVector<SDep, 4> ChainSuccs; 282193323Sed SmallVector<SDep, 4> LoadPreds; 283193323Sed SmallVector<SDep, 4> NodePreds; 284193323Sed SmallVector<SDep, 4> NodeSuccs; 285193323Sed for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 286193323Sed I != E; ++I) { 287193323Sed if (I->isCtrl()) 288193323Sed ChainPred = *I; 289193323Sed else if (I->getSUnit()->getNode() && 290193323Sed I->getSUnit()->getNode()->isOperandOf(LoadNode)) 291193323Sed LoadPreds.push_back(*I); 292193323Sed else 293193323Sed NodePreds.push_back(*I); 294193323Sed } 295193323Sed for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 296193323Sed I != E; ++I) { 297193323Sed if (I->isCtrl()) 298193323Sed ChainSuccs.push_back(*I); 299193323Sed else 300193323Sed NodeSuccs.push_back(*I); 301193323Sed } 302193323Sed 303193323Sed if (ChainPred.getSUnit()) { 304193323Sed RemovePred(SU, ChainPred); 305193323Sed if (isNewLoad) 306193323Sed AddPred(LoadSU, ChainPred); 307193323Sed } 308193323Sed for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 309193323Sed const SDep &Pred = LoadPreds[i]; 310193323Sed RemovePred(SU, Pred); 311193323Sed if (isNewLoad) { 312193323Sed AddPred(LoadSU, Pred); 313193323Sed } 314193323Sed } 315193323Sed for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 316193323Sed const SDep &Pred = NodePreds[i]; 317193323Sed RemovePred(SU, Pred); 318193323Sed AddPred(NewSU, Pred); 319193323Sed } 320193323Sed for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 321193323Sed SDep D = NodeSuccs[i]; 322193323Sed SUnit *SuccDep = D.getSUnit(); 323193323Sed D.setSUnit(SU); 324193323Sed RemovePred(SuccDep, D); 325193323Sed D.setSUnit(NewSU); 326193323Sed AddPred(SuccDep, D); 327193323Sed } 328193323Sed for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 329193323Sed SDep D = ChainSuccs[i]; 330193323Sed SUnit *SuccDep = D.getSUnit(); 331193323Sed D.setSUnit(SU); 332193323Sed RemovePred(SuccDep, D); 333193323Sed if (isNewLoad) { 334193323Sed D.setSUnit(LoadSU); 335193323Sed AddPred(SuccDep, D); 336193323Sed } 337234353Sdim } 338193323Sed if (isNewLoad) { 339243830Sdim SDep D(LoadSU, SDep::Barrier); 340243830Sdim D.setLatency(LoadSU->Latency); 341243830Sdim AddPred(NewSU, D); 342193323Sed } 343193323Sed 344193323Sed ++NumUnfolds; 345193323Sed 346193323Sed if (NewSU->NumSuccsLeft == 0) { 347193323Sed NewSU->isAvailable = true; 348193323Sed return NewSU; 349193323Sed } 350193323Sed SU = NewSU; 351193323Sed } 352193323Sed 353202375Srdivacky DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); 354193323Sed NewSU = Clone(SU); 355193323Sed 356193323Sed // New SUnit has the exact same predecessors. 357193323Sed for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 358193323Sed I != E; ++I) 359193323Sed if (!I->isArtificial()) 360193323Sed AddPred(NewSU, *I); 361193323Sed 362193323Sed // Only copy scheduled successors. Cut them from old node's successor 363193323Sed // list and move them over. 364193323Sed SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 365193323Sed for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 366193323Sed I != E; ++I) { 367193323Sed if (I->isArtificial()) 368193323Sed continue; 369193323Sed SUnit *SuccSU = I->getSUnit(); 370193323Sed if (SuccSU->isScheduled) { 371193323Sed SDep D = *I; 372193323Sed D.setSUnit(NewSU); 373193323Sed AddPred(SuccSU, D); 374193323Sed D.setSUnit(SU); 375193323Sed DelDeps.push_back(std::make_pair(SuccSU, D)); 376193323Sed } 377193323Sed } 378193323Sed for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 379193323Sed RemovePred(DelDeps[i].first, DelDeps[i].second); 380193323Sed 381193323Sed ++NumDups; 382193323Sed return NewSU; 383193323Sed} 384193323Sed 385193323Sed/// InsertCopiesAndMoveSuccs - Insert register copies and move all 386193323Sed/// scheduled successors of the given SUnit to the last copy. 387193323Sedvoid ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 388193323Sed const TargetRegisterClass *DestRC, 389193323Sed const TargetRegisterClass *SrcRC, 390261991Sdim SmallVectorImpl<SUnit*> &Copies) { 391234353Sdim SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL)); 392193323Sed CopyFromSU->CopySrcRC = SrcRC; 393193323Sed CopyFromSU->CopyDstRC = DestRC; 394193323Sed 395234353Sdim SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL)); 396193323Sed CopyToSU->CopySrcRC = DestRC; 397193323Sed CopyToSU->CopyDstRC = SrcRC; 398193323Sed 399193323Sed // Only copy scheduled successors. Cut them from old node's successor 400193323Sed // list and move them over. 401193323Sed SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 402193323Sed for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 403193323Sed I != E; ++I) { 404193323Sed if (I->isArtificial()) 405193323Sed continue; 406193323Sed SUnit *SuccSU = I->getSUnit(); 407193323Sed if (SuccSU->isScheduled) { 408193323Sed SDep D = *I; 409193323Sed D.setSUnit(CopyToSU); 410193323Sed AddPred(SuccSU, D); 411193323Sed DelDeps.push_back(std::make_pair(SuccSU, *I)); 412193323Sed } 413193323Sed } 414193323Sed for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 415193323Sed RemovePred(DelDeps[i].first, DelDeps[i].second); 416193323Sed } 417243830Sdim SDep FromDep(SU, SDep::Data, Reg); 418243830Sdim FromDep.setLatency(SU->Latency); 419243830Sdim AddPred(CopyFromSU, FromDep); 420243830Sdim SDep ToDep(CopyFromSU, SDep::Data, 0); 421243830Sdim ToDep.setLatency(CopyFromSU->Latency); 422243830Sdim AddPred(CopyToSU, ToDep); 423193323Sed 424193323Sed Copies.push_back(CopyFromSU); 425193323Sed Copies.push_back(CopyToSU); 426193323Sed 427193323Sed ++NumPRCopies; 428193323Sed} 429193323Sed 430193323Sed/// getPhysicalRegisterVT - Returns the ValueType of the physical register 431193323Sed/// definition of the specified node. 432193323Sed/// FIXME: Move to SelectionDAG? 433198090Srdivackystatic EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 434193323Sed const TargetInstrInfo *TII) { 435224145Sdim const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 436224145Sdim assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 437224145Sdim unsigned NumRes = MCID.getNumDefs(); 438234353Sdim for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { 439193323Sed if (Reg == *ImpDef) 440193323Sed break; 441193323Sed ++NumRes; 442193323Sed } 443193323Sed return N->getValueType(NumRes); 444193323Sed} 445193323Sed 446212904Sdim/// CheckForLiveRegDef - Return true and update live register vector if the 447212904Sdim/// specified register def of the specified SUnit clobbers any "live" registers. 448212904Sdimstatic bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, 449212904Sdim std::vector<SUnit*> &LiveRegDefs, 450212904Sdim SmallSet<unsigned, 4> &RegAdded, 451261991Sdim SmallVectorImpl<unsigned> &LRegs, 452212904Sdim const TargetRegisterInfo *TRI) { 453212904Sdim bool Added = false; 454239462Sdim for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { 455239462Sdim if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { 456239462Sdim if (RegAdded.insert(*AI)) { 457239462Sdim LRegs.push_back(*AI); 458212904Sdim Added = true; 459212904Sdim } 460212904Sdim } 461239462Sdim } 462212904Sdim return Added; 463212904Sdim} 464212904Sdim 465193323Sed/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 466193323Sed/// scheduling of the given node to satisfy live physical register dependencies. 467193323Sed/// If the specific node is the last one that's available to schedule, do 468193323Sed/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 469193323Sedbool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, 470261991Sdim SmallVectorImpl<unsigned> &LRegs){ 471193323Sed if (NumLiveRegs == 0) 472193323Sed return false; 473193323Sed 474193323Sed SmallSet<unsigned, 4> RegAdded; 475193323Sed // If this node would clobber any "live" register, then it's not ready. 476193323Sed for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 477193323Sed I != E; ++I) { 478193323Sed if (I->isAssignedRegDep()) { 479212904Sdim CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 480212904Sdim RegAdded, LRegs, TRI); 481193323Sed } 482193323Sed } 483193323Sed 484218893Sdim for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 485212904Sdim if (Node->getOpcode() == ISD::INLINEASM) { 486212904Sdim // Inline asm can clobber physical defs. 487212904Sdim unsigned NumOps = Node->getNumOperands(); 488218893Sdim if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 489218893Sdim --NumOps; // Ignore the glue operand. 490212904Sdim 491212904Sdim for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 492212904Sdim unsigned Flags = 493212904Sdim cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 494212904Sdim unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 495212904Sdim 496212904Sdim ++i; // Skip the ID value. 497212904Sdim if (InlineAsm::isRegDefKind(Flags) || 498224145Sdim InlineAsm::isRegDefEarlyClobberKind(Flags) || 499224145Sdim InlineAsm::isClobberKind(Flags)) { 500212904Sdim // Check for def of register or earlyclobber register. 501212904Sdim for (; NumVals; --NumVals, ++i) { 502212904Sdim unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 503212904Sdim if (TargetRegisterInfo::isPhysicalRegister(Reg)) 504212904Sdim CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 505212904Sdim } 506212904Sdim } else 507212904Sdim i += NumVals; 508212904Sdim } 509212904Sdim continue; 510212904Sdim } 511193323Sed if (!Node->isMachineOpcode()) 512193323Sed continue; 513224145Sdim const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 514224145Sdim if (!MCID.ImplicitDefs) 515193323Sed continue; 516234353Sdim for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { 517212904Sdim CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 518193323Sed } 519193323Sed } 520193323Sed return !LRegs.empty(); 521193323Sed} 522193323Sed 523193323Sed 524193323Sed/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 525193323Sed/// schedulers. 526193323Sedvoid ScheduleDAGFast::ListScheduleBottomUp() { 527193323Sed unsigned CurCycle = 0; 528193323Sed 529193323Sed // Release any predecessors of the special Exit node. 530193323Sed ReleasePredecessors(&ExitSU, CurCycle); 531193323Sed 532193323Sed // Add root to Available queue. 533193323Sed if (!SUnits.empty()) { 534193323Sed SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 535193323Sed assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 536193323Sed RootSU->isAvailable = true; 537193323Sed AvailableQueue.push(RootSU); 538193323Sed } 539193323Sed 540193323Sed // While Available queue is not empty, grab the node with the highest 541193323Sed // priority. If it is not ready put it back. Schedule the node. 542193323Sed SmallVector<SUnit*, 4> NotReady; 543193323Sed DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 544193323Sed Sequence.reserve(SUnits.size()); 545193323Sed while (!AvailableQueue.empty()) { 546193323Sed bool Delayed = false; 547193323Sed LRegsMap.clear(); 548193323Sed SUnit *CurSU = AvailableQueue.pop(); 549193323Sed while (CurSU) { 550193323Sed SmallVector<unsigned, 4> LRegs; 551193323Sed if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 552193323Sed break; 553193323Sed Delayed = true; 554193323Sed LRegsMap.insert(std::make_pair(CurSU, LRegs)); 555193323Sed 556193323Sed CurSU->isPending = true; // This SU is not in AvailableQueue right now. 557193323Sed NotReady.push_back(CurSU); 558193323Sed CurSU = AvailableQueue.pop(); 559193323Sed } 560193323Sed 561193323Sed // All candidates are delayed due to live physical reg dependencies. 562193323Sed // Try code duplication or inserting cross class copies 563193323Sed // to resolve it. 564193323Sed if (Delayed && !CurSU) { 565193323Sed if (!CurSU) { 566193323Sed // Try duplicating the nodes that produces these 567193323Sed // "expensive to copy" values to break the dependency. In case even 568193323Sed // that doesn't work, insert cross class copies. 569193323Sed SUnit *TrySU = NotReady[0]; 570261991Sdim SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; 571193323Sed assert(LRegs.size() == 1 && "Can't handle this yet!"); 572193323Sed unsigned Reg = LRegs[0]; 573193323Sed SUnit *LRDef = LiveRegDefs[Reg]; 574198090Srdivacky EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 575193323Sed const TargetRegisterClass *RC = 576210299Sed TRI->getMinimalPhysRegClass(Reg, VT); 577193323Sed const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 578193323Sed 579221345Sdim // If cross copy register class is the same as RC, then it must be 580221345Sdim // possible copy the value directly. Do not try duplicate the def. 581221345Sdim // If cross copy register class is not the same as RC, then it's 582221345Sdim // possible to copy the value but it require cross register class copies 583221345Sdim // and it is expensive. 584221345Sdim // If cross copy register class is null, then it's not possible to copy 585221345Sdim // the value at all. 586193323Sed SUnit *NewDef = 0; 587221345Sdim if (DestRC != RC) { 588193323Sed NewDef = CopyAndMoveSuccessors(LRDef); 589221345Sdim if (!DestRC && !NewDef) 590221345Sdim report_fatal_error("Can't handle live physical " 591221345Sdim "register dependency!"); 592221345Sdim } 593193323Sed if (!NewDef) { 594193323Sed // Issue copies, these can be expensive cross register class copies. 595193323Sed SmallVector<SUnit*, 2> Copies; 596193323Sed InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 597202375Srdivacky DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum 598198090Srdivacky << " to SU #" << Copies.front()->NodeNum << "\n"); 599243830Sdim AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); 600193323Sed NewDef = Copies.back(); 601193323Sed } 602193323Sed 603202375Srdivacky DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum 604198090Srdivacky << " to SU #" << TrySU->NodeNum << "\n"); 605193323Sed LiveRegDefs[Reg] = NewDef; 606243830Sdim AddPred(NewDef, SDep(TrySU, SDep::Artificial)); 607193323Sed TrySU->isAvailable = false; 608193323Sed CurSU = NewDef; 609193323Sed } 610193323Sed 611193323Sed if (!CurSU) { 612198090Srdivacky llvm_unreachable("Unable to resolve live physical register dependencies!"); 613193323Sed } 614193323Sed } 615193323Sed 616193323Sed // Add the nodes that aren't ready back onto the available list. 617193323Sed for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 618193323Sed NotReady[i]->isPending = false; 619193323Sed // May no longer be available due to backtracking. 620193323Sed if (NotReady[i]->isAvailable) 621193323Sed AvailableQueue.push(NotReady[i]); 622193323Sed } 623193323Sed NotReady.clear(); 624193323Sed 625193323Sed if (CurSU) 626193323Sed ScheduleNodeBottomUp(CurSU, CurCycle); 627193323Sed ++CurCycle; 628193323Sed } 629193323Sed 630198090Srdivacky // Reverse the order since it is bottom up. 631193323Sed std::reverse(Sequence.begin(), Sequence.end()); 632198090Srdivacky 633193323Sed#ifndef NDEBUG 634234353Sdim VerifyScheduledSequence(/*isBottomUp=*/true); 635193323Sed#endif 636193323Sed} 637193323Sed 638243830Sdim 639243830Sdimnamespace { 640193323Sed//===----------------------------------------------------------------------===// 641243830Sdim// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the 642243830Sdim// DAG in topological order. 643243830Sdim// IMPORTANT: this may not work for targets with phyreg dependency. 644243830Sdim// 645243830Sdimclass ScheduleDAGLinearize : public ScheduleDAGSDNodes { 646243830Sdimpublic: 647243830Sdim ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} 648243830Sdim 649243830Sdim void Schedule(); 650243830Sdim 651243830Sdim MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); 652243830Sdim 653243830Sdimprivate: 654243830Sdim std::vector<SDNode*> Sequence; 655243830Sdim DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user 656243830Sdim 657243830Sdim void ScheduleNode(SDNode *N); 658243830Sdim}; 659243830Sdim} // end anonymous namespace 660243830Sdim 661243830Sdimvoid ScheduleDAGLinearize::ScheduleNode(SDNode *N) { 662243830Sdim if (N->getNodeId() != 0) 663243830Sdim llvm_unreachable(0); 664243830Sdim 665243830Sdim if (!N->isMachineOpcode() && 666243830Sdim (N->getOpcode() == ISD::EntryToken || isPassiveNode(N))) 667243830Sdim // These nodes do not need to be translated into MIs. 668243830Sdim return; 669243830Sdim 670243830Sdim DEBUG(dbgs() << "\n*** Scheduling: "); 671243830Sdim DEBUG(N->dump(DAG)); 672243830Sdim Sequence.push_back(N); 673243830Sdim 674243830Sdim unsigned NumOps = N->getNumOperands(); 675243830Sdim if (unsigned NumLeft = NumOps) { 676243830Sdim SDNode *GluedOpN = 0; 677243830Sdim do { 678243830Sdim const SDValue &Op = N->getOperand(NumLeft-1); 679243830Sdim SDNode *OpN = Op.getNode(); 680243830Sdim 681243830Sdim if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) { 682243830Sdim // Schedule glue operand right above N. 683243830Sdim GluedOpN = OpN; 684243830Sdim assert(OpN->getNodeId() != 0 && "Glue operand not ready?"); 685243830Sdim OpN->setNodeId(0); 686243830Sdim ScheduleNode(OpN); 687243830Sdim continue; 688243830Sdim } 689243830Sdim 690243830Sdim if (OpN == GluedOpN) 691243830Sdim // Glue operand is already scheduled. 692243830Sdim continue; 693243830Sdim 694243830Sdim DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN); 695243830Sdim if (DI != GluedMap.end() && DI->second != N) 696243830Sdim // Users of glues are counted against the glued users. 697243830Sdim OpN = DI->second; 698243830Sdim 699243830Sdim unsigned Degree = OpN->getNodeId(); 700243830Sdim assert(Degree > 0 && "Predecessor over-released!"); 701243830Sdim OpN->setNodeId(--Degree); 702243830Sdim if (Degree == 0) 703243830Sdim ScheduleNode(OpN); 704243830Sdim } while (--NumLeft); 705243830Sdim } 706243830Sdim} 707243830Sdim 708243830Sdim/// findGluedUser - Find the representative use of a glue value by walking 709243830Sdim/// the use chain. 710243830Sdimstatic SDNode *findGluedUser(SDNode *N) { 711243830Sdim while (SDNode *Glued = N->getGluedUser()) 712243830Sdim N = Glued; 713243830Sdim return N; 714243830Sdim} 715243830Sdim 716243830Sdimvoid ScheduleDAGLinearize::Schedule() { 717243830Sdim DEBUG(dbgs() << "********** DAG Linearization **********\n"); 718243830Sdim 719243830Sdim SmallVector<SDNode*, 8> Glues; 720243830Sdim unsigned DAGSize = 0; 721243830Sdim for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(), 722243830Sdim E = DAG->allnodes_end(); I != E; ++I) { 723243830Sdim SDNode *N = I; 724243830Sdim 725243830Sdim // Use node id to record degree. 726243830Sdim unsigned Degree = N->use_size(); 727243830Sdim N->setNodeId(Degree); 728243830Sdim unsigned NumVals = N->getNumValues(); 729243830Sdim if (NumVals && N->getValueType(NumVals-1) == MVT::Glue && 730243830Sdim N->hasAnyUseOfValue(NumVals-1)) { 731243830Sdim SDNode *User = findGluedUser(N); 732243830Sdim if (User) { 733243830Sdim Glues.push_back(N); 734243830Sdim GluedMap.insert(std::make_pair(N, User)); 735243830Sdim } 736243830Sdim } 737243830Sdim 738243830Sdim if (N->isMachineOpcode() || 739243830Sdim (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N))) 740243830Sdim ++DAGSize; 741243830Sdim } 742243830Sdim 743243830Sdim for (unsigned i = 0, e = Glues.size(); i != e; ++i) { 744243830Sdim SDNode *Glue = Glues[i]; 745243830Sdim SDNode *GUser = GluedMap[Glue]; 746243830Sdim unsigned Degree = Glue->getNodeId(); 747243830Sdim unsigned UDegree = GUser->getNodeId(); 748243830Sdim 749243830Sdim // Glue user must be scheduled together with the glue operand. So other 750243830Sdim // users of the glue operand must be treated as its users. 751243830Sdim SDNode *ImmGUser = Glue->getGluedUser(); 752243830Sdim for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end(); 753243830Sdim ui != ue; ++ui) 754243830Sdim if (*ui == ImmGUser) 755243830Sdim --Degree; 756243830Sdim GUser->setNodeId(UDegree + Degree); 757243830Sdim Glue->setNodeId(1); 758243830Sdim } 759243830Sdim 760243830Sdim Sequence.reserve(DAGSize); 761243830Sdim ScheduleNode(DAG->getRoot().getNode()); 762243830Sdim} 763243830Sdim 764243830SdimMachineBasicBlock* 765243830SdimScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { 766243830Sdim InstrEmitter Emitter(BB, InsertPos); 767243830Sdim DenseMap<SDValue, unsigned> VRBaseMap; 768243830Sdim 769243830Sdim DEBUG({ 770243830Sdim dbgs() << "\n*** Final schedule ***\n"; 771243830Sdim }); 772243830Sdim 773243830Sdim // FIXME: Handle dbg_values. 774243830Sdim unsigned NumNodes = Sequence.size(); 775243830Sdim for (unsigned i = 0; i != NumNodes; ++i) { 776243830Sdim SDNode *N = Sequence[NumNodes-i-1]; 777243830Sdim DEBUG(N->dump(DAG)); 778243830Sdim Emitter.EmitNode(N, false, false, VRBaseMap); 779243830Sdim } 780243830Sdim 781243830Sdim DEBUG(dbgs() << '\n'); 782243830Sdim 783243830Sdim InsertPos = Emitter.getInsertPos(); 784243830Sdim return Emitter.getBlock(); 785243830Sdim} 786243830Sdim 787243830Sdim//===----------------------------------------------------------------------===// 788193323Sed// Public Constructor Functions 789193323Sed//===----------------------------------------------------------------------===// 790193323Sed 791193323Sedllvm::ScheduleDAGSDNodes * 792193323Sedllvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 793193323Sed return new ScheduleDAGFast(*IS->MF); 794193323Sed} 795243830Sdim 796243830Sdimllvm::ScheduleDAGSDNodes * 797243830Sdimllvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) { 798243830Sdim return new ScheduleDAGLinearize(*IS->MF); 799243830Sdim} 800