ScheduleDAGFast.cpp revision 249423
1151564Snjl//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===// 2151564Snjl// 3151564Snjl// The LLVM Compiler Infrastructure 4151564Snjl// 5151564Snjl// This file is distributed under the University of Illinois Open Source 6151564Snjl// License. See LICENSE.TXT for details. 7151564Snjl// 8151564Snjl//===----------------------------------------------------------------------===// 9151564Snjl// 10151564Snjl// This implements a fast scheduler. 11151564Snjl// 12151564Snjl//===----------------------------------------------------------------------===// 13151564Snjl 14151564Snjl#define DEBUG_TYPE "pre-RA-sched" 15151564Snjl#include "llvm/CodeGen/SchedulerRegistry.h" 16151564Snjl#include "InstrEmitter.h" 17151564Snjl#include "ScheduleDAGSDNodes.h" 18151564Snjl#include "llvm/ADT/STLExtras.h" 19151564Snjl#include "llvm/ADT/SmallSet.h" 20151564Snjl#include "llvm/ADT/Statistic.h" 21151564Snjl#include "llvm/CodeGen/SelectionDAGISel.h" 22151564Snjl#include "llvm/IR/DataLayout.h" 23151564Snjl#include "llvm/IR/InlineAsm.h" 24151564Snjl#include "llvm/Support/Debug.h" 25151564Snjl#include "llvm/Support/ErrorHandling.h" 26151564Snjl#include "llvm/Support/raw_ostream.h" 27151564Snjl#include "llvm/Target/TargetInstrInfo.h" 28151564Snjl#include "llvm/Target/TargetRegisterInfo.h" 29151564Snjlusing namespace llvm; 30151564Snjl 31151564SnjlSTATISTIC(NumUnfolds, "Number of nodes unfolded"); 32151564SnjlSTATISTIC(NumDups, "Number of duplicated nodes"); 33151564SnjlSTATISTIC(NumPRCopies, "Number of physical copies"); 34151564Snjl 35151564Snjlstatic RegisterScheduler 36193530Sjkim fastDAGScheduler("fast", "Fast suboptimal list scheduling", 37193530Sjkim createFastDAGScheduler); 38151564Snjlstatic RegisterScheduler 39151564Snjl linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling", 40151564Snjl createDAGLinearizer); 41151564Snjl 42151564Snjl 43151564Snjlnamespace { 44151564Snjl /// FastPriorityQueue - A degenerate priority queue that considers 45151564Snjl /// all nodes to have the same priority. 46151564Snjl /// 47151564Snjl struct FastPriorityQueue { 48152677Sume SmallVector<SUnit *, 16> Queue; 49152677Sume 50152677Sume bool empty() const { return Queue.empty(); } 51152677Sume 52152677Sume void push(SUnit *U) { 53151564Snjl Queue.push_back(U); 54151564Snjl } 55151564Snjl 56151564Snjl SUnit *pop() { 57151564Snjl if (empty()) return NULL; 58152677Sume SUnit *V = Queue.back(); 59152677Sume Queue.pop_back(); 60151564Snjl return V; 61151564Snjl } 62151564Snjl }; 63151564Snjl 64151564Snjl//===----------------------------------------------------------------------===// 65227309Sed/// ScheduleDAGFast - The actual "fast" list scheduler implementation. 66227309Sed/// 67186026Ssilbyclass ScheduleDAGFast : public ScheduleDAGSDNodes { 68186026Ssilbyprivate: 69186026Ssilby /// AvailableQueue - The priority queue to use for the available SUnits. 70186026Ssilby FastPriorityQueue AvailableQueue; 71186026Ssilby 72186026Ssilby /// LiveRegDefs - A set of physical registers and their definition 73186026Ssilby /// that are "live". These nodes must be scheduled before any other nodes that 74186026Ssilby /// modifies the registers can be scheduled. 75186026Ssilby unsigned NumLiveRegs; 76186026Ssilby std::vector<SUnit*> LiveRegDefs; 77186026Ssilby std::vector<unsigned> LiveRegCycles; 78186026Ssilby 79186026Ssilbypublic: 80186026Ssilby ScheduleDAGFast(MachineFunction &mf) 81186026Ssilby : ScheduleDAGSDNodes(mf) {} 82151564Snjl 83151564Snjl void Schedule(); 84151564Snjl 85151564Snjl /// AddPred - adds a predecessor edge to SUnit SU. 86151564Snjl /// This returns true if this is a new predecessor. 87151564Snjl void AddPred(SUnit *SU, const SDep &D) { 88151564Snjl SU->addPred(D); 89151564Snjl } 90151564Snjl 91151564Snjl /// RemovePred - removes a predecessor edge from SUnit SU. 92246128Ssbz /// This returns true if an edge was removed. 93151564Snjl void RemovePred(SUnit *SU, const SDep &D) { 94151564Snjl SU->removePred(D); 95151564Snjl } 96151564Snjl 97151564Snjlprivate: 98151564Snjl void ReleasePred(SUnit *SU, SDep *PredEdge); 99151564Snjl void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 100151564Snjl void ScheduleNodeBottomUp(SUnit*, unsigned); 101151564Snjl SUnit *CopyAndMoveSuccessors(SUnit*); 102151564Snjl void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 103151564Snjl const TargetRegisterClass*, 104151564Snjl const TargetRegisterClass*, 105151564Snjl SmallVector<SUnit*, 2>&); 106151564Snjl bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 107151564Snjl void ListScheduleBottomUp(); 108151564Snjl 109151564Snjl /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. 110151564Snjl bool forceUnitLatencies() const { return true; } 111151564Snjl}; 112151564Snjl} // end anonymous namespace 113151564Snjl 114151564Snjl 115151564Snjl/// Schedule - Schedule the DAG using list scheduling. 116151564Snjlvoid ScheduleDAGFast::Schedule() { 117151564Snjl DEBUG(dbgs() << "********** List Scheduling **********\n"); 118151564Snjl 119151564Snjl NumLiveRegs = 0; 120151564Snjl LiveRegDefs.resize(TRI->getNumRegs(), NULL); 121151564Snjl LiveRegCycles.resize(TRI->getNumRegs(), 0); 122151564Snjl 123151564Snjl // Build the scheduling graph. 124151564Snjl BuildSchedGraph(NULL); 125151564Snjl 126151564Snjl DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 127151564Snjl SUnits[su].dumpAll(this)); 128151564Snjl 129151564Snjl // Execute the actual scheduling loop. 130151564Snjl ListScheduleBottomUp(); 131151564Snjl} 132151564Snjl 133151564Snjl//===----------------------------------------------------------------------===// 134151564Snjl// Bottom-Up Scheduling 135151564Snjl//===----------------------------------------------------------------------===// 136151564Snjl 137151564Snjl/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 138151564Snjl/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 139151564Snjlvoid ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { 140151564Snjl SUnit *PredSU = PredEdge->getSUnit(); 141151564Snjl 142152677Sume#ifndef NDEBUG 143152677Sume if (PredSU->NumSuccsLeft == 0) { 144152677Sume dbgs() << "*** Scheduling failed! ***\n"; 145151564Snjl PredSU->dump(this); 146151564Snjl dbgs() << " has been released too many times!\n"; 147151564Snjl llvm_unreachable(0); 148151564Snjl } 149151564Snjl#endif 150151564Snjl --PredSU->NumSuccsLeft; 151151564Snjl 152151564Snjl // If all the node's successors are scheduled, this node is ready 153151564Snjl // to be scheduled. Ignore the special EntrySU node. 154151564Snjl if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 155151564Snjl PredSU->isAvailable = true; 156151564Snjl AvailableQueue.push(PredSU); 157151564Snjl } 158151564Snjl} 159151564Snjl 160151564Snjlvoid ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 161152677Sume // Bottom up: release predecessors 162152677Sume for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 163152677Sume I != E; ++I) { 164152677Sume ReleasePred(SU, &*I); 165152677Sume if (I->isAssignedRegDep()) { 166152677Sume // This is a physical register dependency and it's impossible or 167152677Sume // expensive to copy the register. Make sure nothing that can 168152677Sume // clobber the register is scheduled between the predecessor and 169152677Sume // this node. 170152677Sume if (!LiveRegDefs[I->getReg()]) { 171152677Sume ++NumLiveRegs; 172152677Sume LiveRegDefs[I->getReg()] = I->getSUnit(); 173152677Sume LiveRegCycles[I->getReg()] = CurCycle; 174152677Sume } 175152677Sume } 176152677Sume } 177152677Sume} 178152677Sume 179152677Sume/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 180152677Sume/// count of its predecessors. If a predecessor pending count is zero, add it to 181152677Sume/// the Available queue. 182152677Sumevoid ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 183152677Sume DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 184152677Sume DEBUG(SU->dump(this)); 185152677Sume 186152677Sume assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); 187152677Sume SU->setHeightToAtLeast(CurCycle); 188152677Sume Sequence.push_back(SU); 189151564Snjl 190151564Snjl ReleasePredecessors(SU, CurCycle); 191151564Snjl 192151564Snjl // Release all the implicit physical register defs that are live. 193202771Sjkim for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 194151564Snjl I != E; ++I) { 195151564Snjl if (I->isAssignedRegDep()) { 196151564Snjl if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 197186026Ssilby assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 198186026Ssilby assert(LiveRegDefs[I->getReg()] == SU && 199186026Ssilby "Physical register dependency violated?"); 200151564Snjl --NumLiveRegs; 201151564Snjl LiveRegDefs[I->getReg()] = NULL; 202151564Snjl LiveRegCycles[I->getReg()] = 0; 203151564Snjl } 204151564Snjl } 205151564Snjl } 206151564Snjl 207151564Snjl SU->isScheduled = true; 208151564Snjl} 209151564Snjl 210151564Snjl/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 211151564Snjl/// successors to the newly created node. 212151564SnjlSUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { 213151564Snjl if (SU->getNode()->getGluedNode()) 214151564Snjl return NULL; 215151564Snjl 216151564Snjl SDNode *N = SU->getNode(); 217151564Snjl if (!N) 218186026Ssilby return NULL; 219186026Ssilby 220186026Ssilby SUnit *NewSU; 221151564Snjl bool TryUnfold = false; 222151564Snjl for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 223151564Snjl EVT VT = N->getValueType(i); 224151564Snjl if (VT == MVT::Glue) 225151564Snjl return NULL; 226151564Snjl else if (VT == MVT::Other) 227151564Snjl TryUnfold = true; 228151564Snjl } 229151564Snjl for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 230151564Snjl const SDValue &Op = N->getOperand(i); 231151564Snjl EVT VT = Op.getNode()->getValueType(Op.getResNo()); 232151564Snjl if (VT == MVT::Glue) 233151564Snjl return NULL; 234151564Snjl } 235151564Snjl 236151564Snjl if (TryUnfold) { 237151564Snjl SmallVector<SDNode*, 2> NewNodes; 238151564Snjl if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 239151564Snjl return NULL; 240151564Snjl 241151564Snjl DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); 242151564Snjl assert(NewNodes.size() == 2 && "Expected a load folding node!"); 243151564Snjl 244151564Snjl N = NewNodes[1]; 245151564Snjl SDNode *LoadNode = NewNodes[0]; 246151564Snjl unsigned NumVals = N->getNumValues(); 247151564Snjl unsigned OldNumVals = SU->getNode()->getNumValues(); 248151564Snjl for (unsigned i = 0; i != NumVals; ++i) 249151564Snjl DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 250151564Snjl DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 251151564Snjl SDValue(LoadNode, 1)); 252151564Snjl 253151564Snjl SUnit *NewSU = newSUnit(N); 254151564Snjl assert(N->getNodeId() == -1 && "Node already inserted!"); 255151564Snjl N->setNodeId(NewSU->NodeNum); 256151564Snjl 257151564Snjl const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 258151564Snjl for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 259151564Snjl if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 260202771Sjkim NewSU->isTwoAddress = true; 261151564Snjl break; 262151564Snjl } 263151564Snjl } 264151564Snjl if (MCID.isCommutable()) 265151564Snjl NewSU->isCommutable = true; 266186026Ssilby 267186026Ssilby // LoadNode may already exist. This can happen when there is another 268186026Ssilby // load from the same location and producing the same type of value 269151564Snjl // but it has different alignment or volatileness. 270151564Snjl bool isNewLoad = true; 271151564Snjl SUnit *LoadSU; 272151564Snjl if (LoadNode->getNodeId() != -1) { 273151564Snjl LoadSU = &SUnits[LoadNode->getNodeId()]; 274151564Snjl isNewLoad = false; 275151564Snjl } else { 276151564Snjl LoadSU = newSUnit(LoadNode); 277151564Snjl LoadNode->setNodeId(LoadSU->NodeNum); 278151564Snjl } 279151564Snjl 280151564Snjl SDep ChainPred; 281151564Snjl SmallVector<SDep, 4> ChainSuccs; 282151564Snjl SmallVector<SDep, 4> LoadPreds; 283151564Snjl SmallVector<SDep, 4> NodePreds; 284151564Snjl SmallVector<SDep, 4> NodeSuccs; 285151564Snjl for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 286151564Snjl I != E; ++I) { 287186026Ssilby if (I->isCtrl()) 288186026Ssilby ChainPred = *I; 289186026Ssilby else if (I->getSUnit()->getNode() && 290151564Snjl I->getSUnit()->getNode()->isOperandOf(LoadNode)) 291151564Snjl LoadPreds.push_back(*I); 292151564Snjl else 293151564Snjl NodePreds.push_back(*I); 294151564Snjl } 295151564Snjl for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 296151564Snjl I != E; ++I) { 297151564Snjl if (I->isCtrl()) 298151564Snjl ChainSuccs.push_back(*I); 299151564Snjl else 300151564Snjl NodeSuccs.push_back(*I); 301151564Snjl } 302151564Snjl 303151564Snjl if (ChainPred.getSUnit()) { 304151564Snjl RemovePred(SU, ChainPred); 305151564Snjl if (isNewLoad) 306151564Snjl AddPred(LoadSU, ChainPred); 307151564Snjl } 308151564Snjl for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 309151564Snjl const SDep &Pred = LoadPreds[i]; 310151564Snjl RemovePred(SU, Pred); 311151564Snjl if (isNewLoad) { 312151564Snjl AddPred(LoadSU, Pred); 313151564Snjl } 314151564Snjl } 315151564Snjl for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 316151564Snjl const SDep &Pred = NodePreds[i]; 317151564Snjl RemovePred(SU, Pred); 318151564Snjl AddPred(NewSU, Pred); 319151564Snjl } 320151564Snjl for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 321151564Snjl SDep D = NodeSuccs[i]; 322151564Snjl SUnit *SuccDep = D.getSUnit(); 323151564Snjl D.setSUnit(SU); 324151564Snjl RemovePred(SuccDep, D); 325186026Ssilby D.setSUnit(NewSU); 326186026Ssilby AddPred(SuccDep, D); 327186026Ssilby } 328151564Snjl for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 329151564Snjl SDep D = ChainSuccs[i]; 330151564Snjl SUnit *SuccDep = D.getSUnit(); 331151564Snjl D.setSUnit(SU); 332151564Snjl RemovePred(SuccDep, D); 333151564Snjl if (isNewLoad) { 334151564Snjl D.setSUnit(LoadSU); 335186026Ssilby AddPred(SuccDep, D); 336186031Ssilby } 337151564Snjl } 338151564Snjl if (isNewLoad) { 339151564Snjl SDep D(LoadSU, SDep::Barrier); 340151564Snjl D.setLatency(LoadSU->Latency); 341151564Snjl AddPred(NewSU, D); 342151564Snjl } 343151564Snjl 344151564Snjl ++NumUnfolds; 345151564Snjl 346151564Snjl if (NewSU->NumSuccsLeft == 0) { 347151564Snjl NewSU->isAvailable = true; 348151564Snjl return NewSU; 349151564Snjl } 350151564Snjl SU = NewSU; 351151564Snjl } 352151564Snjl 353151564Snjl DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); 354151564Snjl NewSU = Clone(SU); 355151564Snjl 356151564Snjl // New SUnit has the exact same predecessors. 357151564Snjl for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 358152677Sume I != E; ++I) 359152677Sume if (!I->isArtificial()) 360152677Sume AddPred(NewSU, *I); 361152677Sume 362152677Sume // Only copy scheduled successors. Cut them from old node's successor 363151564Snjl // list and move them over. 364151564Snjl SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 365151564Snjl for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 366151564Snjl I != E; ++I) { 367151564Snjl if (I->isArtificial()) 368151564Snjl continue; 369151564Snjl SUnit *SuccSU = I->getSUnit(); 370151564Snjl if (SuccSU->isScheduled) { 371151564Snjl SDep D = *I; 372151564Snjl D.setSUnit(NewSU); 373151564Snjl AddPred(SuccSU, D); 374151564Snjl D.setSUnit(SU); 375151564Snjl DelDeps.push_back(std::make_pair(SuccSU, D)); 376151564Snjl } 377154273Sbruno } 378154273Sbruno for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 379152677Sume RemovePred(DelDeps[i].first, DelDeps[i].second); 380151564Snjl 381151564Snjl ++NumDups; 382152677Sume return NewSU; 383151564Snjl} 384154273Sbruno 385154273Sbruno/// InsertCopiesAndMoveSuccs - Insert register copies and move all 386154273Sbruno/// scheduled successors of the given SUnit to the last copy. 387154273Sbrunovoid ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 388154273Sbruno const TargetRegisterClass *DestRC, 389154273Sbruno const TargetRegisterClass *SrcRC, 390154273Sbruno SmallVector<SUnit*, 2> &Copies) { 391154273Sbruno SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL)); 392154273Sbruno CopyFromSU->CopySrcRC = SrcRC; 393216503Savg CopyFromSU->CopyDstRC = DestRC; 394154273Sbruno 395154273Sbruno SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL)); 396154273Sbruno CopyToSU->CopySrcRC = DestRC; 397154273Sbruno CopyToSU->CopyDstRC = SrcRC; 398154273Sbruno 399154273Sbruno // Only copy scheduled successors. Cut them from old node's successor 400151564Snjl // list and move them over. 401151564Snjl SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 402152677Sume for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 403151564Snjl I != E; ++I) { 404151564Snjl if (I->isArtificial()) 405151564Snjl continue; 406152677Sume SUnit *SuccSU = I->getSUnit(); 407152677Sume if (SuccSU->isScheduled) { 408152677Sume SDep D = *I; 409151564Snjl D.setSUnit(CopyToSU); 410151564Snjl AddPred(SuccSU, D); 411151564Snjl DelDeps.push_back(std::make_pair(SuccSU, *I)); 412152744Snjl } 413152744Snjl } 414151564Snjl for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 415151564Snjl RemovePred(DelDeps[i].first, DelDeps[i].second); 416151564Snjl } 417151564Snjl SDep FromDep(SU, SDep::Data, Reg); 418151564Snjl FromDep.setLatency(SU->Latency); 419151564Snjl AddPred(CopyFromSU, FromDep); 420151564Snjl SDep ToDep(CopyFromSU, SDep::Data, 0); 421151564Snjl ToDep.setLatency(CopyFromSU->Latency); 422151564Snjl AddPred(CopyToSU, ToDep); 423151564Snjl 424151564Snjl Copies.push_back(CopyFromSU); 425151564Snjl Copies.push_back(CopyToSU); 426151564Snjl 427151564Snjl ++NumPRCopies; 428151564Snjl} 429151564Snjl 430151564Snjl/// getPhysicalRegisterVT - Returns the ValueType of the physical register 431151564Snjl/// definition of the specified node. 432151564Snjl/// FIXME: Move to SelectionDAG? 433152677Sumestatic EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 434152677Sume const TargetInstrInfo *TII) { 435152677Sume const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 436152677Sume assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 437152677Sume unsigned NumRes = MCID.getNumDefs(); 438151564Snjl for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { 439151564Snjl if (Reg == *ImpDef) 440151564Snjl break; 441151564Snjl ++NumRes; 442152677Sume } 443151564Snjl return N->getValueType(NumRes); 444151564Snjl} 445152677Sume 446151564Snjl/// CheckForLiveRegDef - Return true and update live register vector if the 447151564Snjl/// specified register def of the specified SUnit clobbers any "live" registers. 448151564Snjlstatic bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, 449151564Snjl std::vector<SUnit*> &LiveRegDefs, 450152677Sume SmallSet<unsigned, 4> &RegAdded, 451151564Snjl SmallVector<unsigned, 4> &LRegs, 452151564Snjl const TargetRegisterInfo *TRI) { 453151564Snjl bool Added = false; 454152677Sume for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { 455152677Sume if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { 456151564Snjl if (RegAdded.insert(*AI)) { 457151564Snjl LRegs.push_back(*AI); 458151564Snjl Added = true; 459152677Sume } 460151564Snjl } 461152677Sume } 462152677Sume return Added; 463151564Snjl} 464152677Sume 465152677Sume/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 466151564Snjl/// scheduling of the given node to satisfy live physical register dependencies. 467151564Snjl/// If the specific node is the last one that's available to schedule, do 468152677Sume/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 469151564Snjlbool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, 470151564Snjl SmallVector<unsigned, 4> &LRegs){ 471151564Snjl if (NumLiveRegs == 0) 472151564Snjl return false; 473152677Sume 474151564Snjl SmallSet<unsigned, 4> RegAdded; 475151564Snjl // If this node would clobber any "live" register, then it's not ready. 476152677Sume for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 477151564Snjl I != E; ++I) { 478151564Snjl if (I->isAssignedRegDep()) { 479151564Snjl CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 480152677Sume RegAdded, LRegs, TRI); 481151564Snjl } 482151564Snjl } 483152744Snjl 484152744Snjl for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 485152677Sume if (Node->getOpcode() == ISD::INLINEASM) { 486151564Snjl // Inline asm can clobber physical defs. 487151564Snjl unsigned NumOps = Node->getNumOperands(); 488151564Snjl if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 489152744Snjl --NumOps; // Ignore the glue operand. 490152744Snjl 491151564Snjl for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 492151564Snjl unsigned Flags = 493151564Snjl cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 494 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 495 496 ++i; // Skip the ID value. 497 if (InlineAsm::isRegDefKind(Flags) || 498 InlineAsm::isRegDefEarlyClobberKind(Flags) || 499 InlineAsm::isClobberKind(Flags)) { 500 // Check for def of register or earlyclobber register. 501 for (; NumVals; --NumVals, ++i) { 502 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 503 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 504 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 505 } 506 } else 507 i += NumVals; 508 } 509 continue; 510 } 511 if (!Node->isMachineOpcode()) 512 continue; 513 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 514 if (!MCID.ImplicitDefs) 515 continue; 516 for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { 517 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 518 } 519 } 520 return !LRegs.empty(); 521} 522 523 524/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 525/// schedulers. 526void ScheduleDAGFast::ListScheduleBottomUp() { 527 unsigned CurCycle = 0; 528 529 // Release any predecessors of the special Exit node. 530 ReleasePredecessors(&ExitSU, CurCycle); 531 532 // Add root to Available queue. 533 if (!SUnits.empty()) { 534 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 535 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 536 RootSU->isAvailable = true; 537 AvailableQueue.push(RootSU); 538 } 539 540 // While Available queue is not empty, grab the node with the highest 541 // priority. If it is not ready put it back. Schedule the node. 542 SmallVector<SUnit*, 4> NotReady; 543 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 544 Sequence.reserve(SUnits.size()); 545 while (!AvailableQueue.empty()) { 546 bool Delayed = false; 547 LRegsMap.clear(); 548 SUnit *CurSU = AvailableQueue.pop(); 549 while (CurSU) { 550 SmallVector<unsigned, 4> LRegs; 551 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 552 break; 553 Delayed = true; 554 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 555 556 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 557 NotReady.push_back(CurSU); 558 CurSU = AvailableQueue.pop(); 559 } 560 561 // All candidates are delayed due to live physical reg dependencies. 562 // Try code duplication or inserting cross class copies 563 // to resolve it. 564 if (Delayed && !CurSU) { 565 if (!CurSU) { 566 // Try duplicating the nodes that produces these 567 // "expensive to copy" values to break the dependency. In case even 568 // that doesn't work, insert cross class copies. 569 SUnit *TrySU = NotReady[0]; 570 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 571 assert(LRegs.size() == 1 && "Can't handle this yet!"); 572 unsigned Reg = LRegs[0]; 573 SUnit *LRDef = LiveRegDefs[Reg]; 574 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 575 const TargetRegisterClass *RC = 576 TRI->getMinimalPhysRegClass(Reg, VT); 577 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 578 579 // If cross copy register class is the same as RC, then it must be 580 // possible copy the value directly. Do not try duplicate the def. 581 // If cross copy register class is not the same as RC, then it's 582 // possible to copy the value but it require cross register class copies 583 // and it is expensive. 584 // If cross copy register class is null, then it's not possible to copy 585 // the value at all. 586 SUnit *NewDef = 0; 587 if (DestRC != RC) { 588 NewDef = CopyAndMoveSuccessors(LRDef); 589 if (!DestRC && !NewDef) 590 report_fatal_error("Can't handle live physical " 591 "register dependency!"); 592 } 593 if (!NewDef) { 594 // Issue copies, these can be expensive cross register class copies. 595 SmallVector<SUnit*, 2> Copies; 596 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 597 DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum 598 << " to SU #" << Copies.front()->NodeNum << "\n"); 599 AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); 600 NewDef = Copies.back(); 601 } 602 603 DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum 604 << " to SU #" << TrySU->NodeNum << "\n"); 605 LiveRegDefs[Reg] = NewDef; 606 AddPred(NewDef, SDep(TrySU, SDep::Artificial)); 607 TrySU->isAvailable = false; 608 CurSU = NewDef; 609 } 610 611 if (!CurSU) { 612 llvm_unreachable("Unable to resolve live physical register dependencies!"); 613 } 614 } 615 616 // Add the nodes that aren't ready back onto the available list. 617 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 618 NotReady[i]->isPending = false; 619 // May no longer be available due to backtracking. 620 if (NotReady[i]->isAvailable) 621 AvailableQueue.push(NotReady[i]); 622 } 623 NotReady.clear(); 624 625 if (CurSU) 626 ScheduleNodeBottomUp(CurSU, CurCycle); 627 ++CurCycle; 628 } 629 630 // Reverse the order since it is bottom up. 631 std::reverse(Sequence.begin(), Sequence.end()); 632 633#ifndef NDEBUG 634 VerifyScheduledSequence(/*isBottomUp=*/true); 635#endif 636} 637 638 639namespace { 640//===----------------------------------------------------------------------===// 641// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the 642// DAG in topological order. 643// IMPORTANT: this may not work for targets with phyreg dependency. 644// 645class ScheduleDAGLinearize : public ScheduleDAGSDNodes { 646public: 647 ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} 648 649 void Schedule(); 650 651 MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); 652 653private: 654 std::vector<SDNode*> Sequence; 655 DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user 656 657 void ScheduleNode(SDNode *N); 658}; 659} // end anonymous namespace 660 661void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { 662 if (N->getNodeId() != 0) 663 llvm_unreachable(0); 664 665 if (!N->isMachineOpcode() && 666 (N->getOpcode() == ISD::EntryToken || isPassiveNode(N))) 667 // These nodes do not need to be translated into MIs. 668 return; 669 670 DEBUG(dbgs() << "\n*** Scheduling: "); 671 DEBUG(N->dump(DAG)); 672 Sequence.push_back(N); 673 674 unsigned NumOps = N->getNumOperands(); 675 if (unsigned NumLeft = NumOps) { 676 SDNode *GluedOpN = 0; 677 do { 678 const SDValue &Op = N->getOperand(NumLeft-1); 679 SDNode *OpN = Op.getNode(); 680 681 if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) { 682 // Schedule glue operand right above N. 683 GluedOpN = OpN; 684 assert(OpN->getNodeId() != 0 && "Glue operand not ready?"); 685 OpN->setNodeId(0); 686 ScheduleNode(OpN); 687 continue; 688 } 689 690 if (OpN == GluedOpN) 691 // Glue operand is already scheduled. 692 continue; 693 694 DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN); 695 if (DI != GluedMap.end() && DI->second != N) 696 // Users of glues are counted against the glued users. 697 OpN = DI->second; 698 699 unsigned Degree = OpN->getNodeId(); 700 assert(Degree > 0 && "Predecessor over-released!"); 701 OpN->setNodeId(--Degree); 702 if (Degree == 0) 703 ScheduleNode(OpN); 704 } while (--NumLeft); 705 } 706} 707 708/// findGluedUser - Find the representative use of a glue value by walking 709/// the use chain. 710static SDNode *findGluedUser(SDNode *N) { 711 while (SDNode *Glued = N->getGluedUser()) 712 N = Glued; 713 return N; 714} 715 716void ScheduleDAGLinearize::Schedule() { 717 DEBUG(dbgs() << "********** DAG Linearization **********\n"); 718 719 SmallVector<SDNode*, 8> Glues; 720 unsigned DAGSize = 0; 721 for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(), 722 E = DAG->allnodes_end(); I != E; ++I) { 723 SDNode *N = I; 724 725 // Use node id to record degree. 726 unsigned Degree = N->use_size(); 727 N->setNodeId(Degree); 728 unsigned NumVals = N->getNumValues(); 729 if (NumVals && N->getValueType(NumVals-1) == MVT::Glue && 730 N->hasAnyUseOfValue(NumVals-1)) { 731 SDNode *User = findGluedUser(N); 732 if (User) { 733 Glues.push_back(N); 734 GluedMap.insert(std::make_pair(N, User)); 735 } 736 } 737 738 if (N->isMachineOpcode() || 739 (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N))) 740 ++DAGSize; 741 } 742 743 for (unsigned i = 0, e = Glues.size(); i != e; ++i) { 744 SDNode *Glue = Glues[i]; 745 SDNode *GUser = GluedMap[Glue]; 746 unsigned Degree = Glue->getNodeId(); 747 unsigned UDegree = GUser->getNodeId(); 748 749 // Glue user must be scheduled together with the glue operand. So other 750 // users of the glue operand must be treated as its users. 751 SDNode *ImmGUser = Glue->getGluedUser(); 752 for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end(); 753 ui != ue; ++ui) 754 if (*ui == ImmGUser) 755 --Degree; 756 GUser->setNodeId(UDegree + Degree); 757 Glue->setNodeId(1); 758 } 759 760 Sequence.reserve(DAGSize); 761 ScheduleNode(DAG->getRoot().getNode()); 762} 763 764MachineBasicBlock* 765ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { 766 InstrEmitter Emitter(BB, InsertPos); 767 DenseMap<SDValue, unsigned> VRBaseMap; 768 769 DEBUG({ 770 dbgs() << "\n*** Final schedule ***\n"; 771 }); 772 773 // FIXME: Handle dbg_values. 774 unsigned NumNodes = Sequence.size(); 775 for (unsigned i = 0; i != NumNodes; ++i) { 776 SDNode *N = Sequence[NumNodes-i-1]; 777 DEBUG(N->dump(DAG)); 778 Emitter.EmitNode(N, false, false, VRBaseMap); 779 } 780 781 DEBUG(dbgs() << '\n'); 782 783 InsertPos = Emitter.getInsertPos(); 784 return Emitter.getBlock(); 785} 786 787//===----------------------------------------------------------------------===// 788// Public Constructor Functions 789//===----------------------------------------------------------------------===// 790 791llvm::ScheduleDAGSDNodes * 792llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 793 return new ScheduleDAGFast(*IS->MF); 794} 795 796llvm::ScheduleDAGSDNodes * 797llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) { 798 return new ScheduleDAGLinearize(*IS->MF); 799} 800