1321369Sdim//===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===// 2193323Sed// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6193323Sed// 7193323Sed//===----------------------------------------------------------------------===// 8193323Sed// 9321369Sdim/// \file Implements the ScheduleDAG class, which is a base class used by 10321369Sdim/// scheduling implementation classes. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#include "llvm/CodeGen/ScheduleDAG.h" 15321369Sdim#include "llvm/ADT/STLExtras.h" 16321369Sdim#include "llvm/ADT/SmallVector.h" 17353358Sdim#include "llvm/ADT/Statistic.h" 18321369Sdim#include "llvm/ADT/iterator_range.h" 19321369Sdim#include "llvm/CodeGen/MachineFunction.h" 20193323Sed#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 21218893Sdim#include "llvm/CodeGen/SelectionDAGNodes.h" 22327952Sdim#include "llvm/CodeGen/TargetInstrInfo.h" 23327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h" 24327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h" 25341825Sdim#include "llvm/Config/llvm-config.h" 26224145Sdim#include "llvm/Support/CommandLine.h" 27321369Sdim#include "llvm/Support/Compiler.h" 28193323Sed#include "llvm/Support/Debug.h" 29198090Srdivacky#include "llvm/Support/raw_ostream.h" 30321369Sdim#include <algorithm> 31321369Sdim#include <cassert> 32321369Sdim#include <iterator> 33321369Sdim#include <limits> 34321369Sdim#include <utility> 35321369Sdim#include <vector> 36321369Sdim 37193323Sedusing namespace llvm; 38193323Sed 39276479Sdim#define DEBUG_TYPE "pre-RA-sched" 40276479Sdim 41353358SdimSTATISTIC(NumNewPredsAdded, "Number of times a single predecessor was added"); 42353358SdimSTATISTIC(NumTopoInits, 43353358Sdim "Number of times the topological order has been recomputed"); 44353358Sdim 45224145Sdim#ifndef NDEBUG 46226633Sdimstatic cl::opt<bool> StressSchedOpt( 47224145Sdim "stress-sched", cl::Hidden, cl::init(false), 48224145Sdim cl::desc("Stress test instruction scheduling")); 49224145Sdim#endif 50224145Sdim 51321369Sdimvoid SchedulingPriorityQueue::anchor() {} 52234353Sdim 53193323SedScheduleDAG::ScheduleDAG(MachineFunction &mf) 54288943Sdim : TM(mf.getTarget()), TII(mf.getSubtarget().getInstrInfo()), 55288943Sdim TRI(mf.getSubtarget().getRegisterInfo()), MF(mf), 56321369Sdim MRI(mf.getRegInfo()) { 57224145Sdim#ifndef NDEBUG 58224145Sdim StressSched = StressSchedOpt; 59224145Sdim#endif 60193323Sed} 61193323Sed 62321369SdimScheduleDAG::~ScheduleDAG() = default; 63193323Sed 64234353Sdimvoid ScheduleDAG::clearDAG() { 65234353Sdim SUnits.clear(); 66234353Sdim EntrySU = SUnit(); 67234353Sdim ExitSU = SUnit(); 68234353Sdim} 69234353Sdim 70224145Sdimconst MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { 71276479Sdim if (!Node || !Node->isMachineOpcode()) return nullptr; 72218893Sdim return &TII->get(Node->getMachineOpcode()); 73218893Sdim} 74218893Sdim 75344779SdimLLVM_DUMP_METHOD void SDep::dump(const TargetRegisterInfo *TRI) const { 76321369Sdim switch (getKind()) { 77344779Sdim case Data: dbgs() << "Data"; break; 78344779Sdim case Anti: dbgs() << "Anti"; break; 79344779Sdim case Output: dbgs() << "Out "; break; 80344779Sdim case Order: dbgs() << "Ord "; break; 81321369Sdim } 82321369Sdim 83321369Sdim switch (getKind()) { 84321369Sdim case Data: 85344779Sdim dbgs() << " Latency=" << getLatency(); 86321369Sdim if (TRI && isAssignedRegDep()) 87344779Sdim dbgs() << " Reg=" << printReg(getReg(), TRI); 88321369Sdim break; 89321369Sdim case Anti: 90321369Sdim case Output: 91344779Sdim dbgs() << " Latency=" << getLatency(); 92321369Sdim break; 93321369Sdim case Order: 94344779Sdim dbgs() << " Latency=" << getLatency(); 95321369Sdim switch(Contents.OrdKind) { 96344779Sdim case Barrier: dbgs() << " Barrier"; break; 97321369Sdim case MayAliasMem: 98344779Sdim case MustAliasMem: dbgs() << " Memory"; break; 99344779Sdim case Artificial: dbgs() << " Artificial"; break; 100344779Sdim case Weak: dbgs() << " Weak"; break; 101344779Sdim case Cluster: dbgs() << " Cluster"; break; 102321369Sdim } 103321369Sdim break; 104321369Sdim } 105321369Sdim} 106321369Sdim 107249423Sdimbool SUnit::addPred(const SDep &D, bool Required) { 108276479Sdim // If this node already has this dependence, don't add a redundant one. 109321369Sdim for (SDep &PredDep : Preds) { 110249423Sdim // Zero-latency weak edges may be added purely for heuristic ordering. Don't 111249423Sdim // add them if another kind of edge already exists. 112321369Sdim if (!Required && PredDep.getSUnit() == D.getSUnit()) 113249423Sdim return false; 114321369Sdim if (PredDep.overlaps(D)) { 115321369Sdim // Extend the latency if needed. Equivalent to 116321369Sdim // removePred(PredDep) + addPred(D). 117321369Sdim if (PredDep.getLatency() < D.getLatency()) { 118321369Sdim SUnit *PredSU = PredDep.getSUnit(); 119239462Sdim // Find the corresponding successor in N. 120321369Sdim SDep ForwardD = PredDep; 121239462Sdim ForwardD.setSUnit(this); 122321369Sdim for (SDep &SuccDep : PredSU->Succs) { 123321369Sdim if (SuccDep == ForwardD) { 124321369Sdim SuccDep.setLatency(D.getLatency()); 125239462Sdim break; 126239462Sdim } 127239462Sdim } 128321369Sdim PredDep.setLatency(D.getLatency()); 129239462Sdim } 130218893Sdim return false; 131239462Sdim } 132239462Sdim } 133193323Sed // Now add a corresponding succ to N. 134193323Sed SDep P = D; 135193323Sed P.setSUnit(this); 136193323Sed SUnit *N = D.getSUnit(); 137193323Sed // Update the bookkeeping. 138193323Sed if (D.getKind() == SDep::Data) { 139321369Sdim assert(NumPreds < std::numeric_limits<unsigned>::max() && 140321369Sdim "NumPreds will overflow!"); 141321369Sdim assert(N->NumSuccs < std::numeric_limits<unsigned>::max() && 142321369Sdim "NumSuccs will overflow!"); 143193323Sed ++NumPreds; 144193323Sed ++N->NumSuccs; 145193323Sed } 146198090Srdivacky if (!N->isScheduled) { 147249423Sdim if (D.isWeak()) { 148249423Sdim ++WeakPredsLeft; 149249423Sdim } 150249423Sdim else { 151321369Sdim assert(NumPredsLeft < std::numeric_limits<unsigned>::max() && 152321369Sdim "NumPredsLeft will overflow!"); 153249423Sdim ++NumPredsLeft; 154249423Sdim } 155198090Srdivacky } 156198090Srdivacky if (!isScheduled) { 157249423Sdim if (D.isWeak()) { 158249423Sdim ++N->WeakSuccsLeft; 159249423Sdim } 160249423Sdim else { 161321369Sdim assert(N->NumSuccsLeft < std::numeric_limits<unsigned>::max() && 162321369Sdim "NumSuccsLeft will overflow!"); 163249423Sdim ++N->NumSuccsLeft; 164249423Sdim } 165198090Srdivacky } 166193323Sed Preds.push_back(D); 167193323Sed N->Succs.push_back(P); 168193323Sed if (P.getLatency() != 0) { 169193323Sed this->setDepthDirty(); 170193323Sed N->setHeightDirty(); 171193323Sed } 172218893Sdim return true; 173193323Sed} 174193323Sed 175193323Sedvoid SUnit::removePred(const SDep &D) { 176193323Sed // Find the matching predecessor. 177321369Sdim SmallVectorImpl<SDep>::iterator I = llvm::find(Preds, D); 178321369Sdim if (I == Preds.end()) 179321369Sdim return; 180321369Sdim // Find the corresponding successor in N. 181321369Sdim SDep P = D; 182321369Sdim P.setSUnit(this); 183321369Sdim SUnit *N = D.getSUnit(); 184321369Sdim SmallVectorImpl<SDep>::iterator Succ = llvm::find(N->Succs, P); 185321369Sdim assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!"); 186321369Sdim N->Succs.erase(Succ); 187321369Sdim Preds.erase(I); 188321369Sdim // Update the bookkeeping. 189321369Sdim if (P.getKind() == SDep::Data) { 190321369Sdim assert(NumPreds > 0 && "NumPreds will underflow!"); 191321369Sdim assert(N->NumSuccs > 0 && "NumSuccs will underflow!"); 192321369Sdim --NumPreds; 193321369Sdim --N->NumSuccs; 194321369Sdim } 195321369Sdim if (!N->isScheduled) { 196321369Sdim if (D.isWeak()) 197321369Sdim --WeakPredsLeft; 198321369Sdim else { 199321369Sdim assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); 200321369Sdim --NumPredsLeft; 201193323Sed } 202321369Sdim } 203321369Sdim if (!isScheduled) { 204321369Sdim if (D.isWeak()) 205321369Sdim --N->WeakSuccsLeft; 206321369Sdim else { 207321369Sdim assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); 208321369Sdim --N->NumSuccsLeft; 209321369Sdim } 210321369Sdim } 211321369Sdim if (P.getLatency() != 0) { 212321369Sdim this->setDepthDirty(); 213321369Sdim N->setHeightDirty(); 214321369Sdim } 215193323Sed} 216193323Sed 217193323Sedvoid SUnit::setDepthDirty() { 218193323Sed if (!isDepthCurrent) return; 219193323Sed SmallVector<SUnit*, 8> WorkList; 220193323Sed WorkList.push_back(this); 221193323Sed do { 222193323Sed SUnit *SU = WorkList.pop_back_val(); 223193323Sed SU->isDepthCurrent = false; 224321369Sdim for (SDep &SuccDep : SU->Succs) { 225321369Sdim SUnit *SuccSU = SuccDep.getSUnit(); 226193323Sed if (SuccSU->isDepthCurrent) 227193323Sed WorkList.push_back(SuccSU); 228193323Sed } 229193323Sed } while (!WorkList.empty()); 230193323Sed} 231193323Sed 232193323Sedvoid SUnit::setHeightDirty() { 233193323Sed if (!isHeightCurrent) return; 234193323Sed SmallVector<SUnit*, 8> WorkList; 235193323Sed WorkList.push_back(this); 236193323Sed do { 237193323Sed SUnit *SU = WorkList.pop_back_val(); 238193323Sed SU->isHeightCurrent = false; 239321369Sdim for (SDep &PredDep : SU->Preds) { 240321369Sdim SUnit *PredSU = PredDep.getSUnit(); 241193323Sed if (PredSU->isHeightCurrent) 242193323Sed WorkList.push_back(PredSU); 243193323Sed } 244193323Sed } while (!WorkList.empty()); 245193323Sed} 246193323Sed 247199989Srdivackyvoid SUnit::setDepthToAtLeast(unsigned NewDepth) { 248199989Srdivacky if (NewDepth <= getDepth()) 249193323Sed return; 250193323Sed setDepthDirty(); 251193323Sed Depth = NewDepth; 252193323Sed isDepthCurrent = true; 253193323Sed} 254193323Sed 255199989Srdivackyvoid SUnit::setHeightToAtLeast(unsigned NewHeight) { 256199989Srdivacky if (NewHeight <= getHeight()) 257193323Sed return; 258193323Sed setHeightDirty(); 259193323Sed Height = NewHeight; 260193323Sed isHeightCurrent = true; 261193323Sed} 262193323Sed 263321369Sdim/// Calculates the maximal path from the node to the exit. 264199989Srdivackyvoid SUnit::ComputeDepth() { 265193323Sed SmallVector<SUnit*, 8> WorkList; 266193323Sed WorkList.push_back(this); 267193323Sed do { 268193323Sed SUnit *Cur = WorkList.back(); 269193323Sed 270193323Sed bool Done = true; 271193323Sed unsigned MaxPredDepth = 0; 272321369Sdim for (const SDep &PredDep : Cur->Preds) { 273321369Sdim SUnit *PredSU = PredDep.getSUnit(); 274193323Sed if (PredSU->isDepthCurrent) 275193323Sed MaxPredDepth = std::max(MaxPredDepth, 276321369Sdim PredSU->Depth + PredDep.getLatency()); 277193323Sed else { 278193323Sed Done = false; 279193323Sed WorkList.push_back(PredSU); 280193323Sed } 281193323Sed } 282193323Sed 283193323Sed if (Done) { 284193323Sed WorkList.pop_back(); 285193323Sed if (MaxPredDepth != Cur->Depth) { 286193323Sed Cur->setDepthDirty(); 287193323Sed Cur->Depth = MaxPredDepth; 288193323Sed } 289193323Sed Cur->isDepthCurrent = true; 290193323Sed } 291193323Sed } while (!WorkList.empty()); 292193323Sed} 293193323Sed 294321369Sdim/// Calculates the maximal path from the node to the entry. 295199989Srdivackyvoid SUnit::ComputeHeight() { 296193323Sed SmallVector<SUnit*, 8> WorkList; 297193323Sed WorkList.push_back(this); 298193323Sed do { 299193323Sed SUnit *Cur = WorkList.back(); 300193323Sed 301193323Sed bool Done = true; 302193323Sed unsigned MaxSuccHeight = 0; 303321369Sdim for (const SDep &SuccDep : Cur->Succs) { 304321369Sdim SUnit *SuccSU = SuccDep.getSUnit(); 305193323Sed if (SuccSU->isHeightCurrent) 306193323Sed MaxSuccHeight = std::max(MaxSuccHeight, 307321369Sdim SuccSU->Height + SuccDep.getLatency()); 308193323Sed else { 309193323Sed Done = false; 310193323Sed WorkList.push_back(SuccSU); 311193323Sed } 312193323Sed } 313193323Sed 314193323Sed if (Done) { 315193323Sed WorkList.pop_back(); 316193323Sed if (MaxSuccHeight != Cur->Height) { 317193323Sed Cur->setHeightDirty(); 318193323Sed Cur->Height = MaxSuccHeight; 319193323Sed } 320193323Sed Cur->isHeightCurrent = true; 321193323Sed } 322193323Sed } while (!WorkList.empty()); 323193323Sed} 324193323Sed 325249423Sdimvoid SUnit::biasCriticalPath() { 326249423Sdim if (NumPreds < 2) 327249423Sdim return; 328249423Sdim 329249423Sdim SUnit::pred_iterator BestI = Preds.begin(); 330249423Sdim unsigned MaxDepth = BestI->getSUnit()->getDepth(); 331276479Sdim for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E; 332276479Sdim ++I) { 333249423Sdim if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) 334249423Sdim BestI = I; 335249423Sdim } 336249423Sdim if (BestI != Preds.begin()) 337249423Sdim std::swap(*Preds.begin(), *BestI); 338249423Sdim} 339249423Sdim 340243830Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 341344779SdimLLVM_DUMP_METHOD void SUnit::dumpAttributes() const { 342202375Srdivacky dbgs() << " # preds left : " << NumPredsLeft << "\n"; 343202375Srdivacky dbgs() << " # succs left : " << NumSuccsLeft << "\n"; 344249423Sdim if (WeakPredsLeft) 345249423Sdim dbgs() << " # weak preds left : " << WeakPredsLeft << "\n"; 346249423Sdim if (WeakSuccsLeft) 347249423Sdim dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n"; 348218893Sdim dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n"; 349202375Srdivacky dbgs() << " Latency : " << Latency << "\n"; 350249423Sdim dbgs() << " Depth : " << getDepth() << "\n"; 351249423Sdim dbgs() << " Height : " << getHeight() << "\n"; 352344779Sdim} 353193323Sed 354344779SdimLLVM_DUMP_METHOD void ScheduleDAG::dumpNodeName(const SUnit &SU) const { 355344779Sdim if (&SU == &EntrySU) 356344779Sdim dbgs() << "EntrySU"; 357344779Sdim else if (&SU == &ExitSU) 358344779Sdim dbgs() << "ExitSU"; 359344779Sdim else 360344779Sdim dbgs() << "SU(" << SU.NodeNum << ")"; 361344779Sdim} 362344779Sdim 363344779SdimLLVM_DUMP_METHOD void ScheduleDAG::dumpNodeAll(const SUnit &SU) const { 364344779Sdim dumpNode(SU); 365344779Sdim SU.dumpAttributes(); 366344779Sdim if (SU.Preds.size() > 0) { 367202375Srdivacky dbgs() << " Predecessors:\n"; 368344779Sdim for (const SDep &Dep : SU.Preds) { 369321369Sdim dbgs() << " "; 370344779Sdim dumpNodeName(*Dep.getSUnit()); 371344779Sdim dbgs() << ": "; 372344779Sdim Dep.dump(TRI); 373344779Sdim dbgs() << '\n'; 374193323Sed } 375193323Sed } 376344779Sdim if (SU.Succs.size() > 0) { 377202375Srdivacky dbgs() << " Successors:\n"; 378344779Sdim for (const SDep &Dep : SU.Succs) { 379321369Sdim dbgs() << " "; 380344779Sdim dumpNodeName(*Dep.getSUnit()); 381344779Sdim dbgs() << ": "; 382344779Sdim Dep.dump(TRI); 383344779Sdim dbgs() << '\n'; 384193323Sed } 385193323Sed } 386193323Sed} 387243830Sdim#endif 388193323Sed 389193323Sed#ifndef NDEBUG 390234353Sdimunsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) { 391193323Sed bool AnyNotSched = false; 392193323Sed unsigned DeadNodes = 0; 393321369Sdim for (const SUnit &SUnit : SUnits) { 394321369Sdim if (!SUnit.isScheduled) { 395321369Sdim if (SUnit.NumPreds == 0 && SUnit.NumSuccs == 0) { 396193323Sed ++DeadNodes; 397193323Sed continue; 398193323Sed } 399193323Sed if (!AnyNotSched) 400202375Srdivacky dbgs() << "*** Scheduling failed! ***\n"; 401344779Sdim dumpNode(SUnit); 402202375Srdivacky dbgs() << "has not been scheduled!\n"; 403193323Sed AnyNotSched = true; 404193323Sed } 405321369Sdim if (SUnit.isScheduled && 406321369Sdim (isBottomUp ? SUnit.getHeight() : SUnit.getDepth()) > 407321369Sdim unsigned(std::numeric_limits<int>::max())) { 408193323Sed if (!AnyNotSched) 409202375Srdivacky dbgs() << "*** Scheduling failed! ***\n"; 410344779Sdim dumpNode(SUnit); 411202375Srdivacky dbgs() << "has an unexpected " 412193323Sed << (isBottomUp ? "Height" : "Depth") << " value!\n"; 413193323Sed AnyNotSched = true; 414193323Sed } 415193323Sed if (isBottomUp) { 416321369Sdim if (SUnit.NumSuccsLeft != 0) { 417193323Sed if (!AnyNotSched) 418202375Srdivacky dbgs() << "*** Scheduling failed! ***\n"; 419344779Sdim dumpNode(SUnit); 420202375Srdivacky dbgs() << "has successors left!\n"; 421193323Sed AnyNotSched = true; 422193323Sed } 423193323Sed } else { 424321369Sdim if (SUnit.NumPredsLeft != 0) { 425193323Sed if (!AnyNotSched) 426202375Srdivacky dbgs() << "*** Scheduling failed! ***\n"; 427344779Sdim dumpNode(SUnit); 428202375Srdivacky dbgs() << "has predecessors left!\n"; 429193323Sed AnyNotSched = true; 430193323Sed } 431193323Sed } 432193323Sed } 433193323Sed assert(!AnyNotSched); 434234353Sdim return SUnits.size() - DeadNodes; 435193323Sed} 436193323Sed#endif 437193323Sed 438193323Sedvoid ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { 439321369Sdim // The idea of the algorithm is taken from 440321369Sdim // "Online algorithms for managing the topological order of 441321369Sdim // a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly 442321369Sdim // This is the MNR algorithm, which was first introduced by 443321369Sdim // A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in 444321369Sdim // "Maintaining a topological order under edge insertions". 445321369Sdim // 446321369Sdim // Short description of the algorithm: 447321369Sdim // 448321369Sdim // Topological ordering, ord, of a DAG maps each node to a topological 449321369Sdim // index so that for all edges X->Y it is the case that ord(X) < ord(Y). 450321369Sdim // 451321369Sdim // This means that if there is a path from the node X to the node Z, 452321369Sdim // then ord(X) < ord(Z). 453321369Sdim // 454321369Sdim // This property can be used to check for reachability of nodes: 455321369Sdim // if Z is reachable from X, then an insertion of the edge Z->X would 456321369Sdim // create a cycle. 457321369Sdim // 458321369Sdim // The algorithm first computes a topological ordering for the DAG by 459321369Sdim // initializing the Index2Node and Node2Index arrays and then tries to keep 460321369Sdim // the ordering up-to-date after edge insertions by reordering the DAG. 461321369Sdim // 462321369Sdim // On insertion of the edge X->Y, the algorithm first marks by calling DFS 463321369Sdim // the nodes reachable from Y, and then shifts them using Shift to lie 464321369Sdim // immediately after X in Index2Node. 465353358Sdim 466353358Sdim // Cancel pending updates, mark as valid. 467353358Sdim Dirty = false; 468353358Sdim Updates.clear(); 469353358Sdim 470193323Sed unsigned DAGSize = SUnits.size(); 471193323Sed std::vector<SUnit*> WorkList; 472193323Sed WorkList.reserve(DAGSize); 473193323Sed 474193323Sed Index2Node.resize(DAGSize); 475193323Sed Node2Index.resize(DAGSize); 476193323Sed 477193323Sed // Initialize the data structures. 478249423Sdim if (ExitSU) 479249423Sdim WorkList.push_back(ExitSU); 480321369Sdim for (SUnit &SU : SUnits) { 481321369Sdim int NodeNum = SU.NodeNum; 482321369Sdim unsigned Degree = SU.Succs.size(); 483193323Sed // Temporarily use the Node2Index array as scratch space for degree counts. 484193323Sed Node2Index[NodeNum] = Degree; 485193323Sed 486193323Sed // Is it a node without dependencies? 487193323Sed if (Degree == 0) { 488321369Sdim assert(SU.Succs.empty() && "SUnit should have no successors"); 489193323Sed // Collect leaf nodes. 490321369Sdim WorkList.push_back(&SU); 491193323Sed } 492210299Sed } 493193323Sed 494193323Sed int Id = DAGSize; 495193323Sed while (!WorkList.empty()) { 496193323Sed SUnit *SU = WorkList.back(); 497193323Sed WorkList.pop_back(); 498249423Sdim if (SU->NodeNum < DAGSize) 499249423Sdim Allocate(SU->NodeNum, --Id); 500321369Sdim for (const SDep &PredDep : SU->Preds) { 501321369Sdim SUnit *SU = PredDep.getSUnit(); 502249423Sdim if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum]) 503193323Sed // If all dependencies of the node are processed already, 504193323Sed // then the node can be computed now. 505193323Sed WorkList.push_back(SU); 506193323Sed } 507193323Sed } 508193323Sed 509193323Sed Visited.resize(DAGSize); 510353358Sdim NumTopoInits++; 511193323Sed 512193323Sed#ifndef NDEBUG 513193323Sed // Check correctness of the ordering 514321369Sdim for (SUnit &SU : SUnits) { 515321369Sdim for (const SDep &PD : SU.Preds) { 516321369Sdim assert(Node2Index[SU.NodeNum] > Node2Index[PD.getSUnit()->NodeNum] && 517193323Sed "Wrong topological sorting"); 518193323Sed } 519193323Sed } 520193323Sed#endif 521193323Sed} 522193323Sed 523353358Sdimvoid ScheduleDAGTopologicalSort::FixOrder() { 524353358Sdim // Recompute from scratch after new nodes have been added. 525353358Sdim if (Dirty) { 526353358Sdim InitDAGTopologicalSorting(); 527353358Sdim return; 528353358Sdim } 529353358Sdim 530353358Sdim // Otherwise apply updates one-by-one. 531353358Sdim for (auto &U : Updates) 532353358Sdim AddPred(U.first, U.second); 533353358Sdim Updates.clear(); 534353358Sdim} 535353358Sdim 536353358Sdimvoid ScheduleDAGTopologicalSort::AddPredQueued(SUnit *Y, SUnit *X) { 537353358Sdim // Recomputing the order from scratch is likely more efficient than applying 538353358Sdim // updates one-by-one for too many updates. The current cut-off is arbitrarily 539353358Sdim // chosen. 540353358Sdim Dirty = Dirty || Updates.size() > 10; 541353358Sdim 542353358Sdim if (Dirty) 543353358Sdim return; 544353358Sdim 545353358Sdim Updates.emplace_back(Y, X); 546353358Sdim} 547353358Sdim 548193323Sedvoid ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { 549193323Sed int UpperBound, LowerBound; 550193323Sed LowerBound = Node2Index[Y->NodeNum]; 551193323Sed UpperBound = Node2Index[X->NodeNum]; 552193323Sed bool HasLoop = false; 553193323Sed // Is Ord(X) < Ord(Y) ? 554193323Sed if (LowerBound < UpperBound) { 555193323Sed // Update the topological order. 556193323Sed Visited.reset(); 557193323Sed DFS(Y, UpperBound, HasLoop); 558193323Sed assert(!HasLoop && "Inserted edge creates a loop!"); 559193323Sed // Recompute topological indexes. 560193323Sed Shift(Visited, LowerBound, UpperBound); 561193323Sed } 562353358Sdim 563353358Sdim NumNewPredsAdded++; 564193323Sed} 565193323Sed 566193323Sedvoid ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { 567193323Sed // InitDAGTopologicalSorting(); 568193323Sed} 569193323Sed 570193323Sedvoid ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, 571218893Sdim bool &HasLoop) { 572193323Sed std::vector<const SUnit*> WorkList; 573210299Sed WorkList.reserve(SUnits.size()); 574193323Sed 575193323Sed WorkList.push_back(SU); 576193323Sed do { 577193323Sed SU = WorkList.back(); 578193323Sed WorkList.pop_back(); 579193323Sed Visited.set(SU->NodeNum); 580321369Sdim for (const SDep &SuccDep 581321369Sdim : make_range(SU->Succs.rbegin(), SU->Succs.rend())) { 582321369Sdim unsigned s = SuccDep.getSUnit()->NodeNum; 583249423Sdim // Edges to non-SUnits are allowed but ignored (e.g. ExitSU). 584249423Sdim if (s >= Node2Index.size()) 585249423Sdim continue; 586193323Sed if (Node2Index[s] == UpperBound) { 587210299Sed HasLoop = true; 588193323Sed return; 589193323Sed } 590193323Sed // Visit successors if not already and in affected region. 591193323Sed if (!Visited.test(s) && Node2Index[s] < UpperBound) { 592321369Sdim WorkList.push_back(SuccDep.getSUnit()); 593210299Sed } 594210299Sed } 595193323Sed } while (!WorkList.empty()); 596193323Sed} 597193323Sed 598321369Sdimstd::vector<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit &StartSU, 599321369Sdim const SUnit &TargetSU, 600321369Sdim bool &Success) { 601321369Sdim std::vector<const SUnit*> WorkList; 602321369Sdim int LowerBound = Node2Index[StartSU.NodeNum]; 603321369Sdim int UpperBound = Node2Index[TargetSU.NodeNum]; 604321369Sdim bool Found = false; 605321369Sdim BitVector VisitedBack; 606321369Sdim std::vector<int> Nodes; 607321369Sdim 608321369Sdim if (LowerBound > UpperBound) { 609321369Sdim Success = false; 610321369Sdim return Nodes; 611321369Sdim } 612321369Sdim 613321369Sdim WorkList.reserve(SUnits.size()); 614321369Sdim Visited.reset(); 615321369Sdim 616321369Sdim // Starting from StartSU, visit all successors up 617321369Sdim // to UpperBound. 618321369Sdim WorkList.push_back(&StartSU); 619321369Sdim do { 620321369Sdim const SUnit *SU = WorkList.back(); 621321369Sdim WorkList.pop_back(); 622321369Sdim for (int I = SU->Succs.size()-1; I >= 0; --I) { 623321369Sdim const SUnit *Succ = SU->Succs[I].getSUnit(); 624321369Sdim unsigned s = Succ->NodeNum; 625321369Sdim // Edges to non-SUnits are allowed but ignored (e.g. ExitSU). 626321369Sdim if (Succ->isBoundaryNode()) 627321369Sdim continue; 628321369Sdim if (Node2Index[s] == UpperBound) { 629321369Sdim Found = true; 630321369Sdim continue; 631321369Sdim } 632321369Sdim // Visit successors if not already and in affected region. 633321369Sdim if (!Visited.test(s) && Node2Index[s] < UpperBound) { 634321369Sdim Visited.set(s); 635321369Sdim WorkList.push_back(Succ); 636321369Sdim } 637321369Sdim } 638321369Sdim } while (!WorkList.empty()); 639321369Sdim 640321369Sdim if (!Found) { 641321369Sdim Success = false; 642321369Sdim return Nodes; 643321369Sdim } 644321369Sdim 645321369Sdim WorkList.clear(); 646321369Sdim VisitedBack.resize(SUnits.size()); 647321369Sdim Found = false; 648321369Sdim 649321369Sdim // Starting from TargetSU, visit all predecessors up 650321369Sdim // to LowerBound. SUs that are visited by the two 651321369Sdim // passes are added to Nodes. 652321369Sdim WorkList.push_back(&TargetSU); 653321369Sdim do { 654321369Sdim const SUnit *SU = WorkList.back(); 655321369Sdim WorkList.pop_back(); 656321369Sdim for (int I = SU->Preds.size()-1; I >= 0; --I) { 657321369Sdim const SUnit *Pred = SU->Preds[I].getSUnit(); 658321369Sdim unsigned s = Pred->NodeNum; 659321369Sdim // Edges to non-SUnits are allowed but ignored (e.g. EntrySU). 660321369Sdim if (Pred->isBoundaryNode()) 661321369Sdim continue; 662321369Sdim if (Node2Index[s] == LowerBound) { 663321369Sdim Found = true; 664321369Sdim continue; 665321369Sdim } 666321369Sdim if (!VisitedBack.test(s) && Visited.test(s)) { 667321369Sdim VisitedBack.set(s); 668321369Sdim WorkList.push_back(Pred); 669321369Sdim Nodes.push_back(s); 670321369Sdim } 671321369Sdim } 672321369Sdim } while (!WorkList.empty()); 673321369Sdim 674321369Sdim assert(Found && "Error in SUnit Graph!"); 675321369Sdim Success = true; 676321369Sdim return Nodes; 677321369Sdim} 678321369Sdim 679210299Sedvoid ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, 680193323Sed int UpperBound) { 681193323Sed std::vector<int> L; 682193323Sed int shift = 0; 683193323Sed int i; 684193323Sed 685193323Sed for (i = LowerBound; i <= UpperBound; ++i) { 686193323Sed // w is node at topological index i. 687193323Sed int w = Index2Node[i]; 688193323Sed if (Visited.test(w)) { 689193323Sed // Unmark. 690193323Sed Visited.reset(w); 691193323Sed L.push_back(w); 692193323Sed shift = shift + 1; 693193323Sed } else { 694193323Sed Allocate(w, i - shift); 695193323Sed } 696193323Sed } 697193323Sed 698321369Sdim for (unsigned LI : L) { 699321369Sdim Allocate(LI, i - shift); 700193323Sed i = i + 1; 701193323Sed } 702193323Sed} 703193323Sed 704249423Sdimbool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) { 705353358Sdim FixOrder(); 706249423Sdim // Is SU reachable from TargetSU via successor edges? 707249423Sdim if (IsReachable(SU, TargetSU)) 708193323Sed return true; 709321369Sdim for (const SDep &PredDep : TargetSU->Preds) 710321369Sdim if (PredDep.isAssignedRegDep() && 711321369Sdim IsReachable(SU, PredDep.getSUnit())) 712193323Sed return true; 713193323Sed return false; 714193323Sed} 715193323Sed 716193323Sedbool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, 717193323Sed const SUnit *TargetSU) { 718353358Sdim FixOrder(); 719193323Sed // If insertion of the edge SU->TargetSU would create a cycle 720193323Sed // then there is a path from TargetSU to SU. 721193323Sed int UpperBound, LowerBound; 722193323Sed LowerBound = Node2Index[TargetSU->NodeNum]; 723193323Sed UpperBound = Node2Index[SU->NodeNum]; 724193323Sed bool HasLoop = false; 725193323Sed // Is Ord(TargetSU) < Ord(SU) ? 726193323Sed if (LowerBound < UpperBound) { 727193323Sed Visited.reset(); 728210299Sed // There may be a path from TargetSU to SU. Check for it. 729193323Sed DFS(TargetSU, UpperBound, HasLoop); 730193323Sed } 731193323Sed return HasLoop; 732193323Sed} 733193323Sed 734193323Sedvoid ScheduleDAGTopologicalSort::Allocate(int n, int index) { 735193323Sed Node2Index[n] = index; 736193323Sed Index2Node[index] = n; 737193323Sed} 738193323Sed 739210299SedScheduleDAGTopologicalSort:: 740249423SdimScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu) 741249423Sdim : SUnits(sunits), ExitSU(exitsu) {} 742193323Sed 743321369SdimScheduleHazardRecognizer::~ScheduleHazardRecognizer() = default; 744