1//===---------------------------- GCNILPSched.cpp - -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CodeGen/ScheduleDAG.h"
14
15using namespace llvm;
16
17#define DEBUG_TYPE "machine-scheduler"
18
19namespace {
20
21class GCNILPScheduler {
22  struct Candidate : ilist_node<Candidate> {
23    SUnit *SU;
24
25    Candidate(SUnit *SU_)
26      : SU(SU_) {}
27  };
28
29  SpecificBumpPtrAllocator<Candidate> Alloc;
30  typedef simple_ilist<Candidate> Queue;
31  Queue PendingQueue;
32  Queue AvailQueue;
33  unsigned CurQueueId = 0;
34
35  std::vector<unsigned> SUNumbers;
36
37  /// CurCycle - The current scheduler state corresponds to this cycle.
38  unsigned CurCycle = 0;
39
40  unsigned getNodePriority(const SUnit *SU) const;
41
42  const SUnit *pickBest(const SUnit *left, const SUnit *right);
43  Candidate* pickCandidate();
44
45  void releasePending();
46  void advanceToCycle(unsigned NextCycle);
47  void releasePredecessors(const SUnit* SU);
48
49public:
50  std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
51                                     const ScheduleDAG &DAG);
52};
53} // namespace
54
55/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
56/// Smaller number is the higher priority.
57static unsigned
58CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
59  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
60  if (SethiUllmanNumber != 0)
61    return SethiUllmanNumber;
62
63  unsigned Extra = 0;
64  for (const SDep &Pred : SU->Preds) {
65    if (Pred.isCtrl()) continue;  // ignore chain preds
66    SUnit *PredSU = Pred.getSUnit();
67    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
68    if (PredSethiUllman > SethiUllmanNumber) {
69      SethiUllmanNumber = PredSethiUllman;
70      Extra = 0;
71    }
72    else if (PredSethiUllman == SethiUllmanNumber)
73      ++Extra;
74  }
75
76  SethiUllmanNumber += Extra;
77
78  if (SethiUllmanNumber == 0)
79    SethiUllmanNumber = 1;
80
81  return SethiUllmanNumber;
82}
83
84// Lower priority means schedule further down. For bottom-up scheduling, lower
85// priority SUs are scheduled before higher priority SUs.
86unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
87  assert(SU->NodeNum < SUNumbers.size());
88  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
89    // If SU does not have a register use, i.e. it doesn't produce a value
90    // that would be consumed (e.g. store), then it terminates a chain of
91    // computation.  Give it a large SethiUllman number so it will be
92    // scheduled right before its predecessors that it doesn't lengthen
93    // their live ranges.
94    return 0xffff;
95
96  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
97    // If SU does not have a register def, schedule it close to its uses
98    // because it does not lengthen any live ranges.
99    return 0;
100
101  return SUNumbers[SU->NodeNum];
102}
103
104/// closestSucc - Returns the scheduled cycle of the successor which is
105/// closest to the current cycle.
106static unsigned closestSucc(const SUnit *SU) {
107  unsigned MaxHeight = 0;
108  for (const SDep &Succ : SU->Succs) {
109    if (Succ.isCtrl()) continue;  // ignore chain succs
110    unsigned Height = Succ.getSUnit()->getHeight();
111    // If there are bunch of CopyToRegs stacked up, they should be considered
112    // to be at the same position.
113    if (Height > MaxHeight)
114      MaxHeight = Height;
115  }
116  return MaxHeight;
117}
118
119/// calcMaxScratches - Returns an cost estimate of the worse case requirement
120/// for scratch registers, i.e. number of data dependencies.
121static unsigned calcMaxScratches(const SUnit *SU) {
122  unsigned Scratches = 0;
123  for (const SDep &Pred : SU->Preds) {
124    if (Pred.isCtrl()) continue;  // ignore chain preds
125    Scratches++;
126  }
127  return Scratches;
128}
129
130// Return -1 if left has higher priority, 1 if right has higher priority.
131// Return 0 if latency-based priority is equivalent.
132static int BUCompareLatency(const SUnit *left, const SUnit *right) {
133  // Scheduling an instruction that uses a VReg whose postincrement has not yet
134  // been scheduled will induce a copy. Model this as an extra cycle of latency.
135  int LHeight = (int)left->getHeight();
136  int RHeight = (int)right->getHeight();
137
138  // If either node is scheduling for latency, sort them by height/depth
139  // and latency.
140
141  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
142  // is enabled, grouping instructions by cycle, then its height is already
143  // covered so only its depth matters. We also reach this point if both stall
144  // but have the same height.
145  if (LHeight != RHeight)
146    return LHeight > RHeight ? 1 : -1;
147
148  int LDepth = left->getDepth();
149  int RDepth = right->getDepth();
150  if (LDepth != RDepth) {
151    LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
152                      << ") depth " << LDepth << " vs SU (" << right->NodeNum
153                      << ") depth " << RDepth << "\n");
154    return LDepth < RDepth ? 1 : -1;
155  }
156  if (left->Latency != right->Latency)
157    return left->Latency > right->Latency ? 1 : -1;
158
159  return 0;
160}
161
162const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
163{
164  // TODO: add register pressure lowering checks
165
166  bool const DisableSchedCriticalPath = false;
167  int MaxReorderWindow = 6;
168  if (!DisableSchedCriticalPath) {
169    int spread = (int)left->getDepth() - (int)right->getDepth();
170    if (std::abs(spread) > MaxReorderWindow) {
171      LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
172                        << left->getDepth() << " != SU(" << right->NodeNum
173                        << "): " << right->getDepth() << "\n");
174      return left->getDepth() < right->getDepth() ? right : left;
175    }
176  }
177
178  bool const DisableSchedHeight = false;
179  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
180    int spread = (int)left->getHeight() - (int)right->getHeight();
181    if (std::abs(spread) > MaxReorderWindow)
182      return left->getHeight() > right->getHeight() ? right : left;
183  }
184
185  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
186  unsigned LPriority = getNodePriority(left);
187  unsigned RPriority = getNodePriority(right);
188
189  if (LPriority != RPriority)
190    return LPriority > RPriority ? right : left;
191
192  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
193  // e.g.
194  // t1 = op t2, c1
195  // t3 = op t4, c2
196  //
197  // and the following instructions are both ready.
198  // t2 = op c3
199  // t4 = op c4
200  //
201  // Then schedule t2 = op first.
202  // i.e.
203  // t4 = op c4
204  // t2 = op c3
205  // t1 = op t2, c1
206  // t3 = op t4, c2
207  //
208  // This creates more short live intervals.
209  unsigned LDist = closestSucc(left);
210  unsigned RDist = closestSucc(right);
211  if (LDist != RDist)
212    return LDist < RDist ? right : left;
213
214  // How many registers becomes live when the node is scheduled.
215  unsigned LScratch = calcMaxScratches(left);
216  unsigned RScratch = calcMaxScratches(right);
217  if (LScratch != RScratch)
218    return LScratch > RScratch ? right : left;
219
220  bool const DisableSchedCycles = false;
221  if (!DisableSchedCycles) {
222    int result = BUCompareLatency(left, right);
223    if (result != 0)
224      return result > 0 ? right : left;
225    return left;
226  }
227  else {
228    if (left->getHeight() != right->getHeight())
229      return (left->getHeight() > right->getHeight()) ? right : left;
230
231    if (left->getDepth() != right->getDepth())
232      return (left->getDepth() < right->getDepth()) ? right : left;
233  }
234
235  assert(left->NodeQueueId && right->NodeQueueId &&
236        "NodeQueueId cannot be zero");
237  return (left->NodeQueueId > right->NodeQueueId) ? right : left;
238}
239
240GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
241  if (AvailQueue.empty())
242    return nullptr;
243  auto Best = AvailQueue.begin();
244  for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
245    auto NewBestSU = pickBest(Best->SU, I->SU);
246    if (NewBestSU != Best->SU) {
247      assert(NewBestSU == I->SU);
248      Best = I;
249    }
250  }
251  return &*Best;
252}
253
254void GCNILPScheduler::releasePending() {
255  // Check to see if any of the pending instructions are ready to issue.  If
256  // so, add them to the available queue.
257  for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
258    auto &C = *I++;
259    if (C.SU->getHeight() <= CurCycle) {
260      PendingQueue.remove(C);
261      AvailQueue.push_back(C);
262      C.SU->NodeQueueId = CurQueueId++;
263    }
264  }
265}
266
267/// Move the scheduler state forward by the specified number of Cycles.
268void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
269  if (NextCycle <= CurCycle)
270    return;
271  CurCycle = NextCycle;
272  releasePending();
273}
274
275void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
276  for (const auto &PredEdge : SU->Preds) {
277    auto PredSU = PredEdge.getSUnit();
278    if (PredEdge.isWeak())
279      continue;
280    assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
281
282    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
283
284    if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
285      PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
286  }
287}
288
289std::vector<const SUnit*>
290GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
291                          const ScheduleDAG &DAG) {
292  auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
293
294  std::vector<SUnit> SUSavedCopy;
295  SUSavedCopy.resize(SUnits.size());
296
297  // we cannot save only those fields we touch: some of them are private
298  // so save units verbatim: this assumes SUnit should have value semantics
299  for (const SUnit &SU : SUnits)
300    SUSavedCopy[SU.NodeNum] = SU;
301
302  SUNumbers.assign(SUnits.size(), 0);
303  for (const SUnit &SU : SUnits)
304    CalcNodeSethiUllmanNumber(&SU, SUNumbers);
305
306  for (const auto *SU : BotRoots) {
307    AvailQueue.push_back(
308      *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
309  }
310  releasePredecessors(&DAG.ExitSU);
311
312  std::vector<const SUnit*> Schedule;
313  Schedule.reserve(SUnits.size());
314  while (true) {
315    if (AvailQueue.empty() && !PendingQueue.empty()) {
316      auto EarliestSU = std::min_element(
317        PendingQueue.begin(), PendingQueue.end(),
318        [=](const Candidate& C1, const Candidate& C2) {
319        return C1.SU->getHeight() < C2.SU->getHeight();
320      })->SU;
321      advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
322    }
323    if (AvailQueue.empty())
324      break;
325
326    LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
327                         "Ready queue:";
328               for (auto &C
329                    : AvailQueue) dbgs()
330               << ' ' << C.SU->NodeNum;
331               dbgs() << '\n';);
332
333    auto C = pickCandidate();
334    assert(C);
335    AvailQueue.remove(*C);
336    auto SU = C->SU;
337    LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
338
339    advanceToCycle(SU->getHeight());
340
341    releasePredecessors(SU);
342    Schedule.push_back(SU);
343    SU->isScheduled = true;
344  }
345  assert(SUnits.size() == Schedule.size());
346
347  std::reverse(Schedule.begin(), Schedule.end());
348
349  // restore units
350  for (auto &SU : SUnits)
351    SU = SUSavedCopy[SU.NodeNum];
352
353  return Schedule;
354}
355
356namespace llvm {
357std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
358                                              const ScheduleDAG &DAG) {
359  GCNILPScheduler S;
360  return S.schedule(BotRoots, DAG);
361}
362}
363