1//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ResourcePriorityQueue class, which is a
10// SchedulingPriorityQueue that prioritizes instructions using DFA state to
11// reduce the length of the critical path through the basic block
12// on VLIW platforms.
13// The scheduler is basically a top-down adaptable list scheduler with DFA
14// resource tracking added to the cost function.
15// DFA is queried as a state machine to model "packets/bundles" during
16// schedule. Currently packets/bundles are discarded at the end of
17// scheduling, affecting only order of instructions.
18//
19//===----------------------------------------------------------------------===//
20
21#include "llvm/CodeGen/ResourcePriorityQueue.h"
22#include "llvm/CodeGen/DFAPacketizer.h"
23#include "llvm/CodeGen/SelectionDAGISel.h"
24#include "llvm/CodeGen/SelectionDAGNodes.h"
25#include "llvm/CodeGen/TargetInstrInfo.h"
26#include "llvm/CodeGen/TargetLowering.h"
27#include "llvm/CodeGen/TargetRegisterInfo.h"
28#include "llvm/CodeGen/TargetSubtargetInfo.h"
29#include "llvm/Support/CommandLine.h"
30
31using namespace llvm;
32
33#define DEBUG_TYPE "scheduler"
34
35static cl::opt<bool>
36    DisableDFASched("disable-dfa-sched", cl::Hidden,
37                    cl::desc("Disable use of DFA during scheduling"));
38
39static cl::opt<int> RegPressureThreshold(
40    "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::init(5),
41    cl::desc("Track reg pressure and switch priority to in-depth"));
42
43ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
44    : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
45  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
46  TRI = STI.getRegisterInfo();
47  TLI = IS->TLI;
48  TII = STI.getInstrInfo();
49  ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
50  // This hard requirement could be relaxed, but for now
51  // do not let it proceed.
52  assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
53
54  unsigned NumRC = TRI->getNumRegClasses();
55  RegLimit.resize(NumRC);
56  RegPressure.resize(NumRC);
57  std::fill(RegLimit.begin(), RegLimit.end(), 0);
58  std::fill(RegPressure.begin(), RegPressure.end(), 0);
59  for (const TargetRegisterClass *RC : TRI->regclasses())
60    RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
61
62  ParallelLiveRanges = 0;
63  HorizontalVerticalBalance = 0;
64}
65
66unsigned
67ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
68  unsigned NumberDeps = 0;
69  for (SDep &Pred : SU->Preds) {
70    if (Pred.isCtrl())
71      continue;
72
73    SUnit *PredSU = Pred.getSUnit();
74    const SDNode *ScegN = PredSU->getNode();
75
76    if (!ScegN)
77      continue;
78
79    // If value is passed to CopyToReg, it is probably
80    // live outside BB.
81    switch (ScegN->getOpcode()) {
82      default:  break;
83      case ISD::TokenFactor:    break;
84      case ISD::CopyFromReg:    NumberDeps++;  break;
85      case ISD::CopyToReg:      break;
86      case ISD::INLINEASM:      break;
87      case ISD::INLINEASM_BR:   break;
88    }
89    if (!ScegN->isMachineOpcode())
90      continue;
91
92    for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
93      MVT VT = ScegN->getSimpleValueType(i);
94      if (TLI->isTypeLegal(VT)
95          && (TLI->getRegClassFor(VT)->getID() == RCId)) {
96        NumberDeps++;
97        break;
98      }
99    }
100  }
101  return NumberDeps;
102}
103
104unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
105                                                    unsigned RCId) {
106  unsigned NumberDeps = 0;
107  for (const SDep &Succ : SU->Succs) {
108    if (Succ.isCtrl())
109      continue;
110
111    SUnit *SuccSU = Succ.getSUnit();
112    const SDNode *ScegN = SuccSU->getNode();
113    if (!ScegN)
114      continue;
115
116    // If value is passed to CopyToReg, it is probably
117    // live outside BB.
118    switch (ScegN->getOpcode()) {
119      default:  break;
120      case ISD::TokenFactor:    break;
121      case ISD::CopyFromReg:    break;
122      case ISD::CopyToReg:      NumberDeps++;  break;
123      case ISD::INLINEASM:      break;
124      case ISD::INLINEASM_BR:   break;
125    }
126    if (!ScegN->isMachineOpcode())
127      continue;
128
129    for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
130      const SDValue &Op = ScegN->getOperand(i);
131      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
132      if (TLI->isTypeLegal(VT)
133          && (TLI->getRegClassFor(VT)->getID() == RCId)) {
134        NumberDeps++;
135        break;
136      }
137    }
138  }
139  return NumberDeps;
140}
141
142static unsigned numberCtrlDepsInSU(SUnit *SU) {
143  unsigned NumberDeps = 0;
144  for (const SDep &Succ : SU->Succs)
145    if (Succ.isCtrl())
146      NumberDeps++;
147
148  return NumberDeps;
149}
150
151static unsigned numberCtrlPredInSU(SUnit *SU) {
152  unsigned NumberDeps = 0;
153  for (SDep &Pred : SU->Preds)
154    if (Pred.isCtrl())
155      NumberDeps++;
156
157  return NumberDeps;
158}
159
160///
161/// Initialize nodes.
162///
163void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
164  SUnits = &sunits;
165  NumNodesSolelyBlocking.resize(SUnits->size(), 0);
166
167  for (SUnit &SU : *SUnits) {
168    initNumRegDefsLeft(&SU);
169    SU.NodeQueueId = 0;
170  }
171}
172
173/// This heuristic is used if DFA scheduling is not desired
174/// for some VLIW platform.
175bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
176  // The isScheduleHigh flag allows nodes with wraparound dependencies that
177  // cannot easily be modeled as edges with latencies to be scheduled as
178  // soon as possible in a top-down schedule.
179  if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
180    return false;
181
182  if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
183    return true;
184
185  unsigned LHSNum = LHS->NodeNum;
186  unsigned RHSNum = RHS->NodeNum;
187
188  // The most important heuristic is scheduling the critical path.
189  unsigned LHSLatency = PQ->getLatency(LHSNum);
190  unsigned RHSLatency = PQ->getLatency(RHSNum);
191  if (LHSLatency < RHSLatency) return true;
192  if (LHSLatency > RHSLatency) return false;
193
194  // After that, if two nodes have identical latencies, look to see if one will
195  // unblock more other nodes than the other.
196  unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
197  unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
198  if (LHSBlocked < RHSBlocked) return true;
199  if (LHSBlocked > RHSBlocked) return false;
200
201  // Finally, just to provide a stable ordering, use the node number as a
202  // deciding factor.
203  return LHSNum < RHSNum;
204}
205
206
207/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
208/// of SU, return it, otherwise return null.
209SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
210  SUnit *OnlyAvailablePred = nullptr;
211  for (const SDep &Pred : SU->Preds) {
212    SUnit &PredSU = *Pred.getSUnit();
213    if (!PredSU.isScheduled) {
214      // We found an available, but not scheduled, predecessor.  If it's the
215      // only one we have found, keep track of it... otherwise give up.
216      if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
217        return nullptr;
218      OnlyAvailablePred = &PredSU;
219    }
220  }
221  return OnlyAvailablePred;
222}
223
224void ResourcePriorityQueue::push(SUnit *SU) {
225  // Look at all of the successors of this node.  Count the number of nodes that
226  // this node is the sole unscheduled node for.
227  unsigned NumNodesBlocking = 0;
228  for (const SDep &Succ : SU->Succs)
229    if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
230      ++NumNodesBlocking;
231
232  NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
233  Queue.push_back(SU);
234}
235
236/// Check if scheduling of this SU is possible
237/// in the current packet.
238bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
239  if (!SU || !SU->getNode())
240    return false;
241
242  // If this is a compound instruction,
243  // it is likely to be a call. Do not delay it.
244  if (SU->getNode()->getGluedNode())
245    return true;
246
247  // First see if the pipeline could receive this instruction
248  // in the current cycle.
249  if (SU->getNode()->isMachineOpcode())
250    switch (SU->getNode()->getMachineOpcode()) {
251    default:
252      if (!ResourcesModel->canReserveResources(&TII->get(
253          SU->getNode()->getMachineOpcode())))
254           return false;
255      break;
256    case TargetOpcode::EXTRACT_SUBREG:
257    case TargetOpcode::INSERT_SUBREG:
258    case TargetOpcode::SUBREG_TO_REG:
259    case TargetOpcode::REG_SEQUENCE:
260    case TargetOpcode::IMPLICIT_DEF:
261        break;
262    }
263
264  // Now see if there are no other dependencies
265  // to instructions already in the packet.
266  for (const SUnit *S : Packet)
267    for (const SDep &Succ : S->Succs) {
268      // Since we do not add pseudos to packets, might as well
269      // ignore order deps.
270      if (Succ.isCtrl())
271        continue;
272
273      if (Succ.getSUnit() == SU)
274        return false;
275    }
276
277  return true;
278}
279
280/// Keep track of available resources.
281void ResourcePriorityQueue::reserveResources(SUnit *SU) {
282  // If this SU does not fit in the packet
283  // start a new one.
284  if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
285    ResourcesModel->clearResources();
286    Packet.clear();
287  }
288
289  if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
290    switch (SU->getNode()->getMachineOpcode()) {
291    default:
292      ResourcesModel->reserveResources(&TII->get(
293        SU->getNode()->getMachineOpcode()));
294      break;
295    case TargetOpcode::EXTRACT_SUBREG:
296    case TargetOpcode::INSERT_SUBREG:
297    case TargetOpcode::SUBREG_TO_REG:
298    case TargetOpcode::REG_SEQUENCE:
299    case TargetOpcode::IMPLICIT_DEF:
300      break;
301    }
302    Packet.push_back(SU);
303  }
304  // Forcefully end packet for PseudoOps.
305  else {
306    ResourcesModel->clearResources();
307    Packet.clear();
308  }
309
310  // If packet is now full, reset the state so in the next cycle
311  // we start fresh.
312  if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
313    ResourcesModel->clearResources();
314    Packet.clear();
315  }
316}
317
318int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
319  int RegBalance = 0;
320
321  if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
322    return RegBalance;
323
324  // Gen estimate.
325  for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
326      MVT VT = SU->getNode()->getSimpleValueType(i);
327      if (TLI->isTypeLegal(VT)
328          && TLI->getRegClassFor(VT)
329          && TLI->getRegClassFor(VT)->getID() == RCId)
330        RegBalance += numberRCValSuccInSU(SU, RCId);
331  }
332  // Kill estimate.
333  for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
334      const SDValue &Op = SU->getNode()->getOperand(i);
335      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
336      if (isa<ConstantSDNode>(Op.getNode()))
337        continue;
338
339      if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
340          && TLI->getRegClassFor(VT)->getID() == RCId)
341        RegBalance -= numberRCValPredInSU(SU, RCId);
342  }
343  return RegBalance;
344}
345
346/// Estimates change in reg pressure from this SU.
347/// It is achieved by trivial tracking of defined
348/// and used vregs in dependent instructions.
349/// The RawPressure flag makes this function to ignore
350/// existing reg file sizes, and report raw def/use
351/// balance.
352int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
353  int RegBalance = 0;
354
355  if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
356    return RegBalance;
357
358  if (RawPressure) {
359    for (const TargetRegisterClass *RC : TRI->regclasses())
360      RegBalance += rawRegPressureDelta(SU, RC->getID());
361  }
362  else {
363    for (const TargetRegisterClass *RC : TRI->regclasses()) {
364      if ((RegPressure[RC->getID()] +
365           rawRegPressureDelta(SU, RC->getID()) > 0) &&
366          (RegPressure[RC->getID()] +
367           rawRegPressureDelta(SU, RC->getID())  >= RegLimit[RC->getID()]))
368        RegBalance += rawRegPressureDelta(SU, RC->getID());
369    }
370  }
371
372  return RegBalance;
373}
374
375// Constants used to denote relative importance of
376// heuristic components for cost computation.
377static const unsigned PriorityOne = 200;
378static const unsigned PriorityTwo = 50;
379static const unsigned PriorityThree = 15;
380static const unsigned PriorityFour = 5;
381static const unsigned ScaleOne = 20;
382static const unsigned ScaleTwo = 10;
383static const unsigned ScaleThree = 5;
384static const unsigned FactorOne = 2;
385
386/// Returns single number reflecting benefit of scheduling SU
387/// in the current cycle.
388int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
389  // Initial trivial priority.
390  int ResCount = 1;
391
392  // Do not waste time on a node that is already scheduled.
393  if (SU->isScheduled)
394    return ResCount;
395
396  // Forced priority is high.
397  if (SU->isScheduleHigh)
398    ResCount += PriorityOne;
399
400  // Adaptable scheduling
401  // A small, but very parallel
402  // region, where reg pressure is an issue.
403  if (HorizontalVerticalBalance > RegPressureThreshold) {
404    // Critical path first
405    ResCount += (SU->getHeight() * ScaleTwo);
406    // If resources are available for it, multiply the
407    // chance of scheduling.
408    if (isResourceAvailable(SU))
409      ResCount <<= FactorOne;
410
411    // Consider change to reg pressure from scheduling
412    // this SU.
413    ResCount -= (regPressureDelta(SU,true) * ScaleOne);
414  }
415  // Default heuristic, greeady and
416  // critical path driven.
417  else {
418    // Critical path first.
419    ResCount += (SU->getHeight() * ScaleTwo);
420    // Now see how many instructions is blocked by this SU.
421    ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
422    // If resources are available for it, multiply the
423    // chance of scheduling.
424    if (isResourceAvailable(SU))
425      ResCount <<= FactorOne;
426
427    ResCount -= (regPressureDelta(SU) * ScaleTwo);
428  }
429
430  // These are platform-specific things.
431  // Will need to go into the back end
432  // and accessed from here via a hook.
433  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
434    if (N->isMachineOpcode()) {
435      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
436      if (TID.isCall())
437        ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
438    }
439    else
440      switch (N->getOpcode()) {
441      default:  break;
442      case ISD::TokenFactor:
443      case ISD::CopyFromReg:
444      case ISD::CopyToReg:
445        ResCount += PriorityFour;
446        break;
447
448      case ISD::INLINEASM:
449      case ISD::INLINEASM_BR:
450        ResCount += PriorityThree;
451        break;
452      }
453  }
454  return ResCount;
455}
456
457
458/// Main resource tracking point.
459void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
460  // Use NULL entry as an event marker to reset
461  // the DFA state.
462  if (!SU) {
463    ResourcesModel->clearResources();
464    Packet.clear();
465    return;
466  }
467
468  const SDNode *ScegN = SU->getNode();
469  // Update reg pressure tracking.
470  // First update current node.
471  if (ScegN->isMachineOpcode()) {
472    // Estimate generated regs.
473    for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
474      MVT VT = ScegN->getSimpleValueType(i);
475
476      if (TLI->isTypeLegal(VT)) {
477        const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
478        if (RC)
479          RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
480      }
481    }
482    // Estimate killed regs.
483    for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
484      const SDValue &Op = ScegN->getOperand(i);
485      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
486
487      if (TLI->isTypeLegal(VT)) {
488        const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
489        if (RC) {
490          if (RegPressure[RC->getID()] >
491            (numberRCValPredInSU(SU, RC->getID())))
492            RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
493          else RegPressure[RC->getID()] = 0;
494        }
495      }
496    }
497    for (SDep &Pred : SU->Preds) {
498      if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
499        continue;
500      --Pred.getSUnit()->NumRegDefsLeft;
501    }
502  }
503
504  // Reserve resources for this SU.
505  reserveResources(SU);
506
507  // Adjust number of parallel live ranges.
508  // Heuristic is simple - node with no data successors reduces
509  // number of live ranges. All others, increase it.
510  unsigned NumberNonControlDeps = 0;
511
512  for (const SDep &Succ : SU->Succs) {
513    adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
514    if (!Succ.isCtrl())
515      NumberNonControlDeps++;
516  }
517
518  if (!NumberNonControlDeps) {
519    if (ParallelLiveRanges >= SU->NumPreds)
520      ParallelLiveRanges -= SU->NumPreds;
521    else
522      ParallelLiveRanges = 0;
523
524  }
525  else
526    ParallelLiveRanges += SU->NumRegDefsLeft;
527
528  // Track parallel live chains.
529  HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
530  HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
531}
532
533void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
534  unsigned  NodeNumDefs = 0;
535  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
536    if (N->isMachineOpcode()) {
537      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
538      // No register need be allocated for this.
539      if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
540        NodeNumDefs = 0;
541        break;
542      }
543      NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
544    }
545    else
546      switch(N->getOpcode()) {
547        default:     break;
548        case ISD::CopyFromReg:
549          NodeNumDefs++;
550          break;
551        case ISD::INLINEASM:
552        case ISD::INLINEASM_BR:
553          NodeNumDefs++;
554          break;
555      }
556
557  SU->NumRegDefsLeft = NodeNumDefs;
558}
559
560/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
561/// scheduled.  If SU is not itself available, then there is at least one
562/// predecessor node that has not been scheduled yet.  If SU has exactly ONE
563/// unscheduled predecessor, we want to increase its priority: it getting
564/// scheduled will make this node available, so it is better than some other
565/// node of the same priority that will not make a node available.
566void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
567  if (SU->isAvailable) return;  // All preds scheduled.
568
569  SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
570  if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
571    return;
572
573  // Okay, we found a single predecessor that is available, but not scheduled.
574  // Since it is available, it must be in the priority queue.  First remove it.
575  remove(OnlyAvailablePred);
576
577  // Reinsert the node into the priority queue, which recomputes its
578  // NumNodesSolelyBlocking value.
579  push(OnlyAvailablePred);
580}
581
582
583/// Main access point - returns next instructions
584/// to be placed in scheduling sequence.
585SUnit *ResourcePriorityQueue::pop() {
586  if (empty())
587    return nullptr;
588
589  std::vector<SUnit *>::iterator Best = Queue.begin();
590  if (!DisableDFASched) {
591    int BestCost = SUSchedulingCost(*Best);
592    for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {
593
594      if (SUSchedulingCost(*I) > BestCost) {
595        BestCost = SUSchedulingCost(*I);
596        Best = I;
597      }
598    }
599  }
600  // Use default TD scheduling mechanism.
601  else {
602    for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
603      if (Picker(*Best, *I))
604        Best = I;
605  }
606
607  SUnit *V = *Best;
608  if (Best != std::prev(Queue.end()))
609    std::swap(*Best, Queue.back());
610
611  Queue.pop_back();
612
613  return V;
614}
615
616
617void ResourcePriorityQueue::remove(SUnit *SU) {
618  assert(!Queue.empty() && "Queue is empty!");
619  std::vector<SUnit *>::iterator I = find(Queue, SU);
620  if (I != std::prev(Queue.end()))
621    std::swap(*I, Queue.back());
622
623  Queue.pop_back();
624}
625