1//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ResourcePriorityQueue class, which is a
10// SchedulingPriorityQueue that prioritizes instructions using DFA state to
11// reduce the length of the critical path through the basic block
12// on VLIW platforms.
13// The scheduler is basically a top-down adaptable list scheduler with DFA
14// resource tracking added to the cost function.
15// DFA is queried as a state machine to model "packets/bundles" during
16// schedule. Currently packets/bundles are discarded at the end of
17// scheduling, affecting only order of instructions.
18//
19//===----------------------------------------------------------------------===//
20
21#include "llvm/CodeGen/ResourcePriorityQueue.h"
22#include "llvm/CodeGen/DFAPacketizer.h"
23#include "llvm/CodeGen/MachineInstr.h"
24#include "llvm/CodeGen/SelectionDAGISel.h"
25#include "llvm/CodeGen/SelectionDAGNodes.h"
26#include "llvm/CodeGen/TargetInstrInfo.h"
27#include "llvm/CodeGen/TargetLowering.h"
28#include "llvm/CodeGen/TargetRegisterInfo.h"
29#include "llvm/CodeGen/TargetSubtargetInfo.h"
30#include "llvm/Support/CommandLine.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33#include "llvm/Target/TargetMachine.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "scheduler"
38
39static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
40  cl::ZeroOrMore, cl::init(false),
41  cl::desc("Disable use of DFA during scheduling"));
42
43static cl::opt<int> RegPressureThreshold(
44  "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
45  cl::desc("Track reg pressure and switch priority to in-depth"));
46
47ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
48    : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
49  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
50  TRI = STI.getRegisterInfo();
51  TLI = IS->TLI;
52  TII = STI.getInstrInfo();
53  ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
54  // This hard requirement could be relaxed, but for now
55  // do not let it proceed.
56  assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
57
58  unsigned NumRC = TRI->getNumRegClasses();
59  RegLimit.resize(NumRC);
60  RegPressure.resize(NumRC);
61  std::fill(RegLimit.begin(), RegLimit.end(), 0);
62  std::fill(RegPressure.begin(), RegPressure.end(), 0);
63  for (const TargetRegisterClass *RC : TRI->regclasses())
64    RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
65
66  ParallelLiveRanges = 0;
67  HorizontalVerticalBalance = 0;
68}
69
70unsigned
71ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
72  unsigned NumberDeps = 0;
73  for (SDep &Pred : SU->Preds) {
74    if (Pred.isCtrl())
75      continue;
76
77    SUnit *PredSU = Pred.getSUnit();
78    const SDNode *ScegN = PredSU->getNode();
79
80    if (!ScegN)
81      continue;
82
83    // If value is passed to CopyToReg, it is probably
84    // live outside BB.
85    switch (ScegN->getOpcode()) {
86      default:  break;
87      case ISD::TokenFactor:    break;
88      case ISD::CopyFromReg:    NumberDeps++;  break;
89      case ISD::CopyToReg:      break;
90      case ISD::INLINEASM:      break;
91      case ISD::INLINEASM_BR:   break;
92    }
93    if (!ScegN->isMachineOpcode())
94      continue;
95
96    for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
97      MVT VT = ScegN->getSimpleValueType(i);
98      if (TLI->isTypeLegal(VT)
99          && (TLI->getRegClassFor(VT)->getID() == RCId)) {
100        NumberDeps++;
101        break;
102      }
103    }
104  }
105  return NumberDeps;
106}
107
108unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
109                                                    unsigned RCId) {
110  unsigned NumberDeps = 0;
111  for (const SDep &Succ : SU->Succs) {
112    if (Succ.isCtrl())
113      continue;
114
115    SUnit *SuccSU = Succ.getSUnit();
116    const SDNode *ScegN = SuccSU->getNode();
117    if (!ScegN)
118      continue;
119
120    // If value is passed to CopyToReg, it is probably
121    // live outside BB.
122    switch (ScegN->getOpcode()) {
123      default:  break;
124      case ISD::TokenFactor:    break;
125      case ISD::CopyFromReg:    break;
126      case ISD::CopyToReg:      NumberDeps++;  break;
127      case ISD::INLINEASM:      break;
128      case ISD::INLINEASM_BR:   break;
129    }
130    if (!ScegN->isMachineOpcode())
131      continue;
132
133    for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
134      const SDValue &Op = ScegN->getOperand(i);
135      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
136      if (TLI->isTypeLegal(VT)
137          && (TLI->getRegClassFor(VT)->getID() == RCId)) {
138        NumberDeps++;
139        break;
140      }
141    }
142  }
143  return NumberDeps;
144}
145
146static unsigned numberCtrlDepsInSU(SUnit *SU) {
147  unsigned NumberDeps = 0;
148  for (const SDep &Succ : SU->Succs)
149    if (Succ.isCtrl())
150      NumberDeps++;
151
152  return NumberDeps;
153}
154
155static unsigned numberCtrlPredInSU(SUnit *SU) {
156  unsigned NumberDeps = 0;
157  for (SDep &Pred : SU->Preds)
158    if (Pred.isCtrl())
159      NumberDeps++;
160
161  return NumberDeps;
162}
163
164///
165/// Initialize nodes.
166///
167void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
168  SUnits = &sunits;
169  NumNodesSolelyBlocking.resize(SUnits->size(), 0);
170
171  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
172    SUnit *SU = &(*SUnits)[i];
173    initNumRegDefsLeft(SU);
174    SU->NodeQueueId = 0;
175  }
176}
177
178/// This heuristic is used if DFA scheduling is not desired
179/// for some VLIW platform.
180bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
181  // The isScheduleHigh flag allows nodes with wraparound dependencies that
182  // cannot easily be modeled as edges with latencies to be scheduled as
183  // soon as possible in a top-down schedule.
184  if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
185    return false;
186
187  if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
188    return true;
189
190  unsigned LHSNum = LHS->NodeNum;
191  unsigned RHSNum = RHS->NodeNum;
192
193  // The most important heuristic is scheduling the critical path.
194  unsigned LHSLatency = PQ->getLatency(LHSNum);
195  unsigned RHSLatency = PQ->getLatency(RHSNum);
196  if (LHSLatency < RHSLatency) return true;
197  if (LHSLatency > RHSLatency) return false;
198
199  // After that, if two nodes have identical latencies, look to see if one will
200  // unblock more other nodes than the other.
201  unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
202  unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
203  if (LHSBlocked < RHSBlocked) return true;
204  if (LHSBlocked > RHSBlocked) return false;
205
206  // Finally, just to provide a stable ordering, use the node number as a
207  // deciding factor.
208  return LHSNum < RHSNum;
209}
210
211
212/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
213/// of SU, return it, otherwise return null.
214SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
215  SUnit *OnlyAvailablePred = nullptr;
216  for (const SDep &Pred : SU->Preds) {
217    SUnit &PredSU = *Pred.getSUnit();
218    if (!PredSU.isScheduled) {
219      // We found an available, but not scheduled, predecessor.  If it's the
220      // only one we have found, keep track of it... otherwise give up.
221      if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
222        return nullptr;
223      OnlyAvailablePred = &PredSU;
224    }
225  }
226  return OnlyAvailablePred;
227}
228
229void ResourcePriorityQueue::push(SUnit *SU) {
230  // Look at all of the successors of this node.  Count the number of nodes that
231  // this node is the sole unscheduled node for.
232  unsigned NumNodesBlocking = 0;
233  for (const SDep &Succ : SU->Succs)
234    if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
235      ++NumNodesBlocking;
236
237  NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
238  Queue.push_back(SU);
239}
240
241/// Check if scheduling of this SU is possible
242/// in the current packet.
243bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
244  if (!SU || !SU->getNode())
245    return false;
246
247  // If this is a compound instruction,
248  // it is likely to be a call. Do not delay it.
249  if (SU->getNode()->getGluedNode())
250    return true;
251
252  // First see if the pipeline could receive this instruction
253  // in the current cycle.
254  if (SU->getNode()->isMachineOpcode())
255    switch (SU->getNode()->getMachineOpcode()) {
256    default:
257      if (!ResourcesModel->canReserveResources(&TII->get(
258          SU->getNode()->getMachineOpcode())))
259           return false;
260      break;
261    case TargetOpcode::EXTRACT_SUBREG:
262    case TargetOpcode::INSERT_SUBREG:
263    case TargetOpcode::SUBREG_TO_REG:
264    case TargetOpcode::REG_SEQUENCE:
265    case TargetOpcode::IMPLICIT_DEF:
266        break;
267    }
268
269  // Now see if there are no other dependencies
270  // to instructions already in the packet.
271  for (unsigned i = 0, e = Packet.size(); i != e; ++i)
272    for (const SDep &Succ : Packet[i]->Succs) {
273      // Since we do not add pseudos to packets, might as well
274      // ignore order deps.
275      if (Succ.isCtrl())
276        continue;
277
278      if (Succ.getSUnit() == SU)
279        return false;
280    }
281
282  return true;
283}
284
285/// Keep track of available resources.
286void ResourcePriorityQueue::reserveResources(SUnit *SU) {
287  // If this SU does not fit in the packet
288  // start a new one.
289  if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
290    ResourcesModel->clearResources();
291    Packet.clear();
292  }
293
294  if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
295    switch (SU->getNode()->getMachineOpcode()) {
296    default:
297      ResourcesModel->reserveResources(&TII->get(
298        SU->getNode()->getMachineOpcode()));
299      break;
300    case TargetOpcode::EXTRACT_SUBREG:
301    case TargetOpcode::INSERT_SUBREG:
302    case TargetOpcode::SUBREG_TO_REG:
303    case TargetOpcode::REG_SEQUENCE:
304    case TargetOpcode::IMPLICIT_DEF:
305      break;
306    }
307    Packet.push_back(SU);
308  }
309  // Forcefully end packet for PseudoOps.
310  else {
311    ResourcesModel->clearResources();
312    Packet.clear();
313  }
314
315  // If packet is now full, reset the state so in the next cycle
316  // we start fresh.
317  if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
318    ResourcesModel->clearResources();
319    Packet.clear();
320  }
321}
322
323int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
324  int RegBalance = 0;
325
326  if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
327    return RegBalance;
328
329  // Gen estimate.
330  for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
331      MVT VT = SU->getNode()->getSimpleValueType(i);
332      if (TLI->isTypeLegal(VT)
333          && TLI->getRegClassFor(VT)
334          && TLI->getRegClassFor(VT)->getID() == RCId)
335        RegBalance += numberRCValSuccInSU(SU, RCId);
336  }
337  // Kill estimate.
338  for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
339      const SDValue &Op = SU->getNode()->getOperand(i);
340      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
341      if (isa<ConstantSDNode>(Op.getNode()))
342        continue;
343
344      if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
345          && TLI->getRegClassFor(VT)->getID() == RCId)
346        RegBalance -= numberRCValPredInSU(SU, RCId);
347  }
348  return RegBalance;
349}
350
351/// Estimates change in reg pressure from this SU.
352/// It is achieved by trivial tracking of defined
353/// and used vregs in dependent instructions.
354/// The RawPressure flag makes this function to ignore
355/// existing reg file sizes, and report raw def/use
356/// balance.
357int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
358  int RegBalance = 0;
359
360  if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
361    return RegBalance;
362
363  if (RawPressure) {
364    for (const TargetRegisterClass *RC : TRI->regclasses())
365      RegBalance += rawRegPressureDelta(SU, RC->getID());
366  }
367  else {
368    for (const TargetRegisterClass *RC : TRI->regclasses()) {
369      if ((RegPressure[RC->getID()] +
370           rawRegPressureDelta(SU, RC->getID()) > 0) &&
371          (RegPressure[RC->getID()] +
372           rawRegPressureDelta(SU, RC->getID())  >= RegLimit[RC->getID()]))
373        RegBalance += rawRegPressureDelta(SU, RC->getID());
374    }
375  }
376
377  return RegBalance;
378}
379
380// Constants used to denote relative importance of
381// heuristic components for cost computation.
382static const unsigned PriorityOne = 200;
383static const unsigned PriorityTwo = 50;
384static const unsigned PriorityThree = 15;
385static const unsigned PriorityFour = 5;
386static const unsigned ScaleOne = 20;
387static const unsigned ScaleTwo = 10;
388static const unsigned ScaleThree = 5;
389static const unsigned FactorOne = 2;
390
391/// Returns single number reflecting benefit of scheduling SU
392/// in the current cycle.
393int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
394  // Initial trivial priority.
395  int ResCount = 1;
396
397  // Do not waste time on a node that is already scheduled.
398  if (SU->isScheduled)
399    return ResCount;
400
401  // Forced priority is high.
402  if (SU->isScheduleHigh)
403    ResCount += PriorityOne;
404
405  // Adaptable scheduling
406  // A small, but very parallel
407  // region, where reg pressure is an issue.
408  if (HorizontalVerticalBalance > RegPressureThreshold) {
409    // Critical path first
410    ResCount += (SU->getHeight() * ScaleTwo);
411    // If resources are available for it, multiply the
412    // chance of scheduling.
413    if (isResourceAvailable(SU))
414      ResCount <<= FactorOne;
415
416    // Consider change to reg pressure from scheduling
417    // this SU.
418    ResCount -= (regPressureDelta(SU,true) * ScaleOne);
419  }
420  // Default heuristic, greeady and
421  // critical path driven.
422  else {
423    // Critical path first.
424    ResCount += (SU->getHeight() * ScaleTwo);
425    // Now see how many instructions is blocked by this SU.
426    ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
427    // If resources are available for it, multiply the
428    // chance of scheduling.
429    if (isResourceAvailable(SU))
430      ResCount <<= FactorOne;
431
432    ResCount -= (regPressureDelta(SU) * ScaleTwo);
433  }
434
435  // These are platform-specific things.
436  // Will need to go into the back end
437  // and accessed from here via a hook.
438  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
439    if (N->isMachineOpcode()) {
440      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
441      if (TID.isCall())
442        ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
443    }
444    else
445      switch (N->getOpcode()) {
446      default:  break;
447      case ISD::TokenFactor:
448      case ISD::CopyFromReg:
449      case ISD::CopyToReg:
450        ResCount += PriorityFour;
451        break;
452
453      case ISD::INLINEASM:
454      case ISD::INLINEASM_BR:
455        ResCount += PriorityThree;
456        break;
457      }
458  }
459  return ResCount;
460}
461
462
463/// Main resource tracking point.
464void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
465  // Use NULL entry as an event marker to reset
466  // the DFA state.
467  if (!SU) {
468    ResourcesModel->clearResources();
469    Packet.clear();
470    return;
471  }
472
473  const SDNode *ScegN = SU->getNode();
474  // Update reg pressure tracking.
475  // First update current node.
476  if (ScegN->isMachineOpcode()) {
477    // Estimate generated regs.
478    for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
479      MVT VT = ScegN->getSimpleValueType(i);
480
481      if (TLI->isTypeLegal(VT)) {
482        const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
483        if (RC)
484          RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
485      }
486    }
487    // Estimate killed regs.
488    for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
489      const SDValue &Op = ScegN->getOperand(i);
490      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
491
492      if (TLI->isTypeLegal(VT)) {
493        const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
494        if (RC) {
495          if (RegPressure[RC->getID()] >
496            (numberRCValPredInSU(SU, RC->getID())))
497            RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
498          else RegPressure[RC->getID()] = 0;
499        }
500      }
501    }
502    for (SDep &Pred : SU->Preds) {
503      if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
504        continue;
505      --Pred.getSUnit()->NumRegDefsLeft;
506    }
507  }
508
509  // Reserve resources for this SU.
510  reserveResources(SU);
511
512  // Adjust number of parallel live ranges.
513  // Heuristic is simple - node with no data successors reduces
514  // number of live ranges. All others, increase it.
515  unsigned NumberNonControlDeps = 0;
516
517  for (const SDep &Succ : SU->Succs) {
518    adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
519    if (!Succ.isCtrl())
520      NumberNonControlDeps++;
521  }
522
523  if (!NumberNonControlDeps) {
524    if (ParallelLiveRanges >= SU->NumPreds)
525      ParallelLiveRanges -= SU->NumPreds;
526    else
527      ParallelLiveRanges = 0;
528
529  }
530  else
531    ParallelLiveRanges += SU->NumRegDefsLeft;
532
533  // Track parallel live chains.
534  HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
535  HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
536}
537
538void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
539  unsigned  NodeNumDefs = 0;
540  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
541    if (N->isMachineOpcode()) {
542      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
543      // No register need be allocated for this.
544      if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
545        NodeNumDefs = 0;
546        break;
547      }
548      NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
549    }
550    else
551      switch(N->getOpcode()) {
552        default:     break;
553        case ISD::CopyFromReg:
554          NodeNumDefs++;
555          break;
556        case ISD::INLINEASM:
557        case ISD::INLINEASM_BR:
558          NodeNumDefs++;
559          break;
560      }
561
562  SU->NumRegDefsLeft = NodeNumDefs;
563}
564
565/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
566/// scheduled.  If SU is not itself available, then there is at least one
567/// predecessor node that has not been scheduled yet.  If SU has exactly ONE
568/// unscheduled predecessor, we want to increase its priority: it getting
569/// scheduled will make this node available, so it is better than some other
570/// node of the same priority that will not make a node available.
571void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
572  if (SU->isAvailable) return;  // All preds scheduled.
573
574  SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
575  if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
576    return;
577
578  // Okay, we found a single predecessor that is available, but not scheduled.
579  // Since it is available, it must be in the priority queue.  First remove it.
580  remove(OnlyAvailablePred);
581
582  // Reinsert the node into the priority queue, which recomputes its
583  // NumNodesSolelyBlocking value.
584  push(OnlyAvailablePred);
585}
586
587
588/// Main access point - returns next instructions
589/// to be placed in scheduling sequence.
590SUnit *ResourcePriorityQueue::pop() {
591  if (empty())
592    return nullptr;
593
594  std::vector<SUnit *>::iterator Best = Queue.begin();
595  if (!DisableDFASched) {
596    int BestCost = SUSchedulingCost(*Best);
597    for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {
598
599      if (SUSchedulingCost(*I) > BestCost) {
600        BestCost = SUSchedulingCost(*I);
601        Best = I;
602      }
603    }
604  }
605  // Use default TD scheduling mechanism.
606  else {
607    for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
608      if (Picker(*Best, *I))
609        Best = I;
610  }
611
612  SUnit *V = *Best;
613  if (Best != std::prev(Queue.end()))
614    std::swap(*Best, Queue.back());
615
616  Queue.pop_back();
617
618  return V;
619}
620
621
622void ResourcePriorityQueue::remove(SUnit *SU) {
623  assert(!Queue.empty() && "Queue is empty!");
624  std::vector<SUnit *>::iterator I = find(Queue, SU);
625  if (I != std::prev(Queue.end()))
626    std::swap(*I, Queue.back());
627
628  Queue.pop_back();
629}
630