1//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the ScheduleDAG class, which is a base class used by
10// scheduling implementation classes.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ScheduleDAGSDNodes.h"
15#include "InstrEmitter.h"
16#include "SDNodeDbgValue.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/SmallPtrSet.h"
19#include "llvm/ADT/SmallSet.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/CodeGen/TargetInstrInfo.h"
26#include "llvm/CodeGen/TargetLowering.h"
27#include "llvm/CodeGen/TargetRegisterInfo.h"
28#include "llvm/CodeGen/TargetSubtargetInfo.h"
29#include "llvm/Config/llvm-config.h"
30#include "llvm/MC/MCInstrItineraries.h"
31#include "llvm/Support/CommandLine.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/raw_ostream.h"
34#include "llvm/Target/TargetMachine.h"
35using namespace llvm;
36
37#define DEBUG_TYPE "pre-RA-sched"
38
39STATISTIC(LoadsClustered, "Number of loads clustered together");
40
41// This allows the latency-based scheduler to notice high latency instructions
42// without a target itinerary. The choice of number here has more to do with
43// balancing scheduler heuristics than with the actual machine latency.
44static cl::opt<int> HighLatencyCycles(
45  "sched-high-latency-cycles", cl::Hidden, cl::init(10),
46  cl::desc("Roughly estimate the number of cycles that 'long latency'"
47           "instructions take for targets with no itinerary"));
48
49ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
50    : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
51      InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
52
53/// Run - perform scheduling.
54///
55void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
56  BB = bb;
57  DAG = dag;
58
59  // Clear the scheduler's SUnit DAG.
60  ScheduleDAG::clearDAG();
61  Sequence.clear();
62
63  // Invoke the target's selection of scheduler.
64  Schedule();
65}
66
67/// NewSUnit - Creates a new SUnit and return a ptr to it.
68///
69SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
70#ifndef NDEBUG
71  const SUnit *Addr = nullptr;
72  if (!SUnits.empty())
73    Addr = &SUnits[0];
74#endif
75  SUnits.emplace_back(N, (unsigned)SUnits.size());
76  assert((Addr == nullptr || Addr == &SUnits[0]) &&
77         "SUnits std::vector reallocated on the fly!");
78  SUnits.back().OrigNode = &SUnits.back();
79  SUnit *SU = &SUnits.back();
80  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
81  if (!N ||
82      (N->isMachineOpcode() &&
83       N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
84    SU->SchedulingPref = Sched::None;
85  else
86    SU->SchedulingPref = TLI.getSchedulingPreference(N);
87  return SU;
88}
89
90SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
91  SUnit *SU = newSUnit(Old->getNode());
92  SU->OrigNode = Old->OrigNode;
93  SU->Latency = Old->Latency;
94  SU->isVRegCycle = Old->isVRegCycle;
95  SU->isCall = Old->isCall;
96  SU->isCallOp = Old->isCallOp;
97  SU->isTwoAddress = Old->isTwoAddress;
98  SU->isCommutable = Old->isCommutable;
99  SU->hasPhysRegDefs = Old->hasPhysRegDefs;
100  SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
101  SU->isScheduleHigh = Old->isScheduleHigh;
102  SU->isScheduleLow = Old->isScheduleLow;
103  SU->SchedulingPref = Old->SchedulingPref;
104  Old->isCloned = true;
105  return SU;
106}
107
108/// CheckForPhysRegDependency - Check if the dependency between def and use of
109/// a specified operand is a physical register dependency. If so, returns the
110/// register and the cost of copying the register.
111static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
112                                      const TargetRegisterInfo *TRI,
113                                      const TargetInstrInfo *TII,
114                                      unsigned &PhysReg, int &Cost) {
115  if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
116    return;
117
118  unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
119  if (Register::isVirtualRegister(Reg))
120    return;
121
122  unsigned ResNo = User->getOperand(2).getResNo();
123  if (Def->getOpcode() == ISD::CopyFromReg &&
124      cast<RegisterSDNode>(Def->getOperand(1))->getReg() == Reg) {
125    PhysReg = Reg;
126  } else if (Def->isMachineOpcode()) {
127    const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
128    if (ResNo >= II.getNumDefs() &&
129        II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
130      PhysReg = Reg;
131  }
132
133  if (PhysReg != 0) {
134    const TargetRegisterClass *RC =
135        TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo));
136    Cost = RC->getCopyCost();
137  }
138}
139
140// Helper for AddGlue to clone node operands.
141static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
142                                SDValue ExtraOper = SDValue()) {
143  SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end());
144  if (ExtraOper.getNode())
145    Ops.push_back(ExtraOper);
146
147  SDVTList VTList = DAG->getVTList(VTs);
148  MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
149
150  // Store memory references.
151  SmallVector<MachineMemOperand *, 2> MMOs;
152  if (MN)
153    MMOs.assign(MN->memoperands_begin(), MN->memoperands_end());
154
155  DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops);
156
157  // Reset the memory references
158  if (MN)
159    DAG->setNodeMemRefs(MN, MMOs);
160}
161
162static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
163  SDNode *GlueDestNode = Glue.getNode();
164
165  // Don't add glue from a node to itself.
166  if (GlueDestNode == N) return false;
167
168  // Don't add a glue operand to something that already uses glue.
169  if (GlueDestNode &&
170      N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
171    return false;
172  }
173  // Don't add glue to something that already has a glue value.
174  if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
175
176  SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end());
177  if (AddGlue)
178    VTs.push_back(MVT::Glue);
179
180  CloneNodeWithValues(N, DAG, VTs, Glue);
181
182  return true;
183}
184
185// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
186// node even though simply shrinking the value list is sufficient.
187static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
188  assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue &&
189          !N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
190         "expected an unused glue value");
191
192  CloneNodeWithValues(N, DAG,
193                      makeArrayRef(N->value_begin(), N->getNumValues() - 1));
194}
195
196/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
197/// This function finds loads of the same base and different offsets. If the
198/// offsets are not far apart (target specific), it add MVT::Glue inputs and
199/// outputs to ensure they are scheduled together and in order. This
200/// optimization may benefit some targets by improving cache locality.
201void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
202  SDValue Chain;
203  unsigned NumOps = Node->getNumOperands();
204  if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
205    Chain = Node->getOperand(NumOps-1);
206  if (!Chain)
207    return;
208
209  // Skip any load instruction that has a tied input. There may be an additional
210  // dependency requiring a different order than by increasing offsets, and the
211  // added glue may introduce a cycle.
212  auto hasTiedInput = [this](const SDNode *N) {
213    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
214    for (unsigned I = 0; I != MCID.getNumOperands(); ++I) {
215      if (MCID.getOperandConstraint(I, MCOI::TIED_TO) != -1)
216        return true;
217    }
218
219    return false;
220  };
221
222  // Look for other loads of the same chain. Find loads that are loading from
223  // the same base pointer and different offsets.
224  SmallPtrSet<SDNode*, 16> Visited;
225  SmallVector<int64_t, 4> Offsets;
226  DenseMap<long long, SDNode*> O2SMap;  // Map from offset to SDNode.
227  bool Cluster = false;
228  SDNode *Base = Node;
229
230  if (hasTiedInput(Base))
231    return;
232
233  // This algorithm requires a reasonably low use count before finding a match
234  // to avoid uselessly blowing up compile time in large blocks.
235  unsigned UseCount = 0;
236  for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
237       I != E && UseCount < 100; ++I, ++UseCount) {
238    if (I.getUse().getResNo() != Chain.getResNo())
239      continue;
240
241    SDNode *User = *I;
242    if (User == Node || !Visited.insert(User).second)
243      continue;
244    int64_t Offset1, Offset2;
245    if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
246        Offset1 == Offset2 ||
247        hasTiedInput(User)) {
248      // FIXME: Should be ok if they addresses are identical. But earlier
249      // optimizations really should have eliminated one of the loads.
250      continue;
251    }
252    if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
253      Offsets.push_back(Offset1);
254    O2SMap.insert(std::make_pair(Offset2, User));
255    Offsets.push_back(Offset2);
256    if (Offset2 < Offset1)
257      Base = User;
258    Cluster = true;
259    // Reset UseCount to allow more matches.
260    UseCount = 0;
261  }
262
263  if (!Cluster)
264    return;
265
266  // Sort them in increasing order.
267  llvm::sort(Offsets);
268
269  // Check if the loads are close enough.
270  SmallVector<SDNode*, 4> Loads;
271  unsigned NumLoads = 0;
272  int64_t BaseOff = Offsets[0];
273  SDNode *BaseLoad = O2SMap[BaseOff];
274  Loads.push_back(BaseLoad);
275  for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
276    int64_t Offset = Offsets[i];
277    SDNode *Load = O2SMap[Offset];
278    if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
279      break; // Stop right here. Ignore loads that are further away.
280    Loads.push_back(Load);
281    ++NumLoads;
282  }
283
284  if (NumLoads == 0)
285    return;
286
287  // Cluster loads by adding MVT::Glue outputs and inputs. This also
288  // ensure they are scheduled in order of increasing addresses.
289  SDNode *Lead = Loads[0];
290  SDValue InGlue = SDValue(nullptr, 0);
291  if (AddGlue(Lead, InGlue, true, DAG))
292    InGlue = SDValue(Lead, Lead->getNumValues() - 1);
293  for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
294    bool OutGlue = I < E - 1;
295    SDNode *Load = Loads[I];
296
297    // If AddGlue fails, we could leave an unsused glue value. This should not
298    // cause any
299    if (AddGlue(Load, InGlue, OutGlue, DAG)) {
300      if (OutGlue)
301        InGlue = SDValue(Load, Load->getNumValues() - 1);
302
303      ++LoadsClustered;
304    }
305    else if (!OutGlue && InGlue.getNode())
306      RemoveUnusedGlue(InGlue.getNode(), DAG);
307  }
308}
309
310/// ClusterNodes - Cluster certain nodes which should be scheduled together.
311///
312void ScheduleDAGSDNodes::ClusterNodes() {
313  for (SDNode &NI : DAG->allnodes()) {
314    SDNode *Node = &NI;
315    if (!Node || !Node->isMachineOpcode())
316      continue;
317
318    unsigned Opc = Node->getMachineOpcode();
319    const MCInstrDesc &MCID = TII->get(Opc);
320    if (MCID.mayLoad())
321      // Cluster loads from "near" addresses into combined SUnits.
322      ClusterNeighboringLoads(Node);
323  }
324}
325
326void ScheduleDAGSDNodes::BuildSchedUnits() {
327  // During scheduling, the NodeId field of SDNode is used to map SDNodes
328  // to their associated SUnits by holding SUnits table indices. A value
329  // of -1 means the SDNode does not yet have an associated SUnit.
330  unsigned NumNodes = 0;
331  for (SDNode &NI : DAG->allnodes()) {
332    NI.setNodeId(-1);
333    ++NumNodes;
334  }
335
336  // Reserve entries in the vector for each of the SUnits we are creating.  This
337  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
338  // invalidated.
339  // FIXME: Multiply by 2 because we may clone nodes during scheduling.
340  // This is a temporary workaround.
341  SUnits.reserve(NumNodes * 2);
342
343  // Add all nodes in depth first order.
344  SmallVector<SDNode*, 64> Worklist;
345  SmallPtrSet<SDNode*, 32> Visited;
346  Worklist.push_back(DAG->getRoot().getNode());
347  Visited.insert(DAG->getRoot().getNode());
348
349  SmallVector<SUnit*, 8> CallSUnits;
350  while (!Worklist.empty()) {
351    SDNode *NI = Worklist.pop_back_val();
352
353    // Add all operands to the worklist unless they've already been added.
354    for (const SDValue &Op : NI->op_values())
355      if (Visited.insert(Op.getNode()).second)
356        Worklist.push_back(Op.getNode());
357
358    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
359      continue;
360
361    // If this node has already been processed, stop now.
362    if (NI->getNodeId() != -1) continue;
363
364    SUnit *NodeSUnit = newSUnit(NI);
365
366    // See if anything is glued to this node, if so, add them to glued
367    // nodes.  Nodes can have at most one glue input and one glue output.  Glue
368    // is required to be the last operand and result of a node.
369
370    // Scan up to find glued preds.
371    SDNode *N = NI;
372    while (N->getNumOperands() &&
373           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
374      N = N->getOperand(N->getNumOperands()-1).getNode();
375      assert(N->getNodeId() == -1 && "Node already inserted!");
376      N->setNodeId(NodeSUnit->NodeNum);
377      if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
378        NodeSUnit->isCall = true;
379    }
380
381    // Scan down to find any glued succs.
382    N = NI;
383    while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
384      SDValue GlueVal(N, N->getNumValues()-1);
385
386      // There are either zero or one users of the Glue result.
387      bool HasGlueUse = false;
388      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
389           UI != E; ++UI)
390        if (GlueVal.isOperandOf(*UI)) {
391          HasGlueUse = true;
392          assert(N->getNodeId() == -1 && "Node already inserted!");
393          N->setNodeId(NodeSUnit->NodeNum);
394          N = *UI;
395          if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
396            NodeSUnit->isCall = true;
397          break;
398        }
399      if (!HasGlueUse) break;
400    }
401
402    if (NodeSUnit->isCall)
403      CallSUnits.push_back(NodeSUnit);
404
405    // Schedule zero-latency TokenFactor below any nodes that may increase the
406    // schedule height. Otherwise, ancestors of the TokenFactor may appear to
407    // have false stalls.
408    if (NI->getOpcode() == ISD::TokenFactor)
409      NodeSUnit->isScheduleLow = true;
410
411    // If there are glue operands involved, N is now the bottom-most node
412    // of the sequence of nodes that are glued together.
413    // Update the SUnit.
414    NodeSUnit->setNode(N);
415    assert(N->getNodeId() == -1 && "Node already inserted!");
416    N->setNodeId(NodeSUnit->NodeNum);
417
418    // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
419    InitNumRegDefsLeft(NodeSUnit);
420
421    // Assign the Latency field of NodeSUnit using target-provided information.
422    computeLatency(NodeSUnit);
423  }
424
425  // Find all call operands.
426  while (!CallSUnits.empty()) {
427    SUnit *SU = CallSUnits.pop_back_val();
428    for (const SDNode *SUNode = SU->getNode(); SUNode;
429         SUNode = SUNode->getGluedNode()) {
430      if (SUNode->getOpcode() != ISD::CopyToReg)
431        continue;
432      SDNode *SrcN = SUNode->getOperand(2).getNode();
433      if (isPassiveNode(SrcN)) continue;   // Not scheduled.
434      SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
435      SrcSU->isCallOp = true;
436    }
437  }
438}
439
440void ScheduleDAGSDNodes::AddSchedEdges() {
441  const TargetSubtargetInfo &ST = MF.getSubtarget();
442
443  // Check to see if the scheduler cares about latencies.
444  bool UnitLatencies = forceUnitLatencies();
445
446  // Pass 2: add the preds, succs, etc.
447  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
448    SUnit *SU = &SUnits[su];
449    SDNode *MainNode = SU->getNode();
450
451    if (MainNode->isMachineOpcode()) {
452      unsigned Opc = MainNode->getMachineOpcode();
453      const MCInstrDesc &MCID = TII->get(Opc);
454      for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
455        if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
456          SU->isTwoAddress = true;
457          break;
458        }
459      }
460      if (MCID.isCommutable())
461        SU->isCommutable = true;
462    }
463
464    // Find all predecessors and successors of the group.
465    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
466      if (N->isMachineOpcode() &&
467          TII->get(N->getMachineOpcode()).getImplicitDefs()) {
468        SU->hasPhysRegClobbers = true;
469        unsigned NumUsed = InstrEmitter::CountResults(N);
470        while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
471          --NumUsed;    // Skip over unused values at the end.
472        if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
473          SU->hasPhysRegDefs = true;
474      }
475
476      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
477        SDNode *OpN = N->getOperand(i).getNode();
478        unsigned DefIdx = N->getOperand(i).getResNo();
479        if (isPassiveNode(OpN)) continue;   // Not scheduled.
480        SUnit *OpSU = &SUnits[OpN->getNodeId()];
481        assert(OpSU && "Node has no SUnit!");
482        if (OpSU == SU) continue;           // In the same group.
483
484        EVT OpVT = N->getOperand(i).getValueType();
485        assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
486        bool isChain = OpVT == MVT::Other;
487
488        unsigned PhysReg = 0;
489        int Cost = 1;
490        // Determine if this is a physical register dependency.
491        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
492        assert((PhysReg == 0 || !isChain) &&
493               "Chain dependence via physreg data?");
494        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
495        // emits a copy from the physical register to a virtual register unless
496        // it requires a cross class copy (cost < 0). That means we are only
497        // treating "expensive to copy" register dependency as physical register
498        // dependency. This may change in the future though.
499        if (Cost >= 0 && !StressSched)
500          PhysReg = 0;
501
502        // If this is a ctrl dep, latency is 1.
503        unsigned OpLatency = isChain ? 1 : OpSU->Latency;
504        // Special-case TokenFactor chains as zero-latency.
505        if(isChain && OpN->getOpcode() == ISD::TokenFactor)
506          OpLatency = 0;
507
508        SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
509          : SDep(OpSU, SDep::Data, PhysReg);
510        Dep.setLatency(OpLatency);
511        if (!isChain && !UnitLatencies) {
512          computeOperandLatency(OpN, N, i, Dep);
513          ST.adjustSchedDependency(OpSU, DefIdx, SU, i, Dep);
514        }
515
516        if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
517          // Multiple register uses are combined in the same SUnit. For example,
518          // we could have a set of glued nodes with all their defs consumed by
519          // another set of glued nodes. Register pressure tracking sees this as
520          // a single use, so to keep pressure balanced we reduce the defs.
521          //
522          // We can't tell (without more book-keeping) if this results from
523          // glued nodes or duplicate operands. As long as we don't reduce
524          // NumRegDefsLeft to zero, we handle the common cases well.
525          --OpSU->NumRegDefsLeft;
526        }
527      }
528    }
529  }
530}
531
532/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
533/// are input.  This SUnit graph is similar to the SelectionDAG, but
534/// excludes nodes that aren't interesting to scheduling, and represents
535/// glued together nodes with a single SUnit.
536void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) {
537  // Cluster certain nodes which should be scheduled together.
538  ClusterNodes();
539  // Populate the SUnits array.
540  BuildSchedUnits();
541  // Compute all the scheduling dependencies between nodes.
542  AddSchedEdges();
543}
544
545// Initialize NumNodeDefs for the current Node's opcode.
546void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
547  // Check for phys reg copy.
548  if (!Node)
549    return;
550
551  if (!Node->isMachineOpcode()) {
552    if (Node->getOpcode() == ISD::CopyFromReg)
553      NodeNumDefs = 1;
554    else
555      NodeNumDefs = 0;
556    return;
557  }
558  unsigned POpc = Node->getMachineOpcode();
559  if (POpc == TargetOpcode::IMPLICIT_DEF) {
560    // No register need be allocated for this.
561    NodeNumDefs = 0;
562    return;
563  }
564  if (POpc == TargetOpcode::PATCHPOINT &&
565      Node->getValueType(0) == MVT::Other) {
566    // PATCHPOINT is defined to have one result, but it might really have none
567    // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
568    // real definition.
569    NodeNumDefs = 0;
570    return;
571  }
572  unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
573  // Some instructions define regs that are not represented in the selection DAG
574  // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
575  NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
576  DefIdx = 0;
577}
578
579// Construct a RegDefIter for this SUnit and find the first valid value.
580ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
581                                           const ScheduleDAGSDNodes *SD)
582  : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
583  InitNodeNumDefs();
584  Advance();
585}
586
587// Advance to the next valid value defined by the SUnit.
588void ScheduleDAGSDNodes::RegDefIter::Advance() {
589  for (;Node;) { // Visit all glued nodes.
590    for (;DefIdx < NodeNumDefs; ++DefIdx) {
591      if (!Node->hasAnyUseOfValue(DefIdx))
592        continue;
593      ValueType = Node->getSimpleValueType(DefIdx);
594      ++DefIdx;
595      return; // Found a normal regdef.
596    }
597    Node = Node->getGluedNode();
598    if (!Node) {
599      return; // No values left to visit.
600    }
601    InitNodeNumDefs();
602  }
603}
604
605void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
606  assert(SU->NumRegDefsLeft == 0 && "expect a new node");
607  for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
608    assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
609    ++SU->NumRegDefsLeft;
610  }
611}
612
613void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
614  SDNode *N = SU->getNode();
615
616  // TokenFactor operands are considered zero latency, and some schedulers
617  // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
618  // whenever node latency is nonzero.
619  if (N && N->getOpcode() == ISD::TokenFactor) {
620    SU->Latency = 0;
621    return;
622  }
623
624  // Check to see if the scheduler cares about latencies.
625  if (forceUnitLatencies()) {
626    SU->Latency = 1;
627    return;
628  }
629
630  if (!InstrItins || InstrItins->isEmpty()) {
631    if (N && N->isMachineOpcode() &&
632        TII->isHighLatencyDef(N->getMachineOpcode()))
633      SU->Latency = HighLatencyCycles;
634    else
635      SU->Latency = 1;
636    return;
637  }
638
639  // Compute the latency for the node.  We use the sum of the latencies for
640  // all nodes glued together into this SUnit.
641  SU->Latency = 0;
642  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
643    if (N->isMachineOpcode())
644      SU->Latency += TII->getInstrLatency(InstrItins, N);
645}
646
647void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
648                                               unsigned OpIdx, SDep& dep) const{
649  // Check to see if the scheduler cares about latencies.
650  if (forceUnitLatencies())
651    return;
652
653  if (dep.getKind() != SDep::Data)
654    return;
655
656  unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
657  if (Use->isMachineOpcode())
658    // Adjust the use operand index by num of defs.
659    OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
660  int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
661  if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
662      !BB->succ_empty()) {
663    unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
664    if (Register::isVirtualRegister(Reg))
665      // This copy is a liveout value. It is likely coalesced, so reduce the
666      // latency so not to penalize the def.
667      // FIXME: need target specific adjustment here?
668      Latency = (Latency > 1) ? Latency - 1 : 1;
669  }
670  if (Latency >= 0)
671    dep.setLatency(Latency);
672}
673
674void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const {
675#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
676  dumpNodeName(SU);
677  dbgs() << ": ";
678
679  if (!SU.getNode()) {
680    dbgs() << "PHYS REG COPY\n";
681    return;
682  }
683
684  SU.getNode()->dump(DAG);
685  dbgs() << "\n";
686  SmallVector<SDNode *, 4> GluedNodes;
687  for (SDNode *N = SU.getNode()->getGluedNode(); N; N = N->getGluedNode())
688    GluedNodes.push_back(N);
689  while (!GluedNodes.empty()) {
690    dbgs() << "    ";
691    GluedNodes.back()->dump(DAG);
692    dbgs() << "\n";
693    GluedNodes.pop_back();
694  }
695#endif
696}
697
698void ScheduleDAGSDNodes::dump() const {
699#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
700  if (EntrySU.getNode() != nullptr)
701    dumpNodeAll(EntrySU);
702  for (const SUnit &SU : SUnits)
703    dumpNodeAll(SU);
704  if (ExitSU.getNode() != nullptr)
705    dumpNodeAll(ExitSU);
706#endif
707}
708
709#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
710void ScheduleDAGSDNodes::dumpSchedule() const {
711  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
712    if (SUnit *SU = Sequence[i])
713      dumpNode(*SU);
714    else
715      dbgs() << "**** NOOP ****\n";
716  }
717}
718#endif
719
720#ifndef NDEBUG
721/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
722/// their state is consistent with the nodes listed in Sequence.
723///
724void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
725  unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
726  unsigned Noops = 0;
727  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
728    if (!Sequence[i])
729      ++Noops;
730  assert(Sequence.size() - Noops == ScheduledNodes &&
731         "The number of nodes scheduled doesn't match the expected number!");
732}
733#endif // NDEBUG
734
735/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
736static void
737ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
738                   SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
739                   DenseMap<SDValue, Register> &VRBaseMap, unsigned Order) {
740  if (!N->getHasDebugValue())
741    return;
742
743  // Opportunistically insert immediate dbg_value uses, i.e. those with the same
744  // source order number as N.
745  MachineBasicBlock *BB = Emitter.getBlock();
746  MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
747  for (auto DV : DAG->GetDbgValues(N)) {
748    if (DV->isEmitted())
749      continue;
750    unsigned DVOrder = DV->getOrder();
751    if (!Order || DVOrder == Order) {
752      MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap);
753      if (DbgMI) {
754        Orders.push_back({DVOrder, DbgMI});
755        BB->insert(InsertPos, DbgMI);
756      }
757    }
758  }
759}
760
761// ProcessSourceNode - Process nodes with source order numbers. These are added
762// to a vector which EmitSchedule uses to determine how to insert dbg_value
763// instructions in the right order.
764static void
765ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
766                  DenseMap<SDValue, Register> &VRBaseMap,
767                  SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders,
768                  SmallSet<Register, 8> &Seen, MachineInstr *NewInsn) {
769  unsigned Order = N->getIROrder();
770  if (!Order || Seen.count(Order)) {
771    // Process any valid SDDbgValues even if node does not have any order
772    // assigned.
773    ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
774    return;
775  }
776
777  // If a new instruction was generated for this Order number, record it.
778  // Otherwise, leave this order number unseen: we will either find later
779  // instructions for it, or leave it unseen if there were no instructions at
780  // all.
781  if (NewInsn) {
782    Seen.insert(Order);
783    Orders.push_back({Order, NewInsn});
784  }
785
786  // Even if no instruction was generated, a Value may have become defined via
787  // earlier nodes. Try to process them now.
788  ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
789}
790
791void ScheduleDAGSDNodes::
792EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
793                MachineBasicBlock::iterator InsertPos) {
794  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
795       I != E; ++I) {
796    if (I->isCtrl()) continue;  // ignore chain preds
797    if (I->getSUnit()->CopyDstRC) {
798      // Copy to physical register.
799      DenseMap<SUnit*, Register>::iterator VRI = VRBaseMap.find(I->getSUnit());
800      assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
801      // Find the destination physical register.
802      Register Reg;
803      for (SUnit::const_succ_iterator II = SU->Succs.begin(),
804             EE = SU->Succs.end(); II != EE; ++II) {
805        if (II->isCtrl()) continue;  // ignore chain preds
806        if (II->getReg()) {
807          Reg = II->getReg();
808          break;
809        }
810      }
811      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
812        .addReg(VRI->second);
813    } else {
814      // Copy from physical register.
815      assert(I->getReg() && "Unknown physical register!");
816      Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
817      bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
818      (void)isNew; // Silence compiler warning.
819      assert(isNew && "Node emitted out of order - early");
820      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
821        .addReg(I->getReg());
822    }
823    break;
824  }
825}
826
827/// EmitSchedule - Emit the machine code in scheduled order. Return the new
828/// InsertPos and MachineBasicBlock that contains this insertion
829/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
830/// not necessarily refer to returned BB. The emitter may split blocks.
831MachineBasicBlock *ScheduleDAGSDNodes::
832EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
833  InstrEmitter Emitter(BB, InsertPos);
834  DenseMap<SDValue, Register> VRBaseMap;
835  DenseMap<SUnit*, Register> CopyVRBaseMap;
836  SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
837  SmallSet<Register, 8> Seen;
838  bool HasDbg = DAG->hasDebugValues();
839
840  // Emit a node, and determine where its first instruction is for debuginfo.
841  // Zero, one, or multiple instructions can be created when emitting a node.
842  auto EmitNode =
843      [&](SDNode *Node, bool IsClone, bool IsCloned,
844          DenseMap<SDValue, Register> &VRBaseMap) -> MachineInstr * {
845    // Fetch instruction prior to this, or end() if nonexistant.
846    auto GetPrevInsn = [&](MachineBasicBlock::iterator I) {
847      if (I == BB->begin())
848        return BB->end();
849      else
850        return std::prev(Emitter.getInsertPos());
851    };
852
853    MachineBasicBlock::iterator Before = GetPrevInsn(Emitter.getInsertPos());
854    Emitter.EmitNode(Node, IsClone, IsCloned, VRBaseMap);
855    MachineBasicBlock::iterator After = GetPrevInsn(Emitter.getInsertPos());
856
857    // If the iterator did not change, no instructions were inserted.
858    if (Before == After)
859      return nullptr;
860
861    MachineInstr *MI;
862    if (Before == BB->end()) {
863      // There were no prior instructions; the new ones must start at the
864      // beginning of the block.
865      MI = &Emitter.getBlock()->instr_front();
866    } else {
867      // Return first instruction after the pre-existing instructions.
868      MI = &*std::next(Before);
869    }
870
871    if (MI->isCandidateForCallSiteEntry() &&
872        DAG->getTarget().Options.EmitCallSiteInfo)
873      MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node));
874
875    if (DAG->getNoMergeSiteInfo(Node)) {
876      MI->setFlag(MachineInstr::MIFlag::NoMerge);
877    }
878
879    return MI;
880  };
881
882  // If this is the first BB, emit byval parameter dbg_value's.
883  if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
884    SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
885    SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
886    for (; PDI != PDE; ++PDI) {
887      MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
888      if (DbgMI) {
889        BB->insert(InsertPos, DbgMI);
890        // We re-emit the dbg_value closer to its use, too, after instructions
891        // are emitted to the BB.
892        (*PDI)->clearIsEmitted();
893      }
894    }
895  }
896
897  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
898    SUnit *SU = Sequence[i];
899    if (!SU) {
900      // Null SUnit* is a noop.
901      TII->insertNoop(*Emitter.getBlock(), InsertPos);
902      continue;
903    }
904
905    // For pre-regalloc scheduling, create instructions corresponding to the
906    // SDNode and any glued SDNodes and append them to the block.
907    if (!SU->getNode()) {
908      // Emit a copy.
909      EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
910      continue;
911    }
912
913    SmallVector<SDNode *, 4> GluedNodes;
914    for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
915      GluedNodes.push_back(N);
916    while (!GluedNodes.empty()) {
917      SDNode *N = GluedNodes.back();
918      auto NewInsn = EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
919      // Remember the source order of the inserted instruction.
920      if (HasDbg)
921        ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
922
923      if (MDNode *MD = DAG->getHeapAllocSite(N))
924        if (NewInsn && NewInsn->isCall())
925          NewInsn->setHeapAllocMarker(MF, MD);
926
927      GluedNodes.pop_back();
928    }
929    auto NewInsn =
930        EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap);
931    // Remember the source order of the inserted instruction.
932    if (HasDbg)
933      ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
934                        NewInsn);
935
936    if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {
937      if (NewInsn && NewInsn->isCall())
938        NewInsn->setHeapAllocMarker(MF, MD);
939    }
940  }
941
942  // Insert all the dbg_values which have not already been inserted in source
943  // order sequence.
944  if (HasDbg) {
945    MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
946
947    // Sort the source order instructions and use the order to insert debug
948    // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
949    // regardless of the host's implementation fo std::sort.
950    llvm::stable_sort(Orders, less_first());
951    std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(),
952                     [](const SDDbgValue *LHS, const SDDbgValue *RHS) {
953                       return LHS->getOrder() < RHS->getOrder();
954                     });
955
956    SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
957    SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
958    // Now emit the rest according to source order.
959    unsigned LastOrder = 0;
960    for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
961      unsigned Order = Orders[i].first;
962      MachineInstr *MI = Orders[i].second;
963      // Insert all SDDbgValue's whose order(s) are before "Order".
964      assert(MI);
965      for (; DI != DE; ++DI) {
966        if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
967          break;
968        if ((*DI)->isEmitted())
969          continue;
970
971        MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
972        if (DbgMI) {
973          if (!LastOrder)
974            // Insert to start of the BB (after PHIs).
975            BB->insert(BBBegin, DbgMI);
976          else {
977            // Insert at the instruction, which may be in a different
978            // block, if the block was split by a custom inserter.
979            MachineBasicBlock::iterator Pos = MI;
980            MI->getParent()->insert(Pos, DbgMI);
981          }
982        }
983      }
984      LastOrder = Order;
985    }
986    // Add trailing DbgValue's before the terminator. FIXME: May want to add
987    // some of them before one or more conditional branches?
988    SmallVector<MachineInstr*, 8> DbgMIs;
989    for (; DI != DE; ++DI) {
990      if ((*DI)->isEmitted())
991        continue;
992      assert((*DI)->getOrder() >= LastOrder &&
993             "emitting DBG_VALUE out of order");
994      if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
995        DbgMIs.push_back(DbgMI);
996    }
997
998    MachineBasicBlock *InsertBB = Emitter.getBlock();
999    MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
1000    InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
1001
1002    SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin();
1003    SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd();
1004    // Now emit the rest according to source order.
1005    LastOrder = 0;
1006    for (const auto &InstrOrder : Orders) {
1007      unsigned Order = InstrOrder.first;
1008      MachineInstr *MI = InstrOrder.second;
1009      if (!MI)
1010        continue;
1011
1012      // Insert all SDDbgLabel's whose order(s) are before "Order".
1013      for (; DLI != DLE &&
1014             (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order;
1015             ++DLI) {
1016        MachineInstr *DbgMI = Emitter.EmitDbgLabel(*DLI);
1017        if (DbgMI) {
1018          if (!LastOrder)
1019            // Insert to start of the BB (after PHIs).
1020            BB->insert(BBBegin, DbgMI);
1021          else {
1022            // Insert at the instruction, which may be in a different
1023            // block, if the block was split by a custom inserter.
1024            MachineBasicBlock::iterator Pos = MI;
1025            MI->getParent()->insert(Pos, DbgMI);
1026          }
1027        }
1028      }
1029      if (DLI == DLE)
1030        break;
1031
1032      LastOrder = Order;
1033    }
1034  }
1035
1036  InsertPos = Emitter.getInsertPos();
1037  return Emitter.getBlock();
1038}
1039
1040/// Return the basic block label.
1041std::string ScheduleDAGSDNodes::getDAGName() const {
1042  return "sunit-dag." + BB->getFullName();
1043}
1044