ScheduleDAGFast.cpp revision 261991
1193323Sed//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This implements a fast scheduler.
11193323Sed//
12193323Sed//===----------------------------------------------------------------------===//
13193323Sed
14193323Sed#define DEBUG_TYPE "pre-RA-sched"
15249423Sdim#include "llvm/CodeGen/SchedulerRegistry.h"
16249423Sdim#include "InstrEmitter.h"
17193323Sed#include "ScheduleDAGSDNodes.h"
18249423Sdim#include "llvm/ADT/STLExtras.h"
19249423Sdim#include "llvm/ADT/SmallSet.h"
20249423Sdim#include "llvm/ADT/Statistic.h"
21193323Sed#include "llvm/CodeGen/SelectionDAGISel.h"
22249423Sdim#include "llvm/IR/DataLayout.h"
23249423Sdim#include "llvm/IR/InlineAsm.h"
24193323Sed#include "llvm/Support/Debug.h"
25198090Srdivacky#include "llvm/Support/ErrorHandling.h"
26198090Srdivacky#include "llvm/Support/raw_ostream.h"
27249423Sdim#include "llvm/Target/TargetInstrInfo.h"
28249423Sdim#include "llvm/Target/TargetRegisterInfo.h"
29193323Sedusing namespace llvm;
30193323Sed
31193323SedSTATISTIC(NumUnfolds,    "Number of nodes unfolded");
32193323SedSTATISTIC(NumDups,       "Number of duplicated nodes");
33193323SedSTATISTIC(NumPRCopies,   "Number of physical copies");
34193323Sed
35193323Sedstatic RegisterScheduler
36193323Sed  fastDAGScheduler("fast", "Fast suboptimal list scheduling",
37193323Sed                   createFastDAGScheduler);
38243830Sdimstatic RegisterScheduler
39243830Sdim  linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling",
40243830Sdim                        createDAGLinearizer);
41193323Sed
42243830Sdim
43193323Sednamespace {
44193323Sed  /// FastPriorityQueue - A degenerate priority queue that considers
45193323Sed  /// all nodes to have the same priority.
46193323Sed  ///
47198892Srdivacky  struct FastPriorityQueue {
48193323Sed    SmallVector<SUnit *, 16> Queue;
49193323Sed
50193323Sed    bool empty() const { return Queue.empty(); }
51234353Sdim
52193323Sed    void push(SUnit *U) {
53193323Sed      Queue.push_back(U);
54193323Sed    }
55193323Sed
56193323Sed    SUnit *pop() {
57193323Sed      if (empty()) return NULL;
58193323Sed      SUnit *V = Queue.back();
59193323Sed      Queue.pop_back();
60193323Sed      return V;
61193323Sed    }
62193323Sed  };
63193323Sed
64193323Sed//===----------------------------------------------------------------------===//
65193323Sed/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
66193323Sed///
67198892Srdivackyclass ScheduleDAGFast : public ScheduleDAGSDNodes {
68193323Sedprivate:
69193323Sed  /// AvailableQueue - The priority queue to use for the available SUnits.
70193323Sed  FastPriorityQueue AvailableQueue;
71193323Sed
72193323Sed  /// LiveRegDefs - A set of physical registers and their definition
73193323Sed  /// that are "live". These nodes must be scheduled before any other nodes that
74193323Sed  /// modifies the registers can be scheduled.
75193323Sed  unsigned NumLiveRegs;
76193323Sed  std::vector<SUnit*> LiveRegDefs;
77193323Sed  std::vector<unsigned> LiveRegCycles;
78193323Sed
79193323Sedpublic:
80193323Sed  ScheduleDAGFast(MachineFunction &mf)
81193323Sed    : ScheduleDAGSDNodes(mf) {}
82193323Sed
83193323Sed  void Schedule();
84193323Sed
85193323Sed  /// AddPred - adds a predecessor edge to SUnit SU.
86193323Sed  /// This returns true if this is a new predecessor.
87193323Sed  void AddPred(SUnit *SU, const SDep &D) {
88193323Sed    SU->addPred(D);
89193323Sed  }
90193323Sed
91193323Sed  /// RemovePred - removes a predecessor edge from SUnit SU.
92193323Sed  /// This returns true if an edge was removed.
93193323Sed  void RemovePred(SUnit *SU, const SDep &D) {
94193323Sed    SU->removePred(D);
95193323Sed  }
96193323Sed
97193323Sedprivate:
98193323Sed  void ReleasePred(SUnit *SU, SDep *PredEdge);
99193323Sed  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
100193323Sed  void ScheduleNodeBottomUp(SUnit*, unsigned);
101193323Sed  SUnit *CopyAndMoveSuccessors(SUnit*);
102193323Sed  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
103193323Sed                                const TargetRegisterClass*,
104193323Sed                                const TargetRegisterClass*,
105261991Sdim                                SmallVectorImpl<SUnit*>&);
106261991Sdim  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
107193323Sed  void ListScheduleBottomUp();
108193323Sed
109234353Sdim  /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
110234353Sdim  bool forceUnitLatencies() const { return true; }
111193323Sed};
112193323Sed}  // end anonymous namespace
113193323Sed
114193323Sed
115193323Sed/// Schedule - Schedule the DAG using list scheduling.
116193323Sedvoid ScheduleDAGFast::Schedule() {
117202375Srdivacky  DEBUG(dbgs() << "********** List Scheduling **********\n");
118193323Sed
119193323Sed  NumLiveRegs = 0;
120234353Sdim  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
121193323Sed  LiveRegCycles.resize(TRI->getNumRegs(), 0);
122193323Sed
123193323Sed  // Build the scheduling graph.
124198090Srdivacky  BuildSchedGraph(NULL);
125193323Sed
126193323Sed  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
127193323Sed          SUnits[su].dumpAll(this));
128193323Sed
129193323Sed  // Execute the actual scheduling loop.
130193323Sed  ListScheduleBottomUp();
131193323Sed}
132193323Sed
133193323Sed//===----------------------------------------------------------------------===//
134193323Sed//  Bottom-Up Scheduling
135193323Sed//===----------------------------------------------------------------------===//
136193323Sed
137193323Sed/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
138193323Sed/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
139193323Sedvoid ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
140193323Sed  SUnit *PredSU = PredEdge->getSUnit();
141198090Srdivacky
142193323Sed#ifndef NDEBUG
143198090Srdivacky  if (PredSU->NumSuccsLeft == 0) {
144202375Srdivacky    dbgs() << "*** Scheduling failed! ***\n";
145193323Sed    PredSU->dump(this);
146202375Srdivacky    dbgs() << " has been released too many times!\n";
147198090Srdivacky    llvm_unreachable(0);
148193323Sed  }
149193323Sed#endif
150198090Srdivacky  --PredSU->NumSuccsLeft;
151198090Srdivacky
152193323Sed  // If all the node's successors are scheduled, this node is ready
153193323Sed  // to be scheduled. Ignore the special EntrySU node.
154193323Sed  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
155193323Sed    PredSU->isAvailable = true;
156193323Sed    AvailableQueue.push(PredSU);
157193323Sed  }
158193323Sed}
159193323Sed
160193323Sedvoid ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
161193323Sed  // Bottom up: release predecessors
162193323Sed  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
163193323Sed       I != E; ++I) {
164193323Sed    ReleasePred(SU, &*I);
165193323Sed    if (I->isAssignedRegDep()) {
166193323Sed      // This is a physical register dependency and it's impossible or
167234353Sdim      // expensive to copy the register. Make sure nothing that can
168193323Sed      // clobber the register is scheduled between the predecessor and
169193323Sed      // this node.
170193323Sed      if (!LiveRegDefs[I->getReg()]) {
171193323Sed        ++NumLiveRegs;
172193323Sed        LiveRegDefs[I->getReg()] = I->getSUnit();
173193323Sed        LiveRegCycles[I->getReg()] = CurCycle;
174193323Sed      }
175193323Sed    }
176193323Sed  }
177193323Sed}
178193323Sed
179193323Sed/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
180193323Sed/// count of its predecessors. If a predecessor pending count is zero, add it to
181193323Sed/// the Available queue.
182193323Sedvoid ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
183202375Srdivacky  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
184193323Sed  DEBUG(SU->dump(this));
185193323Sed
186193323Sed  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
187193323Sed  SU->setHeightToAtLeast(CurCycle);
188193323Sed  Sequence.push_back(SU);
189193323Sed
190193323Sed  ReleasePredecessors(SU, CurCycle);
191193323Sed
192193323Sed  // Release all the implicit physical register defs that are live.
193193323Sed  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
194193323Sed       I != E; ++I) {
195193323Sed    if (I->isAssignedRegDep()) {
196193323Sed      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
197193323Sed        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
198193323Sed        assert(LiveRegDefs[I->getReg()] == SU &&
199193323Sed               "Physical register dependency violated?");
200193323Sed        --NumLiveRegs;
201193323Sed        LiveRegDefs[I->getReg()] = NULL;
202193323Sed        LiveRegCycles[I->getReg()] = 0;
203193323Sed      }
204193323Sed    }
205193323Sed  }
206193323Sed
207193323Sed  SU->isScheduled = true;
208193323Sed}
209193323Sed
210193323Sed/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
211193323Sed/// successors to the newly created node.
212193323SedSUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
213218893Sdim  if (SU->getNode()->getGluedNode())
214193323Sed    return NULL;
215193323Sed
216193323Sed  SDNode *N = SU->getNode();
217193323Sed  if (!N)
218193323Sed    return NULL;
219193323Sed
220193323Sed  SUnit *NewSU;
221193323Sed  bool TryUnfold = false;
222193323Sed  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
223198090Srdivacky    EVT VT = N->getValueType(i);
224218893Sdim    if (VT == MVT::Glue)
225193323Sed      return NULL;
226193323Sed    else if (VT == MVT::Other)
227193323Sed      TryUnfold = true;
228193323Sed  }
229193323Sed  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
230193323Sed    const SDValue &Op = N->getOperand(i);
231198090Srdivacky    EVT VT = Op.getNode()->getValueType(Op.getResNo());
232218893Sdim    if (VT == MVT::Glue)
233193323Sed      return NULL;
234193323Sed  }
235193323Sed
236193323Sed  if (TryUnfold) {
237193323Sed    SmallVector<SDNode*, 2> NewNodes;
238193323Sed    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
239193323Sed      return NULL;
240193323Sed
241202375Srdivacky    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
242193323Sed    assert(NewNodes.size() == 2 && "Expected a load folding node!");
243193323Sed
244193323Sed    N = NewNodes[1];
245193323Sed    SDNode *LoadNode = NewNodes[0];
246193323Sed    unsigned NumVals = N->getNumValues();
247193323Sed    unsigned OldNumVals = SU->getNode()->getNumValues();
248193323Sed    for (unsigned i = 0; i != NumVals; ++i)
249193323Sed      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
250193323Sed    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
251193323Sed                                   SDValue(LoadNode, 1));
252193323Sed
253234353Sdim    SUnit *NewSU = newSUnit(N);
254193323Sed    assert(N->getNodeId() == -1 && "Node already inserted!");
255193323Sed    N->setNodeId(NewSU->NodeNum);
256234353Sdim
257224145Sdim    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
258224145Sdim    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
259224145Sdim      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
260193323Sed        NewSU->isTwoAddress = true;
261193323Sed        break;
262193323Sed      }
263193323Sed    }
264224145Sdim    if (MCID.isCommutable())
265193323Sed      NewSU->isCommutable = true;
266193323Sed
267193323Sed    // LoadNode may already exist. This can happen when there is another
268193323Sed    // load from the same location and producing the same type of value
269193323Sed    // but it has different alignment or volatileness.
270193323Sed    bool isNewLoad = true;
271193323Sed    SUnit *LoadSU;
272193323Sed    if (LoadNode->getNodeId() != -1) {
273193323Sed      LoadSU = &SUnits[LoadNode->getNodeId()];
274193323Sed      isNewLoad = false;
275193323Sed    } else {
276234353Sdim      LoadSU = newSUnit(LoadNode);
277193323Sed      LoadNode->setNodeId(LoadSU->NodeNum);
278193323Sed    }
279193323Sed
280193323Sed    SDep ChainPred;
281193323Sed    SmallVector<SDep, 4> ChainSuccs;
282193323Sed    SmallVector<SDep, 4> LoadPreds;
283193323Sed    SmallVector<SDep, 4> NodePreds;
284193323Sed    SmallVector<SDep, 4> NodeSuccs;
285193323Sed    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
286193323Sed         I != E; ++I) {
287193323Sed      if (I->isCtrl())
288193323Sed        ChainPred = *I;
289193323Sed      else if (I->getSUnit()->getNode() &&
290193323Sed               I->getSUnit()->getNode()->isOperandOf(LoadNode))
291193323Sed        LoadPreds.push_back(*I);
292193323Sed      else
293193323Sed        NodePreds.push_back(*I);
294193323Sed    }
295193323Sed    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
296193323Sed         I != E; ++I) {
297193323Sed      if (I->isCtrl())
298193323Sed        ChainSuccs.push_back(*I);
299193323Sed      else
300193323Sed        NodeSuccs.push_back(*I);
301193323Sed    }
302193323Sed
303193323Sed    if (ChainPred.getSUnit()) {
304193323Sed      RemovePred(SU, ChainPred);
305193323Sed      if (isNewLoad)
306193323Sed        AddPred(LoadSU, ChainPred);
307193323Sed    }
308193323Sed    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
309193323Sed      const SDep &Pred = LoadPreds[i];
310193323Sed      RemovePred(SU, Pred);
311193323Sed      if (isNewLoad) {
312193323Sed        AddPred(LoadSU, Pred);
313193323Sed      }
314193323Sed    }
315193323Sed    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
316193323Sed      const SDep &Pred = NodePreds[i];
317193323Sed      RemovePred(SU, Pred);
318193323Sed      AddPred(NewSU, Pred);
319193323Sed    }
320193323Sed    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
321193323Sed      SDep D = NodeSuccs[i];
322193323Sed      SUnit *SuccDep = D.getSUnit();
323193323Sed      D.setSUnit(SU);
324193323Sed      RemovePred(SuccDep, D);
325193323Sed      D.setSUnit(NewSU);
326193323Sed      AddPred(SuccDep, D);
327193323Sed    }
328193323Sed    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
329193323Sed      SDep D = ChainSuccs[i];
330193323Sed      SUnit *SuccDep = D.getSUnit();
331193323Sed      D.setSUnit(SU);
332193323Sed      RemovePred(SuccDep, D);
333193323Sed      if (isNewLoad) {
334193323Sed        D.setSUnit(LoadSU);
335193323Sed        AddPred(SuccDep, D);
336193323Sed      }
337234353Sdim    }
338193323Sed    if (isNewLoad) {
339243830Sdim      SDep D(LoadSU, SDep::Barrier);
340243830Sdim      D.setLatency(LoadSU->Latency);
341243830Sdim      AddPred(NewSU, D);
342193323Sed    }
343193323Sed
344193323Sed    ++NumUnfolds;
345193323Sed
346193323Sed    if (NewSU->NumSuccsLeft == 0) {
347193323Sed      NewSU->isAvailable = true;
348193323Sed      return NewSU;
349193323Sed    }
350193323Sed    SU = NewSU;
351193323Sed  }
352193323Sed
353202375Srdivacky  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
354193323Sed  NewSU = Clone(SU);
355193323Sed
356193323Sed  // New SUnit has the exact same predecessors.
357193323Sed  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
358193323Sed       I != E; ++I)
359193323Sed    if (!I->isArtificial())
360193323Sed      AddPred(NewSU, *I);
361193323Sed
362193323Sed  // Only copy scheduled successors. Cut them from old node's successor
363193323Sed  // list and move them over.
364193323Sed  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
365193323Sed  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
366193323Sed       I != E; ++I) {
367193323Sed    if (I->isArtificial())
368193323Sed      continue;
369193323Sed    SUnit *SuccSU = I->getSUnit();
370193323Sed    if (SuccSU->isScheduled) {
371193323Sed      SDep D = *I;
372193323Sed      D.setSUnit(NewSU);
373193323Sed      AddPred(SuccSU, D);
374193323Sed      D.setSUnit(SU);
375193323Sed      DelDeps.push_back(std::make_pair(SuccSU, D));
376193323Sed    }
377193323Sed  }
378193323Sed  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
379193323Sed    RemovePred(DelDeps[i].first, DelDeps[i].second);
380193323Sed
381193323Sed  ++NumDups;
382193323Sed  return NewSU;
383193323Sed}
384193323Sed
385193323Sed/// InsertCopiesAndMoveSuccs - Insert register copies and move all
386193323Sed/// scheduled successors of the given SUnit to the last copy.
387193323Sedvoid ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
388193323Sed                                              const TargetRegisterClass *DestRC,
389193323Sed                                              const TargetRegisterClass *SrcRC,
390261991Sdim                                              SmallVectorImpl<SUnit*> &Copies) {
391234353Sdim  SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL));
392193323Sed  CopyFromSU->CopySrcRC = SrcRC;
393193323Sed  CopyFromSU->CopyDstRC = DestRC;
394193323Sed
395234353Sdim  SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL));
396193323Sed  CopyToSU->CopySrcRC = DestRC;
397193323Sed  CopyToSU->CopyDstRC = SrcRC;
398193323Sed
399193323Sed  // Only copy scheduled successors. Cut them from old node's successor
400193323Sed  // list and move them over.
401193323Sed  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
402193323Sed  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
403193323Sed       I != E; ++I) {
404193323Sed    if (I->isArtificial())
405193323Sed      continue;
406193323Sed    SUnit *SuccSU = I->getSUnit();
407193323Sed    if (SuccSU->isScheduled) {
408193323Sed      SDep D = *I;
409193323Sed      D.setSUnit(CopyToSU);
410193323Sed      AddPred(SuccSU, D);
411193323Sed      DelDeps.push_back(std::make_pair(SuccSU, *I));
412193323Sed    }
413193323Sed  }
414193323Sed  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
415193323Sed    RemovePred(DelDeps[i].first, DelDeps[i].second);
416193323Sed  }
417243830Sdim  SDep FromDep(SU, SDep::Data, Reg);
418243830Sdim  FromDep.setLatency(SU->Latency);
419243830Sdim  AddPred(CopyFromSU, FromDep);
420243830Sdim  SDep ToDep(CopyFromSU, SDep::Data, 0);
421243830Sdim  ToDep.setLatency(CopyFromSU->Latency);
422243830Sdim  AddPred(CopyToSU, ToDep);
423193323Sed
424193323Sed  Copies.push_back(CopyFromSU);
425193323Sed  Copies.push_back(CopyToSU);
426193323Sed
427193323Sed  ++NumPRCopies;
428193323Sed}
429193323Sed
430193323Sed/// getPhysicalRegisterVT - Returns the ValueType of the physical register
431193323Sed/// definition of the specified node.
432193323Sed/// FIXME: Move to SelectionDAG?
433198090Srdivackystatic EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
434193323Sed                                 const TargetInstrInfo *TII) {
435224145Sdim  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
436224145Sdim  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
437224145Sdim  unsigned NumRes = MCID.getNumDefs();
438234353Sdim  for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
439193323Sed    if (Reg == *ImpDef)
440193323Sed      break;
441193323Sed    ++NumRes;
442193323Sed  }
443193323Sed  return N->getValueType(NumRes);
444193323Sed}
445193323Sed
446212904Sdim/// CheckForLiveRegDef - Return true and update live register vector if the
447212904Sdim/// specified register def of the specified SUnit clobbers any "live" registers.
448212904Sdimstatic bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
449212904Sdim                               std::vector<SUnit*> &LiveRegDefs,
450212904Sdim                               SmallSet<unsigned, 4> &RegAdded,
451261991Sdim                               SmallVectorImpl<unsigned> &LRegs,
452212904Sdim                               const TargetRegisterInfo *TRI) {
453212904Sdim  bool Added = false;
454239462Sdim  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
455239462Sdim    if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) {
456239462Sdim      if (RegAdded.insert(*AI)) {
457239462Sdim        LRegs.push_back(*AI);
458212904Sdim        Added = true;
459212904Sdim      }
460212904Sdim    }
461239462Sdim  }
462212904Sdim  return Added;
463212904Sdim}
464212904Sdim
465193323Sed/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
466193323Sed/// scheduling of the given node to satisfy live physical register dependencies.
467193323Sed/// If the specific node is the last one that's available to schedule, do
468193323Sed/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
469193323Sedbool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
470261991Sdim                                              SmallVectorImpl<unsigned> &LRegs){
471193323Sed  if (NumLiveRegs == 0)
472193323Sed    return false;
473193323Sed
474193323Sed  SmallSet<unsigned, 4> RegAdded;
475193323Sed  // If this node would clobber any "live" register, then it's not ready.
476193323Sed  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
477193323Sed       I != E; ++I) {
478193323Sed    if (I->isAssignedRegDep()) {
479212904Sdim      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
480212904Sdim                         RegAdded, LRegs, TRI);
481193323Sed    }
482193323Sed  }
483193323Sed
484218893Sdim  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
485212904Sdim    if (Node->getOpcode() == ISD::INLINEASM) {
486212904Sdim      // Inline asm can clobber physical defs.
487212904Sdim      unsigned NumOps = Node->getNumOperands();
488218893Sdim      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
489218893Sdim        --NumOps;  // Ignore the glue operand.
490212904Sdim
491212904Sdim      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
492212904Sdim        unsigned Flags =
493212904Sdim          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
494212904Sdim        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
495212904Sdim
496212904Sdim        ++i; // Skip the ID value.
497212904Sdim        if (InlineAsm::isRegDefKind(Flags) ||
498224145Sdim            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
499224145Sdim            InlineAsm::isClobberKind(Flags)) {
500212904Sdim          // Check for def of register or earlyclobber register.
501212904Sdim          for (; NumVals; --NumVals, ++i) {
502212904Sdim            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
503212904Sdim            if (TargetRegisterInfo::isPhysicalRegister(Reg))
504212904Sdim              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
505212904Sdim          }
506212904Sdim        } else
507212904Sdim          i += NumVals;
508212904Sdim      }
509212904Sdim      continue;
510212904Sdim    }
511193323Sed    if (!Node->isMachineOpcode())
512193323Sed      continue;
513224145Sdim    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
514224145Sdim    if (!MCID.ImplicitDefs)
515193323Sed      continue;
516234353Sdim    for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
517212904Sdim      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
518193323Sed    }
519193323Sed  }
520193323Sed  return !LRegs.empty();
521193323Sed}
522193323Sed
523193323Sed
524193323Sed/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
525193323Sed/// schedulers.
526193323Sedvoid ScheduleDAGFast::ListScheduleBottomUp() {
527193323Sed  unsigned CurCycle = 0;
528193323Sed
529193323Sed  // Release any predecessors of the special Exit node.
530193323Sed  ReleasePredecessors(&ExitSU, CurCycle);
531193323Sed
532193323Sed  // Add root to Available queue.
533193323Sed  if (!SUnits.empty()) {
534193323Sed    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
535193323Sed    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
536193323Sed    RootSU->isAvailable = true;
537193323Sed    AvailableQueue.push(RootSU);
538193323Sed  }
539193323Sed
540193323Sed  // While Available queue is not empty, grab the node with the highest
541193323Sed  // priority. If it is not ready put it back.  Schedule the node.
542193323Sed  SmallVector<SUnit*, 4> NotReady;
543193323Sed  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
544193323Sed  Sequence.reserve(SUnits.size());
545193323Sed  while (!AvailableQueue.empty()) {
546193323Sed    bool Delayed = false;
547193323Sed    LRegsMap.clear();
548193323Sed    SUnit *CurSU = AvailableQueue.pop();
549193323Sed    while (CurSU) {
550193323Sed      SmallVector<unsigned, 4> LRegs;
551193323Sed      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
552193323Sed        break;
553193323Sed      Delayed = true;
554193323Sed      LRegsMap.insert(std::make_pair(CurSU, LRegs));
555193323Sed
556193323Sed      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
557193323Sed      NotReady.push_back(CurSU);
558193323Sed      CurSU = AvailableQueue.pop();
559193323Sed    }
560193323Sed
561193323Sed    // All candidates are delayed due to live physical reg dependencies.
562193323Sed    // Try code duplication or inserting cross class copies
563193323Sed    // to resolve it.
564193323Sed    if (Delayed && !CurSU) {
565193323Sed      if (!CurSU) {
566193323Sed        // Try duplicating the nodes that produces these
567193323Sed        // "expensive to copy" values to break the dependency. In case even
568193323Sed        // that doesn't work, insert cross class copies.
569193323Sed        SUnit *TrySU = NotReady[0];
570261991Sdim        SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
571193323Sed        assert(LRegs.size() == 1 && "Can't handle this yet!");
572193323Sed        unsigned Reg = LRegs[0];
573193323Sed        SUnit *LRDef = LiveRegDefs[Reg];
574198090Srdivacky        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
575193323Sed        const TargetRegisterClass *RC =
576210299Sed          TRI->getMinimalPhysRegClass(Reg, VT);
577193323Sed        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
578193323Sed
579221345Sdim        // If cross copy register class is the same as RC, then it must be
580221345Sdim        // possible copy the value directly. Do not try duplicate the def.
581221345Sdim        // If cross copy register class is not the same as RC, then it's
582221345Sdim        // possible to copy the value but it require cross register class copies
583221345Sdim        // and it is expensive.
584221345Sdim        // If cross copy register class is null, then it's not possible to copy
585221345Sdim        // the value at all.
586193323Sed        SUnit *NewDef = 0;
587221345Sdim        if (DestRC != RC) {
588193323Sed          NewDef = CopyAndMoveSuccessors(LRDef);
589221345Sdim          if (!DestRC && !NewDef)
590221345Sdim            report_fatal_error("Can't handle live physical "
591221345Sdim                               "register dependency!");
592221345Sdim        }
593193323Sed        if (!NewDef) {
594193323Sed          // Issue copies, these can be expensive cross register class copies.
595193323Sed          SmallVector<SUnit*, 2> Copies;
596193323Sed          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
597202375Srdivacky          DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
598198090Srdivacky                       << " to SU #" << Copies.front()->NodeNum << "\n");
599243830Sdim          AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
600193323Sed          NewDef = Copies.back();
601193323Sed        }
602193323Sed
603202375Srdivacky        DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
604198090Srdivacky                     << " to SU #" << TrySU->NodeNum << "\n");
605193323Sed        LiveRegDefs[Reg] = NewDef;
606243830Sdim        AddPred(NewDef, SDep(TrySU, SDep::Artificial));
607193323Sed        TrySU->isAvailable = false;
608193323Sed        CurSU = NewDef;
609193323Sed      }
610193323Sed
611193323Sed      if (!CurSU) {
612198090Srdivacky        llvm_unreachable("Unable to resolve live physical register dependencies!");
613193323Sed      }
614193323Sed    }
615193323Sed
616193323Sed    // Add the nodes that aren't ready back onto the available list.
617193323Sed    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
618193323Sed      NotReady[i]->isPending = false;
619193323Sed      // May no longer be available due to backtracking.
620193323Sed      if (NotReady[i]->isAvailable)
621193323Sed        AvailableQueue.push(NotReady[i]);
622193323Sed    }
623193323Sed    NotReady.clear();
624193323Sed
625193323Sed    if (CurSU)
626193323Sed      ScheduleNodeBottomUp(CurSU, CurCycle);
627193323Sed    ++CurCycle;
628193323Sed  }
629193323Sed
630198090Srdivacky  // Reverse the order since it is bottom up.
631193323Sed  std::reverse(Sequence.begin(), Sequence.end());
632198090Srdivacky
633193323Sed#ifndef NDEBUG
634234353Sdim  VerifyScheduledSequence(/*isBottomUp=*/true);
635193323Sed#endif
636193323Sed}
637193323Sed
638243830Sdim
639243830Sdimnamespace {
640193323Sed//===----------------------------------------------------------------------===//
641243830Sdim// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the
642243830Sdim// DAG in topological order.
643243830Sdim// IMPORTANT: this may not work for targets with phyreg dependency.
644243830Sdim//
645243830Sdimclass ScheduleDAGLinearize : public ScheduleDAGSDNodes {
646243830Sdimpublic:
647243830Sdim  ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
648243830Sdim
649243830Sdim  void Schedule();
650243830Sdim
651243830Sdim  MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
652243830Sdim
653243830Sdimprivate:
654243830Sdim  std::vector<SDNode*> Sequence;
655243830Sdim  DenseMap<SDNode*, SDNode*> GluedMap;  // Cache glue to its user
656243830Sdim
657243830Sdim  void ScheduleNode(SDNode *N);
658243830Sdim};
659243830Sdim} // end anonymous namespace
660243830Sdim
661243830Sdimvoid ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
662243830Sdim  if (N->getNodeId() != 0)
663243830Sdim    llvm_unreachable(0);
664243830Sdim
665243830Sdim  if (!N->isMachineOpcode() &&
666243830Sdim      (N->getOpcode() == ISD::EntryToken || isPassiveNode(N)))
667243830Sdim    // These nodes do not need to be translated into MIs.
668243830Sdim    return;
669243830Sdim
670243830Sdim  DEBUG(dbgs() << "\n*** Scheduling: ");
671243830Sdim  DEBUG(N->dump(DAG));
672243830Sdim  Sequence.push_back(N);
673243830Sdim
674243830Sdim  unsigned NumOps = N->getNumOperands();
675243830Sdim  if (unsigned NumLeft = NumOps) {
676243830Sdim    SDNode *GluedOpN = 0;
677243830Sdim    do {
678243830Sdim      const SDValue &Op = N->getOperand(NumLeft-1);
679243830Sdim      SDNode *OpN = Op.getNode();
680243830Sdim
681243830Sdim      if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) {
682243830Sdim        // Schedule glue operand right above N.
683243830Sdim        GluedOpN = OpN;
684243830Sdim        assert(OpN->getNodeId() != 0 && "Glue operand not ready?");
685243830Sdim        OpN->setNodeId(0);
686243830Sdim        ScheduleNode(OpN);
687243830Sdim        continue;
688243830Sdim      }
689243830Sdim
690243830Sdim      if (OpN == GluedOpN)
691243830Sdim        // Glue operand is already scheduled.
692243830Sdim        continue;
693243830Sdim
694243830Sdim      DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN);
695243830Sdim      if (DI != GluedMap.end() && DI->second != N)
696243830Sdim        // Users of glues are counted against the glued users.
697243830Sdim        OpN = DI->second;
698243830Sdim
699243830Sdim      unsigned Degree = OpN->getNodeId();
700243830Sdim      assert(Degree > 0 && "Predecessor over-released!");
701243830Sdim      OpN->setNodeId(--Degree);
702243830Sdim      if (Degree == 0)
703243830Sdim        ScheduleNode(OpN);
704243830Sdim    } while (--NumLeft);
705243830Sdim  }
706243830Sdim}
707243830Sdim
708243830Sdim/// findGluedUser - Find the representative use of a glue value by walking
709243830Sdim/// the use chain.
710243830Sdimstatic SDNode *findGluedUser(SDNode *N) {
711243830Sdim  while (SDNode *Glued = N->getGluedUser())
712243830Sdim    N = Glued;
713243830Sdim  return N;
714243830Sdim}
715243830Sdim
716243830Sdimvoid ScheduleDAGLinearize::Schedule() {
717243830Sdim  DEBUG(dbgs() << "********** DAG Linearization **********\n");
718243830Sdim
719243830Sdim  SmallVector<SDNode*, 8> Glues;
720243830Sdim  unsigned DAGSize = 0;
721243830Sdim  for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(),
722243830Sdim         E = DAG->allnodes_end(); I != E; ++I) {
723243830Sdim    SDNode *N = I;
724243830Sdim
725243830Sdim    // Use node id to record degree.
726243830Sdim    unsigned Degree = N->use_size();
727243830Sdim    N->setNodeId(Degree);
728243830Sdim    unsigned NumVals = N->getNumValues();
729243830Sdim    if (NumVals && N->getValueType(NumVals-1) == MVT::Glue &&
730243830Sdim        N->hasAnyUseOfValue(NumVals-1)) {
731243830Sdim      SDNode *User = findGluedUser(N);
732243830Sdim      if (User) {
733243830Sdim        Glues.push_back(N);
734243830Sdim        GluedMap.insert(std::make_pair(N, User));
735243830Sdim      }
736243830Sdim    }
737243830Sdim
738243830Sdim    if (N->isMachineOpcode() ||
739243830Sdim        (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N)))
740243830Sdim      ++DAGSize;
741243830Sdim  }
742243830Sdim
743243830Sdim  for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
744243830Sdim    SDNode *Glue = Glues[i];
745243830Sdim    SDNode *GUser = GluedMap[Glue];
746243830Sdim    unsigned Degree = Glue->getNodeId();
747243830Sdim    unsigned UDegree = GUser->getNodeId();
748243830Sdim
749243830Sdim    // Glue user must be scheduled together with the glue operand. So other
750243830Sdim    // users of the glue operand must be treated as its users.
751243830Sdim    SDNode *ImmGUser = Glue->getGluedUser();
752243830Sdim    for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end();
753243830Sdim         ui != ue; ++ui)
754243830Sdim      if (*ui == ImmGUser)
755243830Sdim        --Degree;
756243830Sdim    GUser->setNodeId(UDegree + Degree);
757243830Sdim    Glue->setNodeId(1);
758243830Sdim  }
759243830Sdim
760243830Sdim  Sequence.reserve(DAGSize);
761243830Sdim  ScheduleNode(DAG->getRoot().getNode());
762243830Sdim}
763243830Sdim
764243830SdimMachineBasicBlock*
765243830SdimScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
766243830Sdim  InstrEmitter Emitter(BB, InsertPos);
767243830Sdim  DenseMap<SDValue, unsigned> VRBaseMap;
768243830Sdim
769243830Sdim  DEBUG({
770243830Sdim      dbgs() << "\n*** Final schedule ***\n";
771243830Sdim    });
772243830Sdim
773243830Sdim  // FIXME: Handle dbg_values.
774243830Sdim  unsigned NumNodes = Sequence.size();
775243830Sdim  for (unsigned i = 0; i != NumNodes; ++i) {
776243830Sdim    SDNode *N = Sequence[NumNodes-i-1];
777243830Sdim    DEBUG(N->dump(DAG));
778243830Sdim    Emitter.EmitNode(N, false, false, VRBaseMap);
779243830Sdim  }
780243830Sdim
781243830Sdim  DEBUG(dbgs() << '\n');
782243830Sdim
783243830Sdim  InsertPos = Emitter.getInsertPos();
784243830Sdim  return Emitter.getBlock();
785243830Sdim}
786243830Sdim
787243830Sdim//===----------------------------------------------------------------------===//
788193323Sed//                         Public Constructor Functions
789193323Sed//===----------------------------------------------------------------------===//
790193323Sed
791193323Sedllvm::ScheduleDAGSDNodes *
792193323Sedllvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
793193323Sed  return new ScheduleDAGFast(*IS->MF);
794193323Sed}
795243830Sdim
796243830Sdimllvm::ScheduleDAGSDNodes *
797243830Sdimllvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
798243830Sdim  return new ScheduleDAGLinearize(*IS->MF);
799243830Sdim}
800