ScheduleDAGInstrs.cpp revision 223017
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements the ScheduleDAGInstrs class, which implements re-scheduling
11// of MachineInstrs.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "sched-instrs"
16#include "ScheduleDAGInstrs.h"
17#include "llvm/Operator.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/CodeGen/MachineFunctionPass.h"
21#include "llvm/CodeGen/MachineMemOperand.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/PseudoSourceValue.h"
24#include "llvm/Target/TargetMachine.h"
25#include "llvm/Target/TargetInstrInfo.h"
26#include "llvm/Target/TargetRegisterInfo.h"
27#include "llvm/Target/TargetSubtarget.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/raw_ostream.h"
30#include "llvm/ADT/SmallSet.h"
31using namespace llvm;
32
33ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
34                                     const MachineLoopInfo &mli,
35                                     const MachineDominatorTree &mdt)
36  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
37    InstrItins(mf.getTarget().getInstrItineraryData()),
38    Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()),
39    LoopRegs(MLI, MDT), FirstDbgValue(0) {
40  DbgValues.clear();
41}
42
43/// Run - perform scheduling.
44///
45void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
46                            MachineBasicBlock::iterator begin,
47                            MachineBasicBlock::iterator end,
48                            unsigned endcount) {
49  BB = bb;
50  Begin = begin;
51  InsertPosIndex = endcount;
52
53  ScheduleDAG::Run(bb, end);
54}
55
56/// getUnderlyingObjectFromInt - This is the function that does the work of
57/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
58static const Value *getUnderlyingObjectFromInt(const Value *V) {
59  do {
60    if (const Operator *U = dyn_cast<Operator>(V)) {
61      // If we find a ptrtoint, we can transfer control back to the
62      // regular getUnderlyingObjectFromInt.
63      if (U->getOpcode() == Instruction::PtrToInt)
64        return U->getOperand(0);
65      // If we find an add of a constant or a multiplied value, it's
66      // likely that the other operand will lead us to the base
67      // object. We don't have to worry about the case where the
68      // object address is somehow being computed by the multiply,
69      // because our callers only care when the result is an
70      // identifibale object.
71      if (U->getOpcode() != Instruction::Add ||
72          (!isa<ConstantInt>(U->getOperand(1)) &&
73           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
74        return V;
75      V = U->getOperand(0);
76    } else {
77      return V;
78    }
79    assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
80  } while (1);
81}
82
83/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
84/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
85static const Value *getUnderlyingObject(const Value *V) {
86  // First just call Value::getUnderlyingObject to let it do what it does.
87  do {
88    V = GetUnderlyingObject(V);
89    // If it found an inttoptr, use special code to continue climing.
90    if (Operator::getOpcode(V) != Instruction::IntToPtr)
91      break;
92    const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
93    // If that succeeded in finding a pointer, continue the search.
94    if (!O->getType()->isPointerTy())
95      break;
96    V = O;
97  } while (1);
98  return V;
99}
100
101/// getUnderlyingObjectForInstr - If this machine instr has memory reference
102/// information and it can be tracked to a normal reference to a known
103/// object, return the Value for that object. Otherwise return null.
104static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
105                                                const MachineFrameInfo *MFI,
106                                                bool &MayAlias) {
107  MayAlias = true;
108  if (!MI->hasOneMemOperand() ||
109      !(*MI->memoperands_begin())->getValue() ||
110      (*MI->memoperands_begin())->isVolatile())
111    return 0;
112
113  const Value *V = (*MI->memoperands_begin())->getValue();
114  if (!V)
115    return 0;
116
117  V = getUnderlyingObject(V);
118  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
119    // For now, ignore PseudoSourceValues which may alias LLVM IR values
120    // because the code that uses this function has no way to cope with
121    // such aliases.
122    if (PSV->isAliased(MFI))
123      return 0;
124
125    MayAlias = PSV->mayAlias(MFI);
126    return V;
127  }
128
129  if (isIdentifiedObject(V))
130    return V;
131
132  return 0;
133}
134
135void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
136  if (MachineLoop *ML = MLI.getLoopFor(BB))
137    if (BB == ML->getLoopLatch()) {
138      MachineBasicBlock *Header = ML->getHeader();
139      for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
140           E = Header->livein_end(); I != E; ++I)
141        LoopLiveInRegs.insert(*I);
142      LoopRegs.VisitLoop(ML);
143    }
144}
145
146/// AddSchedBarrierDeps - Add dependencies from instructions in the current
147/// list of instructions being scheduled to scheduling barrier by adding
148/// the exit SU to the register defs and use list. This is because we want to
149/// make sure instructions which define registers that are either used by
150/// the terminator or are live-out are properly scheduled. This is
151/// especially important when the definition latency of the return value(s)
152/// are too high to be hidden by the branch or when the liveout registers
153/// used by instructions in the fallthrough block.
154void ScheduleDAGInstrs::AddSchedBarrierDeps() {
155  MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
156  ExitSU.setInstr(ExitMI);
157  bool AllDepKnown = ExitMI &&
158    (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
159  if (ExitMI && AllDepKnown) {
160    // If it's a call or a barrier, add dependencies on the defs and uses of
161    // instruction.
162    for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
163      const MachineOperand &MO = ExitMI->getOperand(i);
164      if (!MO.isReg() || MO.isDef()) continue;
165      unsigned Reg = MO.getReg();
166      if (Reg == 0) continue;
167
168      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
169      Uses[Reg].push_back(&ExitSU);
170    }
171  } else {
172    // For others, e.g. fallthrough, conditional branch, assume the exit
173    // uses all the registers that are livein to the successor blocks.
174    SmallSet<unsigned, 8> Seen;
175    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
176           SE = BB->succ_end(); SI != SE; ++SI)
177      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
178             E = (*SI)->livein_end(); I != E; ++I) {
179        unsigned Reg = *I;
180        if (Seen.insert(Reg))
181          Uses[Reg].push_back(&ExitSU);
182      }
183  }
184}
185
186void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
187  // We'll be allocating one SUnit for each instruction, plus one for
188  // the region exit node.
189  SUnits.reserve(BB->size());
190
191  // We build scheduling units by walking a block's instruction list from bottom
192  // to top.
193
194  // Remember where a generic side-effecting instruction is as we procede.
195  SUnit *BarrierChain = 0, *AliasChain = 0;
196
197  // Memory references to specific known memory locations are tracked
198  // so that they can be given more precise dependencies. We track
199  // separately the known memory locations that may alias and those
200  // that are known not to alias
201  std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
202  std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
203
204  // Check to see if the scheduler cares about latencies.
205  bool UnitLatencies = ForceUnitLatencies();
206
207  // Ask the target if address-backscheduling is desirable, and if so how much.
208  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
209  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
210
211  // Remove any stale debug info; sometimes BuildSchedGraph is called again
212  // without emitting the info from the previous call.
213  DbgValues.clear();
214  FirstDbgValue = NULL;
215
216  // Model data dependencies between instructions being scheduled and the
217  // ExitSU.
218  AddSchedBarrierDeps();
219
220  for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
221    assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs");
222  }
223
224  // Walk the list of instructions, from bottom moving up.
225  MachineInstr *PrevMI = NULL;
226  for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
227       MII != MIE; --MII) {
228    MachineInstr *MI = prior(MII);
229    if (MI && PrevMI) {
230      DbgValues.push_back(std::make_pair(PrevMI, MI));
231      PrevMI = NULL;
232    }
233
234    if (MI->isDebugValue()) {
235      PrevMI = MI;
236      continue;
237    }
238
239    const TargetInstrDesc &TID = MI->getDesc();
240    assert(!TID.isTerminator() && !MI->isLabel() &&
241           "Cannot schedule terminators or labels!");
242    // Create the SUnit for this MI.
243    SUnit *SU = NewSUnit(MI);
244    SU->isCall = TID.isCall();
245    SU->isCommutable = TID.isCommutable();
246
247    // Assign the Latency field of SU using target-provided information.
248    if (UnitLatencies)
249      SU->Latency = 1;
250    else
251      ComputeLatency(SU);
252
253    // Add register-based dependencies (data, anti, and output).
254    for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
255      const MachineOperand &MO = MI->getOperand(j);
256      if (!MO.isReg()) continue;
257      unsigned Reg = MO.getReg();
258      if (Reg == 0) continue;
259
260      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
261
262      std::vector<SUnit *> &UseList = Uses[Reg];
263      // Defs are push in the order they are visited and never reordered.
264      std::vector<SUnit *> &DefList = Defs[Reg];
265      // Optionally add output and anti dependencies. For anti
266      // dependencies we use a latency of 0 because for a multi-issue
267      // target we want to allow the defining instruction to issue
268      // in the same cycle as the using instruction.
269      // TODO: Using a latency of 1 here for output dependencies assumes
270      //       there's no cost for reusing registers.
271      SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
272      unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
273      for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
274        SUnit *DefSU = DefList[i];
275        if (DefSU == &ExitSU)
276          continue;
277        if (DefSU != SU &&
278            (Kind != SDep::Output || !MO.isDead() ||
279             !DefSU->getInstr()->registerDefIsDead(Reg)))
280          DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
281      }
282      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
283        std::vector<SUnit *> &MemDefList = Defs[*Alias];
284        for (unsigned i = 0, e = MemDefList.size(); i != e; ++i) {
285          SUnit *DefSU = MemDefList[i];
286          if (DefSU == &ExitSU)
287            continue;
288          if (DefSU != SU &&
289              (Kind != SDep::Output || !MO.isDead() ||
290               !DefSU->getInstr()->registerDefIsDead(*Alias)))
291            DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
292        }
293      }
294
295      if (MO.isDef()) {
296        // Add any data dependencies.
297        unsigned DataLatency = SU->Latency;
298        for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
299          SUnit *UseSU = UseList[i];
300          if (UseSU == SU)
301            continue;
302          unsigned LDataLatency = DataLatency;
303          // Optionally add in a special extra latency for nodes that
304          // feed addresses.
305          // TODO: Do this for register aliases too.
306          // TODO: Perhaps we should get rid of
307          // SpecialAddressLatency and just move this into
308          // adjustSchedDependency for the targets that care about it.
309          if (SpecialAddressLatency != 0 && !UnitLatencies &&
310              UseSU != &ExitSU) {
311            MachineInstr *UseMI = UseSU->getInstr();
312            const TargetInstrDesc &UseTID = UseMI->getDesc();
313            int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
314            assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
315            if (RegUseIndex >= 0 &&
316                (UseTID.mayLoad() || UseTID.mayStore()) &&
317                (unsigned)RegUseIndex < UseTID.getNumOperands() &&
318                UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
319              LDataLatency += SpecialAddressLatency;
320          }
321          // Adjust the dependence latency using operand def/use
322          // information (if any), and then allow the target to
323          // perform its own adjustments.
324          const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
325          if (!UnitLatencies) {
326            ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
327            ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
328          }
329          UseSU->addPred(dep);
330        }
331        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
332          std::vector<SUnit *> &UseList = Uses[*Alias];
333          for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
334            SUnit *UseSU = UseList[i];
335            if (UseSU == SU)
336              continue;
337            const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
338            if (!UnitLatencies) {
339              ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
340              ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
341            }
342            UseSU->addPred(dep);
343          }
344        }
345
346        // If a def is going to wrap back around to the top of the loop,
347        // backschedule it.
348        if (!UnitLatencies && DefList.empty()) {
349          LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
350          if (I != LoopRegs.Deps.end()) {
351            const MachineOperand *UseMO = I->second.first;
352            unsigned Count = I->second.second;
353            const MachineInstr *UseMI = UseMO->getParent();
354            unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
355            const TargetInstrDesc &UseTID = UseMI->getDesc();
356            // TODO: If we knew the total depth of the region here, we could
357            // handle the case where the whole loop is inside the region but
358            // is large enough that the isScheduleHigh trick isn't needed.
359            if (UseMOIdx < UseTID.getNumOperands()) {
360              // Currently, we only support scheduling regions consisting of
361              // single basic blocks. Check to see if the instruction is in
362              // the same region by checking to see if it has the same parent.
363              if (UseMI->getParent() != MI->getParent()) {
364                unsigned Latency = SU->Latency;
365                if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
366                  Latency += SpecialAddressLatency;
367                // This is a wild guess as to the portion of the latency which
368                // will be overlapped by work done outside the current
369                // scheduling region.
370                Latency -= std::min(Latency, Count);
371                // Add the artificial edge.
372                ExitSU.addPred(SDep(SU, SDep::Order, Latency,
373                                    /*Reg=*/0, /*isNormalMemory=*/false,
374                                    /*isMustAlias=*/false,
375                                    /*isArtificial=*/true));
376              } else if (SpecialAddressLatency > 0 &&
377                         UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
378                // The entire loop body is within the current scheduling region
379                // and the latency of this operation is assumed to be greater
380                // than the latency of the loop.
381                // TODO: Recursively mark data-edge predecessors as
382                //       isScheduleHigh too.
383                SU->isScheduleHigh = true;
384              }
385            }
386            LoopRegs.Deps.erase(I);
387          }
388        }
389
390        UseList.clear();
391        if (!MO.isDead())
392          DefList.clear();
393
394        // Calls will not be reordered because of chain dependencies (see
395        // below). Since call operands are dead, calls may continue to be added
396        // to the DefList making dependence checking quadratic in the size of
397        // the block. Instead, we leave only one call at the back of the
398        // DefList.
399        if (SU->isCall) {
400          while (!DefList.empty() && DefList.back()->isCall)
401            DefList.pop_back();
402        }
403        DefList.push_back(SU);
404      } else {
405        UseList.push_back(SU);
406      }
407    }
408
409    // Add chain dependencies.
410    // Chain dependencies used to enforce memory order should have
411    // latency of 0 (except for true dependency of Store followed by
412    // aliased Load... we estimate that with a single cycle of latency
413    // assuming the hardware will bypass)
414    // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
415    // after stack slots are lowered to actual addresses.
416    // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
417    // produce more precise dependence information.
418#define STORE_LOAD_LATENCY 1
419    unsigned TrueMemOrderLatency = 0;
420    if (TID.isCall() || MI->hasUnmodeledSideEffects() ||
421        (MI->hasVolatileMemoryRef() &&
422         (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
423      // Be conservative with these and add dependencies on all memory
424      // references, even those that are known to not alias.
425      for (std::map<const Value *, SUnit *>::iterator I =
426             NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
427        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
428      }
429      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
430             NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
431        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
432          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
433      }
434      NonAliasMemDefs.clear();
435      NonAliasMemUses.clear();
436      // Add SU to the barrier chain.
437      if (BarrierChain)
438        BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
439      BarrierChain = SU;
440
441      // fall-through
442    new_alias_chain:
443      // Chain all possibly aliasing memory references though SU.
444      if (AliasChain)
445        AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
446      AliasChain = SU;
447      for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
448        PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
449      for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
450           E = AliasMemDefs.end(); I != E; ++I) {
451        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
452      }
453      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
454           AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
455        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
456          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
457      }
458      PendingLoads.clear();
459      AliasMemDefs.clear();
460      AliasMemUses.clear();
461    } else if (TID.mayStore()) {
462      bool MayAlias = true;
463      TrueMemOrderLatency = STORE_LOAD_LATENCY;
464      if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
465        // A store to a specific PseudoSourceValue. Add precise dependencies.
466        // Record the def in MemDefs, first adding a dep if there is
467        // an existing def.
468        std::map<const Value *, SUnit *>::iterator I =
469          ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
470        std::map<const Value *, SUnit *>::iterator IE =
471          ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
472        if (I != IE) {
473          I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
474                                  /*isNormalMemory=*/true));
475          I->second = SU;
476        } else {
477          if (MayAlias)
478            AliasMemDefs[V] = SU;
479          else
480            NonAliasMemDefs[V] = SU;
481        }
482        // Handle the uses in MemUses, if there are any.
483        std::map<const Value *, std::vector<SUnit *> >::iterator J =
484          ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
485        std::map<const Value *, std::vector<SUnit *> >::iterator JE =
486          ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
487        if (J != JE) {
488          for (unsigned i = 0, e = J->second.size(); i != e; ++i)
489            J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
490                                       /*Reg=*/0, /*isNormalMemory=*/true));
491          J->second.clear();
492        }
493        if (MayAlias) {
494          // Add dependencies from all the PendingLoads, i.e. loads
495          // with no underlying object.
496          for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
497            PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
498          // Add dependence on alias chain, if needed.
499          if (AliasChain)
500            AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
501        }
502        // Add dependence on barrier chain, if needed.
503        if (BarrierChain)
504          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
505      } else {
506        // Treat all other stores conservatively.
507        goto new_alias_chain;
508      }
509
510      if (!ExitSU.isPred(SU))
511        // Push store's up a bit to avoid them getting in between cmp
512        // and branches.
513        ExitSU.addPred(SDep(SU, SDep::Order, 0,
514                            /*Reg=*/0, /*isNormalMemory=*/false,
515                            /*isMustAlias=*/false,
516                            /*isArtificial=*/true));
517    } else if (TID.mayLoad()) {
518      bool MayAlias = true;
519      TrueMemOrderLatency = 0;
520      if (MI->isInvariantLoad(AA)) {
521        // Invariant load, no chain dependencies needed!
522      } else {
523        if (const Value *V =
524            getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
525          // A load from a specific PseudoSourceValue. Add precise dependencies.
526          std::map<const Value *, SUnit *>::iterator I =
527            ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
528          std::map<const Value *, SUnit *>::iterator IE =
529            ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
530          if (I != IE)
531            I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
532                                    /*isNormalMemory=*/true));
533          if (MayAlias)
534            AliasMemUses[V].push_back(SU);
535          else
536            NonAliasMemUses[V].push_back(SU);
537        } else {
538          // A load with no underlying object. Depend on all
539          // potentially aliasing stores.
540          for (std::map<const Value *, SUnit *>::iterator I =
541                 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
542            I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
543
544          PendingLoads.push_back(SU);
545          MayAlias = true;
546        }
547
548        // Add dependencies on alias and barrier chains, if needed.
549        if (MayAlias && AliasChain)
550          AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
551        if (BarrierChain)
552          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
553      }
554    }
555  }
556  if (PrevMI)
557    FirstDbgValue = PrevMI;
558
559  for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
560    Defs[i].clear();
561    Uses[i].clear();
562  }
563  PendingLoads.clear();
564}
565
566void ScheduleDAGInstrs::FinishBlock() {
567  // Nothing to do.
568}
569
570void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
571  // Compute the latency for the node.
572  if (!InstrItins || InstrItins->isEmpty()) {
573    SU->Latency = 1;
574
575    // Simplistic target-independent heuristic: assume that loads take
576    // extra time.
577    if (SU->getInstr()->getDesc().mayLoad())
578      SU->Latency += 2;
579  } else {
580    SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
581  }
582}
583
584void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
585                                              SDep& dep) const {
586  if (!InstrItins || InstrItins->isEmpty())
587    return;
588
589  // For a data dependency with a known register...
590  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
591    return;
592
593  const unsigned Reg = dep.getReg();
594
595  // ... find the definition of the register in the defining
596  // instruction
597  MachineInstr *DefMI = Def->getInstr();
598  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
599  if (DefIdx != -1) {
600    const MachineOperand &MO = DefMI->getOperand(DefIdx);
601    if (MO.isReg() && MO.isImplicit() &&
602        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
603      // This is an implicit def, getOperandLatency() won't return the correct
604      // latency. e.g.
605      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
606      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
607      // What we want is to compute latency between def of %D6/%D7 and use of
608      // %Q3 instead.
609      DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
610    }
611    MachineInstr *UseMI = Use->getInstr();
612    // For all uses of the register, calculate the maxmimum latency
613    int Latency = -1;
614    if (UseMI) {
615      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
616        const MachineOperand &MO = UseMI->getOperand(i);
617        if (!MO.isReg() || !MO.isUse())
618          continue;
619        unsigned MOReg = MO.getReg();
620        if (MOReg != Reg)
621          continue;
622
623        int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
624                                              UseMI, i);
625        Latency = std::max(Latency, UseCycle);
626      }
627    } else {
628      // UseMI is null, then it must be a scheduling barrier.
629      if (!InstrItins || InstrItins->isEmpty())
630        return;
631      unsigned DefClass = DefMI->getDesc().getSchedClass();
632      Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
633    }
634
635    // If we found a latency, then replace the existing dependence latency.
636    if (Latency >= 0)
637      dep.setLatency(Latency);
638  }
639}
640
641void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
642  SU->getInstr()->dump();
643}
644
645std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
646  std::string s;
647  raw_string_ostream oss(s);
648  if (SU == &EntrySU)
649    oss << "<entry>";
650  else if (SU == &ExitSU)
651    oss << "<exit>";
652  else
653    SU->getInstr()->print(oss);
654  return oss.str();
655}
656
657// EmitSchedule - Emit the machine code in scheduled order.
658MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
659  // For MachineInstr-based scheduling, we're rescheduling the instructions in
660  // the block, so start by removing them from the block.
661  while (Begin != InsertPos) {
662    MachineBasicBlock::iterator I = Begin;
663    ++Begin;
664    BB->remove(I);
665  }
666
667  // If first instruction was a DBG_VALUE then put it back.
668  if (FirstDbgValue)
669    BB->insert(InsertPos, FirstDbgValue);
670
671  // Then re-insert them according to the given schedule.
672  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
673    if (SUnit *SU = Sequence[i])
674      BB->insert(InsertPos, SU->getInstr());
675    else
676      // Null SUnit* is a noop.
677      EmitNoop();
678  }
679
680  // Update the Begin iterator, as the first instruction in the block
681  // may have been scheduled later.
682  if (!Sequence.empty())
683    Begin = Sequence[0]->getInstr();
684
685  // Reinsert any remaining debug_values.
686  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
687         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
688    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
689    MachineInstr *DbgValue = P.first;
690    MachineInstr *OrigPrivMI = P.second;
691    BB->insertAfter(OrigPrivMI, DbgValue);
692  }
693  DbgValues.clear();
694  FirstDbgValue = NULL;
695  return BB;
696}
697