ScheduleDAGInstrs.cpp revision 199481
1193323Sed//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This implements the ScheduleDAGInstrs class, which implements re-scheduling
11193323Sed// of MachineInstrs.
12193323Sed//
13193323Sed//===----------------------------------------------------------------------===//
14193323Sed
15193323Sed#define DEBUG_TYPE "sched-instrs"
16193323Sed#include "ScheduleDAGInstrs.h"
17198090Srdivacky#include "llvm/Operator.h"
18193323Sed#include "llvm/Analysis/AliasAnalysis.h"
19193323Sed#include "llvm/CodeGen/MachineFunctionPass.h"
20198090Srdivacky#include "llvm/CodeGen/MachineMemOperand.h"
21193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h"
22193323Sed#include "llvm/CodeGen/PseudoSourceValue.h"
23193323Sed#include "llvm/Target/TargetMachine.h"
24193323Sed#include "llvm/Target/TargetInstrInfo.h"
25193323Sed#include "llvm/Target/TargetRegisterInfo.h"
26193323Sed#include "llvm/Target/TargetSubtarget.h"
27193323Sed#include "llvm/Support/Debug.h"
28193323Sed#include "llvm/Support/raw_ostream.h"
29193323Sed#include "llvm/ADT/SmallSet.h"
30193323Sedusing namespace llvm;
31193323Sed
32193323SedScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
33193323Sed                                     const MachineLoopInfo &mli,
34193323Sed                                     const MachineDominatorTree &mdt)
35198396Srdivacky  : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) {
36198396Srdivacky  MFI = mf.getFrameInfo();
37198396Srdivacky}
38193323Sed
39193323Sed/// Run - perform scheduling.
40193323Sed///
41193323Sedvoid ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
42193323Sed                            MachineBasicBlock::iterator begin,
43193323Sed                            MachineBasicBlock::iterator end,
44193323Sed                            unsigned endcount) {
45193323Sed  BB = bb;
46193323Sed  Begin = begin;
47193323Sed  InsertPosIndex = endcount;
48193323Sed
49193323Sed  ScheduleDAG::Run(bb, end);
50193323Sed}
51193323Sed
52193323Sed/// getUnderlyingObjectFromInt - This is the function that does the work of
53193323Sed/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
54193323Sedstatic const Value *getUnderlyingObjectFromInt(const Value *V) {
55193323Sed  do {
56198090Srdivacky    if (const Operator *U = dyn_cast<Operator>(V)) {
57193323Sed      // If we find a ptrtoint, we can transfer control back to the
58193323Sed      // regular getUnderlyingObjectFromInt.
59198090Srdivacky      if (U->getOpcode() == Instruction::PtrToInt)
60193323Sed        return U->getOperand(0);
61193323Sed      // If we find an add of a constant or a multiplied value, it's
62193323Sed      // likely that the other operand will lead us to the base
63193323Sed      // object. We don't have to worry about the case where the
64198090Srdivacky      // object address is somehow being computed by the multiply,
65193323Sed      // because our callers only care when the result is an
66193323Sed      // identifibale object.
67198090Srdivacky      if (U->getOpcode() != Instruction::Add ||
68193323Sed          (!isa<ConstantInt>(U->getOperand(1)) &&
69198090Srdivacky           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
70193323Sed        return V;
71193323Sed      V = U->getOperand(0);
72193323Sed    } else {
73193323Sed      return V;
74193323Sed    }
75193323Sed    assert(isa<IntegerType>(V->getType()) && "Unexpected operand type!");
76193323Sed  } while (1);
77193323Sed}
78193323Sed
79193323Sed/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject
80193323Sed/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
81193323Sedstatic const Value *getUnderlyingObject(const Value *V) {
82193323Sed  // First just call Value::getUnderlyingObject to let it do what it does.
83193323Sed  do {
84193323Sed    V = V->getUnderlyingObject();
85193323Sed    // If it found an inttoptr, use special code to continue climing.
86198090Srdivacky    if (Operator::getOpcode(V) != Instruction::IntToPtr)
87193323Sed      break;
88193323Sed    const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
89193323Sed    // If that succeeded in finding a pointer, continue the search.
90193323Sed    if (!isa<PointerType>(O->getType()))
91193323Sed      break;
92193323Sed    V = O;
93193323Sed  } while (1);
94193323Sed  return V;
95193323Sed}
96193323Sed
97193323Sed/// getUnderlyingObjectForInstr - If this machine instr has memory reference
98193323Sed/// information and it can be tracked to a normal reference to a known
99193323Sed/// object, return the Value for that object. Otherwise return null.
100198396Srdivackystatic const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
101198892Srdivacky                                                const MachineFrameInfo *MFI,
102198892Srdivacky                                                bool &MayAlias) {
103198892Srdivacky  MayAlias = true;
104193323Sed  if (!MI->hasOneMemOperand() ||
105198090Srdivacky      !(*MI->memoperands_begin())->getValue() ||
106198090Srdivacky      (*MI->memoperands_begin())->isVolatile())
107193323Sed    return 0;
108193323Sed
109198090Srdivacky  const Value *V = (*MI->memoperands_begin())->getValue();
110193323Sed  if (!V)
111193323Sed    return 0;
112193323Sed
113193323Sed  V = getUnderlyingObject(V);
114198396Srdivacky  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
115198396Srdivacky    // For now, ignore PseudoSourceValues which may alias LLVM IR values
116198396Srdivacky    // because the code that uses this function has no way to cope with
117198396Srdivacky    // such aliases.
118198396Srdivacky    if (PSV->isAliased(MFI))
119198396Srdivacky      return 0;
120199481Srdivacky
121199481Srdivacky    MayAlias = PSV->mayAlias(MFI);
122198396Srdivacky    return V;
123198396Srdivacky  }
124193323Sed
125198396Srdivacky  if (isIdentifiedObject(V))
126198396Srdivacky    return V;
127198396Srdivacky
128198396Srdivacky  return 0;
129193323Sed}
130193323Sed
131193323Sedvoid ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
132193323Sed  if (MachineLoop *ML = MLI.getLoopFor(BB))
133193323Sed    if (BB == ML->getLoopLatch()) {
134193323Sed      MachineBasicBlock *Header = ML->getHeader();
135193323Sed      for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
136193323Sed           E = Header->livein_end(); I != E; ++I)
137193323Sed        LoopLiveInRegs.insert(*I);
138193323Sed      LoopRegs.VisitLoop(ML);
139193323Sed    }
140193323Sed}
141193323Sed
142198090Srdivackyvoid ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
143193323Sed  // We'll be allocating one SUnit for each instruction, plus one for
144193323Sed  // the region exit node.
145193323Sed  SUnits.reserve(BB->size());
146193323Sed
147193323Sed  // We build scheduling units by walking a block's instruction list from bottom
148193323Sed  // to top.
149193323Sed
150199481Srdivacky  // Remember where a generic side-effecting instruction is as we procede.
151199481Srdivacky  SUnit *BarrierChain = 0, *AliasChain = 0;
152193323Sed
153199481Srdivacky  // Memory references to specific known memory locations are tracked
154199481Srdivacky  // so that they can be given more precise dependencies. We track
155199481Srdivacky  // separately the known memory locations that may alias and those
156199481Srdivacky  // that are known not to alias
157199481Srdivacky  std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
158199481Srdivacky  std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
159193323Sed
160193323Sed  // Check to see if the scheduler cares about latencies.
161193323Sed  bool UnitLatencies = ForceUnitLatencies();
162193323Sed
163193323Sed  // Ask the target if address-backscheduling is desirable, and if so how much.
164198090Srdivacky  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
165198090Srdivacky  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
166193323Sed
167193323Sed  // Walk the list of instructions, from bottom moving up.
168193323Sed  for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
169193323Sed       MII != MIE; --MII) {
170193323Sed    MachineInstr *MI = prior(MII);
171193323Sed    const TargetInstrDesc &TID = MI->getDesc();
172193323Sed    assert(!TID.isTerminator() && !MI->isLabel() &&
173193323Sed           "Cannot schedule terminators or labels!");
174193323Sed    // Create the SUnit for this MI.
175193323Sed    SUnit *SU = NewSUnit(MI);
176193323Sed
177193323Sed    // Assign the Latency field of SU using target-provided information.
178193323Sed    if (UnitLatencies)
179193323Sed      SU->Latency = 1;
180193323Sed    else
181193323Sed      ComputeLatency(SU);
182193323Sed
183193323Sed    // Add register-based dependencies (data, anti, and output).
184193323Sed    for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
185193323Sed      const MachineOperand &MO = MI->getOperand(j);
186193323Sed      if (!MO.isReg()) continue;
187193323Sed      unsigned Reg = MO.getReg();
188193323Sed      if (Reg == 0) continue;
189193323Sed
190193323Sed      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
191193323Sed      std::vector<SUnit *> &UseList = Uses[Reg];
192193323Sed      std::vector<SUnit *> &DefList = Defs[Reg];
193198090Srdivacky      // Optionally add output and anti dependencies. For anti
194198090Srdivacky      // dependencies we use a latency of 0 because for a multi-issue
195198090Srdivacky      // target we want to allow the defining instruction to issue
196198090Srdivacky      // in the same cycle as the using instruction.
197198090Srdivacky      // TODO: Using a latency of 1 here for output dependencies assumes
198198090Srdivacky      //       there's no cost for reusing registers.
199193323Sed      SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
200198090Srdivacky      unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
201193323Sed      for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
202193323Sed        SUnit *DefSU = DefList[i];
203193323Sed        if (DefSU != SU &&
204193323Sed            (Kind != SDep::Output || !MO.isDead() ||
205193323Sed             !DefSU->getInstr()->registerDefIsDead(Reg)))
206198090Srdivacky          DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
207193323Sed      }
208193323Sed      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
209193323Sed        std::vector<SUnit *> &DefList = Defs[*Alias];
210193323Sed        for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
211193323Sed          SUnit *DefSU = DefList[i];
212193323Sed          if (DefSU != SU &&
213193323Sed              (Kind != SDep::Output || !MO.isDead() ||
214198892Srdivacky               !DefSU->getInstr()->registerDefIsDead(*Alias)))
215198090Srdivacky            DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
216193323Sed        }
217193323Sed      }
218193323Sed
219193323Sed      if (MO.isDef()) {
220193323Sed        // Add any data dependencies.
221193323Sed        unsigned DataLatency = SU->Latency;
222193323Sed        for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
223193323Sed          SUnit *UseSU = UseList[i];
224193323Sed          if (UseSU != SU) {
225193323Sed            unsigned LDataLatency = DataLatency;
226193323Sed            // Optionally add in a special extra latency for nodes that
227193323Sed            // feed addresses.
228193323Sed            // TODO: Do this for register aliases too.
229198090Srdivacky            // TODO: Perhaps we should get rid of
230198090Srdivacky            // SpecialAddressLatency and just move this into
231198090Srdivacky            // adjustSchedDependency for the targets that care about
232198090Srdivacky            // it.
233193323Sed            if (SpecialAddressLatency != 0 && !UnitLatencies) {
234193323Sed              MachineInstr *UseMI = UseSU->getInstr();
235193323Sed              const TargetInstrDesc &UseTID = UseMI->getDesc();
236193323Sed              int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
237193323Sed              assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
238193323Sed              if ((UseTID.mayLoad() || UseTID.mayStore()) &&
239193323Sed                  (unsigned)RegUseIndex < UseTID.getNumOperands() &&
240193323Sed                  UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
241193323Sed                LDataLatency += SpecialAddressLatency;
242193323Sed            }
243198090Srdivacky            // Adjust the dependence latency using operand def/use
244198090Srdivacky            // information (if any), and then allow the target to
245198090Srdivacky            // perform its own adjustments.
246198090Srdivacky            const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
247198090Srdivacky            if (!UnitLatencies) {
248198090Srdivacky              ComputeOperandLatency(SU, UseSU, (SDep &)dep);
249198090Srdivacky              ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
250198090Srdivacky            }
251198090Srdivacky            UseSU->addPred(dep);
252193323Sed          }
253193323Sed        }
254193323Sed        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
255193323Sed          std::vector<SUnit *> &UseList = Uses[*Alias];
256193323Sed          for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
257193323Sed            SUnit *UseSU = UseList[i];
258198090Srdivacky            if (UseSU != SU) {
259198090Srdivacky              const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
260198090Srdivacky              if (!UnitLatencies) {
261198090Srdivacky                ComputeOperandLatency(SU, UseSU, (SDep &)dep);
262198090Srdivacky                ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
263198090Srdivacky              }
264198090Srdivacky              UseSU->addPred(dep);
265198090Srdivacky            }
266193323Sed          }
267193323Sed        }
268193323Sed
269193323Sed        // If a def is going to wrap back around to the top of the loop,
270193323Sed        // backschedule it.
271193323Sed        if (!UnitLatencies && DefList.empty()) {
272193323Sed          LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
273193323Sed          if (I != LoopRegs.Deps.end()) {
274193323Sed            const MachineOperand *UseMO = I->second.first;
275193323Sed            unsigned Count = I->second.second;
276193323Sed            const MachineInstr *UseMI = UseMO->getParent();
277193323Sed            unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
278193323Sed            const TargetInstrDesc &UseTID = UseMI->getDesc();
279193323Sed            // TODO: If we knew the total depth of the region here, we could
280193323Sed            // handle the case where the whole loop is inside the region but
281193323Sed            // is large enough that the isScheduleHigh trick isn't needed.
282193323Sed            if (UseMOIdx < UseTID.getNumOperands()) {
283193323Sed              // Currently, we only support scheduling regions consisting of
284193323Sed              // single basic blocks. Check to see if the instruction is in
285193323Sed              // the same region by checking to see if it has the same parent.
286193323Sed              if (UseMI->getParent() != MI->getParent()) {
287193323Sed                unsigned Latency = SU->Latency;
288193323Sed                if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
289193323Sed                  Latency += SpecialAddressLatency;
290193323Sed                // This is a wild guess as to the portion of the latency which
291193323Sed                // will be overlapped by work done outside the current
292193323Sed                // scheduling region.
293193323Sed                Latency -= std::min(Latency, Count);
294193323Sed                // Add the artifical edge.
295193323Sed                ExitSU.addPred(SDep(SU, SDep::Order, Latency,
296193323Sed                                    /*Reg=*/0, /*isNormalMemory=*/false,
297193323Sed                                    /*isMustAlias=*/false,
298193323Sed                                    /*isArtificial=*/true));
299193323Sed              } else if (SpecialAddressLatency > 0 &&
300193323Sed                         UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
301193323Sed                // The entire loop body is within the current scheduling region
302193323Sed                // and the latency of this operation is assumed to be greater
303193323Sed                // than the latency of the loop.
304193323Sed                // TODO: Recursively mark data-edge predecessors as
305193323Sed                //       isScheduleHigh too.
306193323Sed                SU->isScheduleHigh = true;
307193323Sed              }
308193323Sed            }
309193323Sed            LoopRegs.Deps.erase(I);
310193323Sed          }
311193323Sed        }
312193323Sed
313193323Sed        UseList.clear();
314193323Sed        if (!MO.isDead())
315193323Sed          DefList.clear();
316193323Sed        DefList.push_back(SU);
317193323Sed      } else {
318193323Sed        UseList.push_back(SU);
319193323Sed      }
320193323Sed    }
321193323Sed
322193323Sed    // Add chain dependencies.
323198892Srdivacky    // Chain dependencies used to enforce memory order should have
324198892Srdivacky    // latency of 0 (except for true dependency of Store followed by
325198892Srdivacky    // aliased Load... we estimate that with a single cycle of latency
326198892Srdivacky    // assuming the hardware will bypass)
327193323Sed    // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
328193323Sed    // after stack slots are lowered to actual addresses.
329193323Sed    // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
330193323Sed    // produce more precise dependence information.
331198892Srdivacky#define STORE_LOAD_LATENCY 1
332198892Srdivacky    unsigned TrueMemOrderLatency = 0;
333199481Srdivacky    if (TID.isCall() || TID.hasUnmodeledSideEffects() ||
334199481Srdivacky        (MI->hasVolatileMemoryRef() &&
335199481Srdivacky         (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
336199481Srdivacky      // Be conservative with these and add dependencies on all memory
337199481Srdivacky      // references, even those that are known to not alias.
338199481Srdivacky      for (std::map<const Value *, SUnit *>::iterator I =
339199481Srdivacky             NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
340199481Srdivacky        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
341199481Srdivacky      }
342199481Srdivacky      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
343199481Srdivacky             NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
344199481Srdivacky        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
345199481Srdivacky          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
346199481Srdivacky      }
347199481Srdivacky      NonAliasMemDefs.clear();
348199481Srdivacky      NonAliasMemUses.clear();
349199481Srdivacky      // Add SU to the barrier chain.
350199481Srdivacky      if (BarrierChain)
351199481Srdivacky        BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
352199481Srdivacky      BarrierChain = SU;
353199481Srdivacky
354199481Srdivacky      // fall-through
355199481Srdivacky    new_alias_chain:
356199481Srdivacky      // Chain all possibly aliasing memory references though SU.
357199481Srdivacky      if (AliasChain)
358199481Srdivacky        AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
359199481Srdivacky      AliasChain = SU;
360193323Sed      for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
361198892Srdivacky        PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
362199481Srdivacky      for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
363199481Srdivacky           E = AliasMemDefs.end(); I != E; ++I) {
364198892Srdivacky        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
365193323Sed      }
366193323Sed      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
367199481Srdivacky           AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
368193323Sed        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
369198892Srdivacky          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
370193323Sed      }
371199481Srdivacky      PendingLoads.clear();
372199481Srdivacky      AliasMemDefs.clear();
373199481Srdivacky      AliasMemUses.clear();
374193323Sed    } else if (TID.mayStore()) {
375198892Srdivacky      bool MayAlias = true;
376198892Srdivacky      TrueMemOrderLatency = STORE_LOAD_LATENCY;
377198892Srdivacky      if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
378193323Sed        // A store to a specific PseudoSourceValue. Add precise dependencies.
379199481Srdivacky        // Record the def in MemDefs, first adding a dep if there is
380199481Srdivacky        // an existing def.
381199481Srdivacky        std::map<const Value *, SUnit *>::iterator I =
382199481Srdivacky          ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
383199481Srdivacky        std::map<const Value *, SUnit *>::iterator IE =
384199481Srdivacky          ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
385199481Srdivacky        if (I != IE) {
386198892Srdivacky          I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
387193323Sed                                  /*isNormalMemory=*/true));
388193323Sed          I->second = SU;
389193323Sed        } else {
390199481Srdivacky          if (MayAlias)
391199481Srdivacky            AliasMemDefs[V] = SU;
392199481Srdivacky          else
393199481Srdivacky            NonAliasMemDefs[V] = SU;
394193323Sed        }
395193323Sed        // Handle the uses in MemUses, if there are any.
396193323Sed        std::map<const Value *, std::vector<SUnit *> >::iterator J =
397199481Srdivacky          ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
398199481Srdivacky        std::map<const Value *, std::vector<SUnit *> >::iterator JE =
399199481Srdivacky          ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
400199481Srdivacky        if (J != JE) {
401193323Sed          for (unsigned i = 0, e = J->second.size(); i != e; ++i)
402198892Srdivacky            J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
403198892Srdivacky                                       /*Reg=*/0, /*isNormalMemory=*/true));
404193323Sed          J->second.clear();
405193323Sed        }
406198892Srdivacky        if (MayAlias) {
407199481Srdivacky          // Add dependencies from all the PendingLoads, i.e. loads
408199481Srdivacky          // with no underlying object.
409198892Srdivacky          for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
410198892Srdivacky            PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
411199481Srdivacky          // Add dependence on alias chain, if needed.
412199481Srdivacky          if (AliasChain)
413199481Srdivacky            AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
414198892Srdivacky        }
415199481Srdivacky        // Add dependence on barrier chain, if needed.
416199481Srdivacky        if (BarrierChain)
417199481Srdivacky          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
418198953Srdivacky      } else {
419193323Sed        // Treat all other stores conservatively.
420199481Srdivacky        goto new_alias_chain;
421198892Srdivacky      }
422193323Sed    } else if (TID.mayLoad()) {
423198892Srdivacky      bool MayAlias = true;
424198892Srdivacky      TrueMemOrderLatency = 0;
425198090Srdivacky      if (MI->isInvariantLoad(AA)) {
426193323Sed        // Invariant load, no chain dependencies needed!
427198953Srdivacky      } else {
428199481Srdivacky        if (const Value *V =
429199481Srdivacky            getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
430199481Srdivacky          // A load from a specific PseudoSourceValue. Add precise dependencies.
431199481Srdivacky          std::map<const Value *, SUnit *>::iterator I =
432199481Srdivacky            ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
433199481Srdivacky          std::map<const Value *, SUnit *>::iterator IE =
434199481Srdivacky            ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
435199481Srdivacky          if (I != IE)
436199481Srdivacky            I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
437199481Srdivacky                                    /*isNormalMemory=*/true));
438199481Srdivacky          if (MayAlias)
439199481Srdivacky            AliasMemUses[V].push_back(SU);
440199481Srdivacky          else
441199481Srdivacky            NonAliasMemUses[V].push_back(SU);
442199481Srdivacky        } else {
443199481Srdivacky          // A load with no underlying object. Depend on all
444199481Srdivacky          // potentially aliasing stores.
445199481Srdivacky          for (std::map<const Value *, SUnit *>::iterator I =
446199481Srdivacky                 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
447199481Srdivacky            I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
448199481Srdivacky
449199481Srdivacky          PendingLoads.push_back(SU);
450199481Srdivacky          MayAlias = true;
451198892Srdivacky        }
452199481Srdivacky
453199481Srdivacky        // Add dependencies on alias and barrier chains, if needed.
454199481Srdivacky        if (MayAlias && AliasChain)
455199481Srdivacky          AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
456199481Srdivacky        if (BarrierChain)
457199481Srdivacky          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
458199481Srdivacky      }
459193323Sed    }
460193323Sed  }
461193323Sed
462193323Sed  for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
463193323Sed    Defs[i].clear();
464193323Sed    Uses[i].clear();
465193323Sed  }
466193323Sed  PendingLoads.clear();
467193323Sed}
468193323Sed
469193323Sedvoid ScheduleDAGInstrs::FinishBlock() {
470193323Sed  // Nothing to do.
471193323Sed}
472193323Sed
473193323Sedvoid ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
474193323Sed  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
475193323Sed
476198090Srdivacky  // Compute the latency for the node.
477193323Sed  SU->Latency =
478198090Srdivacky    InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());
479193323Sed
480193323Sed  // Simplistic target-independent heuristic: assume that loads take
481193323Sed  // extra time.
482193323Sed  if (InstrItins.isEmpty())
483193323Sed    if (SU->getInstr()->getDesc().mayLoad())
484193323Sed      SU->Latency += 2;
485193323Sed}
486193323Sed
487198090Srdivackyvoid ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
488198090Srdivacky                                              SDep& dep) const {
489198090Srdivacky  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
490198090Srdivacky  if (InstrItins.isEmpty())
491198090Srdivacky    return;
492198090Srdivacky
493198090Srdivacky  // For a data dependency with a known register...
494198090Srdivacky  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
495198090Srdivacky    return;
496198090Srdivacky
497198090Srdivacky  const unsigned Reg = dep.getReg();
498198090Srdivacky
499198090Srdivacky  // ... find the definition of the register in the defining
500198090Srdivacky  // instruction
501198090Srdivacky  MachineInstr *DefMI = Def->getInstr();
502198090Srdivacky  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
503198090Srdivacky  if (DefIdx != -1) {
504198090Srdivacky    int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx);
505198090Srdivacky    if (DefCycle >= 0) {
506198090Srdivacky      MachineInstr *UseMI = Use->getInstr();
507198090Srdivacky      const unsigned UseClass = UseMI->getDesc().getSchedClass();
508198090Srdivacky
509198090Srdivacky      // For all uses of the register, calculate the maxmimum latency
510198090Srdivacky      int Latency = -1;
511198090Srdivacky      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
512198090Srdivacky        const MachineOperand &MO = UseMI->getOperand(i);
513198090Srdivacky        if (!MO.isReg() || !MO.isUse())
514198090Srdivacky          continue;
515198090Srdivacky        unsigned MOReg = MO.getReg();
516198090Srdivacky        if (MOReg != Reg)
517198090Srdivacky          continue;
518198090Srdivacky
519198090Srdivacky        int UseCycle = InstrItins.getOperandCycle(UseClass, i);
520198090Srdivacky        if (UseCycle >= 0)
521198090Srdivacky          Latency = std::max(Latency, DefCycle - UseCycle + 1);
522198090Srdivacky      }
523198090Srdivacky
524198090Srdivacky      // If we found a latency, then replace the existing dependence latency.
525198090Srdivacky      if (Latency >= 0)
526198090Srdivacky        dep.setLatency(Latency);
527198090Srdivacky    }
528198090Srdivacky  }
529198090Srdivacky}
530198090Srdivacky
531193323Sedvoid ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
532193323Sed  SU->getInstr()->dump();
533193323Sed}
534193323Sed
535193323Sedstd::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
536193323Sed  std::string s;
537193323Sed  raw_string_ostream oss(s);
538193323Sed  if (SU == &EntrySU)
539193323Sed    oss << "<entry>";
540193323Sed  else if (SU == &ExitSU)
541193323Sed    oss << "<exit>";
542193323Sed  else
543193323Sed    SU->getInstr()->print(oss);
544193323Sed  return oss.str();
545193323Sed}
546193323Sed
547193323Sed// EmitSchedule - Emit the machine code in scheduled order.
548198090SrdivackyMachineBasicBlock *ScheduleDAGInstrs::
549198090SrdivackyEmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
550193323Sed  // For MachineInstr-based scheduling, we're rescheduling the instructions in
551193323Sed  // the block, so start by removing them from the block.
552193323Sed  while (Begin != InsertPos) {
553193323Sed    MachineBasicBlock::iterator I = Begin;
554193323Sed    ++Begin;
555193323Sed    BB->remove(I);
556193323Sed  }
557193323Sed
558193323Sed  // Then re-insert them according to the given schedule.
559193323Sed  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
560193323Sed    SUnit *SU = Sequence[i];
561193323Sed    if (!SU) {
562193323Sed      // Null SUnit* is a noop.
563193323Sed      EmitNoop();
564193323Sed      continue;
565193323Sed    }
566193323Sed
567193323Sed    BB->insert(InsertPos, SU->getInstr());
568193323Sed  }
569193323Sed
570193323Sed  // Update the Begin iterator, as the first instruction in the block
571193323Sed  // may have been scheduled later.
572193323Sed  if (!Sequence.empty())
573193323Sed    Begin = Sequence[0]->getInstr();
574193323Sed
575193323Sed  return BB;
576193323Sed}
577