ScheduleDAGInstrs.cpp revision 243830
1193323Sed//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This implements the ScheduleDAGInstrs class, which implements re-scheduling
11193323Sed// of MachineInstrs.
12193323Sed//
13193323Sed//===----------------------------------------------------------------------===//
14193323Sed
15193323Sed#define DEBUG_TYPE "sched-instrs"
16198090Srdivacky#include "llvm/Operator.h"
17193323Sed#include "llvm/Analysis/AliasAnalysis.h"
18218893Sdim#include "llvm/Analysis/ValueTracking.h"
19234353Sdim#include "llvm/CodeGen/LiveIntervalAnalysis.h"
20193323Sed#include "llvm/CodeGen/MachineFunctionPass.h"
21198090Srdivacky#include "llvm/CodeGen/MachineMemOperand.h"
22193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h"
23193323Sed#include "llvm/CodeGen/PseudoSourceValue.h"
24239462Sdim#include "llvm/CodeGen/RegisterPressure.h"
25243830Sdim#include "llvm/CodeGen/ScheduleDAGILP.h"
26234353Sdim#include "llvm/CodeGen/ScheduleDAGInstrs.h"
27224145Sdim#include "llvm/MC/MCInstrItineraries.h"
28193323Sed#include "llvm/Target/TargetMachine.h"
29193323Sed#include "llvm/Target/TargetInstrInfo.h"
30193323Sed#include "llvm/Target/TargetRegisterInfo.h"
31224145Sdim#include "llvm/Target/TargetSubtargetInfo.h"
32239462Sdim#include "llvm/Support/CommandLine.h"
33193323Sed#include "llvm/Support/Debug.h"
34243830Sdim#include "llvm/Support/Format.h"
35193323Sed#include "llvm/Support/raw_ostream.h"
36193323Sed#include "llvm/ADT/SmallSet.h"
37239462Sdim#include "llvm/ADT/SmallPtrSet.h"
38193323Sedusing namespace llvm;
39193323Sed
40239462Sdimstatic cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
41239462Sdim    cl::ZeroOrMore, cl::init(false),
42239462Sdim    cl::desc("Enable use of AA during MI GAD construction"));
43239462Sdim
44193323SedScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
45193323Sed                                     const MachineLoopInfo &mli,
46234353Sdim                                     const MachineDominatorTree &mdt,
47234353Sdim                                     bool IsPostRAFlag,
48234353Sdim                                     LiveIntervals *lis)
49243830Sdim  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
50243830Sdim    IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) {
51234353Sdim  assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
52223017Sdim  DbgValues.clear();
53234353Sdim  assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
54234353Sdim         "Virtual registers must be removed prior to PostRA scheduling");
55243830Sdim
56243830Sdim  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
57243830Sdim  SchedModel.init(*ST.getSchedModel(), &ST, TII);
58198396Srdivacky}
59193323Sed
60193323Sed/// getUnderlyingObjectFromInt - This is the function that does the work of
61193323Sed/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
62193323Sedstatic const Value *getUnderlyingObjectFromInt(const Value *V) {
63193323Sed  do {
64198090Srdivacky    if (const Operator *U = dyn_cast<Operator>(V)) {
65193323Sed      // If we find a ptrtoint, we can transfer control back to the
66193323Sed      // regular getUnderlyingObjectFromInt.
67198090Srdivacky      if (U->getOpcode() == Instruction::PtrToInt)
68193323Sed        return U->getOperand(0);
69193323Sed      // If we find an add of a constant or a multiplied value, it's
70193323Sed      // likely that the other operand will lead us to the base
71193323Sed      // object. We don't have to worry about the case where the
72198090Srdivacky      // object address is somehow being computed by the multiply,
73193323Sed      // because our callers only care when the result is an
74243830Sdim      // identifiable object.
75198090Srdivacky      if (U->getOpcode() != Instruction::Add ||
76193323Sed          (!isa<ConstantInt>(U->getOperand(1)) &&
77198090Srdivacky           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
78193323Sed        return V;
79193323Sed      V = U->getOperand(0);
80193323Sed    } else {
81193323Sed      return V;
82193323Sed    }
83204642Srdivacky    assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
84193323Sed  } while (1);
85193323Sed}
86193323Sed
87218893Sdim/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
88193323Sed/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
89193323Sedstatic const Value *getUnderlyingObject(const Value *V) {
90193323Sed  // First just call Value::getUnderlyingObject to let it do what it does.
91193323Sed  do {
92218893Sdim    V = GetUnderlyingObject(V);
93193323Sed    // If it found an inttoptr, use special code to continue climing.
94198090Srdivacky    if (Operator::getOpcode(V) != Instruction::IntToPtr)
95193323Sed      break;
96193323Sed    const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
97193323Sed    // If that succeeded in finding a pointer, continue the search.
98204642Srdivacky    if (!O->getType()->isPointerTy())
99193323Sed      break;
100193323Sed    V = O;
101193323Sed  } while (1);
102193323Sed  return V;
103193323Sed}
104193323Sed
105193323Sed/// getUnderlyingObjectForInstr - If this machine instr has memory reference
106193323Sed/// information and it can be tracked to a normal reference to a known
107193323Sed/// object, return the Value for that object. Otherwise return null.
108198396Srdivackystatic const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
109198892Srdivacky                                                const MachineFrameInfo *MFI,
110198892Srdivacky                                                bool &MayAlias) {
111198892Srdivacky  MayAlias = true;
112193323Sed  if (!MI->hasOneMemOperand() ||
113198090Srdivacky      !(*MI->memoperands_begin())->getValue() ||
114198090Srdivacky      (*MI->memoperands_begin())->isVolatile())
115193323Sed    return 0;
116193323Sed
117198090Srdivacky  const Value *V = (*MI->memoperands_begin())->getValue();
118193323Sed  if (!V)
119193323Sed    return 0;
120193323Sed
121193323Sed  V = getUnderlyingObject(V);
122198396Srdivacky  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
123198396Srdivacky    // For now, ignore PseudoSourceValues which may alias LLVM IR values
124198396Srdivacky    // because the code that uses this function has no way to cope with
125198396Srdivacky    // such aliases.
126198396Srdivacky    if (PSV->isAliased(MFI))
127198396Srdivacky      return 0;
128223017Sdim
129199481Srdivacky    MayAlias = PSV->mayAlias(MFI);
130198396Srdivacky    return V;
131198396Srdivacky  }
132193323Sed
133198396Srdivacky  if (isIdentifiedObject(V))
134198396Srdivacky    return V;
135198396Srdivacky
136198396Srdivacky  return 0;
137193323Sed}
138193323Sed
139239462Sdimvoid ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
140239462Sdim  BB = bb;
141193323Sed}
142193323Sed
143234353Sdimvoid ScheduleDAGInstrs::finishBlock() {
144239462Sdim  // Subclasses should no longer refer to the old block.
145239462Sdim  BB = 0;
146234353Sdim}
147234353Sdim
148234353Sdim/// Initialize the map with the number of registers.
149234353Sdimvoid Reg2SUnitsMap::setRegLimit(unsigned Limit) {
150234353Sdim  PhysRegSet.setUniverse(Limit);
151234353Sdim  SUnits.resize(Limit);
152234353Sdim}
153234353Sdim
154234353Sdim/// Clear the map without deallocating storage.
155234353Sdimvoid Reg2SUnitsMap::clear() {
156234353Sdim  for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) {
157234353Sdim    SUnits[*I].clear();
158234353Sdim  }
159234353Sdim  PhysRegSet.clear();
160234353Sdim}
161234353Sdim
162234353Sdim/// Initialize the DAG and common scheduler state for the current scheduling
163234353Sdim/// region. This does not actually create the DAG, only clears it. The
164234353Sdim/// scheduling driver may call BuildSchedGraph multiple times per scheduling
165234353Sdim/// region.
166234353Sdimvoid ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
167234353Sdim                                    MachineBasicBlock::iterator begin,
168234353Sdim                                    MachineBasicBlock::iterator end,
169234353Sdim                                    unsigned endcount) {
170239462Sdim  assert(bb == BB && "startBlock should set BB");
171234353Sdim  RegionBegin = begin;
172234353Sdim  RegionEnd = end;
173234353Sdim  EndIndex = endcount;
174234353Sdim  MISUnitMap.clear();
175234353Sdim
176234353Sdim  ScheduleDAG::clearDAG();
177234353Sdim}
178234353Sdim
179234353Sdim/// Close the current scheduling region. Don't clear any state in case the
180234353Sdim/// driver wants to refer to the previous scheduling region.
181234353Sdimvoid ScheduleDAGInstrs::exitRegion() {
182234353Sdim  // Nothing to do.
183234353Sdim}
184234353Sdim
185234353Sdim/// addSchedBarrierDeps - Add dependencies from instructions in the current
186218893Sdim/// list of instructions being scheduled to scheduling barrier by adding
187218893Sdim/// the exit SU to the register defs and use list. This is because we want to
188218893Sdim/// make sure instructions which define registers that are either used by
189218893Sdim/// the terminator or are live-out are properly scheduled. This is
190218893Sdim/// especially important when the definition latency of the return value(s)
191218893Sdim/// are too high to be hidden by the branch or when the liveout registers
192218893Sdim/// used by instructions in the fallthrough block.
193234353Sdimvoid ScheduleDAGInstrs::addSchedBarrierDeps() {
194234353Sdim  MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
195218893Sdim  ExitSU.setInstr(ExitMI);
196218893Sdim  bool AllDepKnown = ExitMI &&
197234353Sdim    (ExitMI->isCall() || ExitMI->isBarrier());
198218893Sdim  if (ExitMI && AllDepKnown) {
199218893Sdim    // If it's a call or a barrier, add dependencies on the defs and uses of
200218893Sdim    // instruction.
201218893Sdim    for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
202218893Sdim      const MachineOperand &MO = ExitMI->getOperand(i);
203218893Sdim      if (!MO.isReg() || MO.isDef()) continue;
204218893Sdim      unsigned Reg = MO.getReg();
205218893Sdim      if (Reg == 0) continue;
206218893Sdim
207234353Sdim      if (TRI->isPhysicalRegister(Reg))
208243830Sdim        Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
209234353Sdim      else {
210234353Sdim        assert(!IsPostRA && "Virtual register encountered after regalloc.");
211234353Sdim        addVRegUseDeps(&ExitSU, i);
212234353Sdim      }
213218893Sdim    }
214218893Sdim  } else {
215218893Sdim    // For others, e.g. fallthrough, conditional branch, assume the exit
216218893Sdim    // uses all the registers that are livein to the successor blocks.
217234353Sdim    assert(Uses.empty() && "Uses in set before adding deps?");
218218893Sdim    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
219218893Sdim           SE = BB->succ_end(); SI != SE; ++SI)
220218893Sdim      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
221223017Sdim             E = (*SI)->livein_end(); I != E; ++I) {
222218893Sdim        unsigned Reg = *I;
223234353Sdim        if (!Uses.contains(Reg))
224243830Sdim          Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
225218893Sdim      }
226218893Sdim  }
227218893Sdim}
228218893Sdim
229234353Sdim/// MO is an operand of SU's instruction that defines a physical register. Add
230234353Sdim/// data dependencies from SU to any uses of the physical register.
231243830Sdimvoid ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
232243830Sdim  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
233234353Sdim  assert(MO.isDef() && "expect physreg def");
234234353Sdim
235234353Sdim  // Ask the target if address-backscheduling is desirable, and if so how much.
236234353Sdim  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
237234353Sdim
238239462Sdim  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
239239462Sdim       Alias.isValid(); ++Alias) {
240234353Sdim    if (!Uses.contains(*Alias))
241234353Sdim      continue;
242243830Sdim    std::vector<PhysRegSUOper> &UseList = Uses[*Alias];
243234353Sdim    for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
244243830Sdim      SUnit *UseSU = UseList[i].SU;
245234353Sdim      if (UseSU == SU)
246234353Sdim        continue;
247239462Sdim
248243830Sdim      SDep dep(SU, SDep::Data, *Alias);
249243830Sdim
250243830Sdim      // Adjust the dependence latency using operand def/use information,
251243830Sdim      // then allow the target to perform its own adjustments.
252243830Sdim      int UseOp = UseList[i].OpIdx;
253243830Sdim      MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr();
254243830Sdim      dep.setLatency(
255243830Sdim        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
256243830Sdim                                         RegUse, UseOp, /*FindMin=*/false));
257243830Sdim      dep.setMinLatency(
258243830Sdim        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
259243830Sdim                                         RegUse, UseOp, /*FindMin=*/true));
260243830Sdim
261243830Sdim      ST.adjustSchedDependency(SU, UseSU, dep);
262234353Sdim      UseSU->addPred(dep);
263234353Sdim    }
264234353Sdim  }
265234353Sdim}
266234353Sdim
267234353Sdim/// addPhysRegDeps - Add register dependencies (data, anti, and output) from
268234353Sdim/// this SUnit to following instructions in the same scheduling region that
269234353Sdim/// depend the physical register referenced at OperIdx.
270234353Sdimvoid ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
271234353Sdim  const MachineInstr *MI = SU->getInstr();
272234353Sdim  const MachineOperand &MO = MI->getOperand(OperIdx);
273234353Sdim
274234353Sdim  // Optionally add output and anti dependencies. For anti
275234353Sdim  // dependencies we use a latency of 0 because for a multi-issue
276234353Sdim  // target we want to allow the defining instruction to issue
277234353Sdim  // in the same cycle as the using instruction.
278234353Sdim  // TODO: Using a latency of 1 here for output dependencies assumes
279234353Sdim  //       there's no cost for reusing registers.
280234353Sdim  SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
281239462Sdim  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
282239462Sdim       Alias.isValid(); ++Alias) {
283234353Sdim    if (!Defs.contains(*Alias))
284234353Sdim      continue;
285243830Sdim    std::vector<PhysRegSUOper> &DefList = Defs[*Alias];
286234353Sdim    for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
287243830Sdim      SUnit *DefSU = DefList[i].SU;
288234353Sdim      if (DefSU == &ExitSU)
289234353Sdim        continue;
290234353Sdim      if (DefSU != SU &&
291234353Sdim          (Kind != SDep::Output || !MO.isDead() ||
292234353Sdim           !DefSU->getInstr()->registerDefIsDead(*Alias))) {
293234353Sdim        if (Kind == SDep::Anti)
294243830Sdim          DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
295234353Sdim        else {
296243830Sdim          SDep Dep(SU, Kind, /*Reg=*/*Alias);
297243830Sdim          unsigned OutLatency =
298243830Sdim            SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
299243830Sdim          Dep.setMinLatency(OutLatency);
300243830Sdim          Dep.setLatency(OutLatency);
301243830Sdim          DefSU->addPred(Dep);
302234353Sdim        }
303234353Sdim      }
304234353Sdim    }
305234353Sdim  }
306234353Sdim
307234353Sdim  if (!MO.isDef()) {
308234353Sdim    // Either insert a new Reg2SUnits entry with an empty SUnits list, or
309234353Sdim    // retrieve the existing SUnits list for this register's uses.
310234353Sdim    // Push this SUnit on the use list.
311243830Sdim    Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx));
312234353Sdim  }
313234353Sdim  else {
314243830Sdim    addPhysRegDataDeps(SU, OperIdx);
315234353Sdim
316234353Sdim    // Either insert a new Reg2SUnits entry with an empty SUnits list, or
317234353Sdim    // retrieve the existing SUnits list for this register's defs.
318243830Sdim    std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()];
319234353Sdim
320234353Sdim    // clear this register's use list
321234353Sdim    if (Uses.contains(MO.getReg()))
322234353Sdim      Uses[MO.getReg()].clear();
323234353Sdim
324234353Sdim    if (!MO.isDead())
325234353Sdim      DefList.clear();
326234353Sdim
327234353Sdim    // Calls will not be reordered because of chain dependencies (see
328234353Sdim    // below). Since call operands are dead, calls may continue to be added
329234353Sdim    // to the DefList making dependence checking quadratic in the size of
330234353Sdim    // the block. Instead, we leave only one call at the back of the
331234353Sdim    // DefList.
332234353Sdim    if (SU->isCall) {
333243830Sdim      while (!DefList.empty() && DefList.back().SU->isCall)
334234353Sdim        DefList.pop_back();
335234353Sdim    }
336234353Sdim    // Defs are pushed in the order they are visited and never reordered.
337243830Sdim    DefList.push_back(PhysRegSUOper(SU, OperIdx));
338234353Sdim  }
339234353Sdim}
340234353Sdim
341234353Sdim/// addVRegDefDeps - Add register output and data dependencies from this SUnit
342234353Sdim/// to instructions that occur later in the same scheduling region if they read
343234353Sdim/// from or write to the virtual register defined at OperIdx.
344234353Sdim///
345234353Sdim/// TODO: Hoist loop induction variable increments. This has to be
346234353Sdim/// reevaluated. Generally, IV scheduling should be done before coalescing.
347234353Sdimvoid ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
348234353Sdim  const MachineInstr *MI = SU->getInstr();
349234353Sdim  unsigned Reg = MI->getOperand(OperIdx).getReg();
350234353Sdim
351239462Sdim  // Singly defined vregs do not have output/anti dependencies.
352234353Sdim  // The current operand is a def, so we have at least one.
353239462Sdim  // Check here if there are any others...
354239462Sdim  if (MRI.hasOneDef(Reg))
355234353Sdim    return;
356234353Sdim
357234353Sdim  // Add output dependence to the next nearest def of this vreg.
358234353Sdim  //
359234353Sdim  // Unless this definition is dead, the output dependence should be
360234353Sdim  // transitively redundant with antidependencies from this definition's
361234353Sdim  // uses. We're conservative for now until we have a way to guarantee the uses
362234353Sdim  // are not eliminated sometime during scheduling. The output dependence edge
363234353Sdim  // is also useful if output latency exceeds def-use latency.
364239462Sdim  VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
365234353Sdim  if (DefI == VRegDefs.end())
366234353Sdim    VRegDefs.insert(VReg2SUnit(Reg, SU));
367234353Sdim  else {
368234353Sdim    SUnit *DefSU = DefI->SU;
369234353Sdim    if (DefSU != SU && DefSU != &ExitSU) {
370243830Sdim      SDep Dep(SU, SDep::Output, Reg);
371243830Sdim      unsigned OutLatency =
372243830Sdim        SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
373243830Sdim      Dep.setMinLatency(OutLatency);
374243830Sdim      Dep.setLatency(OutLatency);
375243830Sdim      DefSU->addPred(Dep);
376234353Sdim    }
377234353Sdim    DefI->SU = SU;
378234353Sdim  }
379234353Sdim}
380234353Sdim
381234353Sdim/// addVRegUseDeps - Add a register data dependency if the instruction that
382234353Sdim/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
383234353Sdim/// register antidependency from this SUnit to instructions that occur later in
384234353Sdim/// the same scheduling region if they write the virtual register.
385234353Sdim///
386234353Sdim/// TODO: Handle ExitSU "uses" properly.
387234353Sdimvoid ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
388234353Sdim  MachineInstr *MI = SU->getInstr();
389234353Sdim  unsigned Reg = MI->getOperand(OperIdx).getReg();
390234353Sdim
391234353Sdim  // Lookup this operand's reaching definition.
392234353Sdim  assert(LIS && "vreg dependencies requires LiveIntervals");
393239462Sdim  LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI));
394239462Sdim  VNInfo *VNI = LRQ.valueIn();
395239462Sdim
396234353Sdim  // VNI will be valid because MachineOperand::readsReg() is checked by caller.
397239462Sdim  assert(VNI && "No value to read by operand");
398234353Sdim  MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
399234353Sdim  // Phis and other noninstructions (after coalescing) have a NULL Def.
400234353Sdim  if (Def) {
401234353Sdim    SUnit *DefSU = getSUnit(Def);
402234353Sdim    if (DefSU) {
403234353Sdim      // The reaching Def lives within this scheduling region.
404234353Sdim      // Create a data dependence.
405243830Sdim      SDep dep(DefSU, SDep::Data, Reg);
406243830Sdim      // Adjust the dependence latency using operand def/use information, then
407243830Sdim      // allow the target to perform its own adjustments.
408243830Sdim      int DefOp = Def->findRegisterDefOperandIdx(Reg);
409243830Sdim      dep.setLatency(
410243830Sdim        SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false));
411243830Sdim      dep.setMinLatency(
412243830Sdim        SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true));
413239462Sdim
414243830Sdim      const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
415243830Sdim      ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
416234353Sdim      SU->addPred(dep);
417234353Sdim    }
418234353Sdim  }
419234353Sdim
420234353Sdim  // Add antidependence to the following def of the vreg it uses.
421239462Sdim  VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
422234353Sdim  if (DefI != VRegDefs.end() && DefI->SU != SU)
423243830Sdim    DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
424234353Sdim}
425234353Sdim
426239462Sdim/// Return true if MI is an instruction we are unable to reason about
427239462Sdim/// (like a call or something with unmodeled side effects).
428239462Sdimstatic inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
429239462Sdim  if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
430243830Sdim      (MI->hasOrderedMemoryRef() &&
431239462Sdim       (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
432239462Sdim    return true;
433239462Sdim  return false;
434239462Sdim}
435239462Sdim
436239462Sdim// This MI might have either incomplete info, or known to be unsafe
437239462Sdim// to deal with (i.e. volatile object).
438239462Sdimstatic inline bool isUnsafeMemoryObject(MachineInstr *MI,
439239462Sdim                                        const MachineFrameInfo *MFI) {
440239462Sdim  if (!MI || MI->memoperands_empty())
441239462Sdim    return true;
442239462Sdim  // We purposefully do no check for hasOneMemOperand() here
443239462Sdim  // in hope to trigger an assert downstream in order to
444239462Sdim  // finish implementation.
445239462Sdim  if ((*MI->memoperands_begin())->isVolatile() ||
446239462Sdim       MI->hasUnmodeledSideEffects())
447239462Sdim    return true;
448239462Sdim
449239462Sdim  const Value *V = (*MI->memoperands_begin())->getValue();
450239462Sdim  if (!V)
451239462Sdim    return true;
452239462Sdim
453239462Sdim  V = getUnderlyingObject(V);
454239462Sdim  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
455239462Sdim    // Similarly to getUnderlyingObjectForInstr:
456239462Sdim    // For now, ignore PseudoSourceValues which may alias LLVM IR values
457239462Sdim    // because the code that uses this function has no way to cope with
458239462Sdim    // such aliases.
459239462Sdim    if (PSV->isAliased(MFI))
460239462Sdim      return true;
461239462Sdim  }
462239462Sdim  // Does this pointer refer to a distinct and identifiable object?
463239462Sdim  if (!isIdentifiedObject(V))
464239462Sdim    return true;
465239462Sdim
466239462Sdim  return false;
467239462Sdim}
468239462Sdim
469239462Sdim/// This returns true if the two MIs need a chain edge betwee them.
470239462Sdim/// If these are not even memory operations, we still may need
471239462Sdim/// chain deps between them. The question really is - could
472239462Sdim/// these two MIs be reordered during scheduling from memory dependency
473239462Sdim/// point of view.
474239462Sdimstatic bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
475239462Sdim                             MachineInstr *MIa,
476239462Sdim                             MachineInstr *MIb) {
477239462Sdim  // Cover a trivial case - no edge is need to itself.
478239462Sdim  if (MIa == MIb)
479239462Sdim    return false;
480239462Sdim
481239462Sdim  if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
482239462Sdim    return true;
483239462Sdim
484239462Sdim  // If we are dealing with two "normal" loads, we do not need an edge
485239462Sdim  // between them - they could be reordered.
486239462Sdim  if (!MIa->mayStore() && !MIb->mayStore())
487239462Sdim    return false;
488239462Sdim
489239462Sdim  // To this point analysis is generic. From here on we do need AA.
490239462Sdim  if (!AA)
491239462Sdim    return true;
492239462Sdim
493239462Sdim  MachineMemOperand *MMOa = *MIa->memoperands_begin();
494239462Sdim  MachineMemOperand *MMOb = *MIb->memoperands_begin();
495239462Sdim
496239462Sdim  // FIXME: Need to handle multiple memory operands to support all targets.
497239462Sdim  if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
498239462Sdim    llvm_unreachable("Multiple memory operands.");
499239462Sdim
500239462Sdim  // The following interface to AA is fashioned after DAGCombiner::isAlias
501239462Sdim  // and operates with MachineMemOperand offset with some important
502239462Sdim  // assumptions:
503239462Sdim  //   - LLVM fundamentally assumes flat address spaces.
504239462Sdim  //   - MachineOperand offset can *only* result from legalization and
505239462Sdim  //     cannot affect queries other than the trivial case of overlap
506239462Sdim  //     checking.
507239462Sdim  //   - These offsets never wrap and never step outside
508239462Sdim  //     of allocated objects.
509239462Sdim  //   - There should never be any negative offsets here.
510239462Sdim  //
511239462Sdim  // FIXME: Modify API to hide this math from "user"
512239462Sdim  // FIXME: Even before we go to AA we can reason locally about some
513239462Sdim  // memory objects. It can save compile time, and possibly catch some
514239462Sdim  // corner cases not currently covered.
515239462Sdim
516239462Sdim  assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
517239462Sdim  assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
518239462Sdim
519239462Sdim  int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
520239462Sdim  int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
521239462Sdim  int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
522239462Sdim
523239462Sdim  AliasAnalysis::AliasResult AAResult = AA->alias(
524239462Sdim  AliasAnalysis::Location(MMOa->getValue(), Overlapa,
525239462Sdim                          MMOa->getTBAAInfo()),
526239462Sdim  AliasAnalysis::Location(MMOb->getValue(), Overlapb,
527239462Sdim                          MMOb->getTBAAInfo()));
528239462Sdim
529239462Sdim  return (AAResult != AliasAnalysis::NoAlias);
530239462Sdim}
531239462Sdim
532239462Sdim/// This recursive function iterates over chain deps of SUb looking for
533239462Sdim/// "latest" node that needs a chain edge to SUa.
534239462Sdimstatic unsigned
535239462SdimiterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
536239462Sdim                 SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
537239462Sdim                 SmallPtrSet<const SUnit*, 16> &Visited) {
538239462Sdim  if (!SUa || !SUb || SUb == ExitSU)
539239462Sdim    return *Depth;
540239462Sdim
541239462Sdim  // Remember visited nodes.
542239462Sdim  if (!Visited.insert(SUb))
543239462Sdim      return *Depth;
544239462Sdim  // If there is _some_ dependency already in place, do not
545239462Sdim  // descend any further.
546239462Sdim  // TODO: Need to make sure that if that dependency got eliminated or ignored
547239462Sdim  // for any reason in the future, we would not violate DAG topology.
548239462Sdim  // Currently it does not happen, but makes an implicit assumption about
549239462Sdim  // future implementation.
550239462Sdim  //
551239462Sdim  // Independently, if we encounter node that is some sort of global
552239462Sdim  // object (like a call) we already have full set of dependencies to it
553239462Sdim  // and we can stop descending.
554239462Sdim  if (SUa->isSucc(SUb) ||
555239462Sdim      isGlobalMemoryObject(AA, SUb->getInstr()))
556239462Sdim    return *Depth;
557239462Sdim
558239462Sdim  // If we do need an edge, or we have exceeded depth budget,
559239462Sdim  // add that edge to the predecessors chain of SUb,
560239462Sdim  // and stop descending.
561239462Sdim  if (*Depth > 200 ||
562239462Sdim      MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
563243830Sdim    SUb->addPred(SDep(SUa, SDep::MayAliasMem));
564239462Sdim    return *Depth;
565239462Sdim  }
566239462Sdim  // Track current depth.
567239462Sdim  (*Depth)++;
568239462Sdim  // Iterate over chain dependencies only.
569239462Sdim  for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
570239462Sdim       I != E; ++I)
571239462Sdim    if (I->isCtrl())
572239462Sdim      iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
573239462Sdim  return *Depth;
574239462Sdim}
575239462Sdim
576239462Sdim/// This function assumes that "downward" from SU there exist
577239462Sdim/// tail/leaf of already constructed DAG. It iterates downward and
578239462Sdim/// checks whether SU can be aliasing any node dominated
579239462Sdim/// by it.
580239462Sdimstatic void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
581239462Sdim                            SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
582239462Sdim                            unsigned LatencyToLoad) {
583239462Sdim  if (!SU)
584239462Sdim    return;
585239462Sdim
586239462Sdim  SmallPtrSet<const SUnit*, 16> Visited;
587239462Sdim  unsigned Depth = 0;
588239462Sdim
589239462Sdim  for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
590239462Sdim       I != IE; ++I) {
591239462Sdim    if (SU == *I)
592239462Sdim      continue;
593239462Sdim    if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
594243830Sdim      SDep Dep(SU, SDep::MayAliasMem);
595243830Sdim      Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
596243830Sdim      (*I)->addPred(Dep);
597239462Sdim    }
598239462Sdim    // Now go through all the chain successors and iterate from them.
599239462Sdim    // Keep track of visited nodes.
600239462Sdim    for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
601239462Sdim         JE = (*I)->Succs.end(); J != JE; ++J)
602239462Sdim      if (J->isCtrl())
603239462Sdim        iterateChainSucc (AA, MFI, SU, J->getSUnit(),
604239462Sdim                          ExitSU, &Depth, Visited);
605239462Sdim  }
606239462Sdim}
607239462Sdim
608239462Sdim/// Check whether two objects need a chain edge, if so, add it
609239462Sdim/// otherwise remember the rejected SU.
610239462Sdimstatic inline
611239462Sdimvoid addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
612239462Sdim                         SUnit *SUa, SUnit *SUb,
613239462Sdim                         std::set<SUnit *> &RejectList,
614239462Sdim                         unsigned TrueMemOrderLatency = 0,
615239462Sdim                         bool isNormalMemory = false) {
616239462Sdim  // If this is a false dependency,
617239462Sdim  // do not add the edge, but rememeber the rejected node.
618239462Sdim  if (!EnableAASchedMI ||
619243830Sdim      MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
620243830Sdim    SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
621243830Sdim    Dep.setLatency(TrueMemOrderLatency);
622243830Sdim    SUb->addPred(Dep);
623243830Sdim  }
624239462Sdim  else {
625239462Sdim    // Duplicate entries should be ignored.
626239462Sdim    RejectList.insert(SUb);
627239462Sdim    DEBUG(dbgs() << "\tReject chain dep between SU("
628239462Sdim          << SUa->NodeNum << ") and SU("
629239462Sdim          << SUb->NodeNum << ")\n");
630239462Sdim  }
631239462Sdim}
632239462Sdim
633234353Sdim/// Create an SUnit for each real instruction, numbered in top-down toplological
634234353Sdim/// order. The instruction order A < B, implies that no edge exists from B to A.
635234353Sdim///
636234353Sdim/// Map each real instruction to its SUnit.
637234353Sdim///
638234353Sdim/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
639234353Sdim/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
640234353Sdim/// instead of pointers.
641234353Sdim///
642234353Sdim/// MachineScheduler relies on initSUnits numbering the nodes by their order in
643234353Sdim/// the original instruction list.
644234353Sdimvoid ScheduleDAGInstrs::initSUnits() {
645234353Sdim  // We'll be allocating one SUnit for each real instruction in the region,
646234353Sdim  // which is contained within a basic block.
647193323Sed  SUnits.reserve(BB->size());
648193323Sed
649234353Sdim  for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
650234353Sdim    MachineInstr *MI = I;
651234353Sdim    if (MI->isDebugValue())
652234353Sdim      continue;
653234353Sdim
654234353Sdim    SUnit *SU = newSUnit(MI);
655234353Sdim    MISUnitMap[MI] = SU;
656234353Sdim
657234353Sdim    SU->isCall = MI->isCall();
658234353Sdim    SU->isCommutable = MI->isCommutable();
659234353Sdim
660234353Sdim    // Assign the Latency field of SU using target-provided information.
661243830Sdim    SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
662234353Sdim  }
663234353Sdim}
664234353Sdim
665239462Sdim/// If RegPressure is non null, compute register pressure as a side effect. The
666239462Sdim/// DAG builder is an efficient place to do it because it already visits
667239462Sdim/// operands.
668239462Sdimvoid ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
669239462Sdim                                        RegPressureTracker *RPTracker) {
670234353Sdim  // Create an SUnit for each real instruction.
671234353Sdim  initSUnits();
672234353Sdim
673193323Sed  // We build scheduling units by walking a block's instruction list from bottom
674193323Sed  // to top.
675193323Sed
676199481Srdivacky  // Remember where a generic side-effecting instruction is as we procede.
677199481Srdivacky  SUnit *BarrierChain = 0, *AliasChain = 0;
678193323Sed
679199481Srdivacky  // Memory references to specific known memory locations are tracked
680199481Srdivacky  // so that they can be given more precise dependencies. We track
681199481Srdivacky  // separately the known memory locations that may alias and those
682199481Srdivacky  // that are known not to alias
683199481Srdivacky  std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
684199481Srdivacky  std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
685239462Sdim  std::set<SUnit*> RejectMemNodes;
686193323Sed
687205218Srdivacky  // Remove any stale debug info; sometimes BuildSchedGraph is called again
688205218Srdivacky  // without emitting the info from the previous call.
689223017Sdim  DbgValues.clear();
690223017Sdim  FirstDbgValue = NULL;
691205218Srdivacky
692234353Sdim  assert(Defs.empty() && Uses.empty() &&
693234353Sdim         "Only BuildGraph should update Defs/Uses");
694234353Sdim  Defs.setRegLimit(TRI->getNumRegs());
695234353Sdim  Uses.setRegLimit(TRI->getNumRegs());
696234353Sdim
697234353Sdim  assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
698234353Sdim  // FIXME: Allow SparseSet to reserve space for the creation of virtual
699234353Sdim  // registers during scheduling. Don't artificially inflate the Universe
700234353Sdim  // because we want to assert that vregs are not created during DAG building.
701234353Sdim  VRegDefs.setUniverse(MRI.getNumVirtRegs());
702234353Sdim
703218893Sdim  // Model data dependencies between instructions being scheduled and the
704218893Sdim  // ExitSU.
705234353Sdim  addSchedBarrierDeps();
706218893Sdim
707193323Sed  // Walk the list of instructions, from bottom moving up.
708223017Sdim  MachineInstr *PrevMI = NULL;
709234353Sdim  for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
710193323Sed       MII != MIE; --MII) {
711193323Sed    MachineInstr *MI = prior(MII);
712223017Sdim    if (MI && PrevMI) {
713223017Sdim      DbgValues.push_back(std::make_pair(PrevMI, MI));
714223017Sdim      PrevMI = NULL;
715223017Sdim    }
716223017Sdim
717205218Srdivacky    if (MI->isDebugValue()) {
718223017Sdim      PrevMI = MI;
719205218Srdivacky      continue;
720205218Srdivacky    }
721239462Sdim    if (RPTracker) {
722239462Sdim      RPTracker->recede();
723239462Sdim      assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
724239462Sdim    }
725223017Sdim
726234982Sdim    assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() &&
727193323Sed           "Cannot schedule terminators or labels!");
728193323Sed
729234353Sdim    SUnit *SU = MISUnitMap[MI];
730234353Sdim    assert(SU && "No SUnit mapped to this MI");
731193323Sed
732193323Sed    // Add register-based dependencies (data, anti, and output).
733193323Sed    for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
734193323Sed      const MachineOperand &MO = MI->getOperand(j);
735193323Sed      if (!MO.isReg()) continue;
736193323Sed      unsigned Reg = MO.getReg();
737193323Sed      if (Reg == 0) continue;
738193323Sed
739234353Sdim      if (TRI->isPhysicalRegister(Reg))
740234353Sdim        addPhysRegDeps(SU, j);
741234353Sdim      else {
742234353Sdim        assert(!IsPostRA && "Virtual register encountered!");
743234353Sdim        if (MO.isDef())
744234353Sdim          addVRegDefDeps(SU, j);
745234353Sdim        else if (MO.readsReg()) // ignore undef operands
746234353Sdim          addVRegUseDeps(SU, j);
747193323Sed      }
748193323Sed    }
749193323Sed
750193323Sed    // Add chain dependencies.
751198892Srdivacky    // Chain dependencies used to enforce memory order should have
752198892Srdivacky    // latency of 0 (except for true dependency of Store followed by
753198892Srdivacky    // aliased Load... we estimate that with a single cycle of latency
754198892Srdivacky    // assuming the hardware will bypass)
755193323Sed    // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
756193323Sed    // after stack slots are lowered to actual addresses.
757193323Sed    // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
758193323Sed    // produce more precise dependence information.
759239462Sdim    unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
760239462Sdim    if (isGlobalMemoryObject(AA, MI)) {
761199481Srdivacky      // Be conservative with these and add dependencies on all memory
762199481Srdivacky      // references, even those that are known to not alias.
763223017Sdim      for (std::map<const Value *, SUnit *>::iterator I =
764199481Srdivacky             NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
765243830Sdim        I->second->addPred(SDep(SU, SDep::Barrier));
766199481Srdivacky      }
767199481Srdivacky      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
768199481Srdivacky             NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
769243830Sdim        for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
770243830Sdim          SDep Dep(SU, SDep::Barrier);
771243830Sdim          Dep.setLatency(TrueMemOrderLatency);
772243830Sdim          I->second[i]->addPred(Dep);
773243830Sdim        }
774199481Srdivacky      }
775199481Srdivacky      // Add SU to the barrier chain.
776199481Srdivacky      if (BarrierChain)
777243830Sdim        BarrierChain->addPred(SDep(SU, SDep::Barrier));
778199481Srdivacky      BarrierChain = SU;
779239462Sdim      // This is a barrier event that acts as a pivotal node in the DAG,
780239462Sdim      // so it is safe to clear list of exposed nodes.
781239462Sdim      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
782239462Sdim                      TrueMemOrderLatency);
783239462Sdim      RejectMemNodes.clear();
784239462Sdim      NonAliasMemDefs.clear();
785239462Sdim      NonAliasMemUses.clear();
786199481Srdivacky
787199481Srdivacky      // fall-through
788199481Srdivacky    new_alias_chain:
789199481Srdivacky      // Chain all possibly aliasing memory references though SU.
790239462Sdim      if (AliasChain) {
791239462Sdim        unsigned ChainLatency = 0;
792239462Sdim        if (AliasChain->getInstr()->mayLoad())
793239462Sdim          ChainLatency = TrueMemOrderLatency;
794239462Sdim        addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes,
795239462Sdim                           ChainLatency);
796239462Sdim      }
797199481Srdivacky      AliasChain = SU;
798193323Sed      for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
799239462Sdim        addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
800239462Sdim                           TrueMemOrderLatency);
801199481Srdivacky      for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
802239462Sdim           E = AliasMemDefs.end(); I != E; ++I)
803239462Sdim        addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
804193323Sed      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
805199481Srdivacky           AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
806193323Sed        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
807239462Sdim          addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
808239462Sdim                             TrueMemOrderLatency);
809193323Sed      }
810239462Sdim      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
811239462Sdim                      TrueMemOrderLatency);
812199481Srdivacky      PendingLoads.clear();
813199481Srdivacky      AliasMemDefs.clear();
814199481Srdivacky      AliasMemUses.clear();
815234353Sdim    } else if (MI->mayStore()) {
816198892Srdivacky      bool MayAlias = true;
817198892Srdivacky      if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
818193323Sed        // A store to a specific PseudoSourceValue. Add precise dependencies.
819199481Srdivacky        // Record the def in MemDefs, first adding a dep if there is
820199481Srdivacky        // an existing def.
821223017Sdim        std::map<const Value *, SUnit *>::iterator I =
822199481Srdivacky          ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
823223017Sdim        std::map<const Value *, SUnit *>::iterator IE =
824199481Srdivacky          ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
825199481Srdivacky        if (I != IE) {
826239462Sdim          addChainDependency(AA, MFI, SU, I->second, RejectMemNodes,
827239462Sdim                             0, true);
828193323Sed          I->second = SU;
829193323Sed        } else {
830199481Srdivacky          if (MayAlias)
831199481Srdivacky            AliasMemDefs[V] = SU;
832199481Srdivacky          else
833199481Srdivacky            NonAliasMemDefs[V] = SU;
834193323Sed        }
835193323Sed        // Handle the uses in MemUses, if there are any.
836193323Sed        std::map<const Value *, std::vector<SUnit *> >::iterator J =
837199481Srdivacky          ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
838199481Srdivacky        std::map<const Value *, std::vector<SUnit *> >::iterator JE =
839199481Srdivacky          ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
840199481Srdivacky        if (J != JE) {
841193323Sed          for (unsigned i = 0, e = J->second.size(); i != e; ++i)
842239462Sdim            addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
843239462Sdim                               TrueMemOrderLatency, true);
844193323Sed          J->second.clear();
845193323Sed        }
846198892Srdivacky        if (MayAlias) {
847199481Srdivacky          // Add dependencies from all the PendingLoads, i.e. loads
848199481Srdivacky          // with no underlying object.
849198892Srdivacky          for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
850239462Sdim            addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
851239462Sdim                               TrueMemOrderLatency);
852199481Srdivacky          // Add dependence on alias chain, if needed.
853199481Srdivacky          if (AliasChain)
854239462Sdim            addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
855239462Sdim          // But we also should check dependent instructions for the
856239462Sdim          // SU in question.
857239462Sdim          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
858239462Sdim                          TrueMemOrderLatency);
859198892Srdivacky        }
860199481Srdivacky        // Add dependence on barrier chain, if needed.
861239462Sdim        // There is no point to check aliasing on barrier event. Even if
862239462Sdim        // SU and barrier _could_ be reordered, they should not. In addition,
863239462Sdim        // we have lost all RejectMemNodes below barrier.
864199481Srdivacky        if (BarrierChain)
865243830Sdim          BarrierChain->addPred(SDep(SU, SDep::Barrier));
866198953Srdivacky      } else {
867193323Sed        // Treat all other stores conservatively.
868199481Srdivacky        goto new_alias_chain;
869198892Srdivacky      }
870218893Sdim
871218893Sdim      if (!ExitSU.isPred(SU))
872218893Sdim        // Push store's up a bit to avoid them getting in between cmp
873218893Sdim        // and branches.
874243830Sdim        ExitSU.addPred(SDep(SU, SDep::Artificial));
875234353Sdim    } else if (MI->mayLoad()) {
876198892Srdivacky      bool MayAlias = true;
877198090Srdivacky      if (MI->isInvariantLoad(AA)) {
878193323Sed        // Invariant load, no chain dependencies needed!
879198953Srdivacky      } else {
880223017Sdim        if (const Value *V =
881199481Srdivacky            getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
882199481Srdivacky          // A load from a specific PseudoSourceValue. Add precise dependencies.
883223017Sdim          std::map<const Value *, SUnit *>::iterator I =
884199481Srdivacky            ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
885223017Sdim          std::map<const Value *, SUnit *>::iterator IE =
886199481Srdivacky            ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
887199481Srdivacky          if (I != IE)
888239462Sdim            addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
889199481Srdivacky          if (MayAlias)
890199481Srdivacky            AliasMemUses[V].push_back(SU);
891223017Sdim          else
892199481Srdivacky            NonAliasMemUses[V].push_back(SU);
893199481Srdivacky        } else {
894199481Srdivacky          // A load with no underlying object. Depend on all
895199481Srdivacky          // potentially aliasing stores.
896223017Sdim          for (std::map<const Value *, SUnit *>::iterator I =
897199481Srdivacky                 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
898239462Sdim            addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
899223017Sdim
900199481Srdivacky          PendingLoads.push_back(SU);
901199481Srdivacky          MayAlias = true;
902198892Srdivacky        }
903239462Sdim        if (MayAlias)
904239462Sdim          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
905199481Srdivacky        // Add dependencies on alias and barrier chains, if needed.
906199481Srdivacky        if (MayAlias && AliasChain)
907239462Sdim          addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
908199481Srdivacky        if (BarrierChain)
909243830Sdim          BarrierChain->addPred(SDep(SU, SDep::Barrier));
910223017Sdim      }
911193323Sed    }
912193323Sed  }
913223017Sdim  if (PrevMI)
914223017Sdim    FirstDbgValue = PrevMI;
915193323Sed
916234353Sdim  Defs.clear();
917234353Sdim  Uses.clear();
918234353Sdim  VRegDefs.clear();
919193323Sed  PendingLoads.clear();
920193323Sed}
921193323Sed
922193323Sedvoid ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
923243830Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
924193323Sed  SU->getInstr()->dump();
925243830Sdim#endif
926193323Sed}
927193323Sed
928193323Sedstd::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
929193323Sed  std::string s;
930193323Sed  raw_string_ostream oss(s);
931193323Sed  if (SU == &EntrySU)
932193323Sed    oss << "<entry>";
933193323Sed  else if (SU == &ExitSU)
934193323Sed    oss << "<exit>";
935193323Sed  else
936193323Sed    SU->getInstr()->print(oss);
937193323Sed  return oss.str();
938193323Sed}
939193323Sed
940234353Sdim/// Return the basic block label. It is not necessarilly unique because a block
941234353Sdim/// contains multiple scheduling regions. But it is fine for visualization.
942234353Sdimstd::string ScheduleDAGInstrs::getDAGName() const {
943234353Sdim  return "dag." + BB->getFullName();
944193323Sed}
945243830Sdim
946243830Sdimnamespace {
947243830Sdim/// \brief Manage the stack used by a reverse depth-first search over the DAG.
948243830Sdimclass SchedDAGReverseDFS {
949243830Sdim  std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
950243830Sdimpublic:
951243830Sdim  bool isComplete() const { return DFSStack.empty(); }
952243830Sdim
953243830Sdim  void follow(const SUnit *SU) {
954243830Sdim    DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
955243830Sdim  }
956243830Sdim  void advance() { ++DFSStack.back().second; }
957243830Sdim
958243830Sdim  void backtrack() { DFSStack.pop_back(); }
959243830Sdim
960243830Sdim  const SUnit *getCurr() const { return DFSStack.back().first; }
961243830Sdim
962243830Sdim  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
963243830Sdim
964243830Sdim  SUnit::const_pred_iterator getPredEnd() const {
965243830Sdim    return getCurr()->Preds.end();
966243830Sdim  }
967243830Sdim};
968243830Sdim} // anonymous
969243830Sdim
970243830Sdimvoid ScheduleDAGILP::resize(unsigned NumSUnits) {
971243830Sdim  ILPValues.resize(NumSUnits);
972243830Sdim}
973243830Sdim
974243830SdimILPValue ScheduleDAGILP::getILP(const SUnit *SU) {
975243830Sdim  return ILPValues[SU->NodeNum];
976243830Sdim}
977243830Sdim
978243830Sdim// A leaf node has an ILP of 1/1.
979243830Sdimstatic ILPValue initILP(const SUnit *SU) {
980243830Sdim  unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1;
981243830Sdim  return ILPValue(Cnt, 1 + SU->getDepth());
982243830Sdim}
983243830Sdim
984243830Sdim/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
985243830Sdim/// search from this root.
986243830Sdimvoid ScheduleDAGILP::computeILP(const SUnit *Root) {
987243830Sdim  if (!IsBottomUp)
988243830Sdim    llvm_unreachable("Top-down ILP metric is unimplemnted");
989243830Sdim
990243830Sdim  SchedDAGReverseDFS DFS;
991243830Sdim  // Mark a node visited by validating it.
992243830Sdim  ILPValues[Root->NodeNum] = initILP(Root);
993243830Sdim  DFS.follow(Root);
994243830Sdim  for (;;) {
995243830Sdim    // Traverse the leftmost path as far as possible.
996243830Sdim    while (DFS.getPred() != DFS.getPredEnd()) {
997243830Sdim      const SUnit *PredSU = DFS.getPred()->getSUnit();
998243830Sdim      DFS.advance();
999243830Sdim      // If the pred is already valid, skip it.
1000243830Sdim      if (ILPValues[PredSU->NodeNum].isValid())
1001243830Sdim        continue;
1002243830Sdim      ILPValues[PredSU->NodeNum] = initILP(PredSU);
1003243830Sdim      DFS.follow(PredSU);
1004243830Sdim    }
1005243830Sdim    // Visit the top of the stack in postorder and backtrack.
1006243830Sdim    unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount;
1007243830Sdim    DFS.backtrack();
1008243830Sdim    if (DFS.isComplete())
1009243830Sdim      break;
1010243830Sdim    // Add the recently finished predecessor's bottom-up descendent count.
1011243830Sdim    ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount;
1012243830Sdim  }
1013243830Sdim}
1014243830Sdim
1015243830Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1016243830Sdimvoid ILPValue::print(raw_ostream &OS) const {
1017243830Sdim  if (!isValid())
1018243830Sdim    OS << "BADILP";
1019243830Sdim  OS << InstrCount << " / " << Cycles << " = "
1020243830Sdim     << format("%g", ((double)InstrCount / Cycles));
1021243830Sdim}
1022243830Sdim
1023243830Sdimvoid ILPValue::dump() const {
1024243830Sdim  dbgs() << *this << '\n';
1025243830Sdim}
1026243830Sdim
1027243830Sdimnamespace llvm {
1028243830Sdim
1029243830Sdimraw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
1030243830Sdim  Val.print(OS);
1031243830Sdim  return OS;
1032243830Sdim}
1033243830Sdim
1034243830Sdim} // namespace llvm
1035243830Sdim#endif // !NDEBUG || LLVM_ENABLE_DUMP
1036