ScheduleDAGInstrs.cpp revision 288943
1193323Sed//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This implements the ScheduleDAGInstrs class, which implements re-scheduling
11193323Sed// of MachineInstrs.
12193323Sed//
13193323Sed//===----------------------------------------------------------------------===//
14193323Sed
15249423Sdim#include "llvm/CodeGen/ScheduleDAGInstrs.h"
16249423Sdim#include "llvm/ADT/MapVector.h"
17249423Sdim#include "llvm/ADT/SmallPtrSet.h"
18249423Sdim#include "llvm/ADT/SmallSet.h"
19193323Sed#include "llvm/Analysis/AliasAnalysis.h"
20218893Sdim#include "llvm/Analysis/ValueTracking.h"
21234353Sdim#include "llvm/CodeGen/LiveIntervalAnalysis.h"
22193323Sed#include "llvm/CodeGen/MachineFunctionPass.h"
23288943Sdim#include "llvm/CodeGen/MachineFrameInfo.h"
24276479Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
25198090Srdivacky#include "llvm/CodeGen/MachineMemOperand.h"
26193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h"
27193323Sed#include "llvm/CodeGen/PseudoSourceValue.h"
28239462Sdim#include "llvm/CodeGen/RegisterPressure.h"
29249423Sdim#include "llvm/CodeGen/ScheduleDFS.h"
30249423Sdim#include "llvm/IR/Operator.h"
31239462Sdim#include "llvm/Support/CommandLine.h"
32193323Sed#include "llvm/Support/Debug.h"
33243830Sdim#include "llvm/Support/Format.h"
34193323Sed#include "llvm/Support/raw_ostream.h"
35249423Sdim#include "llvm/Target/TargetInstrInfo.h"
36249423Sdim#include "llvm/Target/TargetMachine.h"
37249423Sdim#include "llvm/Target/TargetRegisterInfo.h"
38249423Sdim#include "llvm/Target/TargetSubtargetInfo.h"
39261991Sdim#include <queue>
40261991Sdim
41193323Sedusing namespace llvm;
42193323Sed
43276479Sdim#define DEBUG_TYPE "misched"
44276479Sdim
45239462Sdimstatic cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
46239462Sdim    cl::ZeroOrMore, cl::init(false),
47280031Sdim    cl::desc("Enable use of AA during MI DAG construction"));
48239462Sdim
49276479Sdimstatic cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
50280031Sdim    cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
51276479Sdim
52193323SedScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
53280031Sdim                                     const MachineLoopInfo *mli,
54288943Sdim                                     bool IsPostRAFlag, bool RemoveKillFlags,
55234353Sdim                                     LiveIntervals *lis)
56288943Sdim    : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
57288943Sdim      IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
58288943Sdim      CanHandleTerminators(false), FirstDbgValue(nullptr) {
59234353Sdim  assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
60223017Sdim  DbgValues.clear();
61234353Sdim  assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
62234353Sdim         "Virtual registers must be removed prior to PostRA scheduling");
63243830Sdim
64288943Sdim  const TargetSubtargetInfo &ST = mf.getSubtarget();
65280031Sdim  SchedModel.init(ST.getSchedModel(), &ST, TII);
66198396Srdivacky}
67193323Sed
68193323Sed/// getUnderlyingObjectFromInt - This is the function that does the work of
69193323Sed/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
70193323Sedstatic const Value *getUnderlyingObjectFromInt(const Value *V) {
71193323Sed  do {
72198090Srdivacky    if (const Operator *U = dyn_cast<Operator>(V)) {
73193323Sed      // If we find a ptrtoint, we can transfer control back to the
74193323Sed      // regular getUnderlyingObjectFromInt.
75198090Srdivacky      if (U->getOpcode() == Instruction::PtrToInt)
76193323Sed        return U->getOperand(0);
77249423Sdim      // If we find an add of a constant, a multiplied value, or a phi, it's
78193323Sed      // likely that the other operand will lead us to the base
79193323Sed      // object. We don't have to worry about the case where the
80198090Srdivacky      // object address is somehow being computed by the multiply,
81193323Sed      // because our callers only care when the result is an
82243830Sdim      // identifiable object.
83198090Srdivacky      if (U->getOpcode() != Instruction::Add ||
84193323Sed          (!isa<ConstantInt>(U->getOperand(1)) &&
85249423Sdim           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
86249423Sdim           !isa<PHINode>(U->getOperand(1))))
87193323Sed        return V;
88193323Sed      V = U->getOperand(0);
89193323Sed    } else {
90193323Sed      return V;
91193323Sed    }
92204642Srdivacky    assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
93193323Sed  } while (1);
94193323Sed}
95193323Sed
96249423Sdim/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
97193323Sed/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
98249423Sdimstatic void getUnderlyingObjects(const Value *V,
99288943Sdim                                 SmallVectorImpl<Value *> &Objects,
100288943Sdim                                 const DataLayout &DL) {
101276479Sdim  SmallPtrSet<const Value *, 16> Visited;
102249423Sdim  SmallVector<const Value *, 4> Working(1, V);
103193323Sed  do {
104249423Sdim    V = Working.pop_back_val();
105249423Sdim
106249423Sdim    SmallVector<Value *, 4> Objs;
107288943Sdim    GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
108249423Sdim
109261991Sdim    for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
110249423Sdim         I != IE; ++I) {
111249423Sdim      V = *I;
112280031Sdim      if (!Visited.insert(V).second)
113249423Sdim        continue;
114249423Sdim      if (Operator::getOpcode(V) == Instruction::IntToPtr) {
115249423Sdim        const Value *O =
116249423Sdim          getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
117249423Sdim        if (O->getType()->isPointerTy()) {
118249423Sdim          Working.push_back(O);
119249423Sdim          continue;
120249423Sdim        }
121249423Sdim      }
122249423Sdim      Objects.push_back(const_cast<Value *>(V));
123249423Sdim    }
124249423Sdim  } while (!Working.empty());
125193323Sed}
126193323Sed
127276479Sdimtypedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
128276479Sdimtypedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4>
129261991SdimUnderlyingObjectsVector;
130261991Sdim
131249423Sdim/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
132193323Sed/// information and it can be tracked to a normal reference to a known
133249423Sdim/// object, return the Value for that object.
134249423Sdimstatic void getUnderlyingObjectsForInstr(const MachineInstr *MI,
135261991Sdim                                         const MachineFrameInfo *MFI,
136288943Sdim                                         UnderlyingObjectsVector &Objects,
137288943Sdim                                         const DataLayout &DL) {
138193323Sed  if (!MI->hasOneMemOperand() ||
139276479Sdim      (!(*MI->memoperands_begin())->getValue() &&
140276479Sdim       !(*MI->memoperands_begin())->getPseudoValue()) ||
141198090Srdivacky      (*MI->memoperands_begin())->isVolatile())
142249423Sdim    return;
143193323Sed
144276479Sdim  if (const PseudoSourceValue *PSV =
145276479Sdim      (*MI->memoperands_begin())->getPseudoValue()) {
146288943Sdim    // Function that contain tail calls don't have unique PseudoSourceValue
147288943Sdim    // objects. Two PseudoSourceValues might refer to the same or overlapping
148288943Sdim    // locations. The client code calling this function assumes this is not the
149288943Sdim    // case. So return a conservative answer of no known object.
150288943Sdim    if (MFI->hasTailCall())
151288943Sdim      return;
152288943Sdim
153276479Sdim    // For now, ignore PseudoSourceValues which may alias LLVM IR values
154276479Sdim    // because the code that uses this function has no way to cope with
155276479Sdim    // such aliases.
156276479Sdim    if (!PSV->isAliased(MFI)) {
157276479Sdim      bool MayAlias = PSV->mayAlias(MFI);
158276479Sdim      Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
159276479Sdim    }
160276479Sdim    return;
161276479Sdim  }
162276479Sdim
163198090Srdivacky  const Value *V = (*MI->memoperands_begin())->getValue();
164193323Sed  if (!V)
165249423Sdim    return;
166193323Sed
167249423Sdim  SmallVector<Value *, 4> Objs;
168288943Sdim  getUnderlyingObjects(V, Objs, DL);
169223017Sdim
170288943Sdim  for (Value *V : Objs) {
171276479Sdim    if (!isIdentifiedObject(V)) {
172249423Sdim      Objects.clear();
173249423Sdim      return;
174249423Sdim    }
175249423Sdim
176276479Sdim    Objects.push_back(UnderlyingObjectsVector::value_type(V, true));
177249423Sdim  }
178193323Sed}
179193323Sed
180239462Sdimvoid ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
181239462Sdim  BB = bb;
182193323Sed}
183193323Sed
184234353Sdimvoid ScheduleDAGInstrs::finishBlock() {
185239462Sdim  // Subclasses should no longer refer to the old block.
186276479Sdim  BB = nullptr;
187234353Sdim}
188234353Sdim
189234353Sdim/// Initialize the DAG and common scheduler state for the current scheduling
190234353Sdim/// region. This does not actually create the DAG, only clears it. The
191234353Sdim/// scheduling driver may call BuildSchedGraph multiple times per scheduling
192234353Sdim/// region.
193234353Sdimvoid ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
194234353Sdim                                    MachineBasicBlock::iterator begin,
195234353Sdim                                    MachineBasicBlock::iterator end,
196261991Sdim                                    unsigned regioninstrs) {
197239462Sdim  assert(bb == BB && "startBlock should set BB");
198234353Sdim  RegionBegin = begin;
199234353Sdim  RegionEnd = end;
200261991Sdim  NumRegionInstrs = regioninstrs;
201234353Sdim}
202234353Sdim
203234353Sdim/// Close the current scheduling region. Don't clear any state in case the
204234353Sdim/// driver wants to refer to the previous scheduling region.
205234353Sdimvoid ScheduleDAGInstrs::exitRegion() {
206234353Sdim  // Nothing to do.
207234353Sdim}
208234353Sdim
209234353Sdim/// addSchedBarrierDeps - Add dependencies from instructions in the current
210218893Sdim/// list of instructions being scheduled to scheduling barrier by adding
211218893Sdim/// the exit SU to the register defs and use list. This is because we want to
212218893Sdim/// make sure instructions which define registers that are either used by
213218893Sdim/// the terminator or are live-out are properly scheduled. This is
214218893Sdim/// especially important when the definition latency of the return value(s)
215218893Sdim/// are too high to be hidden by the branch or when the liveout registers
216218893Sdim/// used by instructions in the fallthrough block.
217234353Sdimvoid ScheduleDAGInstrs::addSchedBarrierDeps() {
218276479Sdim  MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
219218893Sdim  ExitSU.setInstr(ExitMI);
220218893Sdim  bool AllDepKnown = ExitMI &&
221234353Sdim    (ExitMI->isCall() || ExitMI->isBarrier());
222218893Sdim  if (ExitMI && AllDepKnown) {
223218893Sdim    // If it's a call or a barrier, add dependencies on the defs and uses of
224218893Sdim    // instruction.
225218893Sdim    for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
226218893Sdim      const MachineOperand &MO = ExitMI->getOperand(i);
227218893Sdim      if (!MO.isReg() || MO.isDef()) continue;
228218893Sdim      unsigned Reg = MO.getReg();
229218893Sdim      if (Reg == 0) continue;
230218893Sdim
231234353Sdim      if (TRI->isPhysicalRegister(Reg))
232249423Sdim        Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
233234353Sdim      else {
234234353Sdim        assert(!IsPostRA && "Virtual register encountered after regalloc.");
235249423Sdim        if (MO.readsReg()) // ignore undef operands
236249423Sdim          addVRegUseDeps(&ExitSU, i);
237234353Sdim      }
238218893Sdim    }
239218893Sdim  } else {
240218893Sdim    // For others, e.g. fallthrough, conditional branch, assume the exit
241218893Sdim    // uses all the registers that are livein to the successor blocks.
242234353Sdim    assert(Uses.empty() && "Uses in set before adding deps?");
243218893Sdim    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
244218893Sdim           SE = BB->succ_end(); SI != SE; ++SI)
245218893Sdim      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
246223017Sdim             E = (*SI)->livein_end(); I != E; ++I) {
247218893Sdim        unsigned Reg = *I;
248234353Sdim        if (!Uses.contains(Reg))
249249423Sdim          Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
250218893Sdim      }
251218893Sdim  }
252218893Sdim}
253218893Sdim
254234353Sdim/// MO is an operand of SU's instruction that defines a physical register. Add
255234353Sdim/// data dependencies from SU to any uses of the physical register.
256243830Sdimvoid ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
257243830Sdim  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
258234353Sdim  assert(MO.isDef() && "expect physreg def");
259234353Sdim
260234353Sdim  // Ask the target if address-backscheduling is desirable, and if so how much.
261288943Sdim  const TargetSubtargetInfo &ST = MF.getSubtarget();
262234353Sdim
263239462Sdim  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
264239462Sdim       Alias.isValid(); ++Alias) {
265234353Sdim    if (!Uses.contains(*Alias))
266234353Sdim      continue;
267249423Sdim    for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
268249423Sdim      SUnit *UseSU = I->SU;
269234353Sdim      if (UseSU == SU)
270234353Sdim        continue;
271239462Sdim
272243830Sdim      // Adjust the dependence latency using operand def/use information,
273243830Sdim      // then allow the target to perform its own adjustments.
274249423Sdim      int UseOp = I->OpIdx;
275276479Sdim      MachineInstr *RegUse = nullptr;
276249423Sdim      SDep Dep;
277249423Sdim      if (UseOp < 0)
278249423Sdim        Dep = SDep(SU, SDep::Artificial);
279249423Sdim      else {
280251662Sdim        // Set the hasPhysRegDefs only for physreg defs that have a use within
281251662Sdim        // the scheduling region.
282251662Sdim        SU->hasPhysRegDefs = true;
283249423Sdim        Dep = SDep(SU, SDep::Data, *Alias);
284249423Sdim        RegUse = UseSU->getInstr();
285249423Sdim      }
286249423Sdim      Dep.setLatency(
287261991Sdim        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
288261991Sdim                                         UseOp));
289243830Sdim
290249423Sdim      ST.adjustSchedDependency(SU, UseSU, Dep);
291249423Sdim      UseSU->addPred(Dep);
292234353Sdim    }
293234353Sdim  }
294234353Sdim}
295234353Sdim
296234353Sdim/// addPhysRegDeps - Add register dependencies (data, anti, and output) from
297234353Sdim/// this SUnit to following instructions in the same scheduling region that
298234353Sdim/// depend the physical register referenced at OperIdx.
299234353Sdimvoid ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
300276479Sdim  MachineInstr *MI = SU->getInstr();
301276479Sdim  MachineOperand &MO = MI->getOperand(OperIdx);
302234353Sdim
303234353Sdim  // Optionally add output and anti dependencies. For anti
304234353Sdim  // dependencies we use a latency of 0 because for a multi-issue
305234353Sdim  // target we want to allow the defining instruction to issue
306234353Sdim  // in the same cycle as the using instruction.
307234353Sdim  // TODO: Using a latency of 1 here for output dependencies assumes
308234353Sdim  //       there's no cost for reusing registers.
309234353Sdim  SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
310239462Sdim  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
311239462Sdim       Alias.isValid(); ++Alias) {
312234353Sdim    if (!Defs.contains(*Alias))
313234353Sdim      continue;
314249423Sdim    for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
315249423Sdim      SUnit *DefSU = I->SU;
316234353Sdim      if (DefSU == &ExitSU)
317234353Sdim        continue;
318234353Sdim      if (DefSU != SU &&
319234353Sdim          (Kind != SDep::Output || !MO.isDead() ||
320234353Sdim           !DefSU->getInstr()->registerDefIsDead(*Alias))) {
321234353Sdim        if (Kind == SDep::Anti)
322243830Sdim          DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
323234353Sdim        else {
324243830Sdim          SDep Dep(SU, Kind, /*Reg=*/*Alias);
325261991Sdim          Dep.setLatency(
326261991Sdim            SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
327243830Sdim          DefSU->addPred(Dep);
328234353Sdim        }
329234353Sdim      }
330234353Sdim    }
331234353Sdim  }
332234353Sdim
333234353Sdim  if (!MO.isDef()) {
334251662Sdim    SU->hasPhysRegUses = true;
335234353Sdim    // Either insert a new Reg2SUnits entry with an empty SUnits list, or
336234353Sdim    // retrieve the existing SUnits list for this register's uses.
337234353Sdim    // Push this SUnit on the use list.
338249423Sdim    Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
339276479Sdim    if (RemoveKillFlags)
340276479Sdim      MO.setIsKill(false);
341234353Sdim  }
342234353Sdim  else {
343243830Sdim    addPhysRegDataDeps(SU, OperIdx);
344249423Sdim    unsigned Reg = MO.getReg();
345234353Sdim
346234353Sdim    // clear this register's use list
347249423Sdim    if (Uses.contains(Reg))
348249423Sdim      Uses.eraseAll(Reg);
349234353Sdim
350249423Sdim    if (!MO.isDead()) {
351249423Sdim      Defs.eraseAll(Reg);
352249423Sdim    } else if (SU->isCall) {
353249423Sdim      // Calls will not be reordered because of chain dependencies (see
354249423Sdim      // below). Since call operands are dead, calls may continue to be added
355249423Sdim      // to the DefList making dependence checking quadratic in the size of
356249423Sdim      // the block. Instead, we leave only one call at the back of the
357249423Sdim      // DefList.
358249423Sdim      Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg);
359249423Sdim      Reg2SUnitsMap::iterator B = P.first;
360249423Sdim      Reg2SUnitsMap::iterator I = P.second;
361249423Sdim      for (bool isBegin = I == B; !isBegin; /* empty */) {
362249423Sdim        isBegin = (--I) == B;
363249423Sdim        if (!I->SU->isCall)
364249423Sdim          break;
365249423Sdim        I = Defs.erase(I);
366249423Sdim      }
367249423Sdim    }
368234353Sdim
369234353Sdim    // Defs are pushed in the order they are visited and never reordered.
370249423Sdim    Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
371234353Sdim  }
372234353Sdim}
373234353Sdim
374234353Sdim/// addVRegDefDeps - Add register output and data dependencies from this SUnit
375234353Sdim/// to instructions that occur later in the same scheduling region if they read
376234353Sdim/// from or write to the virtual register defined at OperIdx.
377234353Sdim///
378234353Sdim/// TODO: Hoist loop induction variable increments. This has to be
379234353Sdim/// reevaluated. Generally, IV scheduling should be done before coalescing.
380234353Sdimvoid ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
381234353Sdim  const MachineInstr *MI = SU->getInstr();
382234353Sdim  unsigned Reg = MI->getOperand(OperIdx).getReg();
383234353Sdim
384239462Sdim  // Singly defined vregs do not have output/anti dependencies.
385234353Sdim  // The current operand is a def, so we have at least one.
386239462Sdim  // Check here if there are any others...
387239462Sdim  if (MRI.hasOneDef(Reg))
388234353Sdim    return;
389234353Sdim
390234353Sdim  // Add output dependence to the next nearest def of this vreg.
391234353Sdim  //
392234353Sdim  // Unless this definition is dead, the output dependence should be
393234353Sdim  // transitively redundant with antidependencies from this definition's
394234353Sdim  // uses. We're conservative for now until we have a way to guarantee the uses
395234353Sdim  // are not eliminated sometime during scheduling. The output dependence edge
396234353Sdim  // is also useful if output latency exceeds def-use latency.
397239462Sdim  VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
398234353Sdim  if (DefI == VRegDefs.end())
399234353Sdim    VRegDefs.insert(VReg2SUnit(Reg, SU));
400234353Sdim  else {
401234353Sdim    SUnit *DefSU = DefI->SU;
402234353Sdim    if (DefSU != SU && DefSU != &ExitSU) {
403243830Sdim      SDep Dep(SU, SDep::Output, Reg);
404261991Sdim      Dep.setLatency(
405261991Sdim        SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
406243830Sdim      DefSU->addPred(Dep);
407234353Sdim    }
408234353Sdim    DefI->SU = SU;
409234353Sdim  }
410234353Sdim}
411234353Sdim
412234353Sdim/// addVRegUseDeps - Add a register data dependency if the instruction that
413234353Sdim/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
414234353Sdim/// register antidependency from this SUnit to instructions that occur later in
415234353Sdim/// the same scheduling region if they write the virtual register.
416234353Sdim///
417234353Sdim/// TODO: Handle ExitSU "uses" properly.
418234353Sdimvoid ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
419234353Sdim  MachineInstr *MI = SU->getInstr();
420234353Sdim  unsigned Reg = MI->getOperand(OperIdx).getReg();
421234353Sdim
422261991Sdim  // Record this local VReg use.
423261991Sdim  VReg2UseMap::iterator UI = VRegUses.find(Reg);
424261991Sdim  for (; UI != VRegUses.end(); ++UI) {
425261991Sdim    if (UI->SU == SU)
426261991Sdim      break;
427261991Sdim  }
428261991Sdim  if (UI == VRegUses.end())
429261991Sdim    VRegUses.insert(VReg2SUnit(Reg, SU));
430261991Sdim
431234353Sdim  // Lookup this operand's reaching definition.
432234353Sdim  assert(LIS && "vreg dependencies requires LiveIntervals");
433261991Sdim  LiveQueryResult LRQ
434261991Sdim    = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI));
435239462Sdim  VNInfo *VNI = LRQ.valueIn();
436239462Sdim
437234353Sdim  // VNI will be valid because MachineOperand::readsReg() is checked by caller.
438239462Sdim  assert(VNI && "No value to read by operand");
439234353Sdim  MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
440234353Sdim  // Phis and other noninstructions (after coalescing) have a NULL Def.
441234353Sdim  if (Def) {
442234353Sdim    SUnit *DefSU = getSUnit(Def);
443234353Sdim    if (DefSU) {
444234353Sdim      // The reaching Def lives within this scheduling region.
445234353Sdim      // Create a data dependence.
446243830Sdim      SDep dep(DefSU, SDep::Data, Reg);
447243830Sdim      // Adjust the dependence latency using operand def/use information, then
448243830Sdim      // allow the target to perform its own adjustments.
449243830Sdim      int DefOp = Def->findRegisterDefOperandIdx(Reg);
450261991Sdim      dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
451239462Sdim
452288943Sdim      const TargetSubtargetInfo &ST = MF.getSubtarget();
453243830Sdim      ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
454234353Sdim      SU->addPred(dep);
455234353Sdim    }
456234353Sdim  }
457234353Sdim
458234353Sdim  // Add antidependence to the following def of the vreg it uses.
459239462Sdim  VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
460234353Sdim  if (DefI != VRegDefs.end() && DefI->SU != SU)
461243830Sdim    DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
462234353Sdim}
463234353Sdim
464239462Sdim/// Return true if MI is an instruction we are unable to reason about
465239462Sdim/// (like a call or something with unmodeled side effects).
466239462Sdimstatic inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
467239462Sdim  if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
468243830Sdim      (MI->hasOrderedMemoryRef() &&
469239462Sdim       (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
470239462Sdim    return true;
471239462Sdim  return false;
472239462Sdim}
473239462Sdim
474239462Sdim// This MI might have either incomplete info, or known to be unsafe
475239462Sdim// to deal with (i.e. volatile object).
476239462Sdimstatic inline bool isUnsafeMemoryObject(MachineInstr *MI,
477288943Sdim                                        const MachineFrameInfo *MFI,
478288943Sdim                                        const DataLayout &DL) {
479239462Sdim  if (!MI || MI->memoperands_empty())
480239462Sdim    return true;
481239462Sdim  // We purposefully do no check for hasOneMemOperand() here
482239462Sdim  // in hope to trigger an assert downstream in order to
483239462Sdim  // finish implementation.
484239462Sdim  if ((*MI->memoperands_begin())->isVolatile() ||
485239462Sdim       MI->hasUnmodeledSideEffects())
486239462Sdim    return true;
487276479Sdim
488276479Sdim  if ((*MI->memoperands_begin())->getPseudoValue()) {
489276479Sdim    // Similarly to getUnderlyingObjectForInstr:
490276479Sdim    // For now, ignore PseudoSourceValues which may alias LLVM IR values
491276479Sdim    // because the code that uses this function has no way to cope with
492276479Sdim    // such aliases.
493276479Sdim    return true;
494276479Sdim  }
495276479Sdim
496239462Sdim  const Value *V = (*MI->memoperands_begin())->getValue();
497239462Sdim  if (!V)
498239462Sdim    return true;
499239462Sdim
500249423Sdim  SmallVector<Value *, 4> Objs;
501288943Sdim  getUnderlyingObjects(V, Objs, DL);
502288943Sdim  for (Value *V : Objs) {
503249423Sdim    // Does this pointer refer to a distinct and identifiable object?
504288943Sdim    if (!isIdentifiedObject(V))
505239462Sdim      return true;
506239462Sdim  }
507239462Sdim
508239462Sdim  return false;
509239462Sdim}
510239462Sdim
511239462Sdim/// This returns true if the two MIs need a chain edge betwee them.
512239462Sdim/// If these are not even memory operations, we still may need
513239462Sdim/// chain deps between them. The question really is - could
514239462Sdim/// these two MIs be reordered during scheduling from memory dependency
515239462Sdim/// point of view.
516239462Sdimstatic bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
517288943Sdim                             const DataLayout &DL, MachineInstr *MIa,
518239462Sdim                             MachineInstr *MIb) {
519280031Sdim  const MachineFunction *MF = MIa->getParent()->getParent();
520280031Sdim  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
521280031Sdim
522239462Sdim  // Cover a trivial case - no edge is need to itself.
523239462Sdim  if (MIa == MIb)
524239462Sdim    return false;
525280031Sdim
526280031Sdim  // Let the target decide if memory accesses cannot possibly overlap.
527280031Sdim  if ((MIa->mayLoad() || MIa->mayStore()) &&
528280031Sdim      (MIb->mayLoad() || MIb->mayStore()))
529280031Sdim    if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
530280031Sdim      return false;
531239462Sdim
532276479Sdim  // FIXME: Need to handle multiple memory operands to support all targets.
533276479Sdim  if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
534276479Sdim    return true;
535276479Sdim
536288943Sdim  if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL))
537239462Sdim    return true;
538239462Sdim
539239462Sdim  // If we are dealing with two "normal" loads, we do not need an edge
540239462Sdim  // between them - they could be reordered.
541239462Sdim  if (!MIa->mayStore() && !MIb->mayStore())
542239462Sdim    return false;
543239462Sdim
544239462Sdim  // To this point analysis is generic. From here on we do need AA.
545239462Sdim  if (!AA)
546239462Sdim    return true;
547239462Sdim
548239462Sdim  MachineMemOperand *MMOa = *MIa->memoperands_begin();
549239462Sdim  MachineMemOperand *MMOb = *MIb->memoperands_begin();
550239462Sdim
551276479Sdim  if (!MMOa->getValue() || !MMOb->getValue())
552276479Sdim    return true;
553239462Sdim
554239462Sdim  // The following interface to AA is fashioned after DAGCombiner::isAlias
555239462Sdim  // and operates with MachineMemOperand offset with some important
556239462Sdim  // assumptions:
557239462Sdim  //   - LLVM fundamentally assumes flat address spaces.
558239462Sdim  //   - MachineOperand offset can *only* result from legalization and
559239462Sdim  //     cannot affect queries other than the trivial case of overlap
560239462Sdim  //     checking.
561239462Sdim  //   - These offsets never wrap and never step outside
562239462Sdim  //     of allocated objects.
563239462Sdim  //   - There should never be any negative offsets here.
564239462Sdim  //
565239462Sdim  // FIXME: Modify API to hide this math from "user"
566239462Sdim  // FIXME: Even before we go to AA we can reason locally about some
567239462Sdim  // memory objects. It can save compile time, and possibly catch some
568239462Sdim  // corner cases not currently covered.
569239462Sdim
570239462Sdim  assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
571239462Sdim  assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
572239462Sdim
573239462Sdim  int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
574239462Sdim  int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
575239462Sdim  int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
576239462Sdim
577288943Sdim  AliasResult AAResult =
578288943Sdim      AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
579288943Sdim                               UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
580288943Sdim                MemoryLocation(MMOb->getValue(), Overlapb,
581288943Sdim                               UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
582239462Sdim
583288943Sdim  return (AAResult != NoAlias);
584239462Sdim}
585239462Sdim
586239462Sdim/// This recursive function iterates over chain deps of SUb looking for
587239462Sdim/// "latest" node that needs a chain edge to SUa.
588288943Sdimstatic unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
589288943Sdim                                 const DataLayout &DL, SUnit *SUa, SUnit *SUb,
590288943Sdim                                 SUnit *ExitSU, unsigned *Depth,
591288943Sdim                                 SmallPtrSetImpl<const SUnit *> &Visited) {
592239462Sdim  if (!SUa || !SUb || SUb == ExitSU)
593239462Sdim    return *Depth;
594239462Sdim
595239462Sdim  // Remember visited nodes.
596280031Sdim  if (!Visited.insert(SUb).second)
597239462Sdim      return *Depth;
598239462Sdim  // If there is _some_ dependency already in place, do not
599239462Sdim  // descend any further.
600239462Sdim  // TODO: Need to make sure that if that dependency got eliminated or ignored
601239462Sdim  // for any reason in the future, we would not violate DAG topology.
602239462Sdim  // Currently it does not happen, but makes an implicit assumption about
603239462Sdim  // future implementation.
604239462Sdim  //
605239462Sdim  // Independently, if we encounter node that is some sort of global
606239462Sdim  // object (like a call) we already have full set of dependencies to it
607239462Sdim  // and we can stop descending.
608239462Sdim  if (SUa->isSucc(SUb) ||
609239462Sdim      isGlobalMemoryObject(AA, SUb->getInstr()))
610239462Sdim    return *Depth;
611239462Sdim
612239462Sdim  // If we do need an edge, or we have exceeded depth budget,
613239462Sdim  // add that edge to the predecessors chain of SUb,
614239462Sdim  // and stop descending.
615239462Sdim  if (*Depth > 200 ||
616288943Sdim      MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
617243830Sdim    SUb->addPred(SDep(SUa, SDep::MayAliasMem));
618239462Sdim    return *Depth;
619239462Sdim  }
620239462Sdim  // Track current depth.
621239462Sdim  (*Depth)++;
622280031Sdim  // Iterate over memory dependencies only.
623239462Sdim  for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
624239462Sdim       I != E; ++I)
625280031Sdim    if (I->isNormalMemoryOrBarrier())
626288943Sdim      iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited);
627239462Sdim  return *Depth;
628239462Sdim}
629239462Sdim
630239462Sdim/// This function assumes that "downward" from SU there exist
631239462Sdim/// tail/leaf of already constructed DAG. It iterates downward and
632239462Sdim/// checks whether SU can be aliasing any node dominated
633239462Sdim/// by it.
634239462Sdimstatic void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
635288943Sdim                            const DataLayout &DL, SUnit *SU, SUnit *ExitSU,
636288943Sdim                            std::set<SUnit *> &CheckList,
637239462Sdim                            unsigned LatencyToLoad) {
638239462Sdim  if (!SU)
639239462Sdim    return;
640239462Sdim
641239462Sdim  SmallPtrSet<const SUnit*, 16> Visited;
642239462Sdim  unsigned Depth = 0;
643239462Sdim
644239462Sdim  for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
645239462Sdim       I != IE; ++I) {
646239462Sdim    if (SU == *I)
647239462Sdim      continue;
648288943Sdim    if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) {
649243830Sdim      SDep Dep(SU, SDep::MayAliasMem);
650243830Sdim      Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
651243830Sdim      (*I)->addPred(Dep);
652239462Sdim    }
653280031Sdim
654280031Sdim    // Iterate recursively over all previously added memory chain
655280031Sdim    // successors. Keep track of visited nodes.
656239462Sdim    for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
657239462Sdim         JE = (*I)->Succs.end(); J != JE; ++J)
658280031Sdim      if (J->isNormalMemoryOrBarrier())
659288943Sdim        iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth,
660288943Sdim                         Visited);
661239462Sdim  }
662239462Sdim}
663239462Sdim
664239462Sdim/// Check whether two objects need a chain edge, if so, add it
665239462Sdim/// otherwise remember the rejected SU.
666288943Sdimstatic inline void addChainDependency(AliasAnalysis *AA,
667288943Sdim                                      const MachineFrameInfo *MFI,
668288943Sdim                                      const DataLayout &DL, SUnit *SUa,
669288943Sdim                                      SUnit *SUb, std::set<SUnit *> &RejectList,
670288943Sdim                                      unsigned TrueMemOrderLatency = 0,
671288943Sdim                                      bool isNormalMemory = false) {
672239462Sdim  // If this is a false dependency,
673239462Sdim  // do not add the edge, but rememeber the rejected node.
674288943Sdim  if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
675243830Sdim    SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
676243830Sdim    Dep.setLatency(TrueMemOrderLatency);
677243830Sdim    SUb->addPred(Dep);
678243830Sdim  }
679239462Sdim  else {
680239462Sdim    // Duplicate entries should be ignored.
681239462Sdim    RejectList.insert(SUb);
682239462Sdim    DEBUG(dbgs() << "\tReject chain dep between SU("
683239462Sdim          << SUa->NodeNum << ") and SU("
684239462Sdim          << SUb->NodeNum << ")\n");
685239462Sdim  }
686239462Sdim}
687239462Sdim
688234353Sdim/// Create an SUnit for each real instruction, numbered in top-down toplological
689234353Sdim/// order. The instruction order A < B, implies that no edge exists from B to A.
690234353Sdim///
691234353Sdim/// Map each real instruction to its SUnit.
692234353Sdim///
693234353Sdim/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
694234353Sdim/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
695234353Sdim/// instead of pointers.
696234353Sdim///
697234353Sdim/// MachineScheduler relies on initSUnits numbering the nodes by their order in
698234353Sdim/// the original instruction list.
699234353Sdimvoid ScheduleDAGInstrs::initSUnits() {
700234353Sdim  // We'll be allocating one SUnit for each real instruction in the region,
701234353Sdim  // which is contained within a basic block.
702261991Sdim  SUnits.reserve(NumRegionInstrs);
703193323Sed
704234353Sdim  for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
705234353Sdim    MachineInstr *MI = I;
706234353Sdim    if (MI->isDebugValue())
707234353Sdim      continue;
708234353Sdim
709234353Sdim    SUnit *SU = newSUnit(MI);
710234353Sdim    MISUnitMap[MI] = SU;
711234353Sdim
712234353Sdim    SU->isCall = MI->isCall();
713234353Sdim    SU->isCommutable = MI->isCommutable();
714234353Sdim
715234353Sdim    // Assign the Latency field of SU using target-provided information.
716243830Sdim    SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
717276479Sdim
718276479Sdim    // If this SUnit uses a reserved or unbuffered resource, mark it as such.
719276479Sdim    //
720276479Sdim    // Reserved resources block an instruction from issuing and stall the
721276479Sdim    // entire pipeline. These are identified by BufferSize=0.
722276479Sdim    //
723276479Sdim    // Unbuffered resources prevent execution of subsequent instructions that
724276479Sdim    // require the same resources. This is used for in-order execution pipelines
725276479Sdim    // within an out-of-order core. These are identified by BufferSize=1.
726276479Sdim    if (SchedModel.hasInstrSchedModel()) {
727276479Sdim      const MCSchedClassDesc *SC = getSchedClass(SU);
728276479Sdim      for (TargetSchedModel::ProcResIter
729276479Sdim             PI = SchedModel.getWriteProcResBegin(SC),
730276479Sdim             PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
731276479Sdim        switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) {
732276479Sdim        case 0:
733276479Sdim          SU->hasReservedResource = true;
734276479Sdim          break;
735276479Sdim        case 1:
736276479Sdim          SU->isUnbuffered = true;
737276479Sdim          break;
738276479Sdim        default:
739276479Sdim          break;
740276479Sdim        }
741276479Sdim      }
742276479Sdim    }
743234353Sdim  }
744234353Sdim}
745234353Sdim
746276479Sdim/// If RegPressure is non-null, compute register pressure as a side effect. The
747239462Sdim/// DAG builder is an efficient place to do it because it already visits
748239462Sdim/// operands.
749239462Sdimvoid ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
750261991Sdim                                        RegPressureTracker *RPTracker,
751261991Sdim                                        PressureDiffs *PDiffs) {
752288943Sdim  const TargetSubtargetInfo &ST = MF.getSubtarget();
753261991Sdim  bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
754261991Sdim                                                       : ST.useAA();
755276479Sdim  AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
756261991Sdim
757261991Sdim  MISUnitMap.clear();
758261991Sdim  ScheduleDAG::clearDAG();
759261991Sdim
760234353Sdim  // Create an SUnit for each real instruction.
761234353Sdim  initSUnits();
762234353Sdim
763261991Sdim  if (PDiffs)
764261991Sdim    PDiffs->init(SUnits.size());
765261991Sdim
766193323Sed  // We build scheduling units by walking a block's instruction list from bottom
767193323Sed  // to top.
768193323Sed
769199481Srdivacky  // Remember where a generic side-effecting instruction is as we procede.
770276479Sdim  SUnit *BarrierChain = nullptr, *AliasChain = nullptr;
771193323Sed
772199481Srdivacky  // Memory references to specific known memory locations are tracked
773199481Srdivacky  // so that they can be given more precise dependencies. We track
774199481Srdivacky  // separately the known memory locations that may alias and those
775199481Srdivacky  // that are known not to alias
776276479Sdim  MapVector<ValueType, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs;
777276479Sdim  MapVector<ValueType, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
778239462Sdim  std::set<SUnit*> RejectMemNodes;
779193323Sed
780205218Srdivacky  // Remove any stale debug info; sometimes BuildSchedGraph is called again
781205218Srdivacky  // without emitting the info from the previous call.
782223017Sdim  DbgValues.clear();
783276479Sdim  FirstDbgValue = nullptr;
784205218Srdivacky
785234353Sdim  assert(Defs.empty() && Uses.empty() &&
786234353Sdim         "Only BuildGraph should update Defs/Uses");
787249423Sdim  Defs.setUniverse(TRI->getNumRegs());
788249423Sdim  Uses.setUniverse(TRI->getNumRegs());
789234353Sdim
790234353Sdim  assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
791261991Sdim  VRegUses.clear();
792234353Sdim  VRegDefs.setUniverse(MRI.getNumVirtRegs());
793261991Sdim  VRegUses.setUniverse(MRI.getNumVirtRegs());
794234353Sdim
795218893Sdim  // Model data dependencies between instructions being scheduled and the
796218893Sdim  // ExitSU.
797234353Sdim  addSchedBarrierDeps();
798218893Sdim
799193323Sed  // Walk the list of instructions, from bottom moving up.
800276479Sdim  MachineInstr *DbgMI = nullptr;
801234353Sdim  for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
802193323Sed       MII != MIE; --MII) {
803276479Sdim    MachineInstr *MI = std::prev(MII);
804249423Sdim    if (MI && DbgMI) {
805249423Sdim      DbgValues.push_back(std::make_pair(DbgMI, MI));
806276479Sdim      DbgMI = nullptr;
807223017Sdim    }
808223017Sdim
809205218Srdivacky    if (MI->isDebugValue()) {
810249423Sdim      DbgMI = MI;
811205218Srdivacky      continue;
812205218Srdivacky    }
813261991Sdim    SUnit *SU = MISUnitMap[MI];
814261991Sdim    assert(SU && "No SUnit mapped to this MI");
815261991Sdim
816239462Sdim    if (RPTracker) {
817276479Sdim      PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr;
818276479Sdim      RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
819276479Sdim      assert(RPTracker->getPos() == std::prev(MII) &&
820276479Sdim             "RPTracker can't find MI");
821239462Sdim    }
822223017Sdim
823276479Sdim    assert(
824276479Sdim        (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) &&
825276479Sdim        "Cannot schedule terminators or labels!");
826193323Sed
827193323Sed    // Add register-based dependencies (data, anti, and output).
828249423Sdim    bool HasVRegDef = false;
829193323Sed    for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
830193323Sed      const MachineOperand &MO = MI->getOperand(j);
831193323Sed      if (!MO.isReg()) continue;
832193323Sed      unsigned Reg = MO.getReg();
833193323Sed      if (Reg == 0) continue;
834193323Sed
835234353Sdim      if (TRI->isPhysicalRegister(Reg))
836234353Sdim        addPhysRegDeps(SU, j);
837234353Sdim      else {
838234353Sdim        assert(!IsPostRA && "Virtual register encountered!");
839249423Sdim        if (MO.isDef()) {
840249423Sdim          HasVRegDef = true;
841234353Sdim          addVRegDefDeps(SU, j);
842249423Sdim        }
843234353Sdim        else if (MO.readsReg()) // ignore undef operands
844234353Sdim          addVRegUseDeps(SU, j);
845193323Sed      }
846193323Sed    }
847249423Sdim    // If we haven't seen any uses in this scheduling region, create a
848249423Sdim    // dependence edge to ExitSU to model the live-out latency. This is required
849249423Sdim    // for vreg defs with no in-region use, and prefetches with no vreg def.
850249423Sdim    //
851249423Sdim    // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
852249423Sdim    // check currently relies on being called before adding chain deps.
853249423Sdim    if (SU->NumSuccs == 0 && SU->Latency > 1
854249423Sdim        && (HasVRegDef || MI->mayLoad())) {
855249423Sdim      SDep Dep(SU, SDep::Artificial);
856249423Sdim      Dep.setLatency(SU->Latency - 1);
857249423Sdim      ExitSU.addPred(Dep);
858249423Sdim    }
859193323Sed
860193323Sed    // Add chain dependencies.
861198892Srdivacky    // Chain dependencies used to enforce memory order should have
862198892Srdivacky    // latency of 0 (except for true dependency of Store followed by
863198892Srdivacky    // aliased Load... we estimate that with a single cycle of latency
864198892Srdivacky    // assuming the hardware will bypass)
865193323Sed    // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
866193323Sed    // after stack slots are lowered to actual addresses.
867193323Sed    // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
868193323Sed    // produce more precise dependence information.
869239462Sdim    unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
870239462Sdim    if (isGlobalMemoryObject(AA, MI)) {
871199481Srdivacky      // Be conservative with these and add dependencies on all memory
872199481Srdivacky      // references, even those that are known to not alias.
873276479Sdim      for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
874199481Srdivacky             NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
875276479Sdim        for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
876276479Sdim          I->second[i]->addPred(SDep(SU, SDep::Barrier));
877276479Sdim        }
878199481Srdivacky      }
879276479Sdim      for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
880199481Srdivacky             NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
881243830Sdim        for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
882243830Sdim          SDep Dep(SU, SDep::Barrier);
883243830Sdim          Dep.setLatency(TrueMemOrderLatency);
884243830Sdim          I->second[i]->addPred(Dep);
885243830Sdim        }
886199481Srdivacky      }
887199481Srdivacky      // Add SU to the barrier chain.
888199481Srdivacky      if (BarrierChain)
889243830Sdim        BarrierChain->addPred(SDep(SU, SDep::Barrier));
890199481Srdivacky      BarrierChain = SU;
891239462Sdim      // This is a barrier event that acts as a pivotal node in the DAG,
892239462Sdim      // so it is safe to clear list of exposed nodes.
893288943Sdim      adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
894239462Sdim                      TrueMemOrderLatency);
895239462Sdim      RejectMemNodes.clear();
896239462Sdim      NonAliasMemDefs.clear();
897239462Sdim      NonAliasMemUses.clear();
898199481Srdivacky
899199481Srdivacky      // fall-through
900199481Srdivacky    new_alias_chain:
901280031Sdim      // Chain all possibly aliasing memory references through SU.
902239462Sdim      if (AliasChain) {
903239462Sdim        unsigned ChainLatency = 0;
904239462Sdim        if (AliasChain->getInstr()->mayLoad())
905239462Sdim          ChainLatency = TrueMemOrderLatency;
906288943Sdim        addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
907288943Sdim                           RejectMemNodes, ChainLatency);
908239462Sdim      }
909199481Srdivacky      AliasChain = SU;
910193323Sed      for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
911288943Sdim        addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
912288943Sdim                           PendingLoads[k], RejectMemNodes,
913239462Sdim                           TrueMemOrderLatency);
914276479Sdim      for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
915276479Sdim           AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
916276479Sdim        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
917288943Sdim          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
918288943Sdim                             I->second[i], RejectMemNodes);
919276479Sdim      }
920276479Sdim      for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
921199481Srdivacky           AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
922193323Sed        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
923288943Sdim          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
924288943Sdim                             I->second[i], RejectMemNodes, TrueMemOrderLatency);
925193323Sed      }
926288943Sdim      adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
927239462Sdim                      TrueMemOrderLatency);
928199481Srdivacky      PendingLoads.clear();
929199481Srdivacky      AliasMemDefs.clear();
930199481Srdivacky      AliasMemUses.clear();
931234353Sdim    } else if (MI->mayStore()) {
932280031Sdim      // Add dependence on barrier chain, if needed.
933280031Sdim      // There is no point to check aliasing on barrier event. Even if
934280031Sdim      // SU and barrier _could_ be reordered, they should not. In addition,
935280031Sdim      // we have lost all RejectMemNodes below barrier.
936280031Sdim      if (BarrierChain)
937280031Sdim        BarrierChain->addPred(SDep(SU, SDep::Barrier));
938280031Sdim
939261991Sdim      UnderlyingObjectsVector Objs;
940288943Sdim      getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
941249423Sdim
942249423Sdim      if (Objs.empty()) {
943249423Sdim        // Treat all other stores conservatively.
944249423Sdim        goto new_alias_chain;
945249423Sdim      }
946249423Sdim
947249423Sdim      bool MayAlias = false;
948261991Sdim      for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end();
949261991Sdim           K != KE; ++K) {
950276479Sdim        ValueType V = K->getPointer();
951261991Sdim        bool ThisMayAlias = K->getInt();
952249423Sdim        if (ThisMayAlias)
953249423Sdim          MayAlias = true;
954249423Sdim
955193323Sed        // A store to a specific PseudoSourceValue. Add precise dependencies.
956199481Srdivacky        // Record the def in MemDefs, first adding a dep if there is
957199481Srdivacky        // an existing def.
958276479Sdim        MapVector<ValueType, std::vector<SUnit *> >::iterator I =
959249423Sdim          ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
960276479Sdim        MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
961249423Sdim          ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
962199481Srdivacky        if (I != IE) {
963276479Sdim          for (unsigned i = 0, e = I->second.size(); i != e; ++i)
964288943Sdim            addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
965288943Sdim                               I->second[i], RejectMemNodes, 0, true);
966276479Sdim
967276479Sdim          // If we're not using AA, then we only need one store per object.
968276479Sdim          if (!AAForDep)
969276479Sdim            I->second.clear();
970276479Sdim          I->second.push_back(SU);
971193323Sed        } else {
972276479Sdim          if (ThisMayAlias) {
973276479Sdim            if (!AAForDep)
974276479Sdim              AliasMemDefs[V].clear();
975276479Sdim            AliasMemDefs[V].push_back(SU);
976276479Sdim          } else {
977276479Sdim            if (!AAForDep)
978276479Sdim              NonAliasMemDefs[V].clear();
979276479Sdim            NonAliasMemDefs[V].push_back(SU);
980276479Sdim          }
981193323Sed        }
982193323Sed        // Handle the uses in MemUses, if there are any.
983276479Sdim        MapVector<ValueType, std::vector<SUnit *> >::iterator J =
984249423Sdim          ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
985276479Sdim        MapVector<ValueType, std::vector<SUnit *> >::iterator JE =
986249423Sdim          ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
987199481Srdivacky        if (J != JE) {
988193323Sed          for (unsigned i = 0, e = J->second.size(); i != e; ++i)
989288943Sdim            addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
990288943Sdim                               J->second[i], RejectMemNodes,
991239462Sdim                               TrueMemOrderLatency, true);
992193323Sed          J->second.clear();
993193323Sed        }
994198892Srdivacky      }
995249423Sdim      if (MayAlias) {
996249423Sdim        // Add dependencies from all the PendingLoads, i.e. loads
997249423Sdim        // with no underlying object.
998249423Sdim        for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
999288943Sdim          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
1000288943Sdim                             PendingLoads[k], RejectMemNodes,
1001249423Sdim                             TrueMemOrderLatency);
1002249423Sdim        // Add dependence on alias chain, if needed.
1003249423Sdim        if (AliasChain)
1004288943Sdim          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
1005288943Sdim                             RejectMemNodes);
1006249423Sdim      }
1007288943Sdim      adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
1008288943Sdim                      TrueMemOrderLatency);
1009234353Sdim    } else if (MI->mayLoad()) {
1010198892Srdivacky      bool MayAlias = true;
1011198090Srdivacky      if (MI->isInvariantLoad(AA)) {
1012193323Sed        // Invariant load, no chain dependencies needed!
1013198953Srdivacky      } else {
1014261991Sdim        UnderlyingObjectsVector Objs;
1015288943Sdim        getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
1016249423Sdim
1017249423Sdim        if (Objs.empty()) {
1018199481Srdivacky          // A load with no underlying object. Depend on all
1019199481Srdivacky          // potentially aliasing stores.
1020276479Sdim          for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
1021199481Srdivacky                 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
1022276479Sdim            for (unsigned i = 0, e = I->second.size(); i != e; ++i)
1023288943Sdim              addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
1024288943Sdim                                 I->second[i], RejectMemNodes);
1025223017Sdim
1026199481Srdivacky          PendingLoads.push_back(SU);
1027199481Srdivacky          MayAlias = true;
1028249423Sdim        } else {
1029249423Sdim          MayAlias = false;
1030198892Srdivacky        }
1031249423Sdim
1032261991Sdim        for (UnderlyingObjectsVector::iterator
1033249423Sdim             J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
1034276479Sdim          ValueType V = J->getPointer();
1035261991Sdim          bool ThisMayAlias = J->getInt();
1036249423Sdim
1037249423Sdim          if (ThisMayAlias)
1038249423Sdim            MayAlias = true;
1039249423Sdim
1040249423Sdim          // A load from a specific PseudoSourceValue. Add precise dependencies.
1041276479Sdim          MapVector<ValueType, std::vector<SUnit *> >::iterator I =
1042249423Sdim            ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
1043276479Sdim          MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
1044249423Sdim            ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
1045249423Sdim          if (I != IE)
1046276479Sdim            for (unsigned i = 0, e = I->second.size(); i != e; ++i)
1047288943Sdim              addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
1048288943Sdim                                 I->second[i], RejectMemNodes, 0, true);
1049249423Sdim          if (ThisMayAlias)
1050249423Sdim            AliasMemUses[V].push_back(SU);
1051249423Sdim          else
1052249423Sdim            NonAliasMemUses[V].push_back(SU);
1053249423Sdim        }
1054239462Sdim        if (MayAlias)
1055288943Sdim          adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU,
1056288943Sdim                          RejectMemNodes, /*Latency=*/0);
1057199481Srdivacky        // Add dependencies on alias and barrier chains, if needed.
1058199481Srdivacky        if (MayAlias && AliasChain)
1059288943Sdim          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
1060288943Sdim                             RejectMemNodes);
1061199481Srdivacky        if (BarrierChain)
1062243830Sdim          BarrierChain->addPred(SDep(SU, SDep::Barrier));
1063223017Sdim      }
1064193323Sed    }
1065193323Sed  }
1066249423Sdim  if (DbgMI)
1067249423Sdim    FirstDbgValue = DbgMI;
1068193323Sed
1069234353Sdim  Defs.clear();
1070234353Sdim  Uses.clear();
1071234353Sdim  VRegDefs.clear();
1072193323Sed  PendingLoads.clear();
1073193323Sed}
1074193323Sed
1075276479Sdim/// \brief Initialize register live-range state for updating kills.
1076276479Sdimvoid ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
1077276479Sdim  // Start with no live registers.
1078276479Sdim  LiveRegs.reset();
1079276479Sdim
1080276479Sdim  // Examine the live-in regs of all successors.
1081276479Sdim  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
1082276479Sdim       SE = BB->succ_end(); SI != SE; ++SI) {
1083276479Sdim    for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
1084276479Sdim         E = (*SI)->livein_end(); I != E; ++I) {
1085276479Sdim      unsigned Reg = *I;
1086276479Sdim      // Repeat, for reg and all subregs.
1087276479Sdim      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
1088276479Sdim           SubRegs.isValid(); ++SubRegs)
1089276479Sdim        LiveRegs.set(*SubRegs);
1090276479Sdim    }
1091276479Sdim  }
1092276479Sdim}
1093276479Sdim
1094288943Sdim/// \brief If we change a kill flag on the bundle instruction implicit register
1095288943Sdim/// operands, then we also need to propagate that to any instructions inside
1096288943Sdim/// the bundle which had the same kill state.
1097288943Sdimstatic void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
1098288943Sdim                                 bool NewKillState) {
1099288943Sdim  if (MI->getOpcode() != TargetOpcode::BUNDLE)
1100288943Sdim    return;
1101288943Sdim
1102288943Sdim  // Walk backwards from the last instruction in the bundle to the first.
1103288943Sdim  // Once we set a kill flag on an instruction, we bail out, as otherwise we
1104288943Sdim  // might set it on too many operands.  We will clear as many flags as we
1105288943Sdim  // can though.
1106288943Sdim  MachineBasicBlock::instr_iterator Begin = MI;
1107288943Sdim  MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
1108288943Sdim  while (Begin != End) {
1109288943Sdim    for (MachineOperand &MO : (--End)->operands()) {
1110288943Sdim      if (!MO.isReg() || MO.isDef() || Reg != MO.getReg())
1111288943Sdim        continue;
1112288943Sdim
1113288943Sdim      // DEBUG_VALUE nodes do not contribute to code generation and should
1114288943Sdim      // always be ignored.  Failure to do so may result in trying to modify
1115288943Sdim      // KILL flags on DEBUG_VALUE nodes, which is distressing.
1116288943Sdim      if (MO.isDebug())
1117288943Sdim        continue;
1118288943Sdim
1119288943Sdim      // If the register has the internal flag then it could be killing an
1120288943Sdim      // internal def of the register.  In this case, just skip.  We only want
1121288943Sdim      // to toggle the flag on operands visible outside the bundle.
1122288943Sdim      if (MO.isInternalRead())
1123288943Sdim        continue;
1124288943Sdim
1125288943Sdim      if (MO.isKill() == NewKillState)
1126288943Sdim        continue;
1127288943Sdim      MO.setIsKill(NewKillState);
1128288943Sdim      if (NewKillState)
1129288943Sdim        return;
1130288943Sdim    }
1131288943Sdim  }
1132288943Sdim}
1133288943Sdim
1134276479Sdimbool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
1135276479Sdim  // Setting kill flag...
1136276479Sdim  if (!MO.isKill()) {
1137276479Sdim    MO.setIsKill(true);
1138288943Sdim    toggleBundleKillFlag(MI, MO.getReg(), true);
1139276479Sdim    return false;
1140276479Sdim  }
1141276479Sdim
1142276479Sdim  // If MO itself is live, clear the kill flag...
1143276479Sdim  if (LiveRegs.test(MO.getReg())) {
1144276479Sdim    MO.setIsKill(false);
1145288943Sdim    toggleBundleKillFlag(MI, MO.getReg(), false);
1146276479Sdim    return false;
1147276479Sdim  }
1148276479Sdim
1149276479Sdim  // If any subreg of MO is live, then create an imp-def for that
1150276479Sdim  // subreg and keep MO marked as killed.
1151276479Sdim  MO.setIsKill(false);
1152288943Sdim  toggleBundleKillFlag(MI, MO.getReg(), false);
1153276479Sdim  bool AllDead = true;
1154276479Sdim  const unsigned SuperReg = MO.getReg();
1155276479Sdim  MachineInstrBuilder MIB(MF, MI);
1156276479Sdim  for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
1157276479Sdim    if (LiveRegs.test(*SubRegs)) {
1158276479Sdim      MIB.addReg(*SubRegs, RegState::ImplicitDefine);
1159276479Sdim      AllDead = false;
1160276479Sdim    }
1161276479Sdim  }
1162276479Sdim
1163288943Sdim  if(AllDead) {
1164276479Sdim    MO.setIsKill(true);
1165288943Sdim    toggleBundleKillFlag(MI, MO.getReg(), true);
1166288943Sdim  }
1167276479Sdim  return false;
1168276479Sdim}
1169276479Sdim
1170276479Sdim// FIXME: Reuse the LivePhysRegs utility for this.
1171276479Sdimvoid ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
1172276479Sdim  DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
1173276479Sdim
1174276479Sdim  LiveRegs.resize(TRI->getNumRegs());
1175276479Sdim  BitVector killedRegs(TRI->getNumRegs());
1176276479Sdim
1177276479Sdim  startBlockForKills(MBB);
1178276479Sdim
1179276479Sdim  // Examine block from end to start...
1180276479Sdim  unsigned Count = MBB->size();
1181276479Sdim  for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
1182276479Sdim       I != E; --Count) {
1183276479Sdim    MachineInstr *MI = --I;
1184276479Sdim    if (MI->isDebugValue())
1185276479Sdim      continue;
1186276479Sdim
1187276479Sdim    // Update liveness.  Registers that are defed but not used in this
1188276479Sdim    // instruction are now dead. Mark register and all subregs as they
1189276479Sdim    // are completely defined.
1190276479Sdim    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1191276479Sdim      MachineOperand &MO = MI->getOperand(i);
1192276479Sdim      if (MO.isRegMask())
1193276479Sdim        LiveRegs.clearBitsNotInMask(MO.getRegMask());
1194276479Sdim      if (!MO.isReg()) continue;
1195276479Sdim      unsigned Reg = MO.getReg();
1196276479Sdim      if (Reg == 0) continue;
1197276479Sdim      if (!MO.isDef()) continue;
1198276479Sdim      // Ignore two-addr defs.
1199276479Sdim      if (MI->isRegTiedToUseOperand(i)) continue;
1200276479Sdim
1201276479Sdim      // Repeat for reg and all subregs.
1202276479Sdim      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
1203276479Sdim           SubRegs.isValid(); ++SubRegs)
1204276479Sdim        LiveRegs.reset(*SubRegs);
1205276479Sdim    }
1206276479Sdim
1207276479Sdim    // Examine all used registers and set/clear kill flag. When a
1208276479Sdim    // register is used multiple times we only set the kill flag on
1209276479Sdim    // the first use. Don't set kill flags on undef operands.
1210276479Sdim    killedRegs.reset();
1211276479Sdim    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1212276479Sdim      MachineOperand &MO = MI->getOperand(i);
1213276479Sdim      if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
1214276479Sdim      unsigned Reg = MO.getReg();
1215276479Sdim      if ((Reg == 0) || MRI.isReserved(Reg)) continue;
1216276479Sdim
1217276479Sdim      bool kill = false;
1218276479Sdim      if (!killedRegs.test(Reg)) {
1219276479Sdim        kill = true;
1220276479Sdim        // A register is not killed if any subregs are live...
1221276479Sdim        for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
1222276479Sdim          if (LiveRegs.test(*SubRegs)) {
1223276479Sdim            kill = false;
1224276479Sdim            break;
1225276479Sdim          }
1226276479Sdim        }
1227276479Sdim
1228276479Sdim        // If subreg is not live, then register is killed if it became
1229276479Sdim        // live in this instruction
1230276479Sdim        if (kill)
1231276479Sdim          kill = !LiveRegs.test(Reg);
1232276479Sdim      }
1233276479Sdim
1234276479Sdim      if (MO.isKill() != kill) {
1235276479Sdim        DEBUG(dbgs() << "Fixing " << MO << " in ");
1236276479Sdim        // Warning: toggleKillFlag may invalidate MO.
1237276479Sdim        toggleKillFlag(MI, MO);
1238276479Sdim        DEBUG(MI->dump());
1239288943Sdim        DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) {
1240288943Sdim          MachineBasicBlock::instr_iterator Begin = MI;
1241288943Sdim          MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
1242288943Sdim          while (++Begin != End)
1243288943Sdim            DEBUG(Begin->dump());
1244288943Sdim        });
1245276479Sdim      }
1246276479Sdim
1247276479Sdim      killedRegs.set(Reg);
1248276479Sdim    }
1249276479Sdim
1250276479Sdim    // Mark any used register (that is not using undef) and subregs as
1251276479Sdim    // now live...
1252276479Sdim    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1253276479Sdim      MachineOperand &MO = MI->getOperand(i);
1254276479Sdim      if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
1255276479Sdim      unsigned Reg = MO.getReg();
1256276479Sdim      if ((Reg == 0) || MRI.isReserved(Reg)) continue;
1257276479Sdim
1258276479Sdim      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
1259276479Sdim           SubRegs.isValid(); ++SubRegs)
1260276479Sdim        LiveRegs.set(*SubRegs);
1261276479Sdim    }
1262276479Sdim  }
1263276479Sdim}
1264276479Sdim
1265193323Sedvoid ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
1266243830Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1267193323Sed  SU->getInstr()->dump();
1268243830Sdim#endif
1269193323Sed}
1270193323Sed
1271193323Sedstd::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1272193323Sed  std::string s;
1273193323Sed  raw_string_ostream oss(s);
1274193323Sed  if (SU == &EntrySU)
1275193323Sed    oss << "<entry>";
1276193323Sed  else if (SU == &ExitSU)
1277193323Sed    oss << "<exit>";
1278193323Sed  else
1279288943Sdim    SU->getInstr()->print(oss, /*SkipOpers=*/true);
1280193323Sed  return oss.str();
1281193323Sed}
1282193323Sed
1283234353Sdim/// Return the basic block label. It is not necessarilly unique because a block
1284234353Sdim/// contains multiple scheduling regions. But it is fine for visualization.
1285234353Sdimstd::string ScheduleDAGInstrs::getDAGName() const {
1286234353Sdim  return "dag." + BB->getFullName();
1287193323Sed}
1288243830Sdim
1289249423Sdim//===----------------------------------------------------------------------===//
1290249423Sdim// SchedDFSResult Implementation
1291249423Sdim//===----------------------------------------------------------------------===//
1292249423Sdim
1293249423Sdimnamespace llvm {
1294249423Sdim/// \brief Internal state used to compute SchedDFSResult.
1295249423Sdimclass SchedDFSImpl {
1296249423Sdim  SchedDFSResult &R;
1297249423Sdim
1298249423Sdim  /// Join DAG nodes into equivalence classes by their subtree.
1299249423Sdim  IntEqClasses SubtreeClasses;
1300249423Sdim  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1301249423Sdim  std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
1302249423Sdim
1303249423Sdim  struct RootData {
1304249423Sdim    unsigned NodeID;
1305249423Sdim    unsigned ParentNodeID;  // Parent node (member of the parent subtree).
1306249423Sdim    unsigned SubInstrCount; // Instr count in this tree only, not children.
1307249423Sdim
1308249423Sdim    RootData(unsigned id): NodeID(id),
1309249423Sdim                           ParentNodeID(SchedDFSResult::InvalidSubtreeID),
1310249423Sdim                           SubInstrCount(0) {}
1311249423Sdim
1312249423Sdim    unsigned getSparseSetIndex() const { return NodeID; }
1313249423Sdim  };
1314249423Sdim
1315249423Sdim  SparseSet<RootData> RootSet;
1316249423Sdim
1317249423Sdimpublic:
1318249423Sdim  SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1319249423Sdim    RootSet.setUniverse(R.DFSNodeData.size());
1320249423Sdim  }
1321249423Sdim
1322249423Sdim  /// Return true if this node been visited by the DFS traversal.
1323249423Sdim  ///
1324249423Sdim  /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1325249423Sdim  /// ID. Later, SubtreeID is updated but remains valid.
1326249423Sdim  bool isVisited(const SUnit *SU) const {
1327249423Sdim    return R.DFSNodeData[SU->NodeNum].SubtreeID
1328249423Sdim      != SchedDFSResult::InvalidSubtreeID;
1329249423Sdim  }
1330249423Sdim
1331249423Sdim  /// Initialize this node's instruction count. We don't need to flag the node
1332249423Sdim  /// visited until visitPostorder because the DAG cannot have cycles.
1333249423Sdim  void visitPreorder(const SUnit *SU) {
1334249423Sdim    R.DFSNodeData[SU->NodeNum].InstrCount =
1335249423Sdim      SU->getInstr()->isTransient() ? 0 : 1;
1336249423Sdim  }
1337249423Sdim
1338249423Sdim  /// Called once for each node after all predecessors are visited. Revisit this
1339249423Sdim  /// node's predecessors and potentially join them now that we know the ILP of
1340249423Sdim  /// the other predecessors.
1341249423Sdim  void visitPostorderNode(const SUnit *SU) {
1342249423Sdim    // Mark this node as the root of a subtree. It may be joined with its
1343249423Sdim    // successors later.
1344249423Sdim    R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1345249423Sdim    RootData RData(SU->NodeNum);
1346249423Sdim    RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1347249423Sdim
1348249423Sdim    // If any predecessors are still in their own subtree, they either cannot be
1349249423Sdim    // joined or are large enough to remain separate. If this parent node's
1350249423Sdim    // total instruction count is not greater than a child subtree by at least
1351249423Sdim    // the subtree limit, then try to join it now since splitting subtrees is
1352249423Sdim    // only useful if multiple high-pressure paths are possible.
1353249423Sdim    unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1354249423Sdim    for (SUnit::const_pred_iterator
1355249423Sdim           PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
1356249423Sdim      if (PI->getKind() != SDep::Data)
1357249423Sdim        continue;
1358249423Sdim      unsigned PredNum = PI->getSUnit()->NodeNum;
1359249423Sdim      if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1360249423Sdim        joinPredSubtree(*PI, SU, /*CheckLimit=*/false);
1361249423Sdim
1362249423Sdim      // Either link or merge the TreeData entry from the child to the parent.
1363249423Sdim      if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1364249423Sdim        // If the predecessor's parent is invalid, this is a tree edge and the
1365249423Sdim        // current node is the parent.
1366249423Sdim        if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1367249423Sdim          RootSet[PredNum].ParentNodeID = SU->NodeNum;
1368249423Sdim      }
1369249423Sdim      else if (RootSet.count(PredNum)) {
1370249423Sdim        // The predecessor is not a root, but is still in the root set. This
1371249423Sdim        // must be the new parent that it was just joined to. Note that
1372249423Sdim        // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1373249423Sdim        // set to the original parent.
1374249423Sdim        RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1375249423Sdim        RootSet.erase(PredNum);
1376249423Sdim      }
1377249423Sdim    }
1378249423Sdim    RootSet[SU->NodeNum] = RData;
1379249423Sdim  }
1380249423Sdim
1381249423Sdim  /// Called once for each tree edge after calling visitPostOrderNode on the
1382249423Sdim  /// predecessor. Increment the parent node's instruction count and
1383249423Sdim  /// preemptively join this subtree to its parent's if it is small enough.
1384249423Sdim  void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1385249423Sdim    R.DFSNodeData[Succ->NodeNum].InstrCount
1386249423Sdim      += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1387249423Sdim    joinPredSubtree(PredDep, Succ);
1388249423Sdim  }
1389249423Sdim
1390249423Sdim  /// Add a connection for cross edges.
1391249423Sdim  void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1392249423Sdim    ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
1393249423Sdim  }
1394249423Sdim
1395249423Sdim  /// Set each node's subtree ID to the representative ID and record connections
1396249423Sdim  /// between trees.
1397249423Sdim  void finalize() {
1398249423Sdim    SubtreeClasses.compress();
1399249423Sdim    R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1400249423Sdim    assert(SubtreeClasses.getNumClasses() == RootSet.size()
1401249423Sdim           && "number of roots should match trees");
1402249423Sdim    for (SparseSet<RootData>::const_iterator
1403249423Sdim           RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) {
1404249423Sdim      unsigned TreeID = SubtreeClasses[RI->NodeID];
1405249423Sdim      if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1406249423Sdim        R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID];
1407249423Sdim      R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount;
1408249423Sdim      // Note that SubInstrCount may be greater than InstrCount if we joined
1409249423Sdim      // subtrees across a cross edge. InstrCount will be attributed to the
1410249423Sdim      // original parent, while SubInstrCount will be attributed to the joined
1411249423Sdim      // parent.
1412249423Sdim    }
1413249423Sdim    R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1414249423Sdim    R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1415249423Sdim    DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1416249423Sdim    for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1417249423Sdim      R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1418249423Sdim      DEBUG(dbgs() << "  SU(" << Idx << ") in tree "
1419249423Sdim            << R.DFSNodeData[Idx].SubtreeID << '\n');
1420249423Sdim    }
1421249423Sdim    for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
1422249423Sdim           I = ConnectionPairs.begin(), E = ConnectionPairs.end();
1423249423Sdim         I != E; ++I) {
1424249423Sdim      unsigned PredTree = SubtreeClasses[I->first->NodeNum];
1425249423Sdim      unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
1426249423Sdim      if (PredTree == SuccTree)
1427249423Sdim        continue;
1428249423Sdim      unsigned Depth = I->first->getDepth();
1429249423Sdim      addConnection(PredTree, SuccTree, Depth);
1430249423Sdim      addConnection(SuccTree, PredTree, Depth);
1431249423Sdim    }
1432249423Sdim  }
1433249423Sdim
1434249423Sdimprotected:
1435249423Sdim  /// Join the predecessor subtree with the successor that is its DFS
1436249423Sdim  /// parent. Apply some heuristics before joining.
1437249423Sdim  bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1438249423Sdim                       bool CheckLimit = true) {
1439249423Sdim    assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1440249423Sdim
1441249423Sdim    // Check if the predecessor is already joined.
1442249423Sdim    const SUnit *PredSU = PredDep.getSUnit();
1443249423Sdim    unsigned PredNum = PredSU->NodeNum;
1444249423Sdim    if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1445249423Sdim      return false;
1446249423Sdim
1447249423Sdim    // Four is the magic number of successors before a node is considered a
1448249423Sdim    // pinch point.
1449249423Sdim    unsigned NumDataSucs = 0;
1450249423Sdim    for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(),
1451249423Sdim           SE = PredSU->Succs.end(); SI != SE; ++SI) {
1452249423Sdim      if (SI->getKind() == SDep::Data) {
1453249423Sdim        if (++NumDataSucs >= 4)
1454249423Sdim          return false;
1455249423Sdim      }
1456249423Sdim    }
1457249423Sdim    if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1458249423Sdim      return false;
1459249423Sdim    R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1460249423Sdim    SubtreeClasses.join(Succ->NodeNum, PredNum);
1461249423Sdim    return true;
1462249423Sdim  }
1463249423Sdim
1464249423Sdim  /// Called by finalize() to record a connection between trees.
1465249423Sdim  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1466249423Sdim    if (!Depth)
1467249423Sdim      return;
1468249423Sdim
1469249423Sdim    do {
1470249423Sdim      SmallVectorImpl<SchedDFSResult::Connection> &Connections =
1471249423Sdim        R.SubtreeConnections[FromTree];
1472249423Sdim      for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
1473249423Sdim             I = Connections.begin(), E = Connections.end(); I != E; ++I) {
1474249423Sdim        if (I->TreeID == ToTree) {
1475249423Sdim          I->Level = std::max(I->Level, Depth);
1476249423Sdim          return;
1477249423Sdim        }
1478249423Sdim      }
1479249423Sdim      Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1480249423Sdim      FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1481249423Sdim    } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1482249423Sdim  }
1483249423Sdim};
1484249423Sdim} // namespace llvm
1485249423Sdim
1486243830Sdimnamespace {
1487243830Sdim/// \brief Manage the stack used by a reverse depth-first search over the DAG.
1488243830Sdimclass SchedDAGReverseDFS {
1489243830Sdim  std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
1490243830Sdimpublic:
1491243830Sdim  bool isComplete() const { return DFSStack.empty(); }
1492243830Sdim
1493243830Sdim  void follow(const SUnit *SU) {
1494243830Sdim    DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
1495243830Sdim  }
1496243830Sdim  void advance() { ++DFSStack.back().second; }
1497243830Sdim
1498249423Sdim  const SDep *backtrack() {
1499249423Sdim    DFSStack.pop_back();
1500276479Sdim    return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1501249423Sdim  }
1502243830Sdim
1503243830Sdim  const SUnit *getCurr() const { return DFSStack.back().first; }
1504243830Sdim
1505243830Sdim  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1506243830Sdim
1507243830Sdim  SUnit::const_pred_iterator getPredEnd() const {
1508243830Sdim    return getCurr()->Preds.end();
1509243830Sdim  }
1510243830Sdim};
1511243830Sdim} // anonymous
1512243830Sdim
1513249423Sdimstatic bool hasDataSucc(const SUnit *SU) {
1514249423Sdim  for (SUnit::const_succ_iterator
1515249423Sdim         SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) {
1516249423Sdim    if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode())
1517249423Sdim      return true;
1518249423Sdim  }
1519249423Sdim  return false;
1520243830Sdim}
1521243830Sdim
1522243830Sdim/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
1523243830Sdim/// search from this root.
1524249423Sdimvoid SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
1525243830Sdim  if (!IsBottomUp)
1526243830Sdim    llvm_unreachable("Top-down ILP metric is unimplemnted");
1527243830Sdim
1528249423Sdim  SchedDFSImpl Impl(*this);
1529249423Sdim  for (ArrayRef<SUnit>::const_iterator
1530249423Sdim         SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) {
1531249423Sdim    const SUnit *SU = &*SI;
1532249423Sdim    if (Impl.isVisited(SU) || hasDataSucc(SU))
1533249423Sdim      continue;
1534249423Sdim
1535249423Sdim    SchedDAGReverseDFS DFS;
1536249423Sdim    Impl.visitPreorder(SU);
1537249423Sdim    DFS.follow(SU);
1538249423Sdim    for (;;) {
1539249423Sdim      // Traverse the leftmost path as far as possible.
1540249423Sdim      while (DFS.getPred() != DFS.getPredEnd()) {
1541249423Sdim        const SDep &PredDep = *DFS.getPred();
1542249423Sdim        DFS.advance();
1543249423Sdim        // Ignore non-data edges.
1544249423Sdim        if (PredDep.getKind() != SDep::Data
1545249423Sdim            || PredDep.getSUnit()->isBoundaryNode()) {
1546249423Sdim          continue;
1547249423Sdim        }
1548249423Sdim        // An already visited edge is a cross edge, assuming an acyclic DAG.
1549249423Sdim        if (Impl.isVisited(PredDep.getSUnit())) {
1550249423Sdim          Impl.visitCrossEdge(PredDep, DFS.getCurr());
1551249423Sdim          continue;
1552249423Sdim        }
1553249423Sdim        Impl.visitPreorder(PredDep.getSUnit());
1554249423Sdim        DFS.follow(PredDep.getSUnit());
1555249423Sdim      }
1556249423Sdim      // Visit the top of the stack in postorder and backtrack.
1557249423Sdim      const SUnit *Child = DFS.getCurr();
1558249423Sdim      const SDep *PredDep = DFS.backtrack();
1559249423Sdim      Impl.visitPostorderNode(Child);
1560249423Sdim      if (PredDep)
1561249423Sdim        Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1562249423Sdim      if (DFS.isComplete())
1563249423Sdim        break;
1564243830Sdim    }
1565243830Sdim  }
1566249423Sdim  Impl.finalize();
1567243830Sdim}
1568243830Sdim
1569249423Sdim/// The root of the given SubtreeID was just scheduled. For all subtrees
1570249423Sdim/// connected to this tree, record the depth of the connection so that the
1571249423Sdim/// nearest connected subtrees can be prioritized.
1572249423Sdimvoid SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1573249423Sdim  for (SmallVectorImpl<Connection>::const_iterator
1574249423Sdim         I = SubtreeConnections[SubtreeID].begin(),
1575249423Sdim         E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
1576249423Sdim    SubtreeConnectLevels[I->TreeID] =
1577249423Sdim      std::max(SubtreeConnectLevels[I->TreeID], I->Level);
1578249423Sdim    DEBUG(dbgs() << "  Tree: " << I->TreeID
1579249423Sdim          << " @" << SubtreeConnectLevels[I->TreeID] << '\n');
1580249423Sdim  }
1581249423Sdim}
1582249423Sdim
1583276479SdimLLVM_DUMP_METHOD
1584243830Sdimvoid ILPValue::print(raw_ostream &OS) const {
1585249423Sdim  OS << InstrCount << " / " << Length << " = ";
1586249423Sdim  if (!Length)
1587243830Sdim    OS << "BADILP";
1588249423Sdim  else
1589249423Sdim    OS << format("%g", ((double)InstrCount / Length));
1590243830Sdim}
1591243830Sdim
1592276479SdimLLVM_DUMP_METHOD
1593243830Sdimvoid ILPValue::dump() const {
1594243830Sdim  dbgs() << *this << '\n';
1595243830Sdim}
1596243830Sdim
1597243830Sdimnamespace llvm {
1598243830Sdim
1599276479SdimLLVM_DUMP_METHOD
1600243830Sdimraw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
1601243830Sdim  Val.print(OS);
1602243830Sdim  return OS;
1603243830Sdim}
1604243830Sdim
1605243830Sdim} // namespace llvm
1606