ScheduleDAGInstrs.cpp revision 288943
1193323Sed//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This implements the ScheduleDAGInstrs class, which implements re-scheduling 11193323Sed// of MachineInstrs. 12193323Sed// 13193323Sed//===----------------------------------------------------------------------===// 14193323Sed 15249423Sdim#include "llvm/CodeGen/ScheduleDAGInstrs.h" 16249423Sdim#include "llvm/ADT/MapVector.h" 17249423Sdim#include "llvm/ADT/SmallPtrSet.h" 18249423Sdim#include "llvm/ADT/SmallSet.h" 19193323Sed#include "llvm/Analysis/AliasAnalysis.h" 20218893Sdim#include "llvm/Analysis/ValueTracking.h" 21234353Sdim#include "llvm/CodeGen/LiveIntervalAnalysis.h" 22193323Sed#include "llvm/CodeGen/MachineFunctionPass.h" 23288943Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 24276479Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 25198090Srdivacky#include "llvm/CodeGen/MachineMemOperand.h" 26193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 27193323Sed#include "llvm/CodeGen/PseudoSourceValue.h" 28239462Sdim#include "llvm/CodeGen/RegisterPressure.h" 29249423Sdim#include "llvm/CodeGen/ScheduleDFS.h" 30249423Sdim#include "llvm/IR/Operator.h" 31239462Sdim#include "llvm/Support/CommandLine.h" 32193323Sed#include "llvm/Support/Debug.h" 33243830Sdim#include "llvm/Support/Format.h" 34193323Sed#include "llvm/Support/raw_ostream.h" 35249423Sdim#include "llvm/Target/TargetInstrInfo.h" 36249423Sdim#include "llvm/Target/TargetMachine.h" 37249423Sdim#include "llvm/Target/TargetRegisterInfo.h" 38249423Sdim#include "llvm/Target/TargetSubtargetInfo.h" 39261991Sdim#include <queue> 40261991Sdim 41193323Sedusing namespace llvm; 42193323Sed 43276479Sdim#define DEBUG_TYPE "misched" 44276479Sdim 45239462Sdimstatic cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, 46239462Sdim cl::ZeroOrMore, cl::init(false), 47280031Sdim cl::desc("Enable use of AA during MI DAG construction")); 48239462Sdim 49276479Sdimstatic cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, 50280031Sdim cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction")); 51276479Sdim 52193323SedScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 53280031Sdim const MachineLoopInfo *mli, 54288943Sdim bool IsPostRAFlag, bool RemoveKillFlags, 55234353Sdim LiveIntervals *lis) 56288943Sdim : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis), 57288943Sdim IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), 58288943Sdim CanHandleTerminators(false), FirstDbgValue(nullptr) { 59234353Sdim assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); 60223017Sdim DbgValues.clear(); 61234353Sdim assert(!(IsPostRA && MRI.getNumVirtRegs()) && 62234353Sdim "Virtual registers must be removed prior to PostRA scheduling"); 63243830Sdim 64288943Sdim const TargetSubtargetInfo &ST = mf.getSubtarget(); 65280031Sdim SchedModel.init(ST.getSchedModel(), &ST, TII); 66198396Srdivacky} 67193323Sed 68193323Sed/// getUnderlyingObjectFromInt - This is the function that does the work of 69193323Sed/// looking through basic ptrtoint+arithmetic+inttoptr sequences. 70193323Sedstatic const Value *getUnderlyingObjectFromInt(const Value *V) { 71193323Sed do { 72198090Srdivacky if (const Operator *U = dyn_cast<Operator>(V)) { 73193323Sed // If we find a ptrtoint, we can transfer control back to the 74193323Sed // regular getUnderlyingObjectFromInt. 75198090Srdivacky if (U->getOpcode() == Instruction::PtrToInt) 76193323Sed return U->getOperand(0); 77249423Sdim // If we find an add of a constant, a multiplied value, or a phi, it's 78193323Sed // likely that the other operand will lead us to the base 79193323Sed // object. We don't have to worry about the case where the 80198090Srdivacky // object address is somehow being computed by the multiply, 81193323Sed // because our callers only care when the result is an 82243830Sdim // identifiable object. 83198090Srdivacky if (U->getOpcode() != Instruction::Add || 84193323Sed (!isa<ConstantInt>(U->getOperand(1)) && 85249423Sdim Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && 86249423Sdim !isa<PHINode>(U->getOperand(1)))) 87193323Sed return V; 88193323Sed V = U->getOperand(0); 89193323Sed } else { 90193323Sed return V; 91193323Sed } 92204642Srdivacky assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 93193323Sed } while (1); 94193323Sed} 95193323Sed 96249423Sdim/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects 97193323Sed/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. 98249423Sdimstatic void getUnderlyingObjects(const Value *V, 99288943Sdim SmallVectorImpl<Value *> &Objects, 100288943Sdim const DataLayout &DL) { 101276479Sdim SmallPtrSet<const Value *, 16> Visited; 102249423Sdim SmallVector<const Value *, 4> Working(1, V); 103193323Sed do { 104249423Sdim V = Working.pop_back_val(); 105249423Sdim 106249423Sdim SmallVector<Value *, 4> Objs; 107288943Sdim GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL); 108249423Sdim 109261991Sdim for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); 110249423Sdim I != IE; ++I) { 111249423Sdim V = *I; 112280031Sdim if (!Visited.insert(V).second) 113249423Sdim continue; 114249423Sdim if (Operator::getOpcode(V) == Instruction::IntToPtr) { 115249423Sdim const Value *O = 116249423Sdim getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 117249423Sdim if (O->getType()->isPointerTy()) { 118249423Sdim Working.push_back(O); 119249423Sdim continue; 120249423Sdim } 121249423Sdim } 122249423Sdim Objects.push_back(const_cast<Value *>(V)); 123249423Sdim } 124249423Sdim } while (!Working.empty()); 125193323Sed} 126193323Sed 127276479Sdimtypedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType; 128276479Sdimtypedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4> 129261991SdimUnderlyingObjectsVector; 130261991Sdim 131249423Sdim/// getUnderlyingObjectsForInstr - If this machine instr has memory reference 132193323Sed/// information and it can be tracked to a normal reference to a known 133249423Sdim/// object, return the Value for that object. 134249423Sdimstatic void getUnderlyingObjectsForInstr(const MachineInstr *MI, 135261991Sdim const MachineFrameInfo *MFI, 136288943Sdim UnderlyingObjectsVector &Objects, 137288943Sdim const DataLayout &DL) { 138193323Sed if (!MI->hasOneMemOperand() || 139276479Sdim (!(*MI->memoperands_begin())->getValue() && 140276479Sdim !(*MI->memoperands_begin())->getPseudoValue()) || 141198090Srdivacky (*MI->memoperands_begin())->isVolatile()) 142249423Sdim return; 143193323Sed 144276479Sdim if (const PseudoSourceValue *PSV = 145276479Sdim (*MI->memoperands_begin())->getPseudoValue()) { 146288943Sdim // Function that contain tail calls don't have unique PseudoSourceValue 147288943Sdim // objects. Two PseudoSourceValues might refer to the same or overlapping 148288943Sdim // locations. The client code calling this function assumes this is not the 149288943Sdim // case. So return a conservative answer of no known object. 150288943Sdim if (MFI->hasTailCall()) 151288943Sdim return; 152288943Sdim 153276479Sdim // For now, ignore PseudoSourceValues which may alias LLVM IR values 154276479Sdim // because the code that uses this function has no way to cope with 155276479Sdim // such aliases. 156276479Sdim if (!PSV->isAliased(MFI)) { 157276479Sdim bool MayAlias = PSV->mayAlias(MFI); 158276479Sdim Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); 159276479Sdim } 160276479Sdim return; 161276479Sdim } 162276479Sdim 163198090Srdivacky const Value *V = (*MI->memoperands_begin())->getValue(); 164193323Sed if (!V) 165249423Sdim return; 166193323Sed 167249423Sdim SmallVector<Value *, 4> Objs; 168288943Sdim getUnderlyingObjects(V, Objs, DL); 169223017Sdim 170288943Sdim for (Value *V : Objs) { 171276479Sdim if (!isIdentifiedObject(V)) { 172249423Sdim Objects.clear(); 173249423Sdim return; 174249423Sdim } 175249423Sdim 176276479Sdim Objects.push_back(UnderlyingObjectsVector::value_type(V, true)); 177249423Sdim } 178193323Sed} 179193323Sed 180239462Sdimvoid ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { 181239462Sdim BB = bb; 182193323Sed} 183193323Sed 184234353Sdimvoid ScheduleDAGInstrs::finishBlock() { 185239462Sdim // Subclasses should no longer refer to the old block. 186276479Sdim BB = nullptr; 187234353Sdim} 188234353Sdim 189234353Sdim/// Initialize the DAG and common scheduler state for the current scheduling 190234353Sdim/// region. This does not actually create the DAG, only clears it. The 191234353Sdim/// scheduling driver may call BuildSchedGraph multiple times per scheduling 192234353Sdim/// region. 193234353Sdimvoid ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, 194234353Sdim MachineBasicBlock::iterator begin, 195234353Sdim MachineBasicBlock::iterator end, 196261991Sdim unsigned regioninstrs) { 197239462Sdim assert(bb == BB && "startBlock should set BB"); 198234353Sdim RegionBegin = begin; 199234353Sdim RegionEnd = end; 200261991Sdim NumRegionInstrs = regioninstrs; 201234353Sdim} 202234353Sdim 203234353Sdim/// Close the current scheduling region. Don't clear any state in case the 204234353Sdim/// driver wants to refer to the previous scheduling region. 205234353Sdimvoid ScheduleDAGInstrs::exitRegion() { 206234353Sdim // Nothing to do. 207234353Sdim} 208234353Sdim 209234353Sdim/// addSchedBarrierDeps - Add dependencies from instructions in the current 210218893Sdim/// list of instructions being scheduled to scheduling barrier by adding 211218893Sdim/// the exit SU to the register defs and use list. This is because we want to 212218893Sdim/// make sure instructions which define registers that are either used by 213218893Sdim/// the terminator or are live-out are properly scheduled. This is 214218893Sdim/// especially important when the definition latency of the return value(s) 215218893Sdim/// are too high to be hidden by the branch or when the liveout registers 216218893Sdim/// used by instructions in the fallthrough block. 217234353Sdimvoid ScheduleDAGInstrs::addSchedBarrierDeps() { 218276479Sdim MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr; 219218893Sdim ExitSU.setInstr(ExitMI); 220218893Sdim bool AllDepKnown = ExitMI && 221234353Sdim (ExitMI->isCall() || ExitMI->isBarrier()); 222218893Sdim if (ExitMI && AllDepKnown) { 223218893Sdim // If it's a call or a barrier, add dependencies on the defs and uses of 224218893Sdim // instruction. 225218893Sdim for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { 226218893Sdim const MachineOperand &MO = ExitMI->getOperand(i); 227218893Sdim if (!MO.isReg() || MO.isDef()) continue; 228218893Sdim unsigned Reg = MO.getReg(); 229218893Sdim if (Reg == 0) continue; 230218893Sdim 231234353Sdim if (TRI->isPhysicalRegister(Reg)) 232249423Sdim Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); 233234353Sdim else { 234234353Sdim assert(!IsPostRA && "Virtual register encountered after regalloc."); 235249423Sdim if (MO.readsReg()) // ignore undef operands 236249423Sdim addVRegUseDeps(&ExitSU, i); 237234353Sdim } 238218893Sdim } 239218893Sdim } else { 240218893Sdim // For others, e.g. fallthrough, conditional branch, assume the exit 241218893Sdim // uses all the registers that are livein to the successor blocks. 242234353Sdim assert(Uses.empty() && "Uses in set before adding deps?"); 243218893Sdim for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 244218893Sdim SE = BB->succ_end(); SI != SE; ++SI) 245218893Sdim for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 246223017Sdim E = (*SI)->livein_end(); I != E; ++I) { 247218893Sdim unsigned Reg = *I; 248234353Sdim if (!Uses.contains(Reg)) 249249423Sdim Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); 250218893Sdim } 251218893Sdim } 252218893Sdim} 253218893Sdim 254234353Sdim/// MO is an operand of SU's instruction that defines a physical register. Add 255234353Sdim/// data dependencies from SU to any uses of the physical register. 256243830Sdimvoid ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { 257243830Sdim const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); 258234353Sdim assert(MO.isDef() && "expect physreg def"); 259234353Sdim 260234353Sdim // Ask the target if address-backscheduling is desirable, and if so how much. 261288943Sdim const TargetSubtargetInfo &ST = MF.getSubtarget(); 262234353Sdim 263239462Sdim for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); 264239462Sdim Alias.isValid(); ++Alias) { 265234353Sdim if (!Uses.contains(*Alias)) 266234353Sdim continue; 267249423Sdim for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) { 268249423Sdim SUnit *UseSU = I->SU; 269234353Sdim if (UseSU == SU) 270234353Sdim continue; 271239462Sdim 272243830Sdim // Adjust the dependence latency using operand def/use information, 273243830Sdim // then allow the target to perform its own adjustments. 274249423Sdim int UseOp = I->OpIdx; 275276479Sdim MachineInstr *RegUse = nullptr; 276249423Sdim SDep Dep; 277249423Sdim if (UseOp < 0) 278249423Sdim Dep = SDep(SU, SDep::Artificial); 279249423Sdim else { 280251662Sdim // Set the hasPhysRegDefs only for physreg defs that have a use within 281251662Sdim // the scheduling region. 282251662Sdim SU->hasPhysRegDefs = true; 283249423Sdim Dep = SDep(SU, SDep::Data, *Alias); 284249423Sdim RegUse = UseSU->getInstr(); 285249423Sdim } 286249423Sdim Dep.setLatency( 287261991Sdim SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, 288261991Sdim UseOp)); 289243830Sdim 290249423Sdim ST.adjustSchedDependency(SU, UseSU, Dep); 291249423Sdim UseSU->addPred(Dep); 292234353Sdim } 293234353Sdim } 294234353Sdim} 295234353Sdim 296234353Sdim/// addPhysRegDeps - Add register dependencies (data, anti, and output) from 297234353Sdim/// this SUnit to following instructions in the same scheduling region that 298234353Sdim/// depend the physical register referenced at OperIdx. 299234353Sdimvoid ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { 300276479Sdim MachineInstr *MI = SU->getInstr(); 301276479Sdim MachineOperand &MO = MI->getOperand(OperIdx); 302234353Sdim 303234353Sdim // Optionally add output and anti dependencies. For anti 304234353Sdim // dependencies we use a latency of 0 because for a multi-issue 305234353Sdim // target we want to allow the defining instruction to issue 306234353Sdim // in the same cycle as the using instruction. 307234353Sdim // TODO: Using a latency of 1 here for output dependencies assumes 308234353Sdim // there's no cost for reusing registers. 309234353Sdim SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 310239462Sdim for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); 311239462Sdim Alias.isValid(); ++Alias) { 312234353Sdim if (!Defs.contains(*Alias)) 313234353Sdim continue; 314249423Sdim for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { 315249423Sdim SUnit *DefSU = I->SU; 316234353Sdim if (DefSU == &ExitSU) 317234353Sdim continue; 318234353Sdim if (DefSU != SU && 319234353Sdim (Kind != SDep::Output || !MO.isDead() || 320234353Sdim !DefSU->getInstr()->registerDefIsDead(*Alias))) { 321234353Sdim if (Kind == SDep::Anti) 322243830Sdim DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); 323234353Sdim else { 324243830Sdim SDep Dep(SU, Kind, /*Reg=*/*Alias); 325261991Sdim Dep.setLatency( 326261991Sdim SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); 327243830Sdim DefSU->addPred(Dep); 328234353Sdim } 329234353Sdim } 330234353Sdim } 331234353Sdim } 332234353Sdim 333234353Sdim if (!MO.isDef()) { 334251662Sdim SU->hasPhysRegUses = true; 335234353Sdim // Either insert a new Reg2SUnits entry with an empty SUnits list, or 336234353Sdim // retrieve the existing SUnits list for this register's uses. 337234353Sdim // Push this SUnit on the use list. 338249423Sdim Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); 339276479Sdim if (RemoveKillFlags) 340276479Sdim MO.setIsKill(false); 341234353Sdim } 342234353Sdim else { 343243830Sdim addPhysRegDataDeps(SU, OperIdx); 344249423Sdim unsigned Reg = MO.getReg(); 345234353Sdim 346234353Sdim // clear this register's use list 347249423Sdim if (Uses.contains(Reg)) 348249423Sdim Uses.eraseAll(Reg); 349234353Sdim 350249423Sdim if (!MO.isDead()) { 351249423Sdim Defs.eraseAll(Reg); 352249423Sdim } else if (SU->isCall) { 353249423Sdim // Calls will not be reordered because of chain dependencies (see 354249423Sdim // below). Since call operands are dead, calls may continue to be added 355249423Sdim // to the DefList making dependence checking quadratic in the size of 356249423Sdim // the block. Instead, we leave only one call at the back of the 357249423Sdim // DefList. 358249423Sdim Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); 359249423Sdim Reg2SUnitsMap::iterator B = P.first; 360249423Sdim Reg2SUnitsMap::iterator I = P.second; 361249423Sdim for (bool isBegin = I == B; !isBegin; /* empty */) { 362249423Sdim isBegin = (--I) == B; 363249423Sdim if (!I->SU->isCall) 364249423Sdim break; 365249423Sdim I = Defs.erase(I); 366249423Sdim } 367249423Sdim } 368234353Sdim 369234353Sdim // Defs are pushed in the order they are visited and never reordered. 370249423Sdim Defs.insert(PhysRegSUOper(SU, OperIdx, Reg)); 371234353Sdim } 372234353Sdim} 373234353Sdim 374234353Sdim/// addVRegDefDeps - Add register output and data dependencies from this SUnit 375234353Sdim/// to instructions that occur later in the same scheduling region if they read 376234353Sdim/// from or write to the virtual register defined at OperIdx. 377234353Sdim/// 378234353Sdim/// TODO: Hoist loop induction variable increments. This has to be 379234353Sdim/// reevaluated. Generally, IV scheduling should be done before coalescing. 380234353Sdimvoid ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { 381234353Sdim const MachineInstr *MI = SU->getInstr(); 382234353Sdim unsigned Reg = MI->getOperand(OperIdx).getReg(); 383234353Sdim 384239462Sdim // Singly defined vregs do not have output/anti dependencies. 385234353Sdim // The current operand is a def, so we have at least one. 386239462Sdim // Check here if there are any others... 387239462Sdim if (MRI.hasOneDef(Reg)) 388234353Sdim return; 389234353Sdim 390234353Sdim // Add output dependence to the next nearest def of this vreg. 391234353Sdim // 392234353Sdim // Unless this definition is dead, the output dependence should be 393234353Sdim // transitively redundant with antidependencies from this definition's 394234353Sdim // uses. We're conservative for now until we have a way to guarantee the uses 395234353Sdim // are not eliminated sometime during scheduling. The output dependence edge 396234353Sdim // is also useful if output latency exceeds def-use latency. 397239462Sdim VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); 398234353Sdim if (DefI == VRegDefs.end()) 399234353Sdim VRegDefs.insert(VReg2SUnit(Reg, SU)); 400234353Sdim else { 401234353Sdim SUnit *DefSU = DefI->SU; 402234353Sdim if (DefSU != SU && DefSU != &ExitSU) { 403243830Sdim SDep Dep(SU, SDep::Output, Reg); 404261991Sdim Dep.setLatency( 405261991Sdim SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); 406243830Sdim DefSU->addPred(Dep); 407234353Sdim } 408234353Sdim DefI->SU = SU; 409234353Sdim } 410234353Sdim} 411234353Sdim 412234353Sdim/// addVRegUseDeps - Add a register data dependency if the instruction that 413234353Sdim/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a 414234353Sdim/// register antidependency from this SUnit to instructions that occur later in 415234353Sdim/// the same scheduling region if they write the virtual register. 416234353Sdim/// 417234353Sdim/// TODO: Handle ExitSU "uses" properly. 418234353Sdimvoid ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { 419234353Sdim MachineInstr *MI = SU->getInstr(); 420234353Sdim unsigned Reg = MI->getOperand(OperIdx).getReg(); 421234353Sdim 422261991Sdim // Record this local VReg use. 423261991Sdim VReg2UseMap::iterator UI = VRegUses.find(Reg); 424261991Sdim for (; UI != VRegUses.end(); ++UI) { 425261991Sdim if (UI->SU == SU) 426261991Sdim break; 427261991Sdim } 428261991Sdim if (UI == VRegUses.end()) 429261991Sdim VRegUses.insert(VReg2SUnit(Reg, SU)); 430261991Sdim 431234353Sdim // Lookup this operand's reaching definition. 432234353Sdim assert(LIS && "vreg dependencies requires LiveIntervals"); 433261991Sdim LiveQueryResult LRQ 434261991Sdim = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI)); 435239462Sdim VNInfo *VNI = LRQ.valueIn(); 436239462Sdim 437234353Sdim // VNI will be valid because MachineOperand::readsReg() is checked by caller. 438239462Sdim assert(VNI && "No value to read by operand"); 439234353Sdim MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); 440234353Sdim // Phis and other noninstructions (after coalescing) have a NULL Def. 441234353Sdim if (Def) { 442234353Sdim SUnit *DefSU = getSUnit(Def); 443234353Sdim if (DefSU) { 444234353Sdim // The reaching Def lives within this scheduling region. 445234353Sdim // Create a data dependence. 446243830Sdim SDep dep(DefSU, SDep::Data, Reg); 447243830Sdim // Adjust the dependence latency using operand def/use information, then 448243830Sdim // allow the target to perform its own adjustments. 449243830Sdim int DefOp = Def->findRegisterDefOperandIdx(Reg); 450261991Sdim dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx)); 451239462Sdim 452288943Sdim const TargetSubtargetInfo &ST = MF.getSubtarget(); 453243830Sdim ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); 454234353Sdim SU->addPred(dep); 455234353Sdim } 456234353Sdim } 457234353Sdim 458234353Sdim // Add antidependence to the following def of the vreg it uses. 459239462Sdim VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); 460234353Sdim if (DefI != VRegDefs.end() && DefI->SU != SU) 461243830Sdim DefI->SU->addPred(SDep(SU, SDep::Anti, Reg)); 462234353Sdim} 463234353Sdim 464239462Sdim/// Return true if MI is an instruction we are unable to reason about 465239462Sdim/// (like a call or something with unmodeled side effects). 466239462Sdimstatic inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { 467239462Sdim if (MI->isCall() || MI->hasUnmodeledSideEffects() || 468243830Sdim (MI->hasOrderedMemoryRef() && 469239462Sdim (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) 470239462Sdim return true; 471239462Sdim return false; 472239462Sdim} 473239462Sdim 474239462Sdim// This MI might have either incomplete info, or known to be unsafe 475239462Sdim// to deal with (i.e. volatile object). 476239462Sdimstatic inline bool isUnsafeMemoryObject(MachineInstr *MI, 477288943Sdim const MachineFrameInfo *MFI, 478288943Sdim const DataLayout &DL) { 479239462Sdim if (!MI || MI->memoperands_empty()) 480239462Sdim return true; 481239462Sdim // We purposefully do no check for hasOneMemOperand() here 482239462Sdim // in hope to trigger an assert downstream in order to 483239462Sdim // finish implementation. 484239462Sdim if ((*MI->memoperands_begin())->isVolatile() || 485239462Sdim MI->hasUnmodeledSideEffects()) 486239462Sdim return true; 487276479Sdim 488276479Sdim if ((*MI->memoperands_begin())->getPseudoValue()) { 489276479Sdim // Similarly to getUnderlyingObjectForInstr: 490276479Sdim // For now, ignore PseudoSourceValues which may alias LLVM IR values 491276479Sdim // because the code that uses this function has no way to cope with 492276479Sdim // such aliases. 493276479Sdim return true; 494276479Sdim } 495276479Sdim 496239462Sdim const Value *V = (*MI->memoperands_begin())->getValue(); 497239462Sdim if (!V) 498239462Sdim return true; 499239462Sdim 500249423Sdim SmallVector<Value *, 4> Objs; 501288943Sdim getUnderlyingObjects(V, Objs, DL); 502288943Sdim for (Value *V : Objs) { 503249423Sdim // Does this pointer refer to a distinct and identifiable object? 504288943Sdim if (!isIdentifiedObject(V)) 505239462Sdim return true; 506239462Sdim } 507239462Sdim 508239462Sdim return false; 509239462Sdim} 510239462Sdim 511239462Sdim/// This returns true if the two MIs need a chain edge betwee them. 512239462Sdim/// If these are not even memory operations, we still may need 513239462Sdim/// chain deps between them. The question really is - could 514239462Sdim/// these two MIs be reordered during scheduling from memory dependency 515239462Sdim/// point of view. 516239462Sdimstatic bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, 517288943Sdim const DataLayout &DL, MachineInstr *MIa, 518239462Sdim MachineInstr *MIb) { 519280031Sdim const MachineFunction *MF = MIa->getParent()->getParent(); 520280031Sdim const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 521280031Sdim 522239462Sdim // Cover a trivial case - no edge is need to itself. 523239462Sdim if (MIa == MIb) 524239462Sdim return false; 525280031Sdim 526280031Sdim // Let the target decide if memory accesses cannot possibly overlap. 527280031Sdim if ((MIa->mayLoad() || MIa->mayStore()) && 528280031Sdim (MIb->mayLoad() || MIb->mayStore())) 529280031Sdim if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA)) 530280031Sdim return false; 531239462Sdim 532276479Sdim // FIXME: Need to handle multiple memory operands to support all targets. 533276479Sdim if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) 534276479Sdim return true; 535276479Sdim 536288943Sdim if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL)) 537239462Sdim return true; 538239462Sdim 539239462Sdim // If we are dealing with two "normal" loads, we do not need an edge 540239462Sdim // between them - they could be reordered. 541239462Sdim if (!MIa->mayStore() && !MIb->mayStore()) 542239462Sdim return false; 543239462Sdim 544239462Sdim // To this point analysis is generic. From here on we do need AA. 545239462Sdim if (!AA) 546239462Sdim return true; 547239462Sdim 548239462Sdim MachineMemOperand *MMOa = *MIa->memoperands_begin(); 549239462Sdim MachineMemOperand *MMOb = *MIb->memoperands_begin(); 550239462Sdim 551276479Sdim if (!MMOa->getValue() || !MMOb->getValue()) 552276479Sdim return true; 553239462Sdim 554239462Sdim // The following interface to AA is fashioned after DAGCombiner::isAlias 555239462Sdim // and operates with MachineMemOperand offset with some important 556239462Sdim // assumptions: 557239462Sdim // - LLVM fundamentally assumes flat address spaces. 558239462Sdim // - MachineOperand offset can *only* result from legalization and 559239462Sdim // cannot affect queries other than the trivial case of overlap 560239462Sdim // checking. 561239462Sdim // - These offsets never wrap and never step outside 562239462Sdim // of allocated objects. 563239462Sdim // - There should never be any negative offsets here. 564239462Sdim // 565239462Sdim // FIXME: Modify API to hide this math from "user" 566239462Sdim // FIXME: Even before we go to AA we can reason locally about some 567239462Sdim // memory objects. It can save compile time, and possibly catch some 568239462Sdim // corner cases not currently covered. 569239462Sdim 570239462Sdim assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); 571239462Sdim assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); 572239462Sdim 573239462Sdim int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); 574239462Sdim int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; 575239462Sdim int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; 576239462Sdim 577288943Sdim AliasResult AAResult = 578288943Sdim AA->alias(MemoryLocation(MMOa->getValue(), Overlapa, 579288943Sdim UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), 580288943Sdim MemoryLocation(MMOb->getValue(), Overlapb, 581288943Sdim UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); 582239462Sdim 583288943Sdim return (AAResult != NoAlias); 584239462Sdim} 585239462Sdim 586239462Sdim/// This recursive function iterates over chain deps of SUb looking for 587239462Sdim/// "latest" node that needs a chain edge to SUa. 588288943Sdimstatic unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, 589288943Sdim const DataLayout &DL, SUnit *SUa, SUnit *SUb, 590288943Sdim SUnit *ExitSU, unsigned *Depth, 591288943Sdim SmallPtrSetImpl<const SUnit *> &Visited) { 592239462Sdim if (!SUa || !SUb || SUb == ExitSU) 593239462Sdim return *Depth; 594239462Sdim 595239462Sdim // Remember visited nodes. 596280031Sdim if (!Visited.insert(SUb).second) 597239462Sdim return *Depth; 598239462Sdim // If there is _some_ dependency already in place, do not 599239462Sdim // descend any further. 600239462Sdim // TODO: Need to make sure that if that dependency got eliminated or ignored 601239462Sdim // for any reason in the future, we would not violate DAG topology. 602239462Sdim // Currently it does not happen, but makes an implicit assumption about 603239462Sdim // future implementation. 604239462Sdim // 605239462Sdim // Independently, if we encounter node that is some sort of global 606239462Sdim // object (like a call) we already have full set of dependencies to it 607239462Sdim // and we can stop descending. 608239462Sdim if (SUa->isSucc(SUb) || 609239462Sdim isGlobalMemoryObject(AA, SUb->getInstr())) 610239462Sdim return *Depth; 611239462Sdim 612239462Sdim // If we do need an edge, or we have exceeded depth budget, 613239462Sdim // add that edge to the predecessors chain of SUb, 614239462Sdim // and stop descending. 615239462Sdim if (*Depth > 200 || 616288943Sdim MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { 617243830Sdim SUb->addPred(SDep(SUa, SDep::MayAliasMem)); 618239462Sdim return *Depth; 619239462Sdim } 620239462Sdim // Track current depth. 621239462Sdim (*Depth)++; 622280031Sdim // Iterate over memory dependencies only. 623239462Sdim for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); 624239462Sdim I != E; ++I) 625280031Sdim if (I->isNormalMemoryOrBarrier()) 626288943Sdim iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited); 627239462Sdim return *Depth; 628239462Sdim} 629239462Sdim 630239462Sdim/// This function assumes that "downward" from SU there exist 631239462Sdim/// tail/leaf of already constructed DAG. It iterates downward and 632239462Sdim/// checks whether SU can be aliasing any node dominated 633239462Sdim/// by it. 634239462Sdimstatic void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, 635288943Sdim const DataLayout &DL, SUnit *SU, SUnit *ExitSU, 636288943Sdim std::set<SUnit *> &CheckList, 637239462Sdim unsigned LatencyToLoad) { 638239462Sdim if (!SU) 639239462Sdim return; 640239462Sdim 641239462Sdim SmallPtrSet<const SUnit*, 16> Visited; 642239462Sdim unsigned Depth = 0; 643239462Sdim 644239462Sdim for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end(); 645239462Sdim I != IE; ++I) { 646239462Sdim if (SU == *I) 647239462Sdim continue; 648288943Sdim if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) { 649243830Sdim SDep Dep(SU, SDep::MayAliasMem); 650243830Sdim Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); 651243830Sdim (*I)->addPred(Dep); 652239462Sdim } 653280031Sdim 654280031Sdim // Iterate recursively over all previously added memory chain 655280031Sdim // successors. Keep track of visited nodes. 656239462Sdim for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), 657239462Sdim JE = (*I)->Succs.end(); J != JE; ++J) 658280031Sdim if (J->isNormalMemoryOrBarrier()) 659288943Sdim iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth, 660288943Sdim Visited); 661239462Sdim } 662239462Sdim} 663239462Sdim 664239462Sdim/// Check whether two objects need a chain edge, if so, add it 665239462Sdim/// otherwise remember the rejected SU. 666288943Sdimstatic inline void addChainDependency(AliasAnalysis *AA, 667288943Sdim const MachineFrameInfo *MFI, 668288943Sdim const DataLayout &DL, SUnit *SUa, 669288943Sdim SUnit *SUb, std::set<SUnit *> &RejectList, 670288943Sdim unsigned TrueMemOrderLatency = 0, 671288943Sdim bool isNormalMemory = false) { 672239462Sdim // If this is a false dependency, 673239462Sdim // do not add the edge, but rememeber the rejected node. 674288943Sdim if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { 675243830Sdim SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); 676243830Sdim Dep.setLatency(TrueMemOrderLatency); 677243830Sdim SUb->addPred(Dep); 678243830Sdim } 679239462Sdim else { 680239462Sdim // Duplicate entries should be ignored. 681239462Sdim RejectList.insert(SUb); 682239462Sdim DEBUG(dbgs() << "\tReject chain dep between SU(" 683239462Sdim << SUa->NodeNum << ") and SU(" 684239462Sdim << SUb->NodeNum << ")\n"); 685239462Sdim } 686239462Sdim} 687239462Sdim 688234353Sdim/// Create an SUnit for each real instruction, numbered in top-down toplological 689234353Sdim/// order. The instruction order A < B, implies that no edge exists from B to A. 690234353Sdim/// 691234353Sdim/// Map each real instruction to its SUnit. 692234353Sdim/// 693234353Sdim/// After initSUnits, the SUnits vector cannot be resized and the scheduler may 694234353Sdim/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs 695234353Sdim/// instead of pointers. 696234353Sdim/// 697234353Sdim/// MachineScheduler relies on initSUnits numbering the nodes by their order in 698234353Sdim/// the original instruction list. 699234353Sdimvoid ScheduleDAGInstrs::initSUnits() { 700234353Sdim // We'll be allocating one SUnit for each real instruction in the region, 701234353Sdim // which is contained within a basic block. 702261991Sdim SUnits.reserve(NumRegionInstrs); 703193323Sed 704234353Sdim for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { 705234353Sdim MachineInstr *MI = I; 706234353Sdim if (MI->isDebugValue()) 707234353Sdim continue; 708234353Sdim 709234353Sdim SUnit *SU = newSUnit(MI); 710234353Sdim MISUnitMap[MI] = SU; 711234353Sdim 712234353Sdim SU->isCall = MI->isCall(); 713234353Sdim SU->isCommutable = MI->isCommutable(); 714234353Sdim 715234353Sdim // Assign the Latency field of SU using target-provided information. 716243830Sdim SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); 717276479Sdim 718276479Sdim // If this SUnit uses a reserved or unbuffered resource, mark it as such. 719276479Sdim // 720276479Sdim // Reserved resources block an instruction from issuing and stall the 721276479Sdim // entire pipeline. These are identified by BufferSize=0. 722276479Sdim // 723276479Sdim // Unbuffered resources prevent execution of subsequent instructions that 724276479Sdim // require the same resources. This is used for in-order execution pipelines 725276479Sdim // within an out-of-order core. These are identified by BufferSize=1. 726276479Sdim if (SchedModel.hasInstrSchedModel()) { 727276479Sdim const MCSchedClassDesc *SC = getSchedClass(SU); 728276479Sdim for (TargetSchedModel::ProcResIter 729276479Sdim PI = SchedModel.getWriteProcResBegin(SC), 730276479Sdim PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { 731276479Sdim switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) { 732276479Sdim case 0: 733276479Sdim SU->hasReservedResource = true; 734276479Sdim break; 735276479Sdim case 1: 736276479Sdim SU->isUnbuffered = true; 737276479Sdim break; 738276479Sdim default: 739276479Sdim break; 740276479Sdim } 741276479Sdim } 742276479Sdim } 743234353Sdim } 744234353Sdim} 745234353Sdim 746276479Sdim/// If RegPressure is non-null, compute register pressure as a side effect. The 747239462Sdim/// DAG builder is an efficient place to do it because it already visits 748239462Sdim/// operands. 749239462Sdimvoid ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, 750261991Sdim RegPressureTracker *RPTracker, 751261991Sdim PressureDiffs *PDiffs) { 752288943Sdim const TargetSubtargetInfo &ST = MF.getSubtarget(); 753261991Sdim bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI 754261991Sdim : ST.useAA(); 755276479Sdim AliasAnalysis *AAForDep = UseAA ? AA : nullptr; 756261991Sdim 757261991Sdim MISUnitMap.clear(); 758261991Sdim ScheduleDAG::clearDAG(); 759261991Sdim 760234353Sdim // Create an SUnit for each real instruction. 761234353Sdim initSUnits(); 762234353Sdim 763261991Sdim if (PDiffs) 764261991Sdim PDiffs->init(SUnits.size()); 765261991Sdim 766193323Sed // We build scheduling units by walking a block's instruction list from bottom 767193323Sed // to top. 768193323Sed 769199481Srdivacky // Remember where a generic side-effecting instruction is as we procede. 770276479Sdim SUnit *BarrierChain = nullptr, *AliasChain = nullptr; 771193323Sed 772199481Srdivacky // Memory references to specific known memory locations are tracked 773199481Srdivacky // so that they can be given more precise dependencies. We track 774199481Srdivacky // separately the known memory locations that may alias and those 775199481Srdivacky // that are known not to alias 776276479Sdim MapVector<ValueType, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs; 777276479Sdim MapVector<ValueType, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; 778239462Sdim std::set<SUnit*> RejectMemNodes; 779193323Sed 780205218Srdivacky // Remove any stale debug info; sometimes BuildSchedGraph is called again 781205218Srdivacky // without emitting the info from the previous call. 782223017Sdim DbgValues.clear(); 783276479Sdim FirstDbgValue = nullptr; 784205218Srdivacky 785234353Sdim assert(Defs.empty() && Uses.empty() && 786234353Sdim "Only BuildGraph should update Defs/Uses"); 787249423Sdim Defs.setUniverse(TRI->getNumRegs()); 788249423Sdim Uses.setUniverse(TRI->getNumRegs()); 789234353Sdim 790234353Sdim assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); 791261991Sdim VRegUses.clear(); 792234353Sdim VRegDefs.setUniverse(MRI.getNumVirtRegs()); 793261991Sdim VRegUses.setUniverse(MRI.getNumVirtRegs()); 794234353Sdim 795218893Sdim // Model data dependencies between instructions being scheduled and the 796218893Sdim // ExitSU. 797234353Sdim addSchedBarrierDeps(); 798218893Sdim 799193323Sed // Walk the list of instructions, from bottom moving up. 800276479Sdim MachineInstr *DbgMI = nullptr; 801234353Sdim for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; 802193323Sed MII != MIE; --MII) { 803276479Sdim MachineInstr *MI = std::prev(MII); 804249423Sdim if (MI && DbgMI) { 805249423Sdim DbgValues.push_back(std::make_pair(DbgMI, MI)); 806276479Sdim DbgMI = nullptr; 807223017Sdim } 808223017Sdim 809205218Srdivacky if (MI->isDebugValue()) { 810249423Sdim DbgMI = MI; 811205218Srdivacky continue; 812205218Srdivacky } 813261991Sdim SUnit *SU = MISUnitMap[MI]; 814261991Sdim assert(SU && "No SUnit mapped to this MI"); 815261991Sdim 816239462Sdim if (RPTracker) { 817276479Sdim PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr; 818276479Sdim RPTracker->recede(/*LiveUses=*/nullptr, PDiff); 819276479Sdim assert(RPTracker->getPos() == std::prev(MII) && 820276479Sdim "RPTracker can't find MI"); 821239462Sdim } 822223017Sdim 823276479Sdim assert( 824276479Sdim (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && 825276479Sdim "Cannot schedule terminators or labels!"); 826193323Sed 827193323Sed // Add register-based dependencies (data, anti, and output). 828249423Sdim bool HasVRegDef = false; 829193323Sed for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 830193323Sed const MachineOperand &MO = MI->getOperand(j); 831193323Sed if (!MO.isReg()) continue; 832193323Sed unsigned Reg = MO.getReg(); 833193323Sed if (Reg == 0) continue; 834193323Sed 835234353Sdim if (TRI->isPhysicalRegister(Reg)) 836234353Sdim addPhysRegDeps(SU, j); 837234353Sdim else { 838234353Sdim assert(!IsPostRA && "Virtual register encountered!"); 839249423Sdim if (MO.isDef()) { 840249423Sdim HasVRegDef = true; 841234353Sdim addVRegDefDeps(SU, j); 842249423Sdim } 843234353Sdim else if (MO.readsReg()) // ignore undef operands 844234353Sdim addVRegUseDeps(SU, j); 845193323Sed } 846193323Sed } 847249423Sdim // If we haven't seen any uses in this scheduling region, create a 848249423Sdim // dependence edge to ExitSU to model the live-out latency. This is required 849249423Sdim // for vreg defs with no in-region use, and prefetches with no vreg def. 850249423Sdim // 851249423Sdim // FIXME: NumDataSuccs would be more precise than NumSuccs here. This 852249423Sdim // check currently relies on being called before adding chain deps. 853249423Sdim if (SU->NumSuccs == 0 && SU->Latency > 1 854249423Sdim && (HasVRegDef || MI->mayLoad())) { 855249423Sdim SDep Dep(SU, SDep::Artificial); 856249423Sdim Dep.setLatency(SU->Latency - 1); 857249423Sdim ExitSU.addPred(Dep); 858249423Sdim } 859193323Sed 860193323Sed // Add chain dependencies. 861198892Srdivacky // Chain dependencies used to enforce memory order should have 862198892Srdivacky // latency of 0 (except for true dependency of Store followed by 863198892Srdivacky // aliased Load... we estimate that with a single cycle of latency 864198892Srdivacky // assuming the hardware will bypass) 865193323Sed // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 866193323Sed // after stack slots are lowered to actual addresses. 867193323Sed // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 868193323Sed // produce more precise dependence information. 869239462Sdim unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0; 870239462Sdim if (isGlobalMemoryObject(AA, MI)) { 871199481Srdivacky // Be conservative with these and add dependencies on all memory 872199481Srdivacky // references, even those that are known to not alias. 873276479Sdim for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = 874199481Srdivacky NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { 875276479Sdim for (unsigned i = 0, e = I->second.size(); i != e; ++i) { 876276479Sdim I->second[i]->addPred(SDep(SU, SDep::Barrier)); 877276479Sdim } 878199481Srdivacky } 879276479Sdim for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = 880199481Srdivacky NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { 881243830Sdim for (unsigned i = 0, e = I->second.size(); i != e; ++i) { 882243830Sdim SDep Dep(SU, SDep::Barrier); 883243830Sdim Dep.setLatency(TrueMemOrderLatency); 884243830Sdim I->second[i]->addPred(Dep); 885243830Sdim } 886199481Srdivacky } 887199481Srdivacky // Add SU to the barrier chain. 888199481Srdivacky if (BarrierChain) 889243830Sdim BarrierChain->addPred(SDep(SU, SDep::Barrier)); 890199481Srdivacky BarrierChain = SU; 891239462Sdim // This is a barrier event that acts as a pivotal node in the DAG, 892239462Sdim // so it is safe to clear list of exposed nodes. 893288943Sdim adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, 894239462Sdim TrueMemOrderLatency); 895239462Sdim RejectMemNodes.clear(); 896239462Sdim NonAliasMemDefs.clear(); 897239462Sdim NonAliasMemUses.clear(); 898199481Srdivacky 899199481Srdivacky // fall-through 900199481Srdivacky new_alias_chain: 901280031Sdim // Chain all possibly aliasing memory references through SU. 902239462Sdim if (AliasChain) { 903239462Sdim unsigned ChainLatency = 0; 904239462Sdim if (AliasChain->getInstr()->mayLoad()) 905239462Sdim ChainLatency = TrueMemOrderLatency; 906288943Sdim addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, 907288943Sdim RejectMemNodes, ChainLatency); 908239462Sdim } 909199481Srdivacky AliasChain = SU; 910193323Sed for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 911288943Sdim addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, 912288943Sdim PendingLoads[k], RejectMemNodes, 913239462Sdim TrueMemOrderLatency); 914276479Sdim for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = 915276479Sdim AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { 916276479Sdim for (unsigned i = 0, e = I->second.size(); i != e; ++i) 917288943Sdim addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, 918288943Sdim I->second[i], RejectMemNodes); 919276479Sdim } 920276479Sdim for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = 921199481Srdivacky AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { 922193323Sed for (unsigned i = 0, e = I->second.size(); i != e; ++i) 923288943Sdim addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, 924288943Sdim I->second[i], RejectMemNodes, TrueMemOrderLatency); 925193323Sed } 926288943Sdim adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, 927239462Sdim TrueMemOrderLatency); 928199481Srdivacky PendingLoads.clear(); 929199481Srdivacky AliasMemDefs.clear(); 930199481Srdivacky AliasMemUses.clear(); 931234353Sdim } else if (MI->mayStore()) { 932280031Sdim // Add dependence on barrier chain, if needed. 933280031Sdim // There is no point to check aliasing on barrier event. Even if 934280031Sdim // SU and barrier _could_ be reordered, they should not. In addition, 935280031Sdim // we have lost all RejectMemNodes below barrier. 936280031Sdim if (BarrierChain) 937280031Sdim BarrierChain->addPred(SDep(SU, SDep::Barrier)); 938280031Sdim 939261991Sdim UnderlyingObjectsVector Objs; 940288943Sdim getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout()); 941249423Sdim 942249423Sdim if (Objs.empty()) { 943249423Sdim // Treat all other stores conservatively. 944249423Sdim goto new_alias_chain; 945249423Sdim } 946249423Sdim 947249423Sdim bool MayAlias = false; 948261991Sdim for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end(); 949261991Sdim K != KE; ++K) { 950276479Sdim ValueType V = K->getPointer(); 951261991Sdim bool ThisMayAlias = K->getInt(); 952249423Sdim if (ThisMayAlias) 953249423Sdim MayAlias = true; 954249423Sdim 955193323Sed // A store to a specific PseudoSourceValue. Add precise dependencies. 956199481Srdivacky // Record the def in MemDefs, first adding a dep if there is 957199481Srdivacky // an existing def. 958276479Sdim MapVector<ValueType, std::vector<SUnit *> >::iterator I = 959249423Sdim ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 960276479Sdim MapVector<ValueType, std::vector<SUnit *> >::iterator IE = 961249423Sdim ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 962199481Srdivacky if (I != IE) { 963276479Sdim for (unsigned i = 0, e = I->second.size(); i != e; ++i) 964288943Sdim addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, 965288943Sdim I->second[i], RejectMemNodes, 0, true); 966276479Sdim 967276479Sdim // If we're not using AA, then we only need one store per object. 968276479Sdim if (!AAForDep) 969276479Sdim I->second.clear(); 970276479Sdim I->second.push_back(SU); 971193323Sed } else { 972276479Sdim if (ThisMayAlias) { 973276479Sdim if (!AAForDep) 974276479Sdim AliasMemDefs[V].clear(); 975276479Sdim AliasMemDefs[V].push_back(SU); 976276479Sdim } else { 977276479Sdim if (!AAForDep) 978276479Sdim NonAliasMemDefs[V].clear(); 979276479Sdim NonAliasMemDefs[V].push_back(SU); 980276479Sdim } 981193323Sed } 982193323Sed // Handle the uses in MemUses, if there are any. 983276479Sdim MapVector<ValueType, std::vector<SUnit *> >::iterator J = 984249423Sdim ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); 985276479Sdim MapVector<ValueType, std::vector<SUnit *> >::iterator JE = 986249423Sdim ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); 987199481Srdivacky if (J != JE) { 988193323Sed for (unsigned i = 0, e = J->second.size(); i != e; ++i) 989288943Sdim addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, 990288943Sdim J->second[i], RejectMemNodes, 991239462Sdim TrueMemOrderLatency, true); 992193323Sed J->second.clear(); 993193323Sed } 994198892Srdivacky } 995249423Sdim if (MayAlias) { 996249423Sdim // Add dependencies from all the PendingLoads, i.e. loads 997249423Sdim // with no underlying object. 998249423Sdim for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 999288943Sdim addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, 1000288943Sdim PendingLoads[k], RejectMemNodes, 1001249423Sdim TrueMemOrderLatency); 1002249423Sdim // Add dependence on alias chain, if needed. 1003249423Sdim if (AliasChain) 1004288943Sdim addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, 1005288943Sdim RejectMemNodes); 1006249423Sdim } 1007288943Sdim adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, 1008288943Sdim TrueMemOrderLatency); 1009234353Sdim } else if (MI->mayLoad()) { 1010198892Srdivacky bool MayAlias = true; 1011198090Srdivacky if (MI->isInvariantLoad(AA)) { 1012193323Sed // Invariant load, no chain dependencies needed! 1013198953Srdivacky } else { 1014261991Sdim UnderlyingObjectsVector Objs; 1015288943Sdim getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout()); 1016249423Sdim 1017249423Sdim if (Objs.empty()) { 1018199481Srdivacky // A load with no underlying object. Depend on all 1019199481Srdivacky // potentially aliasing stores. 1020276479Sdim for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = 1021199481Srdivacky AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) 1022276479Sdim for (unsigned i = 0, e = I->second.size(); i != e; ++i) 1023288943Sdim addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, 1024288943Sdim I->second[i], RejectMemNodes); 1025223017Sdim 1026199481Srdivacky PendingLoads.push_back(SU); 1027199481Srdivacky MayAlias = true; 1028249423Sdim } else { 1029249423Sdim MayAlias = false; 1030198892Srdivacky } 1031249423Sdim 1032261991Sdim for (UnderlyingObjectsVector::iterator 1033249423Sdim J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { 1034276479Sdim ValueType V = J->getPointer(); 1035261991Sdim bool ThisMayAlias = J->getInt(); 1036249423Sdim 1037249423Sdim if (ThisMayAlias) 1038249423Sdim MayAlias = true; 1039249423Sdim 1040249423Sdim // A load from a specific PseudoSourceValue. Add precise dependencies. 1041276479Sdim MapVector<ValueType, std::vector<SUnit *> >::iterator I = 1042249423Sdim ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 1043276479Sdim MapVector<ValueType, std::vector<SUnit *> >::iterator IE = 1044249423Sdim ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 1045249423Sdim if (I != IE) 1046276479Sdim for (unsigned i = 0, e = I->second.size(); i != e; ++i) 1047288943Sdim addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, 1048288943Sdim I->second[i], RejectMemNodes, 0, true); 1049249423Sdim if (ThisMayAlias) 1050249423Sdim AliasMemUses[V].push_back(SU); 1051249423Sdim else 1052249423Sdim NonAliasMemUses[V].push_back(SU); 1053249423Sdim } 1054239462Sdim if (MayAlias) 1055288943Sdim adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, 1056288943Sdim RejectMemNodes, /*Latency=*/0); 1057199481Srdivacky // Add dependencies on alias and barrier chains, if needed. 1058199481Srdivacky if (MayAlias && AliasChain) 1059288943Sdim addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, 1060288943Sdim RejectMemNodes); 1061199481Srdivacky if (BarrierChain) 1062243830Sdim BarrierChain->addPred(SDep(SU, SDep::Barrier)); 1063223017Sdim } 1064193323Sed } 1065193323Sed } 1066249423Sdim if (DbgMI) 1067249423Sdim FirstDbgValue = DbgMI; 1068193323Sed 1069234353Sdim Defs.clear(); 1070234353Sdim Uses.clear(); 1071234353Sdim VRegDefs.clear(); 1072193323Sed PendingLoads.clear(); 1073193323Sed} 1074193323Sed 1075276479Sdim/// \brief Initialize register live-range state for updating kills. 1076276479Sdimvoid ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { 1077276479Sdim // Start with no live registers. 1078276479Sdim LiveRegs.reset(); 1079276479Sdim 1080276479Sdim // Examine the live-in regs of all successors. 1081276479Sdim for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 1082276479Sdim SE = BB->succ_end(); SI != SE; ++SI) { 1083276479Sdim for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 1084276479Sdim E = (*SI)->livein_end(); I != E; ++I) { 1085276479Sdim unsigned Reg = *I; 1086276479Sdim // Repeat, for reg and all subregs. 1087276479Sdim for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); 1088276479Sdim SubRegs.isValid(); ++SubRegs) 1089276479Sdim LiveRegs.set(*SubRegs); 1090276479Sdim } 1091276479Sdim } 1092276479Sdim} 1093276479Sdim 1094288943Sdim/// \brief If we change a kill flag on the bundle instruction implicit register 1095288943Sdim/// operands, then we also need to propagate that to any instructions inside 1096288943Sdim/// the bundle which had the same kill state. 1097288943Sdimstatic void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, 1098288943Sdim bool NewKillState) { 1099288943Sdim if (MI->getOpcode() != TargetOpcode::BUNDLE) 1100288943Sdim return; 1101288943Sdim 1102288943Sdim // Walk backwards from the last instruction in the bundle to the first. 1103288943Sdim // Once we set a kill flag on an instruction, we bail out, as otherwise we 1104288943Sdim // might set it on too many operands. We will clear as many flags as we 1105288943Sdim // can though. 1106288943Sdim MachineBasicBlock::instr_iterator Begin = MI; 1107288943Sdim MachineBasicBlock::instr_iterator End = getBundleEnd(MI); 1108288943Sdim while (Begin != End) { 1109288943Sdim for (MachineOperand &MO : (--End)->operands()) { 1110288943Sdim if (!MO.isReg() || MO.isDef() || Reg != MO.getReg()) 1111288943Sdim continue; 1112288943Sdim 1113288943Sdim // DEBUG_VALUE nodes do not contribute to code generation and should 1114288943Sdim // always be ignored. Failure to do so may result in trying to modify 1115288943Sdim // KILL flags on DEBUG_VALUE nodes, which is distressing. 1116288943Sdim if (MO.isDebug()) 1117288943Sdim continue; 1118288943Sdim 1119288943Sdim // If the register has the internal flag then it could be killing an 1120288943Sdim // internal def of the register. In this case, just skip. We only want 1121288943Sdim // to toggle the flag on operands visible outside the bundle. 1122288943Sdim if (MO.isInternalRead()) 1123288943Sdim continue; 1124288943Sdim 1125288943Sdim if (MO.isKill() == NewKillState) 1126288943Sdim continue; 1127288943Sdim MO.setIsKill(NewKillState); 1128288943Sdim if (NewKillState) 1129288943Sdim return; 1130288943Sdim } 1131288943Sdim } 1132288943Sdim} 1133288943Sdim 1134276479Sdimbool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) { 1135276479Sdim // Setting kill flag... 1136276479Sdim if (!MO.isKill()) { 1137276479Sdim MO.setIsKill(true); 1138288943Sdim toggleBundleKillFlag(MI, MO.getReg(), true); 1139276479Sdim return false; 1140276479Sdim } 1141276479Sdim 1142276479Sdim // If MO itself is live, clear the kill flag... 1143276479Sdim if (LiveRegs.test(MO.getReg())) { 1144276479Sdim MO.setIsKill(false); 1145288943Sdim toggleBundleKillFlag(MI, MO.getReg(), false); 1146276479Sdim return false; 1147276479Sdim } 1148276479Sdim 1149276479Sdim // If any subreg of MO is live, then create an imp-def for that 1150276479Sdim // subreg and keep MO marked as killed. 1151276479Sdim MO.setIsKill(false); 1152288943Sdim toggleBundleKillFlag(MI, MO.getReg(), false); 1153276479Sdim bool AllDead = true; 1154276479Sdim const unsigned SuperReg = MO.getReg(); 1155276479Sdim MachineInstrBuilder MIB(MF, MI); 1156276479Sdim for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { 1157276479Sdim if (LiveRegs.test(*SubRegs)) { 1158276479Sdim MIB.addReg(*SubRegs, RegState::ImplicitDefine); 1159276479Sdim AllDead = false; 1160276479Sdim } 1161276479Sdim } 1162276479Sdim 1163288943Sdim if(AllDead) { 1164276479Sdim MO.setIsKill(true); 1165288943Sdim toggleBundleKillFlag(MI, MO.getReg(), true); 1166288943Sdim } 1167276479Sdim return false; 1168276479Sdim} 1169276479Sdim 1170276479Sdim// FIXME: Reuse the LivePhysRegs utility for this. 1171276479Sdimvoid ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { 1172276479Sdim DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); 1173276479Sdim 1174276479Sdim LiveRegs.resize(TRI->getNumRegs()); 1175276479Sdim BitVector killedRegs(TRI->getNumRegs()); 1176276479Sdim 1177276479Sdim startBlockForKills(MBB); 1178276479Sdim 1179276479Sdim // Examine block from end to start... 1180276479Sdim unsigned Count = MBB->size(); 1181276479Sdim for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); 1182276479Sdim I != E; --Count) { 1183276479Sdim MachineInstr *MI = --I; 1184276479Sdim if (MI->isDebugValue()) 1185276479Sdim continue; 1186276479Sdim 1187276479Sdim // Update liveness. Registers that are defed but not used in this 1188276479Sdim // instruction are now dead. Mark register and all subregs as they 1189276479Sdim // are completely defined. 1190276479Sdim for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1191276479Sdim MachineOperand &MO = MI->getOperand(i); 1192276479Sdim if (MO.isRegMask()) 1193276479Sdim LiveRegs.clearBitsNotInMask(MO.getRegMask()); 1194276479Sdim if (!MO.isReg()) continue; 1195276479Sdim unsigned Reg = MO.getReg(); 1196276479Sdim if (Reg == 0) continue; 1197276479Sdim if (!MO.isDef()) continue; 1198276479Sdim // Ignore two-addr defs. 1199276479Sdim if (MI->isRegTiedToUseOperand(i)) continue; 1200276479Sdim 1201276479Sdim // Repeat for reg and all subregs. 1202276479Sdim for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); 1203276479Sdim SubRegs.isValid(); ++SubRegs) 1204276479Sdim LiveRegs.reset(*SubRegs); 1205276479Sdim } 1206276479Sdim 1207276479Sdim // Examine all used registers and set/clear kill flag. When a 1208276479Sdim // register is used multiple times we only set the kill flag on 1209276479Sdim // the first use. Don't set kill flags on undef operands. 1210276479Sdim killedRegs.reset(); 1211276479Sdim for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1212276479Sdim MachineOperand &MO = MI->getOperand(i); 1213276479Sdim if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; 1214276479Sdim unsigned Reg = MO.getReg(); 1215276479Sdim if ((Reg == 0) || MRI.isReserved(Reg)) continue; 1216276479Sdim 1217276479Sdim bool kill = false; 1218276479Sdim if (!killedRegs.test(Reg)) { 1219276479Sdim kill = true; 1220276479Sdim // A register is not killed if any subregs are live... 1221276479Sdim for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { 1222276479Sdim if (LiveRegs.test(*SubRegs)) { 1223276479Sdim kill = false; 1224276479Sdim break; 1225276479Sdim } 1226276479Sdim } 1227276479Sdim 1228276479Sdim // If subreg is not live, then register is killed if it became 1229276479Sdim // live in this instruction 1230276479Sdim if (kill) 1231276479Sdim kill = !LiveRegs.test(Reg); 1232276479Sdim } 1233276479Sdim 1234276479Sdim if (MO.isKill() != kill) { 1235276479Sdim DEBUG(dbgs() << "Fixing " << MO << " in "); 1236276479Sdim // Warning: toggleKillFlag may invalidate MO. 1237276479Sdim toggleKillFlag(MI, MO); 1238276479Sdim DEBUG(MI->dump()); 1239288943Sdim DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) { 1240288943Sdim MachineBasicBlock::instr_iterator Begin = MI; 1241288943Sdim MachineBasicBlock::instr_iterator End = getBundleEnd(MI); 1242288943Sdim while (++Begin != End) 1243288943Sdim DEBUG(Begin->dump()); 1244288943Sdim }); 1245276479Sdim } 1246276479Sdim 1247276479Sdim killedRegs.set(Reg); 1248276479Sdim } 1249276479Sdim 1250276479Sdim // Mark any used register (that is not using undef) and subregs as 1251276479Sdim // now live... 1252276479Sdim for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1253276479Sdim MachineOperand &MO = MI->getOperand(i); 1254276479Sdim if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; 1255276479Sdim unsigned Reg = MO.getReg(); 1256276479Sdim if ((Reg == 0) || MRI.isReserved(Reg)) continue; 1257276479Sdim 1258276479Sdim for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); 1259276479Sdim SubRegs.isValid(); ++SubRegs) 1260276479Sdim LiveRegs.set(*SubRegs); 1261276479Sdim } 1262276479Sdim } 1263276479Sdim} 1264276479Sdim 1265193323Sedvoid ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 1266243830Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1267193323Sed SU->getInstr()->dump(); 1268243830Sdim#endif 1269193323Sed} 1270193323Sed 1271193323Sedstd::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 1272193323Sed std::string s; 1273193323Sed raw_string_ostream oss(s); 1274193323Sed if (SU == &EntrySU) 1275193323Sed oss << "<entry>"; 1276193323Sed else if (SU == &ExitSU) 1277193323Sed oss << "<exit>"; 1278193323Sed else 1279288943Sdim SU->getInstr()->print(oss, /*SkipOpers=*/true); 1280193323Sed return oss.str(); 1281193323Sed} 1282193323Sed 1283234353Sdim/// Return the basic block label. It is not necessarilly unique because a block 1284234353Sdim/// contains multiple scheduling regions. But it is fine for visualization. 1285234353Sdimstd::string ScheduleDAGInstrs::getDAGName() const { 1286234353Sdim return "dag." + BB->getFullName(); 1287193323Sed} 1288243830Sdim 1289249423Sdim//===----------------------------------------------------------------------===// 1290249423Sdim// SchedDFSResult Implementation 1291249423Sdim//===----------------------------------------------------------------------===// 1292249423Sdim 1293249423Sdimnamespace llvm { 1294249423Sdim/// \brief Internal state used to compute SchedDFSResult. 1295249423Sdimclass SchedDFSImpl { 1296249423Sdim SchedDFSResult &R; 1297249423Sdim 1298249423Sdim /// Join DAG nodes into equivalence classes by their subtree. 1299249423Sdim IntEqClasses SubtreeClasses; 1300249423Sdim /// List PredSU, SuccSU pairs that represent data edges between subtrees. 1301249423Sdim std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs; 1302249423Sdim 1303249423Sdim struct RootData { 1304249423Sdim unsigned NodeID; 1305249423Sdim unsigned ParentNodeID; // Parent node (member of the parent subtree). 1306249423Sdim unsigned SubInstrCount; // Instr count in this tree only, not children. 1307249423Sdim 1308249423Sdim RootData(unsigned id): NodeID(id), 1309249423Sdim ParentNodeID(SchedDFSResult::InvalidSubtreeID), 1310249423Sdim SubInstrCount(0) {} 1311249423Sdim 1312249423Sdim unsigned getSparseSetIndex() const { return NodeID; } 1313249423Sdim }; 1314249423Sdim 1315249423Sdim SparseSet<RootData> RootSet; 1316249423Sdim 1317249423Sdimpublic: 1318249423Sdim SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) { 1319249423Sdim RootSet.setUniverse(R.DFSNodeData.size()); 1320249423Sdim } 1321249423Sdim 1322249423Sdim /// Return true if this node been visited by the DFS traversal. 1323249423Sdim /// 1324249423Sdim /// During visitPostorderNode the Node's SubtreeID is assigned to the Node 1325249423Sdim /// ID. Later, SubtreeID is updated but remains valid. 1326249423Sdim bool isVisited(const SUnit *SU) const { 1327249423Sdim return R.DFSNodeData[SU->NodeNum].SubtreeID 1328249423Sdim != SchedDFSResult::InvalidSubtreeID; 1329249423Sdim } 1330249423Sdim 1331249423Sdim /// Initialize this node's instruction count. We don't need to flag the node 1332249423Sdim /// visited until visitPostorder because the DAG cannot have cycles. 1333249423Sdim void visitPreorder(const SUnit *SU) { 1334249423Sdim R.DFSNodeData[SU->NodeNum].InstrCount = 1335249423Sdim SU->getInstr()->isTransient() ? 0 : 1; 1336249423Sdim } 1337249423Sdim 1338249423Sdim /// Called once for each node after all predecessors are visited. Revisit this 1339249423Sdim /// node's predecessors and potentially join them now that we know the ILP of 1340249423Sdim /// the other predecessors. 1341249423Sdim void visitPostorderNode(const SUnit *SU) { 1342249423Sdim // Mark this node as the root of a subtree. It may be joined with its 1343249423Sdim // successors later. 1344249423Sdim R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum; 1345249423Sdim RootData RData(SU->NodeNum); 1346249423Sdim RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1; 1347249423Sdim 1348249423Sdim // If any predecessors are still in their own subtree, they either cannot be 1349249423Sdim // joined or are large enough to remain separate. If this parent node's 1350249423Sdim // total instruction count is not greater than a child subtree by at least 1351249423Sdim // the subtree limit, then try to join it now since splitting subtrees is 1352249423Sdim // only useful if multiple high-pressure paths are possible. 1353249423Sdim unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount; 1354249423Sdim for (SUnit::const_pred_iterator 1355249423Sdim PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { 1356249423Sdim if (PI->getKind() != SDep::Data) 1357249423Sdim continue; 1358249423Sdim unsigned PredNum = PI->getSUnit()->NodeNum; 1359249423Sdim if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit) 1360249423Sdim joinPredSubtree(*PI, SU, /*CheckLimit=*/false); 1361249423Sdim 1362249423Sdim // Either link or merge the TreeData entry from the child to the parent. 1363249423Sdim if (R.DFSNodeData[PredNum].SubtreeID == PredNum) { 1364249423Sdim // If the predecessor's parent is invalid, this is a tree edge and the 1365249423Sdim // current node is the parent. 1366249423Sdim if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID) 1367249423Sdim RootSet[PredNum].ParentNodeID = SU->NodeNum; 1368249423Sdim } 1369249423Sdim else if (RootSet.count(PredNum)) { 1370249423Sdim // The predecessor is not a root, but is still in the root set. This 1371249423Sdim // must be the new parent that it was just joined to. Note that 1372249423Sdim // RootSet[PredNum].ParentNodeID may either be invalid or may still be 1373249423Sdim // set to the original parent. 1374249423Sdim RData.SubInstrCount += RootSet[PredNum].SubInstrCount; 1375249423Sdim RootSet.erase(PredNum); 1376249423Sdim } 1377249423Sdim } 1378249423Sdim RootSet[SU->NodeNum] = RData; 1379249423Sdim } 1380249423Sdim 1381249423Sdim /// Called once for each tree edge after calling visitPostOrderNode on the 1382249423Sdim /// predecessor. Increment the parent node's instruction count and 1383249423Sdim /// preemptively join this subtree to its parent's if it is small enough. 1384249423Sdim void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { 1385249423Sdim R.DFSNodeData[Succ->NodeNum].InstrCount 1386249423Sdim += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount; 1387249423Sdim joinPredSubtree(PredDep, Succ); 1388249423Sdim } 1389249423Sdim 1390249423Sdim /// Add a connection for cross edges. 1391249423Sdim void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) { 1392249423Sdim ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ)); 1393249423Sdim } 1394249423Sdim 1395249423Sdim /// Set each node's subtree ID to the representative ID and record connections 1396249423Sdim /// between trees. 1397249423Sdim void finalize() { 1398249423Sdim SubtreeClasses.compress(); 1399249423Sdim R.DFSTreeData.resize(SubtreeClasses.getNumClasses()); 1400249423Sdim assert(SubtreeClasses.getNumClasses() == RootSet.size() 1401249423Sdim && "number of roots should match trees"); 1402249423Sdim for (SparseSet<RootData>::const_iterator 1403249423Sdim RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) { 1404249423Sdim unsigned TreeID = SubtreeClasses[RI->NodeID]; 1405249423Sdim if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID) 1406249423Sdim R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID]; 1407249423Sdim R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount; 1408249423Sdim // Note that SubInstrCount may be greater than InstrCount if we joined 1409249423Sdim // subtrees across a cross edge. InstrCount will be attributed to the 1410249423Sdim // original parent, while SubInstrCount will be attributed to the joined 1411249423Sdim // parent. 1412249423Sdim } 1413249423Sdim R.SubtreeConnections.resize(SubtreeClasses.getNumClasses()); 1414249423Sdim R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses()); 1415249423Sdim DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n"); 1416249423Sdim for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) { 1417249423Sdim R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx]; 1418249423Sdim DEBUG(dbgs() << " SU(" << Idx << ") in tree " 1419249423Sdim << R.DFSNodeData[Idx].SubtreeID << '\n'); 1420249423Sdim } 1421249423Sdim for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator 1422249423Sdim I = ConnectionPairs.begin(), E = ConnectionPairs.end(); 1423249423Sdim I != E; ++I) { 1424249423Sdim unsigned PredTree = SubtreeClasses[I->first->NodeNum]; 1425249423Sdim unsigned SuccTree = SubtreeClasses[I->second->NodeNum]; 1426249423Sdim if (PredTree == SuccTree) 1427249423Sdim continue; 1428249423Sdim unsigned Depth = I->first->getDepth(); 1429249423Sdim addConnection(PredTree, SuccTree, Depth); 1430249423Sdim addConnection(SuccTree, PredTree, Depth); 1431249423Sdim } 1432249423Sdim } 1433249423Sdim 1434249423Sdimprotected: 1435249423Sdim /// Join the predecessor subtree with the successor that is its DFS 1436249423Sdim /// parent. Apply some heuristics before joining. 1437249423Sdim bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, 1438249423Sdim bool CheckLimit = true) { 1439249423Sdim assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges"); 1440249423Sdim 1441249423Sdim // Check if the predecessor is already joined. 1442249423Sdim const SUnit *PredSU = PredDep.getSUnit(); 1443249423Sdim unsigned PredNum = PredSU->NodeNum; 1444249423Sdim if (R.DFSNodeData[PredNum].SubtreeID != PredNum) 1445249423Sdim return false; 1446249423Sdim 1447249423Sdim // Four is the magic number of successors before a node is considered a 1448249423Sdim // pinch point. 1449249423Sdim unsigned NumDataSucs = 0; 1450249423Sdim for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(), 1451249423Sdim SE = PredSU->Succs.end(); SI != SE; ++SI) { 1452249423Sdim if (SI->getKind() == SDep::Data) { 1453249423Sdim if (++NumDataSucs >= 4) 1454249423Sdim return false; 1455249423Sdim } 1456249423Sdim } 1457249423Sdim if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit) 1458249423Sdim return false; 1459249423Sdim R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum; 1460249423Sdim SubtreeClasses.join(Succ->NodeNum, PredNum); 1461249423Sdim return true; 1462249423Sdim } 1463249423Sdim 1464249423Sdim /// Called by finalize() to record a connection between trees. 1465249423Sdim void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) { 1466249423Sdim if (!Depth) 1467249423Sdim return; 1468249423Sdim 1469249423Sdim do { 1470249423Sdim SmallVectorImpl<SchedDFSResult::Connection> &Connections = 1471249423Sdim R.SubtreeConnections[FromTree]; 1472249423Sdim for (SmallVectorImpl<SchedDFSResult::Connection>::iterator 1473249423Sdim I = Connections.begin(), E = Connections.end(); I != E; ++I) { 1474249423Sdim if (I->TreeID == ToTree) { 1475249423Sdim I->Level = std::max(I->Level, Depth); 1476249423Sdim return; 1477249423Sdim } 1478249423Sdim } 1479249423Sdim Connections.push_back(SchedDFSResult::Connection(ToTree, Depth)); 1480249423Sdim FromTree = R.DFSTreeData[FromTree].ParentTreeID; 1481249423Sdim } while (FromTree != SchedDFSResult::InvalidSubtreeID); 1482249423Sdim } 1483249423Sdim}; 1484249423Sdim} // namespace llvm 1485249423Sdim 1486243830Sdimnamespace { 1487243830Sdim/// \brief Manage the stack used by a reverse depth-first search over the DAG. 1488243830Sdimclass SchedDAGReverseDFS { 1489243830Sdim std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack; 1490243830Sdimpublic: 1491243830Sdim bool isComplete() const { return DFSStack.empty(); } 1492243830Sdim 1493243830Sdim void follow(const SUnit *SU) { 1494243830Sdim DFSStack.push_back(std::make_pair(SU, SU->Preds.begin())); 1495243830Sdim } 1496243830Sdim void advance() { ++DFSStack.back().second; } 1497243830Sdim 1498249423Sdim const SDep *backtrack() { 1499249423Sdim DFSStack.pop_back(); 1500276479Sdim return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second); 1501249423Sdim } 1502243830Sdim 1503243830Sdim const SUnit *getCurr() const { return DFSStack.back().first; } 1504243830Sdim 1505243830Sdim SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; } 1506243830Sdim 1507243830Sdim SUnit::const_pred_iterator getPredEnd() const { 1508243830Sdim return getCurr()->Preds.end(); 1509243830Sdim } 1510243830Sdim}; 1511243830Sdim} // anonymous 1512243830Sdim 1513249423Sdimstatic bool hasDataSucc(const SUnit *SU) { 1514249423Sdim for (SUnit::const_succ_iterator 1515249423Sdim SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) { 1516249423Sdim if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode()) 1517249423Sdim return true; 1518249423Sdim } 1519249423Sdim return false; 1520243830Sdim} 1521243830Sdim 1522243830Sdim/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first 1523243830Sdim/// search from this root. 1524249423Sdimvoid SchedDFSResult::compute(ArrayRef<SUnit> SUnits) { 1525243830Sdim if (!IsBottomUp) 1526243830Sdim llvm_unreachable("Top-down ILP metric is unimplemnted"); 1527243830Sdim 1528249423Sdim SchedDFSImpl Impl(*this); 1529249423Sdim for (ArrayRef<SUnit>::const_iterator 1530249423Sdim SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) { 1531249423Sdim const SUnit *SU = &*SI; 1532249423Sdim if (Impl.isVisited(SU) || hasDataSucc(SU)) 1533249423Sdim continue; 1534249423Sdim 1535249423Sdim SchedDAGReverseDFS DFS; 1536249423Sdim Impl.visitPreorder(SU); 1537249423Sdim DFS.follow(SU); 1538249423Sdim for (;;) { 1539249423Sdim // Traverse the leftmost path as far as possible. 1540249423Sdim while (DFS.getPred() != DFS.getPredEnd()) { 1541249423Sdim const SDep &PredDep = *DFS.getPred(); 1542249423Sdim DFS.advance(); 1543249423Sdim // Ignore non-data edges. 1544249423Sdim if (PredDep.getKind() != SDep::Data 1545249423Sdim || PredDep.getSUnit()->isBoundaryNode()) { 1546249423Sdim continue; 1547249423Sdim } 1548249423Sdim // An already visited edge is a cross edge, assuming an acyclic DAG. 1549249423Sdim if (Impl.isVisited(PredDep.getSUnit())) { 1550249423Sdim Impl.visitCrossEdge(PredDep, DFS.getCurr()); 1551249423Sdim continue; 1552249423Sdim } 1553249423Sdim Impl.visitPreorder(PredDep.getSUnit()); 1554249423Sdim DFS.follow(PredDep.getSUnit()); 1555249423Sdim } 1556249423Sdim // Visit the top of the stack in postorder and backtrack. 1557249423Sdim const SUnit *Child = DFS.getCurr(); 1558249423Sdim const SDep *PredDep = DFS.backtrack(); 1559249423Sdim Impl.visitPostorderNode(Child); 1560249423Sdim if (PredDep) 1561249423Sdim Impl.visitPostorderEdge(*PredDep, DFS.getCurr()); 1562249423Sdim if (DFS.isComplete()) 1563249423Sdim break; 1564243830Sdim } 1565243830Sdim } 1566249423Sdim Impl.finalize(); 1567243830Sdim} 1568243830Sdim 1569249423Sdim/// The root of the given SubtreeID was just scheduled. For all subtrees 1570249423Sdim/// connected to this tree, record the depth of the connection so that the 1571249423Sdim/// nearest connected subtrees can be prioritized. 1572249423Sdimvoid SchedDFSResult::scheduleTree(unsigned SubtreeID) { 1573249423Sdim for (SmallVectorImpl<Connection>::const_iterator 1574249423Sdim I = SubtreeConnections[SubtreeID].begin(), 1575249423Sdim E = SubtreeConnections[SubtreeID].end(); I != E; ++I) { 1576249423Sdim SubtreeConnectLevels[I->TreeID] = 1577249423Sdim std::max(SubtreeConnectLevels[I->TreeID], I->Level); 1578249423Sdim DEBUG(dbgs() << " Tree: " << I->TreeID 1579249423Sdim << " @" << SubtreeConnectLevels[I->TreeID] << '\n'); 1580249423Sdim } 1581249423Sdim} 1582249423Sdim 1583276479SdimLLVM_DUMP_METHOD 1584243830Sdimvoid ILPValue::print(raw_ostream &OS) const { 1585249423Sdim OS << InstrCount << " / " << Length << " = "; 1586249423Sdim if (!Length) 1587243830Sdim OS << "BADILP"; 1588249423Sdim else 1589249423Sdim OS << format("%g", ((double)InstrCount / Length)); 1590243830Sdim} 1591243830Sdim 1592276479SdimLLVM_DUMP_METHOD 1593243830Sdimvoid ILPValue::dump() const { 1594243830Sdim dbgs() << *this << '\n'; 1595243830Sdim} 1596243830Sdim 1597243830Sdimnamespace llvm { 1598243830Sdim 1599276479SdimLLVM_DUMP_METHOD 1600243830Sdimraw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { 1601243830Sdim Val.print(OS); 1602243830Sdim return OS; 1603243830Sdim} 1604243830Sdim 1605243830Sdim} // namespace llvm 1606