ScheduleDAGInstrs.cpp revision 234353
1193323Sed//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This implements the ScheduleDAGInstrs class, which implements re-scheduling 11193323Sed// of MachineInstrs. 12193323Sed// 13193323Sed//===----------------------------------------------------------------------===// 14193323Sed 15193323Sed#define DEBUG_TYPE "sched-instrs" 16198090Srdivacky#include "llvm/Operator.h" 17193323Sed#include "llvm/Analysis/AliasAnalysis.h" 18218893Sdim#include "llvm/Analysis/ValueTracking.h" 19234353Sdim#include "llvm/CodeGen/LiveIntervalAnalysis.h" 20193323Sed#include "llvm/CodeGen/MachineFunctionPass.h" 21198090Srdivacky#include "llvm/CodeGen/MachineMemOperand.h" 22193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 23193323Sed#include "llvm/CodeGen/PseudoSourceValue.h" 24234353Sdim#include "llvm/CodeGen/ScheduleDAGInstrs.h" 25224145Sdim#include "llvm/MC/MCInstrItineraries.h" 26193323Sed#include "llvm/Target/TargetMachine.h" 27193323Sed#include "llvm/Target/TargetInstrInfo.h" 28193323Sed#include "llvm/Target/TargetRegisterInfo.h" 29224145Sdim#include "llvm/Target/TargetSubtargetInfo.h" 30193323Sed#include "llvm/Support/Debug.h" 31193323Sed#include "llvm/Support/raw_ostream.h" 32193323Sed#include "llvm/ADT/SmallSet.h" 33193323Sedusing namespace llvm; 34193323Sed 35193323SedScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 36193323Sed const MachineLoopInfo &mli, 37234353Sdim const MachineDominatorTree &mdt, 38234353Sdim bool IsPostRAFlag, 39234353Sdim LiveIntervals *lis) 40218893Sdim : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), 41234353Sdim InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), 42234353Sdim IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT), 43234353Sdim FirstDbgValue(0) { 44234353Sdim assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); 45223017Sdim DbgValues.clear(); 46234353Sdim assert(!(IsPostRA && MRI.getNumVirtRegs()) && 47234353Sdim "Virtual registers must be removed prior to PostRA scheduling"); 48198396Srdivacky} 49193323Sed 50193323Sed/// getUnderlyingObjectFromInt - This is the function that does the work of 51193323Sed/// looking through basic ptrtoint+arithmetic+inttoptr sequences. 52193323Sedstatic const Value *getUnderlyingObjectFromInt(const Value *V) { 53193323Sed do { 54198090Srdivacky if (const Operator *U = dyn_cast<Operator>(V)) { 55193323Sed // If we find a ptrtoint, we can transfer control back to the 56193323Sed // regular getUnderlyingObjectFromInt. 57198090Srdivacky if (U->getOpcode() == Instruction::PtrToInt) 58193323Sed return U->getOperand(0); 59193323Sed // If we find an add of a constant or a multiplied value, it's 60193323Sed // likely that the other operand will lead us to the base 61193323Sed // object. We don't have to worry about the case where the 62198090Srdivacky // object address is somehow being computed by the multiply, 63193323Sed // because our callers only care when the result is an 64193323Sed // identifibale object. 65198090Srdivacky if (U->getOpcode() != Instruction::Add || 66193323Sed (!isa<ConstantInt>(U->getOperand(1)) && 67198090Srdivacky Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) 68193323Sed return V; 69193323Sed V = U->getOperand(0); 70193323Sed } else { 71193323Sed return V; 72193323Sed } 73204642Srdivacky assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 74193323Sed } while (1); 75193323Sed} 76193323Sed 77218893Sdim/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject 78193323Sed/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. 79193323Sedstatic const Value *getUnderlyingObject(const Value *V) { 80193323Sed // First just call Value::getUnderlyingObject to let it do what it does. 81193323Sed do { 82218893Sdim V = GetUnderlyingObject(V); 83193323Sed // If it found an inttoptr, use special code to continue climing. 84198090Srdivacky if (Operator::getOpcode(V) != Instruction::IntToPtr) 85193323Sed break; 86193323Sed const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 87193323Sed // If that succeeded in finding a pointer, continue the search. 88204642Srdivacky if (!O->getType()->isPointerTy()) 89193323Sed break; 90193323Sed V = O; 91193323Sed } while (1); 92193323Sed return V; 93193323Sed} 94193323Sed 95193323Sed/// getUnderlyingObjectForInstr - If this machine instr has memory reference 96193323Sed/// information and it can be tracked to a normal reference to a known 97193323Sed/// object, return the Value for that object. Otherwise return null. 98198396Srdivackystatic const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, 99198892Srdivacky const MachineFrameInfo *MFI, 100198892Srdivacky bool &MayAlias) { 101198892Srdivacky MayAlias = true; 102193323Sed if (!MI->hasOneMemOperand() || 103198090Srdivacky !(*MI->memoperands_begin())->getValue() || 104198090Srdivacky (*MI->memoperands_begin())->isVolatile()) 105193323Sed return 0; 106193323Sed 107198090Srdivacky const Value *V = (*MI->memoperands_begin())->getValue(); 108193323Sed if (!V) 109193323Sed return 0; 110193323Sed 111193323Sed V = getUnderlyingObject(V); 112198396Srdivacky if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { 113198396Srdivacky // For now, ignore PseudoSourceValues which may alias LLVM IR values 114198396Srdivacky // because the code that uses this function has no way to cope with 115198396Srdivacky // such aliases. 116198396Srdivacky if (PSV->isAliased(MFI)) 117198396Srdivacky return 0; 118223017Sdim 119199481Srdivacky MayAlias = PSV->mayAlias(MFI); 120198396Srdivacky return V; 121198396Srdivacky } 122193323Sed 123198396Srdivacky if (isIdentifiedObject(V)) 124198396Srdivacky return V; 125198396Srdivacky 126198396Srdivacky return 0; 127193323Sed} 128193323Sed 129234353Sdimvoid ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) { 130226633Sdim LoopRegs.Deps.clear(); 131193323Sed if (MachineLoop *ML = MLI.getLoopFor(BB)) 132234353Sdim if (BB == ML->getLoopLatch()) 133193323Sed LoopRegs.VisitLoop(ML); 134193323Sed} 135193323Sed 136234353Sdimvoid ScheduleDAGInstrs::finishBlock() { 137234353Sdim // Nothing to do. 138234353Sdim} 139234353Sdim 140234353Sdim/// Initialize the map with the number of registers. 141234353Sdimvoid Reg2SUnitsMap::setRegLimit(unsigned Limit) { 142234353Sdim PhysRegSet.setUniverse(Limit); 143234353Sdim SUnits.resize(Limit); 144234353Sdim} 145234353Sdim 146234353Sdim/// Clear the map without deallocating storage. 147234353Sdimvoid Reg2SUnitsMap::clear() { 148234353Sdim for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { 149234353Sdim SUnits[*I].clear(); 150234353Sdim } 151234353Sdim PhysRegSet.clear(); 152234353Sdim} 153234353Sdim 154234353Sdim/// Initialize the DAG and common scheduler state for the current scheduling 155234353Sdim/// region. This does not actually create the DAG, only clears it. The 156234353Sdim/// scheduling driver may call BuildSchedGraph multiple times per scheduling 157234353Sdim/// region. 158234353Sdimvoid ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, 159234353Sdim MachineBasicBlock::iterator begin, 160234353Sdim MachineBasicBlock::iterator end, 161234353Sdim unsigned endcount) { 162234353Sdim BB = bb; 163234353Sdim RegionBegin = begin; 164234353Sdim RegionEnd = end; 165234353Sdim EndIndex = endcount; 166234353Sdim MISUnitMap.clear(); 167234353Sdim 168234353Sdim // Check to see if the scheduler cares about latencies. 169234353Sdim UnitLatencies = forceUnitLatencies(); 170234353Sdim 171234353Sdim ScheduleDAG::clearDAG(); 172234353Sdim} 173234353Sdim 174234353Sdim/// Close the current scheduling region. Don't clear any state in case the 175234353Sdim/// driver wants to refer to the previous scheduling region. 176234353Sdimvoid ScheduleDAGInstrs::exitRegion() { 177234353Sdim // Nothing to do. 178234353Sdim} 179234353Sdim 180234353Sdim/// addSchedBarrierDeps - Add dependencies from instructions in the current 181218893Sdim/// list of instructions being scheduled to scheduling barrier by adding 182218893Sdim/// the exit SU to the register defs and use list. This is because we want to 183218893Sdim/// make sure instructions which define registers that are either used by 184218893Sdim/// the terminator or are live-out are properly scheduled. This is 185218893Sdim/// especially important when the definition latency of the return value(s) 186218893Sdim/// are too high to be hidden by the branch or when the liveout registers 187218893Sdim/// used by instructions in the fallthrough block. 188234353Sdimvoid ScheduleDAGInstrs::addSchedBarrierDeps() { 189234353Sdim MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0; 190218893Sdim ExitSU.setInstr(ExitMI); 191218893Sdim bool AllDepKnown = ExitMI && 192234353Sdim (ExitMI->isCall() || ExitMI->isBarrier()); 193218893Sdim if (ExitMI && AllDepKnown) { 194218893Sdim // If it's a call or a barrier, add dependencies on the defs and uses of 195218893Sdim // instruction. 196218893Sdim for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { 197218893Sdim const MachineOperand &MO = ExitMI->getOperand(i); 198218893Sdim if (!MO.isReg() || MO.isDef()) continue; 199218893Sdim unsigned Reg = MO.getReg(); 200218893Sdim if (Reg == 0) continue; 201218893Sdim 202234353Sdim if (TRI->isPhysicalRegister(Reg)) 203234353Sdim Uses[Reg].push_back(&ExitSU); 204234353Sdim else { 205234353Sdim assert(!IsPostRA && "Virtual register encountered after regalloc."); 206234353Sdim addVRegUseDeps(&ExitSU, i); 207234353Sdim } 208218893Sdim } 209218893Sdim } else { 210218893Sdim // For others, e.g. fallthrough, conditional branch, assume the exit 211218893Sdim // uses all the registers that are livein to the successor blocks. 212234353Sdim assert(Uses.empty() && "Uses in set before adding deps?"); 213218893Sdim for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 214218893Sdim SE = BB->succ_end(); SI != SE; ++SI) 215218893Sdim for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 216223017Sdim E = (*SI)->livein_end(); I != E; ++I) { 217218893Sdim unsigned Reg = *I; 218234353Sdim if (!Uses.contains(Reg)) 219218893Sdim Uses[Reg].push_back(&ExitSU); 220218893Sdim } 221218893Sdim } 222218893Sdim} 223218893Sdim 224234353Sdim/// MO is an operand of SU's instruction that defines a physical register. Add 225234353Sdim/// data dependencies from SU to any uses of the physical register. 226234353Sdimvoid ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, 227234353Sdim const MachineOperand &MO) { 228234353Sdim assert(MO.isDef() && "expect physreg def"); 229234353Sdim 230234353Sdim // Ask the target if address-backscheduling is desirable, and if so how much. 231234353Sdim const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 232234353Sdim unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 233234353Sdim unsigned DataLatency = SU->Latency; 234234353Sdim 235234353Sdim for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 236234353Sdim if (!Uses.contains(*Alias)) 237234353Sdim continue; 238234353Sdim std::vector<SUnit*> &UseList = Uses[*Alias]; 239234353Sdim for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 240234353Sdim SUnit *UseSU = UseList[i]; 241234353Sdim if (UseSU == SU) 242234353Sdim continue; 243234353Sdim unsigned LDataLatency = DataLatency; 244234353Sdim // Optionally add in a special extra latency for nodes that 245234353Sdim // feed addresses. 246234353Sdim // TODO: Perhaps we should get rid of 247234353Sdim // SpecialAddressLatency and just move this into 248234353Sdim // adjustSchedDependency for the targets that care about it. 249234353Sdim if (SpecialAddressLatency != 0 && !UnitLatencies && 250234353Sdim UseSU != &ExitSU) { 251234353Sdim MachineInstr *UseMI = UseSU->getInstr(); 252234353Sdim const MCInstrDesc &UseMCID = UseMI->getDesc(); 253234353Sdim int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); 254234353Sdim assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); 255234353Sdim if (RegUseIndex >= 0 && 256234353Sdim (UseMI->mayLoad() || UseMI->mayStore()) && 257234353Sdim (unsigned)RegUseIndex < UseMCID.getNumOperands() && 258234353Sdim UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) 259234353Sdim LDataLatency += SpecialAddressLatency; 260234353Sdim } 261234353Sdim // Adjust the dependence latency using operand def/use 262234353Sdim // information (if any), and then allow the target to 263234353Sdim // perform its own adjustments. 264234353Sdim const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); 265234353Sdim if (!UnitLatencies) { 266234353Sdim computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); 267234353Sdim ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); 268234353Sdim } 269234353Sdim UseSU->addPred(dep); 270234353Sdim } 271234353Sdim } 272234353Sdim} 273234353Sdim 274234353Sdim/// addPhysRegDeps - Add register dependencies (data, anti, and output) from 275234353Sdim/// this SUnit to following instructions in the same scheduling region that 276234353Sdim/// depend the physical register referenced at OperIdx. 277234353Sdimvoid ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { 278234353Sdim const MachineInstr *MI = SU->getInstr(); 279234353Sdim const MachineOperand &MO = MI->getOperand(OperIdx); 280234353Sdim 281234353Sdim // Optionally add output and anti dependencies. For anti 282234353Sdim // dependencies we use a latency of 0 because for a multi-issue 283234353Sdim // target we want to allow the defining instruction to issue 284234353Sdim // in the same cycle as the using instruction. 285234353Sdim // TODO: Using a latency of 1 here for output dependencies assumes 286234353Sdim // there's no cost for reusing registers. 287234353Sdim SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 288234353Sdim for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 289234353Sdim if (!Defs.contains(*Alias)) 290234353Sdim continue; 291234353Sdim std::vector<SUnit *> &DefList = Defs[*Alias]; 292234353Sdim for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 293234353Sdim SUnit *DefSU = DefList[i]; 294234353Sdim if (DefSU == &ExitSU) 295234353Sdim continue; 296234353Sdim if (DefSU != SU && 297234353Sdim (Kind != SDep::Output || !MO.isDead() || 298234353Sdim !DefSU->getInstr()->registerDefIsDead(*Alias))) { 299234353Sdim if (Kind == SDep::Anti) 300234353Sdim DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias)); 301234353Sdim else { 302234353Sdim unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx, 303234353Sdim DefSU->getInstr()); 304234353Sdim DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias)); 305234353Sdim } 306234353Sdim } 307234353Sdim } 308234353Sdim } 309234353Sdim 310234353Sdim if (!MO.isDef()) { 311234353Sdim // Either insert a new Reg2SUnits entry with an empty SUnits list, or 312234353Sdim // retrieve the existing SUnits list for this register's uses. 313234353Sdim // Push this SUnit on the use list. 314234353Sdim Uses[MO.getReg()].push_back(SU); 315234353Sdim } 316234353Sdim else { 317234353Sdim addPhysRegDataDeps(SU, MO); 318234353Sdim 319234353Sdim // Either insert a new Reg2SUnits entry with an empty SUnits list, or 320234353Sdim // retrieve the existing SUnits list for this register's defs. 321234353Sdim std::vector<SUnit *> &DefList = Defs[MO.getReg()]; 322234353Sdim 323234353Sdim // If a def is going to wrap back around to the top of the loop, 324234353Sdim // backschedule it. 325234353Sdim if (!UnitLatencies && DefList.empty()) { 326234353Sdim LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg()); 327234353Sdim if (I != LoopRegs.Deps.end()) { 328234353Sdim const MachineOperand *UseMO = I->second.first; 329234353Sdim unsigned Count = I->second.second; 330234353Sdim const MachineInstr *UseMI = UseMO->getParent(); 331234353Sdim unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); 332234353Sdim const MCInstrDesc &UseMCID = UseMI->getDesc(); 333234353Sdim const TargetSubtargetInfo &ST = 334234353Sdim TM.getSubtarget<TargetSubtargetInfo>(); 335234353Sdim unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 336234353Sdim // TODO: If we knew the total depth of the region here, we could 337234353Sdim // handle the case where the whole loop is inside the region but 338234353Sdim // is large enough that the isScheduleHigh trick isn't needed. 339234353Sdim if (UseMOIdx < UseMCID.getNumOperands()) { 340234353Sdim // Currently, we only support scheduling regions consisting of 341234353Sdim // single basic blocks. Check to see if the instruction is in 342234353Sdim // the same region by checking to see if it has the same parent. 343234353Sdim if (UseMI->getParent() != MI->getParent()) { 344234353Sdim unsigned Latency = SU->Latency; 345234353Sdim if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) 346234353Sdim Latency += SpecialAddressLatency; 347234353Sdim // This is a wild guess as to the portion of the latency which 348234353Sdim // will be overlapped by work done outside the current 349234353Sdim // scheduling region. 350234353Sdim Latency -= std::min(Latency, Count); 351234353Sdim // Add the artificial edge. 352234353Sdim ExitSU.addPred(SDep(SU, SDep::Order, Latency, 353234353Sdim /*Reg=*/0, /*isNormalMemory=*/false, 354234353Sdim /*isMustAlias=*/false, 355234353Sdim /*isArtificial=*/true)); 356234353Sdim } else if (SpecialAddressLatency > 0 && 357234353Sdim UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { 358234353Sdim // The entire loop body is within the current scheduling region 359234353Sdim // and the latency of this operation is assumed to be greater 360234353Sdim // than the latency of the loop. 361234353Sdim // TODO: Recursively mark data-edge predecessors as 362234353Sdim // isScheduleHigh too. 363234353Sdim SU->isScheduleHigh = true; 364234353Sdim } 365234353Sdim } 366234353Sdim LoopRegs.Deps.erase(I); 367234353Sdim } 368234353Sdim } 369234353Sdim 370234353Sdim // clear this register's use list 371234353Sdim if (Uses.contains(MO.getReg())) 372234353Sdim Uses[MO.getReg()].clear(); 373234353Sdim 374234353Sdim if (!MO.isDead()) 375234353Sdim DefList.clear(); 376234353Sdim 377234353Sdim // Calls will not be reordered because of chain dependencies (see 378234353Sdim // below). Since call operands are dead, calls may continue to be added 379234353Sdim // to the DefList making dependence checking quadratic in the size of 380234353Sdim // the block. Instead, we leave only one call at the back of the 381234353Sdim // DefList. 382234353Sdim if (SU->isCall) { 383234353Sdim while (!DefList.empty() && DefList.back()->isCall) 384234353Sdim DefList.pop_back(); 385234353Sdim } 386234353Sdim // Defs are pushed in the order they are visited and never reordered. 387234353Sdim DefList.push_back(SU); 388234353Sdim } 389234353Sdim} 390234353Sdim 391234353Sdim/// addVRegDefDeps - Add register output and data dependencies from this SUnit 392234353Sdim/// to instructions that occur later in the same scheduling region if they read 393234353Sdim/// from or write to the virtual register defined at OperIdx. 394234353Sdim/// 395234353Sdim/// TODO: Hoist loop induction variable increments. This has to be 396234353Sdim/// reevaluated. Generally, IV scheduling should be done before coalescing. 397234353Sdimvoid ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { 398234353Sdim const MachineInstr *MI = SU->getInstr(); 399234353Sdim unsigned Reg = MI->getOperand(OperIdx).getReg(); 400234353Sdim 401234353Sdim // SSA defs do not have output/anti dependencies. 402234353Sdim // The current operand is a def, so we have at least one. 403234353Sdim if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) 404234353Sdim return; 405234353Sdim 406234353Sdim // Add output dependence to the next nearest def of this vreg. 407234353Sdim // 408234353Sdim // Unless this definition is dead, the output dependence should be 409234353Sdim // transitively redundant with antidependencies from this definition's 410234353Sdim // uses. We're conservative for now until we have a way to guarantee the uses 411234353Sdim // are not eliminated sometime during scheduling. The output dependence edge 412234353Sdim // is also useful if output latency exceeds def-use latency. 413234353Sdim VReg2SUnitMap::iterator DefI = findVRegDef(Reg); 414234353Sdim if (DefI == VRegDefs.end()) 415234353Sdim VRegDefs.insert(VReg2SUnit(Reg, SU)); 416234353Sdim else { 417234353Sdim SUnit *DefSU = DefI->SU; 418234353Sdim if (DefSU != SU && DefSU != &ExitSU) { 419234353Sdim unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, 420234353Sdim DefSU->getInstr()); 421234353Sdim DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); 422234353Sdim } 423234353Sdim DefI->SU = SU; 424234353Sdim } 425234353Sdim} 426234353Sdim 427234353Sdim/// addVRegUseDeps - Add a register data dependency if the instruction that 428234353Sdim/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a 429234353Sdim/// register antidependency from this SUnit to instructions that occur later in 430234353Sdim/// the same scheduling region if they write the virtual register. 431234353Sdim/// 432234353Sdim/// TODO: Handle ExitSU "uses" properly. 433234353Sdimvoid ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { 434234353Sdim MachineInstr *MI = SU->getInstr(); 435234353Sdim unsigned Reg = MI->getOperand(OperIdx).getReg(); 436234353Sdim 437234353Sdim // Lookup this operand's reaching definition. 438234353Sdim assert(LIS && "vreg dependencies requires LiveIntervals"); 439234353Sdim SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(); 440234353Sdim LiveInterval *LI = &LIS->getInterval(Reg); 441234353Sdim VNInfo *VNI = LI->getVNInfoBefore(UseIdx); 442234353Sdim // VNI will be valid because MachineOperand::readsReg() is checked by caller. 443234353Sdim MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); 444234353Sdim // Phis and other noninstructions (after coalescing) have a NULL Def. 445234353Sdim if (Def) { 446234353Sdim SUnit *DefSU = getSUnit(Def); 447234353Sdim if (DefSU) { 448234353Sdim // The reaching Def lives within this scheduling region. 449234353Sdim // Create a data dependence. 450234353Sdim // 451234353Sdim // TODO: Handle "special" address latencies cleanly. 452234353Sdim const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); 453234353Sdim if (!UnitLatencies) { 454234353Sdim // Adjust the dependence latency using operand def/use information, then 455234353Sdim // allow the target to perform its own adjustments. 456234353Sdim computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); 457234353Sdim const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 458234353Sdim ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); 459234353Sdim } 460234353Sdim SU->addPred(dep); 461234353Sdim } 462234353Sdim } 463234353Sdim 464234353Sdim // Add antidependence to the following def of the vreg it uses. 465234353Sdim VReg2SUnitMap::iterator DefI = findVRegDef(Reg); 466234353Sdim if (DefI != VRegDefs.end() && DefI->SU != SU) 467234353Sdim DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg)); 468234353Sdim} 469234353Sdim 470234353Sdim/// Create an SUnit for each real instruction, numbered in top-down toplological 471234353Sdim/// order. The instruction order A < B, implies that no edge exists from B to A. 472234353Sdim/// 473234353Sdim/// Map each real instruction to its SUnit. 474234353Sdim/// 475234353Sdim/// After initSUnits, the SUnits vector cannot be resized and the scheduler may 476234353Sdim/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs 477234353Sdim/// instead of pointers. 478234353Sdim/// 479234353Sdim/// MachineScheduler relies on initSUnits numbering the nodes by their order in 480234353Sdim/// the original instruction list. 481234353Sdimvoid ScheduleDAGInstrs::initSUnits() { 482234353Sdim // We'll be allocating one SUnit for each real instruction in the region, 483234353Sdim // which is contained within a basic block. 484193323Sed SUnits.reserve(BB->size()); 485193323Sed 486234353Sdim for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { 487234353Sdim MachineInstr *MI = I; 488234353Sdim if (MI->isDebugValue()) 489234353Sdim continue; 490234353Sdim 491234353Sdim SUnit *SU = newSUnit(MI); 492234353Sdim MISUnitMap[MI] = SU; 493234353Sdim 494234353Sdim SU->isCall = MI->isCall(); 495234353Sdim SU->isCommutable = MI->isCommutable(); 496234353Sdim 497234353Sdim // Assign the Latency field of SU using target-provided information. 498234353Sdim if (UnitLatencies) 499234353Sdim SU->Latency = 1; 500234353Sdim else 501234353Sdim computeLatency(SU); 502234353Sdim } 503234353Sdim} 504234353Sdim 505234353Sdimvoid ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { 506234353Sdim // Create an SUnit for each real instruction. 507234353Sdim initSUnits(); 508234353Sdim 509193323Sed // We build scheduling units by walking a block's instruction list from bottom 510193323Sed // to top. 511193323Sed 512199481Srdivacky // Remember where a generic side-effecting instruction is as we procede. 513199481Srdivacky SUnit *BarrierChain = 0, *AliasChain = 0; 514193323Sed 515199481Srdivacky // Memory references to specific known memory locations are tracked 516199481Srdivacky // so that they can be given more precise dependencies. We track 517199481Srdivacky // separately the known memory locations that may alias and those 518199481Srdivacky // that are known not to alias 519199481Srdivacky std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; 520199481Srdivacky std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; 521193323Sed 522205218Srdivacky // Remove any stale debug info; sometimes BuildSchedGraph is called again 523205218Srdivacky // without emitting the info from the previous call. 524223017Sdim DbgValues.clear(); 525223017Sdim FirstDbgValue = NULL; 526205218Srdivacky 527234353Sdim assert(Defs.empty() && Uses.empty() && 528234353Sdim "Only BuildGraph should update Defs/Uses"); 529234353Sdim Defs.setRegLimit(TRI->getNumRegs()); 530234353Sdim Uses.setRegLimit(TRI->getNumRegs()); 531234353Sdim 532234353Sdim assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); 533234353Sdim // FIXME: Allow SparseSet to reserve space for the creation of virtual 534234353Sdim // registers during scheduling. Don't artificially inflate the Universe 535234353Sdim // because we want to assert that vregs are not created during DAG building. 536234353Sdim VRegDefs.setUniverse(MRI.getNumVirtRegs()); 537234353Sdim 538218893Sdim // Model data dependencies between instructions being scheduled and the 539218893Sdim // ExitSU. 540234353Sdim addSchedBarrierDeps(); 541218893Sdim 542193323Sed // Walk the list of instructions, from bottom moving up. 543223017Sdim MachineInstr *PrevMI = NULL; 544234353Sdim for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; 545193323Sed MII != MIE; --MII) { 546193323Sed MachineInstr *MI = prior(MII); 547223017Sdim if (MI && PrevMI) { 548223017Sdim DbgValues.push_back(std::make_pair(PrevMI, MI)); 549223017Sdim PrevMI = NULL; 550223017Sdim } 551223017Sdim 552205218Srdivacky if (MI->isDebugValue()) { 553223017Sdim PrevMI = MI; 554205218Srdivacky continue; 555205218Srdivacky } 556223017Sdim 557234353Sdim assert(!MI->isTerminator() && !MI->isLabel() && 558193323Sed "Cannot schedule terminators or labels!"); 559193323Sed 560234353Sdim SUnit *SU = MISUnitMap[MI]; 561234353Sdim assert(SU && "No SUnit mapped to this MI"); 562193323Sed 563193323Sed // Add register-based dependencies (data, anti, and output). 564193323Sed for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 565193323Sed const MachineOperand &MO = MI->getOperand(j); 566193323Sed if (!MO.isReg()) continue; 567193323Sed unsigned Reg = MO.getReg(); 568193323Sed if (Reg == 0) continue; 569193323Sed 570234353Sdim if (TRI->isPhysicalRegister(Reg)) 571234353Sdim addPhysRegDeps(SU, j); 572234353Sdim else { 573234353Sdim assert(!IsPostRA && "Virtual register encountered!"); 574234353Sdim if (MO.isDef()) 575234353Sdim addVRegDefDeps(SU, j); 576234353Sdim else if (MO.readsReg()) // ignore undef operands 577234353Sdim addVRegUseDeps(SU, j); 578193323Sed } 579193323Sed } 580193323Sed 581193323Sed // Add chain dependencies. 582198892Srdivacky // Chain dependencies used to enforce memory order should have 583198892Srdivacky // latency of 0 (except for true dependency of Store followed by 584198892Srdivacky // aliased Load... we estimate that with a single cycle of latency 585198892Srdivacky // assuming the hardware will bypass) 586193323Sed // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 587193323Sed // after stack slots are lowered to actual addresses. 588193323Sed // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 589193323Sed // produce more precise dependence information. 590198892Srdivacky#define STORE_LOAD_LATENCY 1 591198892Srdivacky unsigned TrueMemOrderLatency = 0; 592234353Sdim if (MI->isCall() || MI->hasUnmodeledSideEffects() || 593223017Sdim (MI->hasVolatileMemoryRef() && 594234353Sdim (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { 595199481Srdivacky // Be conservative with these and add dependencies on all memory 596199481Srdivacky // references, even those that are known to not alias. 597223017Sdim for (std::map<const Value *, SUnit *>::iterator I = 598199481Srdivacky NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { 599199481Srdivacky I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 600199481Srdivacky } 601199481Srdivacky for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 602199481Srdivacky NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { 603199481Srdivacky for (unsigned i = 0, e = I->second.size(); i != e; ++i) 604199481Srdivacky I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 605199481Srdivacky } 606199481Srdivacky NonAliasMemDefs.clear(); 607199481Srdivacky NonAliasMemUses.clear(); 608199481Srdivacky // Add SU to the barrier chain. 609199481Srdivacky if (BarrierChain) 610199481Srdivacky BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 611199481Srdivacky BarrierChain = SU; 612199481Srdivacky 613199481Srdivacky // fall-through 614199481Srdivacky new_alias_chain: 615199481Srdivacky // Chain all possibly aliasing memory references though SU. 616199481Srdivacky if (AliasChain) 617199481Srdivacky AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 618199481Srdivacky AliasChain = SU; 619193323Sed for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 620198892Srdivacky PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 621199481Srdivacky for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), 622199481Srdivacky E = AliasMemDefs.end(); I != E; ++I) { 623198892Srdivacky I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 624193323Sed } 625193323Sed for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 626199481Srdivacky AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { 627193323Sed for (unsigned i = 0, e = I->second.size(); i != e; ++i) 628198892Srdivacky I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 629193323Sed } 630199481Srdivacky PendingLoads.clear(); 631199481Srdivacky AliasMemDefs.clear(); 632199481Srdivacky AliasMemUses.clear(); 633234353Sdim } else if (MI->mayStore()) { 634198892Srdivacky bool MayAlias = true; 635198892Srdivacky TrueMemOrderLatency = STORE_LOAD_LATENCY; 636198892Srdivacky if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 637193323Sed // A store to a specific PseudoSourceValue. Add precise dependencies. 638199481Srdivacky // Record the def in MemDefs, first adding a dep if there is 639199481Srdivacky // an existing def. 640223017Sdim std::map<const Value *, SUnit *>::iterator I = 641199481Srdivacky ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 642223017Sdim std::map<const Value *, SUnit *>::iterator IE = 643199481Srdivacky ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 644199481Srdivacky if (I != IE) { 645198892Srdivacky I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 646193323Sed /*isNormalMemory=*/true)); 647193323Sed I->second = SU; 648193323Sed } else { 649199481Srdivacky if (MayAlias) 650199481Srdivacky AliasMemDefs[V] = SU; 651199481Srdivacky else 652199481Srdivacky NonAliasMemDefs[V] = SU; 653193323Sed } 654193323Sed // Handle the uses in MemUses, if there are any. 655193323Sed std::map<const Value *, std::vector<SUnit *> >::iterator J = 656199481Srdivacky ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); 657199481Srdivacky std::map<const Value *, std::vector<SUnit *> >::iterator JE = 658199481Srdivacky ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); 659199481Srdivacky if (J != JE) { 660193323Sed for (unsigned i = 0, e = J->second.size(); i != e; ++i) 661198892Srdivacky J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, 662198892Srdivacky /*Reg=*/0, /*isNormalMemory=*/true)); 663193323Sed J->second.clear(); 664193323Sed } 665198892Srdivacky if (MayAlias) { 666199481Srdivacky // Add dependencies from all the PendingLoads, i.e. loads 667199481Srdivacky // with no underlying object. 668198892Srdivacky for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 669198892Srdivacky PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 670199481Srdivacky // Add dependence on alias chain, if needed. 671199481Srdivacky if (AliasChain) 672199481Srdivacky AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 673198892Srdivacky } 674199481Srdivacky // Add dependence on barrier chain, if needed. 675199481Srdivacky if (BarrierChain) 676199481Srdivacky BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 677198953Srdivacky } else { 678193323Sed // Treat all other stores conservatively. 679199481Srdivacky goto new_alias_chain; 680198892Srdivacky } 681218893Sdim 682218893Sdim if (!ExitSU.isPred(SU)) 683218893Sdim // Push store's up a bit to avoid them getting in between cmp 684218893Sdim // and branches. 685218893Sdim ExitSU.addPred(SDep(SU, SDep::Order, 0, 686218893Sdim /*Reg=*/0, /*isNormalMemory=*/false, 687218893Sdim /*isMustAlias=*/false, 688218893Sdim /*isArtificial=*/true)); 689234353Sdim } else if (MI->mayLoad()) { 690198892Srdivacky bool MayAlias = true; 691198892Srdivacky TrueMemOrderLatency = 0; 692198090Srdivacky if (MI->isInvariantLoad(AA)) { 693193323Sed // Invariant load, no chain dependencies needed! 694198953Srdivacky } else { 695223017Sdim if (const Value *V = 696199481Srdivacky getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 697199481Srdivacky // A load from a specific PseudoSourceValue. Add precise dependencies. 698223017Sdim std::map<const Value *, SUnit *>::iterator I = 699199481Srdivacky ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 700223017Sdim std::map<const Value *, SUnit *>::iterator IE = 701199481Srdivacky ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 702199481Srdivacky if (I != IE) 703199481Srdivacky I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 704199481Srdivacky /*isNormalMemory=*/true)); 705199481Srdivacky if (MayAlias) 706199481Srdivacky AliasMemUses[V].push_back(SU); 707223017Sdim else 708199481Srdivacky NonAliasMemUses[V].push_back(SU); 709199481Srdivacky } else { 710199481Srdivacky // A load with no underlying object. Depend on all 711199481Srdivacky // potentially aliasing stores. 712223017Sdim for (std::map<const Value *, SUnit *>::iterator I = 713199481Srdivacky AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) 714199481Srdivacky I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 715223017Sdim 716199481Srdivacky PendingLoads.push_back(SU); 717199481Srdivacky MayAlias = true; 718198892Srdivacky } 719223017Sdim 720199481Srdivacky // Add dependencies on alias and barrier chains, if needed. 721199481Srdivacky if (MayAlias && AliasChain) 722199481Srdivacky AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 723199481Srdivacky if (BarrierChain) 724199481Srdivacky BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 725223017Sdim } 726193323Sed } 727193323Sed } 728223017Sdim if (PrevMI) 729223017Sdim FirstDbgValue = PrevMI; 730193323Sed 731234353Sdim Defs.clear(); 732234353Sdim Uses.clear(); 733234353Sdim VRegDefs.clear(); 734193323Sed PendingLoads.clear(); 735193323Sed} 736193323Sed 737234353Sdimvoid ScheduleDAGInstrs::computeLatency(SUnit *SU) { 738198090Srdivacky // Compute the latency for the node. 739218893Sdim if (!InstrItins || InstrItins->isEmpty()) { 740218893Sdim SU->Latency = 1; 741193323Sed 742218893Sdim // Simplistic target-independent heuristic: assume that loads take 743218893Sdim // extra time. 744234353Sdim if (SU->getInstr()->mayLoad()) 745193323Sed SU->Latency += 2; 746218893Sdim } else { 747218893Sdim SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); 748218893Sdim } 749193323Sed} 750193323Sed 751234353Sdimvoid ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, 752198090Srdivacky SDep& dep) const { 753218893Sdim if (!InstrItins || InstrItins->isEmpty()) 754198090Srdivacky return; 755223017Sdim 756198090Srdivacky // For a data dependency with a known register... 757198090Srdivacky if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) 758198090Srdivacky return; 759198090Srdivacky 760198090Srdivacky const unsigned Reg = dep.getReg(); 761198090Srdivacky 762198090Srdivacky // ... find the definition of the register in the defining 763198090Srdivacky // instruction 764198090Srdivacky MachineInstr *DefMI = Def->getInstr(); 765198090Srdivacky int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); 766198090Srdivacky if (DefIdx != -1) { 767218893Sdim const MachineOperand &MO = DefMI->getOperand(DefIdx); 768218893Sdim if (MO.isReg() && MO.isImplicit() && 769218893Sdim DefIdx >= (int)DefMI->getDesc().getNumOperands()) { 770218893Sdim // This is an implicit def, getOperandLatency() won't return the correct 771218893Sdim // latency. e.g. 772218893Sdim // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> 773218893Sdim // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... 774218893Sdim // What we want is to compute latency between def of %D6/%D7 and use of 775218893Sdim // %Q3 instead. 776234353Sdim unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); 777234353Sdim if (DefMI->getOperand(Op2).isReg()) 778234353Sdim DefIdx = Op2; 779218893Sdim } 780218893Sdim MachineInstr *UseMI = Use->getInstr(); 781218893Sdim // For all uses of the register, calculate the maxmimum latency 782218893Sdim int Latency = -1; 783218893Sdim if (UseMI) { 784198090Srdivacky for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 785198090Srdivacky const MachineOperand &MO = UseMI->getOperand(i); 786198090Srdivacky if (!MO.isReg() || !MO.isUse()) 787198090Srdivacky continue; 788198090Srdivacky unsigned MOReg = MO.getReg(); 789198090Srdivacky if (MOReg != Reg) 790198090Srdivacky continue; 791198090Srdivacky 792218893Sdim int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, 793218893Sdim UseMI, i); 794218893Sdim Latency = std::max(Latency, UseCycle); 795198090Srdivacky } 796218893Sdim } else { 797218893Sdim // UseMI is null, then it must be a scheduling barrier. 798218893Sdim if (!InstrItins || InstrItins->isEmpty()) 799218893Sdim return; 800218893Sdim unsigned DefClass = DefMI->getDesc().getSchedClass(); 801218893Sdim Latency = InstrItins->getOperandCycle(DefClass, DefIdx); 802218893Sdim } 803198090Srdivacky 804218893Sdim // If we found a latency, then replace the existing dependence latency. 805218893Sdim if (Latency >= 0) 806218893Sdim dep.setLatency(Latency); 807198090Srdivacky } 808198090Srdivacky} 809198090Srdivacky 810193323Sedvoid ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 811193323Sed SU->getInstr()->dump(); 812193323Sed} 813193323Sed 814193323Sedstd::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 815193323Sed std::string s; 816193323Sed raw_string_ostream oss(s); 817193323Sed if (SU == &EntrySU) 818193323Sed oss << "<entry>"; 819193323Sed else if (SU == &ExitSU) 820193323Sed oss << "<exit>"; 821193323Sed else 822193323Sed SU->getInstr()->print(oss); 823193323Sed return oss.str(); 824193323Sed} 825193323Sed 826234353Sdim/// Return the basic block label. It is not necessarilly unique because a block 827234353Sdim/// contains multiple scheduling regions. But it is fine for visualization. 828234353Sdimstd::string ScheduleDAGInstrs::getDAGName() const { 829234353Sdim return "dag." + BB->getFullName(); 830193323Sed} 831