ScheduleDAGInstrs.cpp revision 199481
1193323Sed//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This implements the ScheduleDAGInstrs class, which implements re-scheduling 11193323Sed// of MachineInstrs. 12193323Sed// 13193323Sed//===----------------------------------------------------------------------===// 14193323Sed 15193323Sed#define DEBUG_TYPE "sched-instrs" 16193323Sed#include "ScheduleDAGInstrs.h" 17198090Srdivacky#include "llvm/Operator.h" 18193323Sed#include "llvm/Analysis/AliasAnalysis.h" 19193323Sed#include "llvm/CodeGen/MachineFunctionPass.h" 20198090Srdivacky#include "llvm/CodeGen/MachineMemOperand.h" 21193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 22193323Sed#include "llvm/CodeGen/PseudoSourceValue.h" 23193323Sed#include "llvm/Target/TargetMachine.h" 24193323Sed#include "llvm/Target/TargetInstrInfo.h" 25193323Sed#include "llvm/Target/TargetRegisterInfo.h" 26193323Sed#include "llvm/Target/TargetSubtarget.h" 27193323Sed#include "llvm/Support/Debug.h" 28193323Sed#include "llvm/Support/raw_ostream.h" 29193323Sed#include "llvm/ADT/SmallSet.h" 30193323Sedusing namespace llvm; 31193323Sed 32193323SedScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 33193323Sed const MachineLoopInfo &mli, 34193323Sed const MachineDominatorTree &mdt) 35198396Srdivacky : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) { 36198396Srdivacky MFI = mf.getFrameInfo(); 37198396Srdivacky} 38193323Sed 39193323Sed/// Run - perform scheduling. 40193323Sed/// 41193323Sedvoid ScheduleDAGInstrs::Run(MachineBasicBlock *bb, 42193323Sed MachineBasicBlock::iterator begin, 43193323Sed MachineBasicBlock::iterator end, 44193323Sed unsigned endcount) { 45193323Sed BB = bb; 46193323Sed Begin = begin; 47193323Sed InsertPosIndex = endcount; 48193323Sed 49193323Sed ScheduleDAG::Run(bb, end); 50193323Sed} 51193323Sed 52193323Sed/// getUnderlyingObjectFromInt - This is the function that does the work of 53193323Sed/// looking through basic ptrtoint+arithmetic+inttoptr sequences. 54193323Sedstatic const Value *getUnderlyingObjectFromInt(const Value *V) { 55193323Sed do { 56198090Srdivacky if (const Operator *U = dyn_cast<Operator>(V)) { 57193323Sed // If we find a ptrtoint, we can transfer control back to the 58193323Sed // regular getUnderlyingObjectFromInt. 59198090Srdivacky if (U->getOpcode() == Instruction::PtrToInt) 60193323Sed return U->getOperand(0); 61193323Sed // If we find an add of a constant or a multiplied value, it's 62193323Sed // likely that the other operand will lead us to the base 63193323Sed // object. We don't have to worry about the case where the 64198090Srdivacky // object address is somehow being computed by the multiply, 65193323Sed // because our callers only care when the result is an 66193323Sed // identifibale object. 67198090Srdivacky if (U->getOpcode() != Instruction::Add || 68193323Sed (!isa<ConstantInt>(U->getOperand(1)) && 69198090Srdivacky Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) 70193323Sed return V; 71193323Sed V = U->getOperand(0); 72193323Sed } else { 73193323Sed return V; 74193323Sed } 75193323Sed assert(isa<IntegerType>(V->getType()) && "Unexpected operand type!"); 76193323Sed } while (1); 77193323Sed} 78193323Sed 79193323Sed/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject 80193323Sed/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. 81193323Sedstatic const Value *getUnderlyingObject(const Value *V) { 82193323Sed // First just call Value::getUnderlyingObject to let it do what it does. 83193323Sed do { 84193323Sed V = V->getUnderlyingObject(); 85193323Sed // If it found an inttoptr, use special code to continue climing. 86198090Srdivacky if (Operator::getOpcode(V) != Instruction::IntToPtr) 87193323Sed break; 88193323Sed const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 89193323Sed // If that succeeded in finding a pointer, continue the search. 90193323Sed if (!isa<PointerType>(O->getType())) 91193323Sed break; 92193323Sed V = O; 93193323Sed } while (1); 94193323Sed return V; 95193323Sed} 96193323Sed 97193323Sed/// getUnderlyingObjectForInstr - If this machine instr has memory reference 98193323Sed/// information and it can be tracked to a normal reference to a known 99193323Sed/// object, return the Value for that object. Otherwise return null. 100198396Srdivackystatic const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, 101198892Srdivacky const MachineFrameInfo *MFI, 102198892Srdivacky bool &MayAlias) { 103198892Srdivacky MayAlias = true; 104193323Sed if (!MI->hasOneMemOperand() || 105198090Srdivacky !(*MI->memoperands_begin())->getValue() || 106198090Srdivacky (*MI->memoperands_begin())->isVolatile()) 107193323Sed return 0; 108193323Sed 109198090Srdivacky const Value *V = (*MI->memoperands_begin())->getValue(); 110193323Sed if (!V) 111193323Sed return 0; 112193323Sed 113193323Sed V = getUnderlyingObject(V); 114198396Srdivacky if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { 115198396Srdivacky // For now, ignore PseudoSourceValues which may alias LLVM IR values 116198396Srdivacky // because the code that uses this function has no way to cope with 117198396Srdivacky // such aliases. 118198396Srdivacky if (PSV->isAliased(MFI)) 119198396Srdivacky return 0; 120199481Srdivacky 121199481Srdivacky MayAlias = PSV->mayAlias(MFI); 122198396Srdivacky return V; 123198396Srdivacky } 124193323Sed 125198396Srdivacky if (isIdentifiedObject(V)) 126198396Srdivacky return V; 127198396Srdivacky 128198396Srdivacky return 0; 129193323Sed} 130193323Sed 131193323Sedvoid ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { 132193323Sed if (MachineLoop *ML = MLI.getLoopFor(BB)) 133193323Sed if (BB == ML->getLoopLatch()) { 134193323Sed MachineBasicBlock *Header = ML->getHeader(); 135193323Sed for (MachineBasicBlock::livein_iterator I = Header->livein_begin(), 136193323Sed E = Header->livein_end(); I != E; ++I) 137193323Sed LoopLiveInRegs.insert(*I); 138193323Sed LoopRegs.VisitLoop(ML); 139193323Sed } 140193323Sed} 141193323Sed 142198090Srdivackyvoid ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { 143193323Sed // We'll be allocating one SUnit for each instruction, plus one for 144193323Sed // the region exit node. 145193323Sed SUnits.reserve(BB->size()); 146193323Sed 147193323Sed // We build scheduling units by walking a block's instruction list from bottom 148193323Sed // to top. 149193323Sed 150199481Srdivacky // Remember where a generic side-effecting instruction is as we procede. 151199481Srdivacky SUnit *BarrierChain = 0, *AliasChain = 0; 152193323Sed 153199481Srdivacky // Memory references to specific known memory locations are tracked 154199481Srdivacky // so that they can be given more precise dependencies. We track 155199481Srdivacky // separately the known memory locations that may alias and those 156199481Srdivacky // that are known not to alias 157199481Srdivacky std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; 158199481Srdivacky std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; 159193323Sed 160193323Sed // Check to see if the scheduler cares about latencies. 161193323Sed bool UnitLatencies = ForceUnitLatencies(); 162193323Sed 163193323Sed // Ask the target if address-backscheduling is desirable, and if so how much. 164198090Srdivacky const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); 165198090Srdivacky unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 166193323Sed 167193323Sed // Walk the list of instructions, from bottom moving up. 168193323Sed for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; 169193323Sed MII != MIE; --MII) { 170193323Sed MachineInstr *MI = prior(MII); 171193323Sed const TargetInstrDesc &TID = MI->getDesc(); 172193323Sed assert(!TID.isTerminator() && !MI->isLabel() && 173193323Sed "Cannot schedule terminators or labels!"); 174193323Sed // Create the SUnit for this MI. 175193323Sed SUnit *SU = NewSUnit(MI); 176193323Sed 177193323Sed // Assign the Latency field of SU using target-provided information. 178193323Sed if (UnitLatencies) 179193323Sed SU->Latency = 1; 180193323Sed else 181193323Sed ComputeLatency(SU); 182193323Sed 183193323Sed // Add register-based dependencies (data, anti, and output). 184193323Sed for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 185193323Sed const MachineOperand &MO = MI->getOperand(j); 186193323Sed if (!MO.isReg()) continue; 187193323Sed unsigned Reg = MO.getReg(); 188193323Sed if (Reg == 0) continue; 189193323Sed 190193323Sed assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); 191193323Sed std::vector<SUnit *> &UseList = Uses[Reg]; 192193323Sed std::vector<SUnit *> &DefList = Defs[Reg]; 193198090Srdivacky // Optionally add output and anti dependencies. For anti 194198090Srdivacky // dependencies we use a latency of 0 because for a multi-issue 195198090Srdivacky // target we want to allow the defining instruction to issue 196198090Srdivacky // in the same cycle as the using instruction. 197198090Srdivacky // TODO: Using a latency of 1 here for output dependencies assumes 198198090Srdivacky // there's no cost for reusing registers. 199193323Sed SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 200198090Srdivacky unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1; 201193323Sed for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 202193323Sed SUnit *DefSU = DefList[i]; 203193323Sed if (DefSU != SU && 204193323Sed (Kind != SDep::Output || !MO.isDead() || 205193323Sed !DefSU->getInstr()->registerDefIsDead(Reg))) 206198090Srdivacky DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg)); 207193323Sed } 208193323Sed for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 209193323Sed std::vector<SUnit *> &DefList = Defs[*Alias]; 210193323Sed for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 211193323Sed SUnit *DefSU = DefList[i]; 212193323Sed if (DefSU != SU && 213193323Sed (Kind != SDep::Output || !MO.isDead() || 214198892Srdivacky !DefSU->getInstr()->registerDefIsDead(*Alias))) 215198090Srdivacky DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias)); 216193323Sed } 217193323Sed } 218193323Sed 219193323Sed if (MO.isDef()) { 220193323Sed // Add any data dependencies. 221193323Sed unsigned DataLatency = SU->Latency; 222193323Sed for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 223193323Sed SUnit *UseSU = UseList[i]; 224193323Sed if (UseSU != SU) { 225193323Sed unsigned LDataLatency = DataLatency; 226193323Sed // Optionally add in a special extra latency for nodes that 227193323Sed // feed addresses. 228193323Sed // TODO: Do this for register aliases too. 229198090Srdivacky // TODO: Perhaps we should get rid of 230198090Srdivacky // SpecialAddressLatency and just move this into 231198090Srdivacky // adjustSchedDependency for the targets that care about 232198090Srdivacky // it. 233193323Sed if (SpecialAddressLatency != 0 && !UnitLatencies) { 234193323Sed MachineInstr *UseMI = UseSU->getInstr(); 235193323Sed const TargetInstrDesc &UseTID = UseMI->getDesc(); 236193323Sed int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); 237193323Sed assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); 238193323Sed if ((UseTID.mayLoad() || UseTID.mayStore()) && 239193323Sed (unsigned)RegUseIndex < UseTID.getNumOperands() && 240193323Sed UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) 241193323Sed LDataLatency += SpecialAddressLatency; 242193323Sed } 243198090Srdivacky // Adjust the dependence latency using operand def/use 244198090Srdivacky // information (if any), and then allow the target to 245198090Srdivacky // perform its own adjustments. 246198090Srdivacky const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); 247198090Srdivacky if (!UnitLatencies) { 248198090Srdivacky ComputeOperandLatency(SU, UseSU, (SDep &)dep); 249198090Srdivacky ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); 250198090Srdivacky } 251198090Srdivacky UseSU->addPred(dep); 252193323Sed } 253193323Sed } 254193323Sed for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 255193323Sed std::vector<SUnit *> &UseList = Uses[*Alias]; 256193323Sed for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 257193323Sed SUnit *UseSU = UseList[i]; 258198090Srdivacky if (UseSU != SU) { 259198090Srdivacky const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); 260198090Srdivacky if (!UnitLatencies) { 261198090Srdivacky ComputeOperandLatency(SU, UseSU, (SDep &)dep); 262198090Srdivacky ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); 263198090Srdivacky } 264198090Srdivacky UseSU->addPred(dep); 265198090Srdivacky } 266193323Sed } 267193323Sed } 268193323Sed 269193323Sed // If a def is going to wrap back around to the top of the loop, 270193323Sed // backschedule it. 271193323Sed if (!UnitLatencies && DefList.empty()) { 272193323Sed LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg); 273193323Sed if (I != LoopRegs.Deps.end()) { 274193323Sed const MachineOperand *UseMO = I->second.first; 275193323Sed unsigned Count = I->second.second; 276193323Sed const MachineInstr *UseMI = UseMO->getParent(); 277193323Sed unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); 278193323Sed const TargetInstrDesc &UseTID = UseMI->getDesc(); 279193323Sed // TODO: If we knew the total depth of the region here, we could 280193323Sed // handle the case where the whole loop is inside the region but 281193323Sed // is large enough that the isScheduleHigh trick isn't needed. 282193323Sed if (UseMOIdx < UseTID.getNumOperands()) { 283193323Sed // Currently, we only support scheduling regions consisting of 284193323Sed // single basic blocks. Check to see if the instruction is in 285193323Sed // the same region by checking to see if it has the same parent. 286193323Sed if (UseMI->getParent() != MI->getParent()) { 287193323Sed unsigned Latency = SU->Latency; 288193323Sed if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) 289193323Sed Latency += SpecialAddressLatency; 290193323Sed // This is a wild guess as to the portion of the latency which 291193323Sed // will be overlapped by work done outside the current 292193323Sed // scheduling region. 293193323Sed Latency -= std::min(Latency, Count); 294193323Sed // Add the artifical edge. 295193323Sed ExitSU.addPred(SDep(SU, SDep::Order, Latency, 296193323Sed /*Reg=*/0, /*isNormalMemory=*/false, 297193323Sed /*isMustAlias=*/false, 298193323Sed /*isArtificial=*/true)); 299193323Sed } else if (SpecialAddressLatency > 0 && 300193323Sed UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { 301193323Sed // The entire loop body is within the current scheduling region 302193323Sed // and the latency of this operation is assumed to be greater 303193323Sed // than the latency of the loop. 304193323Sed // TODO: Recursively mark data-edge predecessors as 305193323Sed // isScheduleHigh too. 306193323Sed SU->isScheduleHigh = true; 307193323Sed } 308193323Sed } 309193323Sed LoopRegs.Deps.erase(I); 310193323Sed } 311193323Sed } 312193323Sed 313193323Sed UseList.clear(); 314193323Sed if (!MO.isDead()) 315193323Sed DefList.clear(); 316193323Sed DefList.push_back(SU); 317193323Sed } else { 318193323Sed UseList.push_back(SU); 319193323Sed } 320193323Sed } 321193323Sed 322193323Sed // Add chain dependencies. 323198892Srdivacky // Chain dependencies used to enforce memory order should have 324198892Srdivacky // latency of 0 (except for true dependency of Store followed by 325198892Srdivacky // aliased Load... we estimate that with a single cycle of latency 326198892Srdivacky // assuming the hardware will bypass) 327193323Sed // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 328193323Sed // after stack slots are lowered to actual addresses. 329193323Sed // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 330193323Sed // produce more precise dependence information. 331198892Srdivacky#define STORE_LOAD_LATENCY 1 332198892Srdivacky unsigned TrueMemOrderLatency = 0; 333199481Srdivacky if (TID.isCall() || TID.hasUnmodeledSideEffects() || 334199481Srdivacky (MI->hasVolatileMemoryRef() && 335199481Srdivacky (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) { 336199481Srdivacky // Be conservative with these and add dependencies on all memory 337199481Srdivacky // references, even those that are known to not alias. 338199481Srdivacky for (std::map<const Value *, SUnit *>::iterator I = 339199481Srdivacky NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { 340199481Srdivacky I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 341199481Srdivacky } 342199481Srdivacky for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 343199481Srdivacky NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { 344199481Srdivacky for (unsigned i = 0, e = I->second.size(); i != e; ++i) 345199481Srdivacky I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 346199481Srdivacky } 347199481Srdivacky NonAliasMemDefs.clear(); 348199481Srdivacky NonAliasMemUses.clear(); 349199481Srdivacky // Add SU to the barrier chain. 350199481Srdivacky if (BarrierChain) 351199481Srdivacky BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 352199481Srdivacky BarrierChain = SU; 353199481Srdivacky 354199481Srdivacky // fall-through 355199481Srdivacky new_alias_chain: 356199481Srdivacky // Chain all possibly aliasing memory references though SU. 357199481Srdivacky if (AliasChain) 358199481Srdivacky AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 359199481Srdivacky AliasChain = SU; 360193323Sed for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 361198892Srdivacky PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 362199481Srdivacky for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), 363199481Srdivacky E = AliasMemDefs.end(); I != E; ++I) { 364198892Srdivacky I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 365193323Sed } 366193323Sed for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 367199481Srdivacky AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { 368193323Sed for (unsigned i = 0, e = I->second.size(); i != e; ++i) 369198892Srdivacky I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 370193323Sed } 371199481Srdivacky PendingLoads.clear(); 372199481Srdivacky AliasMemDefs.clear(); 373199481Srdivacky AliasMemUses.clear(); 374193323Sed } else if (TID.mayStore()) { 375198892Srdivacky bool MayAlias = true; 376198892Srdivacky TrueMemOrderLatency = STORE_LOAD_LATENCY; 377198892Srdivacky if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 378193323Sed // A store to a specific PseudoSourceValue. Add precise dependencies. 379199481Srdivacky // Record the def in MemDefs, first adding a dep if there is 380199481Srdivacky // an existing def. 381199481Srdivacky std::map<const Value *, SUnit *>::iterator I = 382199481Srdivacky ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 383199481Srdivacky std::map<const Value *, SUnit *>::iterator IE = 384199481Srdivacky ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 385199481Srdivacky if (I != IE) { 386198892Srdivacky I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 387193323Sed /*isNormalMemory=*/true)); 388193323Sed I->second = SU; 389193323Sed } else { 390199481Srdivacky if (MayAlias) 391199481Srdivacky AliasMemDefs[V] = SU; 392199481Srdivacky else 393199481Srdivacky NonAliasMemDefs[V] = SU; 394193323Sed } 395193323Sed // Handle the uses in MemUses, if there are any. 396193323Sed std::map<const Value *, std::vector<SUnit *> >::iterator J = 397199481Srdivacky ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); 398199481Srdivacky std::map<const Value *, std::vector<SUnit *> >::iterator JE = 399199481Srdivacky ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); 400199481Srdivacky if (J != JE) { 401193323Sed for (unsigned i = 0, e = J->second.size(); i != e; ++i) 402198892Srdivacky J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, 403198892Srdivacky /*Reg=*/0, /*isNormalMemory=*/true)); 404193323Sed J->second.clear(); 405193323Sed } 406198892Srdivacky if (MayAlias) { 407199481Srdivacky // Add dependencies from all the PendingLoads, i.e. loads 408199481Srdivacky // with no underlying object. 409198892Srdivacky for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 410198892Srdivacky PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 411199481Srdivacky // Add dependence on alias chain, if needed. 412199481Srdivacky if (AliasChain) 413199481Srdivacky AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 414198892Srdivacky } 415199481Srdivacky // Add dependence on barrier chain, if needed. 416199481Srdivacky if (BarrierChain) 417199481Srdivacky BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 418198953Srdivacky } else { 419193323Sed // Treat all other stores conservatively. 420199481Srdivacky goto new_alias_chain; 421198892Srdivacky } 422193323Sed } else if (TID.mayLoad()) { 423198892Srdivacky bool MayAlias = true; 424198892Srdivacky TrueMemOrderLatency = 0; 425198090Srdivacky if (MI->isInvariantLoad(AA)) { 426193323Sed // Invariant load, no chain dependencies needed! 427198953Srdivacky } else { 428199481Srdivacky if (const Value *V = 429199481Srdivacky getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 430199481Srdivacky // A load from a specific PseudoSourceValue. Add precise dependencies. 431199481Srdivacky std::map<const Value *, SUnit *>::iterator I = 432199481Srdivacky ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 433199481Srdivacky std::map<const Value *, SUnit *>::iterator IE = 434199481Srdivacky ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 435199481Srdivacky if (I != IE) 436199481Srdivacky I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 437199481Srdivacky /*isNormalMemory=*/true)); 438199481Srdivacky if (MayAlias) 439199481Srdivacky AliasMemUses[V].push_back(SU); 440199481Srdivacky else 441199481Srdivacky NonAliasMemUses[V].push_back(SU); 442199481Srdivacky } else { 443199481Srdivacky // A load with no underlying object. Depend on all 444199481Srdivacky // potentially aliasing stores. 445199481Srdivacky for (std::map<const Value *, SUnit *>::iterator I = 446199481Srdivacky AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) 447199481Srdivacky I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 448199481Srdivacky 449199481Srdivacky PendingLoads.push_back(SU); 450199481Srdivacky MayAlias = true; 451198892Srdivacky } 452199481Srdivacky 453199481Srdivacky // Add dependencies on alias and barrier chains, if needed. 454199481Srdivacky if (MayAlias && AliasChain) 455199481Srdivacky AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 456199481Srdivacky if (BarrierChain) 457199481Srdivacky BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 458199481Srdivacky } 459193323Sed } 460193323Sed } 461193323Sed 462193323Sed for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { 463193323Sed Defs[i].clear(); 464193323Sed Uses[i].clear(); 465193323Sed } 466193323Sed PendingLoads.clear(); 467193323Sed} 468193323Sed 469193323Sedvoid ScheduleDAGInstrs::FinishBlock() { 470193323Sed // Nothing to do. 471193323Sed} 472193323Sed 473193323Sedvoid ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { 474193323Sed const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); 475193323Sed 476198090Srdivacky // Compute the latency for the node. 477193323Sed SU->Latency = 478198090Srdivacky InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass()); 479193323Sed 480193323Sed // Simplistic target-independent heuristic: assume that loads take 481193323Sed // extra time. 482193323Sed if (InstrItins.isEmpty()) 483193323Sed if (SU->getInstr()->getDesc().mayLoad()) 484193323Sed SU->Latency += 2; 485193323Sed} 486193323Sed 487198090Srdivackyvoid ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 488198090Srdivacky SDep& dep) const { 489198090Srdivacky const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); 490198090Srdivacky if (InstrItins.isEmpty()) 491198090Srdivacky return; 492198090Srdivacky 493198090Srdivacky // For a data dependency with a known register... 494198090Srdivacky if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) 495198090Srdivacky return; 496198090Srdivacky 497198090Srdivacky const unsigned Reg = dep.getReg(); 498198090Srdivacky 499198090Srdivacky // ... find the definition of the register in the defining 500198090Srdivacky // instruction 501198090Srdivacky MachineInstr *DefMI = Def->getInstr(); 502198090Srdivacky int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); 503198090Srdivacky if (DefIdx != -1) { 504198090Srdivacky int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx); 505198090Srdivacky if (DefCycle >= 0) { 506198090Srdivacky MachineInstr *UseMI = Use->getInstr(); 507198090Srdivacky const unsigned UseClass = UseMI->getDesc().getSchedClass(); 508198090Srdivacky 509198090Srdivacky // For all uses of the register, calculate the maxmimum latency 510198090Srdivacky int Latency = -1; 511198090Srdivacky for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 512198090Srdivacky const MachineOperand &MO = UseMI->getOperand(i); 513198090Srdivacky if (!MO.isReg() || !MO.isUse()) 514198090Srdivacky continue; 515198090Srdivacky unsigned MOReg = MO.getReg(); 516198090Srdivacky if (MOReg != Reg) 517198090Srdivacky continue; 518198090Srdivacky 519198090Srdivacky int UseCycle = InstrItins.getOperandCycle(UseClass, i); 520198090Srdivacky if (UseCycle >= 0) 521198090Srdivacky Latency = std::max(Latency, DefCycle - UseCycle + 1); 522198090Srdivacky } 523198090Srdivacky 524198090Srdivacky // If we found a latency, then replace the existing dependence latency. 525198090Srdivacky if (Latency >= 0) 526198090Srdivacky dep.setLatency(Latency); 527198090Srdivacky } 528198090Srdivacky } 529198090Srdivacky} 530198090Srdivacky 531193323Sedvoid ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 532193323Sed SU->getInstr()->dump(); 533193323Sed} 534193323Sed 535193323Sedstd::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 536193323Sed std::string s; 537193323Sed raw_string_ostream oss(s); 538193323Sed if (SU == &EntrySU) 539193323Sed oss << "<entry>"; 540193323Sed else if (SU == &ExitSU) 541193323Sed oss << "<exit>"; 542193323Sed else 543193323Sed SU->getInstr()->print(oss); 544193323Sed return oss.str(); 545193323Sed} 546193323Sed 547193323Sed// EmitSchedule - Emit the machine code in scheduled order. 548198090SrdivackyMachineBasicBlock *ScheduleDAGInstrs:: 549198090SrdivackyEmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { 550193323Sed // For MachineInstr-based scheduling, we're rescheduling the instructions in 551193323Sed // the block, so start by removing them from the block. 552193323Sed while (Begin != InsertPos) { 553193323Sed MachineBasicBlock::iterator I = Begin; 554193323Sed ++Begin; 555193323Sed BB->remove(I); 556193323Sed } 557193323Sed 558193323Sed // Then re-insert them according to the given schedule. 559193323Sed for (unsigned i = 0, e = Sequence.size(); i != e; i++) { 560193323Sed SUnit *SU = Sequence[i]; 561193323Sed if (!SU) { 562193323Sed // Null SUnit* is a noop. 563193323Sed EmitNoop(); 564193323Sed continue; 565193323Sed } 566193323Sed 567193323Sed BB->insert(InsertPos, SU->getInstr()); 568193323Sed } 569193323Sed 570193323Sed // Update the Begin iterator, as the first instruction in the block 571193323Sed // may have been scheduled later. 572193323Sed if (!Sequence.empty()) 573193323Sed Begin = Sequence[0]->getInstr(); 574193323Sed 575193323Sed return BB; 576193323Sed} 577