1243789Sdim//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===// 2243789Sdim// 3243789Sdim// The LLVM Compiler Infrastructure 4243789Sdim// 5243789Sdim// This file is distributed under the University of Illinois Open Source 6243789Sdim// License. See LICENSE.TXT for details. 7243789Sdim// 8243789Sdim//===----------------------------------------------------------------------===// 9243789Sdim// 10243789Sdim// This file implements a wrapper around MCSchedModel that allows the interface 11243789Sdim// to benefit from information currently only available in TargetInstrInfo. 12243789Sdim// 13243789Sdim//===----------------------------------------------------------------------===// 14243789Sdim 15243789Sdim#include "llvm/CodeGen/TargetSchedule.h" 16249423Sdim#include "llvm/Support/CommandLine.h" 17249423Sdim#include "llvm/Support/raw_ostream.h" 18243789Sdim#include "llvm/Target/TargetInstrInfo.h" 19243789Sdim#include "llvm/Target/TargetMachine.h" 20243789Sdim#include "llvm/Target/TargetRegisterInfo.h" 21243789Sdim#include "llvm/Target/TargetSubtargetInfo.h" 22243789Sdim 23243789Sdimusing namespace llvm; 24243789Sdim 25243789Sdimstatic cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true), 26243789Sdim cl::desc("Use TargetSchedModel for latency lookup")); 27243789Sdim 28243789Sdimstatic cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true), 29243789Sdim cl::desc("Use InstrItineraryData for latency lookup")); 30243789Sdim 31243789Sdimbool TargetSchedModel::hasInstrSchedModel() const { 32243789Sdim return EnableSchedModel && SchedModel.hasInstrSchedModel(); 33243789Sdim} 34243789Sdim 35243789Sdimbool TargetSchedModel::hasInstrItineraries() const { 36243789Sdim return EnableSchedItins && !InstrItins.isEmpty(); 37243789Sdim} 38243789Sdim 39243789Sdimstatic unsigned gcd(unsigned Dividend, unsigned Divisor) { 40243789Sdim // Dividend and Divisor will be naturally swapped as needed. 41243789Sdim while(Divisor) { 42243789Sdim unsigned Rem = Dividend % Divisor; 43243789Sdim Dividend = Divisor; 44243789Sdim Divisor = Rem; 45243789Sdim }; 46243789Sdim return Dividend; 47243789Sdim} 48243789Sdimstatic unsigned lcm(unsigned A, unsigned B) { 49243789Sdim unsigned LCM = (uint64_t(A) * B) / gcd(A, B); 50243789Sdim assert((LCM >= A && LCM >= B) && "LCM overflow"); 51243789Sdim return LCM; 52243789Sdim} 53243789Sdim 54243789Sdimvoid TargetSchedModel::init(const MCSchedModel &sm, 55243789Sdim const TargetSubtargetInfo *sti, 56243789Sdim const TargetInstrInfo *tii) { 57243789Sdim SchedModel = sm; 58243789Sdim STI = sti; 59243789Sdim TII = tii; 60243789Sdim STI->initInstrItins(InstrItins); 61243789Sdim 62243789Sdim unsigned NumRes = SchedModel.getNumProcResourceKinds(); 63243789Sdim ResourceFactors.resize(NumRes); 64243789Sdim ResourceLCM = SchedModel.IssueWidth; 65243789Sdim for (unsigned Idx = 0; Idx < NumRes; ++Idx) { 66243789Sdim unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; 67243789Sdim if (NumUnits > 0) 68243789Sdim ResourceLCM = lcm(ResourceLCM, NumUnits); 69243789Sdim } 70243789Sdim MicroOpFactor = ResourceLCM / SchedModel.IssueWidth; 71243789Sdim for (unsigned Idx = 0; Idx < NumRes; ++Idx) { 72243789Sdim unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; 73243789Sdim ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0; 74243789Sdim } 75243789Sdim} 76243789Sdim 77243789Sdimunsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, 78243789Sdim const MCSchedClassDesc *SC) const { 79243789Sdim if (hasInstrItineraries()) { 80243789Sdim int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass()); 81243789Sdim return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI); 82243789Sdim } 83243789Sdim if (hasInstrSchedModel()) { 84243789Sdim if (!SC) 85243789Sdim SC = resolveSchedClass(MI); 86243789Sdim if (SC->isValid()) 87243789Sdim return SC->NumMicroOps; 88243789Sdim } 89243789Sdim return MI->isTransient() ? 0 : 1; 90243789Sdim} 91243789Sdim 92243789Sdim// The machine model may explicitly specify an invalid latency, which 93243789Sdim// effectively means infinite latency. Since users of the TargetSchedule API 94243789Sdim// don't know how to handle this, we convert it to a very large latency that is 95243789Sdim// easy to distinguish when debugging the DAG but won't induce overflow. 96263508Sdimstatic unsigned capLatency(int Cycles) { 97243789Sdim return Cycles >= 0 ? Cycles : 1000; 98243789Sdim} 99243789Sdim 100243789Sdim/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require 101243789Sdim/// evaluation of predicates that depend on instruction operands or flags. 102243789Sdimconst MCSchedClassDesc *TargetSchedModel:: 103243789SdimresolveSchedClass(const MachineInstr *MI) const { 104243789Sdim 105243789Sdim // Get the definition's scheduling class descriptor from this machine model. 106243789Sdim unsigned SchedClass = MI->getDesc().getSchedClass(); 107243789Sdim const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); 108251662Sdim if (!SCDesc->isValid()) 109251662Sdim return SCDesc; 110243789Sdim 111243789Sdim#ifndef NDEBUG 112243789Sdim unsigned NIter = 0; 113243789Sdim#endif 114243789Sdim while (SCDesc->isVariant()) { 115243789Sdim assert(++NIter < 6 && "Variants are nested deeper than the magic number"); 116243789Sdim 117243789Sdim SchedClass = STI->resolveSchedClass(SchedClass, MI, this); 118243789Sdim SCDesc = SchedModel.getSchedClassDesc(SchedClass); 119243789Sdim } 120243789Sdim return SCDesc; 121243789Sdim} 122243789Sdim 123243789Sdim/// Find the def index of this operand. This index maps to the machine model and 124243789Sdim/// is independent of use operands. Def operands may be reordered with uses or 125243789Sdim/// merged with uses without affecting the def index (e.g. before/after 126243789Sdim/// regalloc). However, an instruction's def operands must never be reordered 127243789Sdim/// with respect to each other. 128243789Sdimstatic unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) { 129243789Sdim unsigned DefIdx = 0; 130243789Sdim for (unsigned i = 0; i != DefOperIdx; ++i) { 131243789Sdim const MachineOperand &MO = MI->getOperand(i); 132243789Sdim if (MO.isReg() && MO.isDef()) 133243789Sdim ++DefIdx; 134243789Sdim } 135243789Sdim return DefIdx; 136243789Sdim} 137243789Sdim 138243789Sdim/// Find the use index of this operand. This is independent of the instruction's 139243789Sdim/// def operands. 140243789Sdim/// 141243789Sdim/// Note that uses are not determined by the operand's isUse property, which 142243789Sdim/// is simply the inverse of isDef. Here we consider any readsReg operand to be 143243789Sdim/// a "use". The machine model allows an operand to be both a Def and Use. 144243789Sdimstatic unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { 145243789Sdim unsigned UseIdx = 0; 146243789Sdim for (unsigned i = 0; i != UseOperIdx; ++i) { 147243789Sdim const MachineOperand &MO = MI->getOperand(i); 148243789Sdim if (MO.isReg() && MO.readsReg()) 149243789Sdim ++UseIdx; 150243789Sdim } 151243789Sdim return UseIdx; 152243789Sdim} 153243789Sdim 154243789Sdim// Top-level API for clients that know the operand indices. 155243789Sdimunsigned TargetSchedModel::computeOperandLatency( 156243789Sdim const MachineInstr *DefMI, unsigned DefOperIdx, 157263508Sdim const MachineInstr *UseMI, unsigned UseOperIdx) const { 158243789Sdim 159263508Sdim if (!hasInstrSchedModel() && !hasInstrItineraries()) 160263508Sdim return TII->defaultDefLatency(&SchedModel, DefMI); 161243789Sdim 162243789Sdim if (hasInstrItineraries()) { 163243789Sdim int OperLatency = 0; 164243789Sdim if (UseMI) { 165263508Sdim OperLatency = TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, 166263508Sdim UseMI, UseOperIdx); 167243789Sdim } 168243789Sdim else { 169243789Sdim unsigned DefClass = DefMI->getDesc().getSchedClass(); 170243789Sdim OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx); 171243789Sdim } 172243789Sdim if (OperLatency >= 0) 173243789Sdim return OperLatency; 174243789Sdim 175243789Sdim // No operand latency was found. 176243789Sdim unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI); 177243789Sdim 178243789Sdim // Expected latency is the max of the stage latency and itinerary props. 179243789Sdim // Rather than directly querying InstrItins stage latency, we call a TII 180243789Sdim // hook to allow subtargets to specialize latency. This hook is only 181243789Sdim // applicable to the InstrItins model. InstrSchedModel should model all 182243789Sdim // special cases without TII hooks. 183263508Sdim InstrLatency = std::max(InstrLatency, 184263508Sdim TII->defaultDefLatency(&SchedModel, DefMI)); 185243789Sdim return InstrLatency; 186243789Sdim } 187263508Sdim // hasInstrSchedModel() 188243789Sdim const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); 189243789Sdim unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); 190243789Sdim if (DefIdx < SCDesc->NumWriteLatencyEntries) { 191243789Sdim // Lookup the definition's write latency in SubtargetInfo. 192243789Sdim const MCWriteLatencyEntry *WLEntry = 193243789Sdim STI->getWriteLatencyEntry(SCDesc, DefIdx); 194243789Sdim unsigned WriteID = WLEntry->WriteResourceID; 195263508Sdim unsigned Latency = capLatency(WLEntry->Cycles); 196243789Sdim if (!UseMI) 197243789Sdim return Latency; 198243789Sdim 199243789Sdim // Lookup the use's latency adjustment in SubtargetInfo. 200243789Sdim const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI); 201243789Sdim if (UseDesc->NumReadAdvanceEntries == 0) 202243789Sdim return Latency; 203243789Sdim unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); 204263508Sdim int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); 205263508Sdim if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap 206263508Sdim return 0; 207263508Sdim return Latency - Advance; 208243789Sdim } 209243789Sdim // If DefIdx does not exist in the model (e.g. implicit defs), then return 210243789Sdim // unit latency (defaultDefLatency may be too conservative). 211243789Sdim#ifndef NDEBUG 212243789Sdim if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() 213263508Sdim && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef() 214263508Sdim && SchedModel.isComplete()) { 215243789Sdim std::string Err; 216243789Sdim raw_string_ostream ss(Err); 217243789Sdim ss << "DefIdx " << DefIdx << " exceeds machine model writes for " 218243789Sdim << *DefMI; 219243789Sdim report_fatal_error(ss.str()); 220243789Sdim } 221243789Sdim#endif 222249423Sdim // FIXME: Automatically giving all implicit defs defaultDefLatency is 223249423Sdim // undesirable. We should only do it for defs that are known to the MC 224249423Sdim // desc like flags. Truly implicit defs should get 1 cycle latency. 225249423Sdim return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); 226243789Sdim} 227243789Sdim 228263508Sdimunsigned 229263508SdimTargetSchedModel::computeInstrLatency(const MachineInstr *MI, 230263508Sdim bool UseDefaultDefLatency) const { 231243789Sdim // For the itinerary model, fall back to the old subtarget hook. 232243789Sdim // Allow subtargets to compute Bundle latencies outside the machine model. 233263508Sdim if (hasInstrItineraries() || MI->isBundle() || 234263508Sdim (!hasInstrSchedModel() && !UseDefaultDefLatency)) 235243789Sdim return TII->getInstrLatency(&InstrItins, MI); 236243789Sdim 237243789Sdim if (hasInstrSchedModel()) { 238243789Sdim const MCSchedClassDesc *SCDesc = resolveSchedClass(MI); 239243789Sdim if (SCDesc->isValid()) { 240243789Sdim unsigned Latency = 0; 241243789Sdim for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; 242243789Sdim DefIdx != DefEnd; ++DefIdx) { 243243789Sdim // Lookup the definition's write latency in SubtargetInfo. 244243789Sdim const MCWriteLatencyEntry *WLEntry = 245243789Sdim STI->getWriteLatencyEntry(SCDesc, DefIdx); 246263508Sdim Latency = std::max(Latency, capLatency(WLEntry->Cycles)); 247243789Sdim } 248243789Sdim return Latency; 249243789Sdim } 250243789Sdim } 251243789Sdim return TII->defaultDefLatency(&SchedModel, MI); 252243789Sdim} 253243789Sdim 254243789Sdimunsigned TargetSchedModel:: 255243789SdimcomputeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, 256243789Sdim const MachineInstr *DepMI) const { 257263508Sdim if (SchedModel.MicroOpBufferSize <= 1) 258243789Sdim return 1; 259243789Sdim 260263508Sdim // MicroOpBufferSize > 1 indicates an out-of-order processor that can dispatch 261243789Sdim // WAW dependencies in the same cycle. 262243789Sdim 263243789Sdim // Treat predication as a data dependency for out-of-order cpus. In-order 264243789Sdim // cpus do not need to treat predicated writes specially. 265243789Sdim // 266243789Sdim // TODO: The following hack exists because predication passes do not 267243789Sdim // correctly append imp-use operands, and readsReg() strangely returns false 268243789Sdim // for predicated defs. 269243789Sdim unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); 270243789Sdim const MachineFunction &MF = *DefMI->getParent()->getParent(); 271243789Sdim const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); 272243789Sdim if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI)) 273243789Sdim return computeInstrLatency(DefMI); 274243789Sdim 275243789Sdim // If we have a per operand scheduling model, check if this def is writing 276243789Sdim // an unbuffered resource. If so, it treated like an in-order cpu. 277243789Sdim if (hasInstrSchedModel()) { 278243789Sdim const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); 279243789Sdim if (SCDesc->isValid()) { 280243789Sdim for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), 281243789Sdim *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) { 282263508Sdim if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize) 283243789Sdim return 1; 284243789Sdim } 285243789Sdim } 286243789Sdim } 287243789Sdim return 0; 288243789Sdim} 289