1249259Sdim//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim//===----------------------------------------------------------------------===// 9249259Sdim// 10249259Sdim// This file defines the interface for the MachineTraceMetrics analysis pass 11249259Sdim// that estimates CPU resource usage and critical data dependency paths through 12249259Sdim// preferred traces. This is useful for super-scalar CPUs where execution speed 13249259Sdim// can be limited both by data dependencies and by limited execution resources. 14249259Sdim// 15249259Sdim// Out-of-order CPUs will often be executing instructions from multiple basic 16249259Sdim// blocks at the same time. This makes it difficult to estimate the resource 17249259Sdim// usage accurately in a single basic block. Resources can be estimated better 18249259Sdim// by looking at a trace through the current basic block. 19249259Sdim// 20249259Sdim// For every block, the MachineTraceMetrics pass will pick a preferred trace 21249259Sdim// that passes through the block. The trace is chosen based on loop structure, 22249259Sdim// branch probabilities, and resource usage. The intention is to pick likely 23249259Sdim// traces that would be the most affected by code transformations. 24249259Sdim// 25249259Sdim// It is expensive to compute a full arbitrary trace for every block, so to 26249259Sdim// save some computations, traces are chosen to be convergent. This means that 27249259Sdim// if the traces through basic blocks A and B ever cross when moving away from 28249259Sdim// A and B, they never diverge again. This applies in both directions - If the 29249259Sdim// traces meet above A and B, they won't diverge when going further back. 30249259Sdim// 31249259Sdim// Traces tend to align with loops. The trace through a block in an inner loop 32249259Sdim// will begin at the loop entry block and end at a back edge. If there are 33249259Sdim// nested loops, the trace may begin and end at those instead. 34249259Sdim// 35249259Sdim// For each trace, we compute the critical path length, which is the number of 36249259Sdim// cycles required to execute the trace when execution is limited by data 37249259Sdim// dependencies only. We also compute the resource height, which is the number 38249259Sdim// of cycles required to execute all instructions in the trace when ignoring 39249259Sdim// data dependencies. 40249259Sdim// 41249259Sdim// Every instruction in the current block has a slack - the number of cycles 42249259Sdim// execution of the instruction can be delayed without extending the critical 43249259Sdim// path. 44249259Sdim// 45249259Sdim//===----------------------------------------------------------------------===// 46249259Sdim 47249259Sdim#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H 48249259Sdim#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H 49249259Sdim 50249259Sdim#include "llvm/ADT/ArrayRef.h" 51249259Sdim#include "llvm/ADT/DenseMap.h" 52249259Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 53249259Sdim#include "llvm/CodeGen/TargetSchedule.h" 54249259Sdim 55249259Sdimnamespace llvm { 56249259Sdim 57249259Sdimclass InstrItineraryData; 58249259Sdimclass MachineBasicBlock; 59249259Sdimclass MachineInstr; 60249259Sdimclass MachineLoop; 61249259Sdimclass MachineLoopInfo; 62249259Sdimclass MachineRegisterInfo; 63249259Sdimclass TargetInstrInfo; 64249259Sdimclass TargetRegisterInfo; 65249259Sdimclass raw_ostream; 66249259Sdim 67249259Sdimclass MachineTraceMetrics : public MachineFunctionPass { 68249259Sdim const MachineFunction *MF; 69249259Sdim const TargetInstrInfo *TII; 70249259Sdim const TargetRegisterInfo *TRI; 71249259Sdim const MachineRegisterInfo *MRI; 72249259Sdim const MachineLoopInfo *Loops; 73249259Sdim TargetSchedModel SchedModel; 74249259Sdim 75249259Sdimpublic: 76249259Sdim class Ensemble; 77249259Sdim class Trace; 78249259Sdim static char ID; 79249259Sdim MachineTraceMetrics(); 80249259Sdim void getAnalysisUsage(AnalysisUsage&) const; 81249259Sdim bool runOnMachineFunction(MachineFunction&); 82249259Sdim void releaseMemory(); 83249259Sdim void verifyAnalysis() const; 84249259Sdim 85249259Sdim friend class Ensemble; 86249259Sdim friend class Trace; 87249259Sdim 88249259Sdim /// Per-basic block information that doesn't depend on the trace through the 89249259Sdim /// block. 90249259Sdim struct FixedBlockInfo { 91249259Sdim /// The number of non-trivial instructions in the block. 92249259Sdim /// Doesn't count PHI and COPY instructions that are likely to be removed. 93249259Sdim unsigned InstrCount; 94249259Sdim 95249259Sdim /// True when the block contains calls. 96249259Sdim bool HasCalls; 97249259Sdim 98249259Sdim FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {} 99249259Sdim 100249259Sdim /// Returns true when resource information for this block has been computed. 101249259Sdim bool hasResources() const { return InstrCount != ~0u; } 102249259Sdim 103249259Sdim /// Invalidate resource information. 104249259Sdim void invalidate() { InstrCount = ~0u; } 105249259Sdim }; 106249259Sdim 107249259Sdim /// Get the fixed resource information about MBB. Compute it on demand. 108249259Sdim const FixedBlockInfo *getResources(const MachineBasicBlock*); 109249259Sdim 110249259Sdim /// Get the scaled number of cycles used per processor resource in MBB. 111249259Sdim /// This is an array with SchedModel.getNumProcResourceKinds() entries. 112249259Sdim /// The getResources() function above must have been called first. 113249259Sdim /// 114249259Sdim /// These numbers have already been scaled by SchedModel.getResourceFactor(). 115249259Sdim ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const; 116249259Sdim 117249259Sdim /// A virtual register or regunit required by a basic block or its trace 118249259Sdim /// successors. 119249259Sdim struct LiveInReg { 120249259Sdim /// The virtual register required, or a register unit. 121249259Sdim unsigned Reg; 122249259Sdim 123249259Sdim /// For virtual registers: Minimum height of the defining instruction. 124249259Sdim /// For regunits: Height of the highest user in the trace. 125249259Sdim unsigned Height; 126249259Sdim 127249259Sdim LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {} 128249259Sdim }; 129249259Sdim 130249259Sdim /// Per-basic block information that relates to a specific trace through the 131249259Sdim /// block. Convergent traces means that only one of these is required per 132249259Sdim /// block in a trace ensemble. 133249259Sdim struct TraceBlockInfo { 134249259Sdim /// Trace predecessor, or NULL for the first block in the trace. 135249259Sdim /// Valid when hasValidDepth(). 136249259Sdim const MachineBasicBlock *Pred; 137249259Sdim 138249259Sdim /// Trace successor, or NULL for the last block in the trace. 139249259Sdim /// Valid when hasValidHeight(). 140249259Sdim const MachineBasicBlock *Succ; 141249259Sdim 142249259Sdim /// The block number of the head of the trace. (When hasValidDepth()). 143249259Sdim unsigned Head; 144249259Sdim 145249259Sdim /// The block number of the tail of the trace. (When hasValidHeight()). 146249259Sdim unsigned Tail; 147249259Sdim 148249259Sdim /// Accumulated number of instructions in the trace above this block. 149249259Sdim /// Does not include instructions in this block. 150249259Sdim unsigned InstrDepth; 151249259Sdim 152249259Sdim /// Accumulated number of instructions in the trace below this block. 153249259Sdim /// Includes instructions in this block. 154249259Sdim unsigned InstrHeight; 155249259Sdim 156249259Sdim TraceBlockInfo() : 157249259Sdim Pred(0), Succ(0), 158249259Sdim InstrDepth(~0u), InstrHeight(~0u), 159249259Sdim HasValidInstrDepths(false), HasValidInstrHeights(false) {} 160249259Sdim 161249259Sdim /// Returns true if the depth resources have been computed from the trace 162249259Sdim /// above this block. 163249259Sdim bool hasValidDepth() const { return InstrDepth != ~0u; } 164249259Sdim 165249259Sdim /// Returns true if the height resources have been computed from the trace 166249259Sdim /// below this block. 167249259Sdim bool hasValidHeight() const { return InstrHeight != ~0u; } 168249259Sdim 169249259Sdim /// Invalidate depth resources when some block above this one has changed. 170249259Sdim void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; } 171249259Sdim 172249259Sdim /// Invalidate height resources when a block below this one has changed. 173249259Sdim void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; } 174249259Sdim 175249259Sdim /// Assuming that this is a dominator of TBI, determine if it contains 176249259Sdim /// useful instruction depths. A dominating block can be above the current 177249259Sdim /// trace head, and any dependencies from such a far away dominator are not 178249259Sdim /// expected to affect the critical path. 179249259Sdim /// 180249259Sdim /// Also returns true when TBI == this. 181249259Sdim bool isUsefulDominator(const TraceBlockInfo &TBI) const { 182249259Sdim // The trace for TBI may not even be calculated yet. 183249259Sdim if (!hasValidDepth() || !TBI.hasValidDepth()) 184249259Sdim return false; 185249259Sdim // Instruction depths are only comparable if the traces share a head. 186249259Sdim if (Head != TBI.Head) 187249259Sdim return false; 188249259Sdim // It is almost always the case that TBI belongs to the same trace as 189249259Sdim // this block, but rare convoluted cases involving irreducible control 190249259Sdim // flow, a dominator may share a trace head without actually being on the 191249259Sdim // same trace as TBI. This is not a big problem as long as it doesn't 192249259Sdim // increase the instruction depth. 193249259Sdim return HasValidInstrDepths && InstrDepth <= TBI.InstrDepth; 194249259Sdim } 195249259Sdim 196249259Sdim // Data-dependency-related information. Per-instruction depth and height 197249259Sdim // are computed from data dependencies in the current trace, using 198249259Sdim // itinerary data. 199249259Sdim 200249259Sdim /// Instruction depths have been computed. This implies hasValidDepth(). 201249259Sdim bool HasValidInstrDepths; 202249259Sdim 203249259Sdim /// Instruction heights have been computed. This implies hasValidHeight(). 204249259Sdim bool HasValidInstrHeights; 205249259Sdim 206249259Sdim /// Critical path length. This is the number of cycles in the longest data 207249259Sdim /// dependency chain through the trace. This is only valid when both 208249259Sdim /// HasValidInstrDepths and HasValidInstrHeights are set. 209249259Sdim unsigned CriticalPath; 210249259Sdim 211249259Sdim /// Live-in registers. These registers are defined above the current block 212249259Sdim /// and used by this block or a block below it. 213249259Sdim /// This does not include PHI uses in the current block, but it does 214249259Sdim /// include PHI uses in deeper blocks. 215249259Sdim SmallVector<LiveInReg, 4> LiveIns; 216249259Sdim 217249259Sdim void print(raw_ostream&) const; 218249259Sdim }; 219249259Sdim 220249259Sdim /// InstrCycles represents the cycle height and depth of an instruction in a 221249259Sdim /// trace. 222249259Sdim struct InstrCycles { 223249259Sdim /// Earliest issue cycle as determined by data dependencies and instruction 224249259Sdim /// latencies from the beginning of the trace. Data dependencies from 225249259Sdim /// before the trace are not included. 226249259Sdim unsigned Depth; 227249259Sdim 228249259Sdim /// Minimum number of cycles from this instruction is issued to the of the 229249259Sdim /// trace, as determined by data dependencies and instruction latencies. 230249259Sdim unsigned Height; 231249259Sdim }; 232249259Sdim 233249259Sdim /// A trace represents a plausible sequence of executed basic blocks that 234249259Sdim /// passes through the current basic block one. The Trace class serves as a 235249259Sdim /// handle to internal cached data structures. 236249259Sdim class Trace { 237249259Sdim Ensemble &TE; 238249259Sdim TraceBlockInfo &TBI; 239249259Sdim 240249259Sdim unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; } 241249259Sdim 242249259Sdim public: 243249259Sdim explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {} 244249259Sdim void print(raw_ostream&) const; 245249259Sdim 246249259Sdim /// Compute the total number of instructions in the trace. 247249259Sdim unsigned getInstrCount() const { 248249259Sdim return TBI.InstrDepth + TBI.InstrHeight; 249249259Sdim } 250249259Sdim 251249259Sdim /// Return the resource depth of the top/bottom of the trace center block. 252249259Sdim /// This is the number of cycles required to execute all instructions from 253249259Sdim /// the trace head to the trace center block. The resource depth only 254249259Sdim /// considers execution resources, it ignores data dependencies. 255249259Sdim /// When Bottom is set, instructions in the trace center block are included. 256249259Sdim unsigned getResourceDepth(bool Bottom) const; 257249259Sdim 258249259Sdim /// Return the resource length of the trace. This is the number of cycles 259249259Sdim /// required to execute the instructions in the trace if they were all 260249259Sdim /// independent, exposing the maximum instruction-level parallelism. 261249259Sdim /// 262249259Sdim /// Any blocks in Extrablocks are included as if they were part of the 263252723Sdim /// trace. Likewise, extra resources required by the specified scheduling 264252723Sdim /// classes are included. For the caller to account for extra machine 265252723Sdim /// instructions, it must first resolve each instruction's scheduling class. 266252723Sdim unsigned getResourceLength( 267252723Sdim ArrayRef<const MachineBasicBlock*> Extrablocks = None, 268252723Sdim ArrayRef<const MCSchedClassDesc*> ExtraInstrs = None) const; 269249259Sdim 270249259Sdim /// Return the length of the (data dependency) critical path through the 271249259Sdim /// trace. 272249259Sdim unsigned getCriticalPath() const { return TBI.CriticalPath; } 273249259Sdim 274249259Sdim /// Return the depth and height of MI. The depth is only valid for 275249259Sdim /// instructions in or above the trace center block. The height is only 276249259Sdim /// valid for instructions in or below the trace center block. 277249259Sdim InstrCycles getInstrCycles(const MachineInstr *MI) const { 278249259Sdim return TE.Cycles.lookup(MI); 279249259Sdim } 280249259Sdim 281249259Sdim /// Return the slack of MI. This is the number of cycles MI can be delayed 282249259Sdim /// before the critical path becomes longer. 283249259Sdim /// MI must be an instruction in the trace center block. 284249259Sdim unsigned getInstrSlack(const MachineInstr *MI) const; 285249259Sdim 286249259Sdim /// Return the Depth of a PHI instruction in a trace center block successor. 287249259Sdim /// The PHI does not have to be part of the trace. 288249259Sdim unsigned getPHIDepth(const MachineInstr *PHI) const; 289249259Sdim }; 290249259Sdim 291249259Sdim /// A trace ensemble is a collection of traces selected using the same 292249259Sdim /// strategy, for example 'minimum resource height'. There is one trace for 293249259Sdim /// every block in the function. 294249259Sdim class Ensemble { 295249259Sdim SmallVector<TraceBlockInfo, 4> BlockInfo; 296249259Sdim DenseMap<const MachineInstr*, InstrCycles> Cycles; 297249259Sdim SmallVector<unsigned, 0> ProcResourceDepths; 298249259Sdim SmallVector<unsigned, 0> ProcResourceHeights; 299249259Sdim friend class Trace; 300249259Sdim 301249259Sdim void computeTrace(const MachineBasicBlock*); 302249259Sdim void computeDepthResources(const MachineBasicBlock*); 303249259Sdim void computeHeightResources(const MachineBasicBlock*); 304249259Sdim unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&); 305249259Sdim void computeInstrDepths(const MachineBasicBlock*); 306249259Sdim void computeInstrHeights(const MachineBasicBlock*); 307249259Sdim void addLiveIns(const MachineInstr *DefMI, unsigned DefOp, 308249259Sdim ArrayRef<const MachineBasicBlock*> Trace); 309249259Sdim 310249259Sdim protected: 311249259Sdim MachineTraceMetrics &MTM; 312249259Sdim virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0; 313249259Sdim virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0; 314249259Sdim explicit Ensemble(MachineTraceMetrics*); 315249259Sdim const MachineLoop *getLoopFor(const MachineBasicBlock*) const; 316249259Sdim const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const; 317249259Sdim const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const; 318249259Sdim ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const; 319249259Sdim ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const; 320249259Sdim 321249259Sdim public: 322249259Sdim virtual ~Ensemble(); 323249259Sdim virtual const char *getName() const =0; 324249259Sdim void print(raw_ostream&) const; 325249259Sdim void invalidate(const MachineBasicBlock *MBB); 326249259Sdim void verify() const; 327249259Sdim 328249259Sdim /// Get the trace that passes through MBB. 329249259Sdim /// The trace is computed on demand. 330249259Sdim Trace getTrace(const MachineBasicBlock *MBB); 331249259Sdim }; 332249259Sdim 333249259Sdim /// Strategies for selecting traces. 334249259Sdim enum Strategy { 335249259Sdim /// Select the trace through a block that has the fewest instructions. 336249259Sdim TS_MinInstrCount, 337249259Sdim 338249259Sdim TS_NumStrategies 339249259Sdim }; 340249259Sdim 341249259Sdim /// Get the trace ensemble representing the given trace selection strategy. 342249259Sdim /// The returned Ensemble object is owned by the MachineTraceMetrics analysis, 343249259Sdim /// and valid for the lifetime of the analysis pass. 344249259Sdim Ensemble *getEnsemble(Strategy); 345249259Sdim 346249259Sdim /// Invalidate cached information about MBB. This must be called *before* MBB 347249259Sdim /// is erased, or the CFG is otherwise changed. 348249259Sdim /// 349249259Sdim /// This invalidates per-block information about resource usage for MBB only, 350249259Sdim /// and it invalidates per-trace information for any trace that passes 351249259Sdim /// through MBB. 352249259Sdim /// 353249259Sdim /// Call Ensemble::getTrace() again to update any trace handles. 354249259Sdim void invalidate(const MachineBasicBlock *MBB); 355249259Sdim 356249259Sdimprivate: 357249259Sdim // One entry per basic block, indexed by block number. 358249259Sdim SmallVector<FixedBlockInfo, 4> BlockInfo; 359249259Sdim 360249259Sdim // Cycles consumed on each processor resource per block. 361249259Sdim // The number of processor resource kinds is constant for a given subtarget, 362249259Sdim // but it is not known at compile time. The number of cycles consumed by 363249259Sdim // block B on processor resource R is at ProcResourceCycles[B*Kinds + R] 364249259Sdim // where Kinds = SchedModel.getNumProcResourceKinds(). 365249259Sdim SmallVector<unsigned, 0> ProcResourceCycles; 366249259Sdim 367249259Sdim // One ensemble per strategy. 368249259Sdim Ensemble* Ensembles[TS_NumStrategies]; 369249259Sdim 370249259Sdim // Convert scaled resource usage to a cycle count that can be compared with 371249259Sdim // latencies. 372249259Sdim unsigned getCycles(unsigned Scaled) { 373249259Sdim unsigned Factor = SchedModel.getLatencyFactor(); 374249259Sdim return (Scaled + Factor - 1) / Factor; 375249259Sdim } 376249259Sdim}; 377249259Sdim 378249259Sdiminline raw_ostream &operator<<(raw_ostream &OS, 379249259Sdim const MachineTraceMetrics::Trace &Tr) { 380249259Sdim Tr.print(OS); 381249259Sdim return OS; 382249259Sdim} 383249259Sdim 384249259Sdiminline raw_ostream &operator<<(raw_ostream &OS, 385249259Sdim const MachineTraceMetrics::Ensemble &En) { 386249259Sdim En.print(OS); 387249259Sdim return OS; 388249259Sdim} 389249259Sdim} // end namespace llvm 390249259Sdim 391249259Sdim#endif 392