1249259Sdim//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
2249259Sdim//
3249259Sdim//                     The LLVM Compiler Infrastructure
4249259Sdim//
5249259Sdim// This file is distributed under the University of Illinois Open Source
6249259Sdim// License. See LICENSE.TXT for details.
7249259Sdim//
8249259Sdim//===----------------------------------------------------------------------===//
9249259Sdim//
10249259Sdim// This file defines the interface for the MachineTraceMetrics analysis pass
11249259Sdim// that estimates CPU resource usage and critical data dependency paths through
12249259Sdim// preferred traces. This is useful for super-scalar CPUs where execution speed
13249259Sdim// can be limited both by data dependencies and by limited execution resources.
14249259Sdim//
15249259Sdim// Out-of-order CPUs will often be executing instructions from multiple basic
16249259Sdim// blocks at the same time. This makes it difficult to estimate the resource
17249259Sdim// usage accurately in a single basic block. Resources can be estimated better
18249259Sdim// by looking at a trace through the current basic block.
19249259Sdim//
20249259Sdim// For every block, the MachineTraceMetrics pass will pick a preferred trace
21249259Sdim// that passes through the block. The trace is chosen based on loop structure,
22249259Sdim// branch probabilities, and resource usage. The intention is to pick likely
23249259Sdim// traces that would be the most affected by code transformations.
24249259Sdim//
25249259Sdim// It is expensive to compute a full arbitrary trace for every block, so to
26249259Sdim// save some computations, traces are chosen to be convergent. This means that
27249259Sdim// if the traces through basic blocks A and B ever cross when moving away from
28249259Sdim// A and B, they never diverge again. This applies in both directions - If the
29249259Sdim// traces meet above A and B, they won't diverge when going further back.
30249259Sdim//
31249259Sdim// Traces tend to align with loops. The trace through a block in an inner loop
32249259Sdim// will begin at the loop entry block and end at a back edge. If there are
33249259Sdim// nested loops, the trace may begin and end at those instead.
34249259Sdim//
35249259Sdim// For each trace, we compute the critical path length, which is the number of
36249259Sdim// cycles required to execute the trace when execution is limited by data
37249259Sdim// dependencies only. We also compute the resource height, which is the number
38249259Sdim// of cycles required to execute all instructions in the trace when ignoring
39249259Sdim// data dependencies.
40249259Sdim//
41249259Sdim// Every instruction in the current block has a slack - the number of cycles
42249259Sdim// execution of the instruction can be delayed without extending the critical
43249259Sdim// path.
44249259Sdim//
45249259Sdim//===----------------------------------------------------------------------===//
46249259Sdim
47249259Sdim#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
48249259Sdim#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
49249259Sdim
50249259Sdim#include "llvm/ADT/ArrayRef.h"
51249259Sdim#include "llvm/ADT/DenseMap.h"
52249259Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
53249259Sdim#include "llvm/CodeGen/TargetSchedule.h"
54249259Sdim
55249259Sdimnamespace llvm {
56249259Sdim
57249259Sdimclass InstrItineraryData;
58249259Sdimclass MachineBasicBlock;
59249259Sdimclass MachineInstr;
60249259Sdimclass MachineLoop;
61249259Sdimclass MachineLoopInfo;
62249259Sdimclass MachineRegisterInfo;
63249259Sdimclass TargetInstrInfo;
64249259Sdimclass TargetRegisterInfo;
65249259Sdimclass raw_ostream;
66249259Sdim
67249259Sdimclass MachineTraceMetrics : public MachineFunctionPass {
68249259Sdim  const MachineFunction *MF;
69249259Sdim  const TargetInstrInfo *TII;
70249259Sdim  const TargetRegisterInfo *TRI;
71249259Sdim  const MachineRegisterInfo *MRI;
72249259Sdim  const MachineLoopInfo *Loops;
73249259Sdim  TargetSchedModel SchedModel;
74249259Sdim
75249259Sdimpublic:
76249259Sdim  class Ensemble;
77249259Sdim  class Trace;
78249259Sdim  static char ID;
79249259Sdim  MachineTraceMetrics();
80249259Sdim  void getAnalysisUsage(AnalysisUsage&) const;
81249259Sdim  bool runOnMachineFunction(MachineFunction&);
82249259Sdim  void releaseMemory();
83249259Sdim  void verifyAnalysis() const;
84249259Sdim
85249259Sdim  friend class Ensemble;
86249259Sdim  friend class Trace;
87249259Sdim
88249259Sdim  /// Per-basic block information that doesn't depend on the trace through the
89249259Sdim  /// block.
90249259Sdim  struct FixedBlockInfo {
91249259Sdim    /// The number of non-trivial instructions in the block.
92249259Sdim    /// Doesn't count PHI and COPY instructions that are likely to be removed.
93249259Sdim    unsigned InstrCount;
94249259Sdim
95249259Sdim    /// True when the block contains calls.
96249259Sdim    bool HasCalls;
97249259Sdim
98249259Sdim    FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {}
99249259Sdim
100249259Sdim    /// Returns true when resource information for this block has been computed.
101249259Sdim    bool hasResources() const { return InstrCount != ~0u; }
102249259Sdim
103249259Sdim    /// Invalidate resource information.
104249259Sdim    void invalidate() { InstrCount = ~0u; }
105249259Sdim  };
106249259Sdim
107249259Sdim  /// Get the fixed resource information about MBB. Compute it on demand.
108249259Sdim  const FixedBlockInfo *getResources(const MachineBasicBlock*);
109249259Sdim
110249259Sdim  /// Get the scaled number of cycles used per processor resource in MBB.
111249259Sdim  /// This is an array with SchedModel.getNumProcResourceKinds() entries.
112249259Sdim  /// The getResources() function above must have been called first.
113249259Sdim  ///
114249259Sdim  /// These numbers have already been scaled by SchedModel.getResourceFactor().
115249259Sdim  ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;
116249259Sdim
117249259Sdim  /// A virtual register or regunit required by a basic block or its trace
118249259Sdim  /// successors.
119249259Sdim  struct LiveInReg {
120249259Sdim    /// The virtual register required, or a register unit.
121249259Sdim    unsigned Reg;
122249259Sdim
123249259Sdim    /// For virtual registers: Minimum height of the defining instruction.
124249259Sdim    /// For regunits: Height of the highest user in the trace.
125249259Sdim    unsigned Height;
126249259Sdim
127249259Sdim    LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
128249259Sdim  };
129249259Sdim
130249259Sdim  /// Per-basic block information that relates to a specific trace through the
131249259Sdim  /// block. Convergent traces means that only one of these is required per
132249259Sdim  /// block in a trace ensemble.
133249259Sdim  struct TraceBlockInfo {
134249259Sdim    /// Trace predecessor, or NULL for the first block in the trace.
135249259Sdim    /// Valid when hasValidDepth().
136249259Sdim    const MachineBasicBlock *Pred;
137249259Sdim
138249259Sdim    /// Trace successor, or NULL for the last block in the trace.
139249259Sdim    /// Valid when hasValidHeight().
140249259Sdim    const MachineBasicBlock *Succ;
141249259Sdim
142249259Sdim    /// The block number of the head of the trace. (When hasValidDepth()).
143249259Sdim    unsigned Head;
144249259Sdim
145249259Sdim    /// The block number of the tail of the trace. (When hasValidHeight()).
146249259Sdim    unsigned Tail;
147249259Sdim
148249259Sdim    /// Accumulated number of instructions in the trace above this block.
149249259Sdim    /// Does not include instructions in this block.
150249259Sdim    unsigned InstrDepth;
151249259Sdim
152249259Sdim    /// Accumulated number of instructions in the trace below this block.
153249259Sdim    /// Includes instructions in this block.
154249259Sdim    unsigned InstrHeight;
155249259Sdim
156249259Sdim    TraceBlockInfo() :
157249259Sdim      Pred(0), Succ(0),
158249259Sdim      InstrDepth(~0u), InstrHeight(~0u),
159249259Sdim      HasValidInstrDepths(false), HasValidInstrHeights(false) {}
160249259Sdim
161249259Sdim    /// Returns true if the depth resources have been computed from the trace
162249259Sdim    /// above this block.
163249259Sdim    bool hasValidDepth() const { return InstrDepth != ~0u; }
164249259Sdim
165249259Sdim    /// Returns true if the height resources have been computed from the trace
166249259Sdim    /// below this block.
167249259Sdim    bool hasValidHeight() const { return InstrHeight != ~0u; }
168249259Sdim
169249259Sdim    /// Invalidate depth resources when some block above this one has changed.
170249259Sdim    void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
171249259Sdim
172249259Sdim    /// Invalidate height resources when a block below this one has changed.
173249259Sdim    void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
174249259Sdim
175249259Sdim    /// Assuming that this is a dominator of TBI, determine if it contains
176249259Sdim    /// useful instruction depths. A dominating block can be above the current
177249259Sdim    /// trace head, and any dependencies from such a far away dominator are not
178249259Sdim    /// expected to affect the critical path.
179249259Sdim    ///
180249259Sdim    /// Also returns true when TBI == this.
181249259Sdim    bool isUsefulDominator(const TraceBlockInfo &TBI) const {
182249259Sdim      // The trace for TBI may not even be calculated yet.
183249259Sdim      if (!hasValidDepth() || !TBI.hasValidDepth())
184249259Sdim        return false;
185249259Sdim      // Instruction depths are only comparable if the traces share a head.
186249259Sdim      if (Head != TBI.Head)
187249259Sdim        return false;
188249259Sdim      // It is almost always the case that TBI belongs to the same trace as
189249259Sdim      // this block, but rare convoluted cases involving irreducible control
190249259Sdim      // flow, a dominator may share a trace head without actually being on the
191249259Sdim      // same trace as TBI. This is not a big problem as long as it doesn't
192249259Sdim      // increase the instruction depth.
193249259Sdim      return HasValidInstrDepths && InstrDepth <= TBI.InstrDepth;
194249259Sdim    }
195249259Sdim
196249259Sdim    // Data-dependency-related information. Per-instruction depth and height
197249259Sdim    // are computed from data dependencies in the current trace, using
198249259Sdim    // itinerary data.
199249259Sdim
200249259Sdim    /// Instruction depths have been computed. This implies hasValidDepth().
201249259Sdim    bool HasValidInstrDepths;
202249259Sdim
203249259Sdim    /// Instruction heights have been computed. This implies hasValidHeight().
204249259Sdim    bool HasValidInstrHeights;
205249259Sdim
206249259Sdim    /// Critical path length. This is the number of cycles in the longest data
207249259Sdim    /// dependency chain through the trace. This is only valid when both
208249259Sdim    /// HasValidInstrDepths and HasValidInstrHeights are set.
209249259Sdim    unsigned CriticalPath;
210249259Sdim
211249259Sdim    /// Live-in registers. These registers are defined above the current block
212249259Sdim    /// and used by this block or a block below it.
213249259Sdim    /// This does not include PHI uses in the current block, but it does
214249259Sdim    /// include PHI uses in deeper blocks.
215249259Sdim    SmallVector<LiveInReg, 4> LiveIns;
216249259Sdim
217249259Sdim    void print(raw_ostream&) const;
218249259Sdim  };
219249259Sdim
220249259Sdim  /// InstrCycles represents the cycle height and depth of an instruction in a
221249259Sdim  /// trace.
222249259Sdim  struct InstrCycles {
223249259Sdim    /// Earliest issue cycle as determined by data dependencies and instruction
224249259Sdim    /// latencies from the beginning of the trace. Data dependencies from
225249259Sdim    /// before the trace are not included.
226249259Sdim    unsigned Depth;
227249259Sdim
228249259Sdim    /// Minimum number of cycles from this instruction is issued to the of the
229249259Sdim    /// trace, as determined by data dependencies and instruction latencies.
230249259Sdim    unsigned Height;
231249259Sdim  };
232249259Sdim
233249259Sdim  /// A trace represents a plausible sequence of executed basic blocks that
234249259Sdim  /// passes through the current basic block one. The Trace class serves as a
235249259Sdim  /// handle to internal cached data structures.
236249259Sdim  class Trace {
237249259Sdim    Ensemble &TE;
238249259Sdim    TraceBlockInfo &TBI;
239249259Sdim
240249259Sdim    unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
241249259Sdim
242249259Sdim  public:
243249259Sdim    explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
244249259Sdim    void print(raw_ostream&) const;
245249259Sdim
246249259Sdim    /// Compute the total number of instructions in the trace.
247249259Sdim    unsigned getInstrCount() const {
248249259Sdim      return TBI.InstrDepth + TBI.InstrHeight;
249249259Sdim    }
250249259Sdim
251249259Sdim    /// Return the resource depth of the top/bottom of the trace center block.
252249259Sdim    /// This is the number of cycles required to execute all instructions from
253249259Sdim    /// the trace head to the trace center block. The resource depth only
254249259Sdim    /// considers execution resources, it ignores data dependencies.
255249259Sdim    /// When Bottom is set, instructions in the trace center block are included.
256249259Sdim    unsigned getResourceDepth(bool Bottom) const;
257249259Sdim
258249259Sdim    /// Return the resource length of the trace. This is the number of cycles
259249259Sdim    /// required to execute the instructions in the trace if they were all
260249259Sdim    /// independent, exposing the maximum instruction-level parallelism.
261249259Sdim    ///
262249259Sdim    /// Any blocks in Extrablocks are included as if they were part of the
263252723Sdim    /// trace. Likewise, extra resources required by the specified scheduling
264252723Sdim    /// classes are included. For the caller to account for extra machine
265252723Sdim    /// instructions, it must first resolve each instruction's scheduling class.
266252723Sdim    unsigned getResourceLength(
267252723Sdim                ArrayRef<const MachineBasicBlock*> Extrablocks = None,
268252723Sdim                ArrayRef<const MCSchedClassDesc*> ExtraInstrs = None) const;
269249259Sdim
270249259Sdim    /// Return the length of the (data dependency) critical path through the
271249259Sdim    /// trace.
272249259Sdim    unsigned getCriticalPath() const { return TBI.CriticalPath; }
273249259Sdim
274249259Sdim    /// Return the depth and height of MI. The depth is only valid for
275249259Sdim    /// instructions in or above the trace center block. The height is only
276249259Sdim    /// valid for instructions in or below the trace center block.
277249259Sdim    InstrCycles getInstrCycles(const MachineInstr *MI) const {
278249259Sdim      return TE.Cycles.lookup(MI);
279249259Sdim    }
280249259Sdim
281249259Sdim    /// Return the slack of MI. This is the number of cycles MI can be delayed
282249259Sdim    /// before the critical path becomes longer.
283249259Sdim    /// MI must be an instruction in the trace center block.
284249259Sdim    unsigned getInstrSlack(const MachineInstr *MI) const;
285249259Sdim
286249259Sdim    /// Return the Depth of a PHI instruction in a trace center block successor.
287249259Sdim    /// The PHI does not have to be part of the trace.
288249259Sdim    unsigned getPHIDepth(const MachineInstr *PHI) const;
289249259Sdim  };
290249259Sdim
291249259Sdim  /// A trace ensemble is a collection of traces selected using the same
292249259Sdim  /// strategy, for example 'minimum resource height'. There is one trace for
293249259Sdim  /// every block in the function.
294249259Sdim  class Ensemble {
295249259Sdim    SmallVector<TraceBlockInfo, 4> BlockInfo;
296249259Sdim    DenseMap<const MachineInstr*, InstrCycles> Cycles;
297249259Sdim    SmallVector<unsigned, 0> ProcResourceDepths;
298249259Sdim    SmallVector<unsigned, 0> ProcResourceHeights;
299249259Sdim    friend class Trace;
300249259Sdim
301249259Sdim    void computeTrace(const MachineBasicBlock*);
302249259Sdim    void computeDepthResources(const MachineBasicBlock*);
303249259Sdim    void computeHeightResources(const MachineBasicBlock*);
304249259Sdim    unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
305249259Sdim    void computeInstrDepths(const MachineBasicBlock*);
306249259Sdim    void computeInstrHeights(const MachineBasicBlock*);
307249259Sdim    void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
308249259Sdim                    ArrayRef<const MachineBasicBlock*> Trace);
309249259Sdim
310249259Sdim  protected:
311249259Sdim    MachineTraceMetrics &MTM;
312249259Sdim    virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
313249259Sdim    virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
314249259Sdim    explicit Ensemble(MachineTraceMetrics*);
315249259Sdim    const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
316249259Sdim    const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
317249259Sdim    const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
318249259Sdim    ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
319249259Sdim    ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
320249259Sdim
321249259Sdim  public:
322249259Sdim    virtual ~Ensemble();
323249259Sdim    virtual const char *getName() const =0;
324249259Sdim    void print(raw_ostream&) const;
325249259Sdim    void invalidate(const MachineBasicBlock *MBB);
326249259Sdim    void verify() const;
327249259Sdim
328249259Sdim    /// Get the trace that passes through MBB.
329249259Sdim    /// The trace is computed on demand.
330249259Sdim    Trace getTrace(const MachineBasicBlock *MBB);
331249259Sdim  };
332249259Sdim
333249259Sdim  /// Strategies for selecting traces.
334249259Sdim  enum Strategy {
335249259Sdim    /// Select the trace through a block that has the fewest instructions.
336249259Sdim    TS_MinInstrCount,
337249259Sdim
338249259Sdim    TS_NumStrategies
339249259Sdim  };
340249259Sdim
341249259Sdim  /// Get the trace ensemble representing the given trace selection strategy.
342249259Sdim  /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
343249259Sdim  /// and valid for the lifetime of the analysis pass.
344249259Sdim  Ensemble *getEnsemble(Strategy);
345249259Sdim
346249259Sdim  /// Invalidate cached information about MBB. This must be called *before* MBB
347249259Sdim  /// is erased, or the CFG is otherwise changed.
348249259Sdim  ///
349249259Sdim  /// This invalidates per-block information about resource usage for MBB only,
350249259Sdim  /// and it invalidates per-trace information for any trace that passes
351249259Sdim  /// through MBB.
352249259Sdim  ///
353249259Sdim  /// Call Ensemble::getTrace() again to update any trace handles.
354249259Sdim  void invalidate(const MachineBasicBlock *MBB);
355249259Sdim
356249259Sdimprivate:
357249259Sdim  // One entry per basic block, indexed by block number.
358249259Sdim  SmallVector<FixedBlockInfo, 4> BlockInfo;
359249259Sdim
360249259Sdim  // Cycles consumed on each processor resource per block.
361249259Sdim  // The number of processor resource kinds is constant for a given subtarget,
362249259Sdim  // but it is not known at compile time. The number of cycles consumed by
363249259Sdim  // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
364249259Sdim  // where Kinds = SchedModel.getNumProcResourceKinds().
365249259Sdim  SmallVector<unsigned, 0> ProcResourceCycles;
366249259Sdim
367249259Sdim  // One ensemble per strategy.
368249259Sdim  Ensemble* Ensembles[TS_NumStrategies];
369249259Sdim
370249259Sdim  // Convert scaled resource usage to a cycle count that can be compared with
371249259Sdim  // latencies.
372249259Sdim  unsigned getCycles(unsigned Scaled) {
373249259Sdim    unsigned Factor = SchedModel.getLatencyFactor();
374249259Sdim    return (Scaled + Factor - 1) / Factor;
375249259Sdim  }
376249259Sdim};
377249259Sdim
378249259Sdiminline raw_ostream &operator<<(raw_ostream &OS,
379249259Sdim                               const MachineTraceMetrics::Trace &Tr) {
380249259Sdim  Tr.print(OS);
381249259Sdim  return OS;
382249259Sdim}
383249259Sdim
384249259Sdiminline raw_ostream &operator<<(raw_ostream &OS,
385249259Sdim                               const MachineTraceMetrics::Ensemble &En) {
386249259Sdim  En.print(OS);
387249259Sdim  return OS;
388249259Sdim}
389249259Sdim} // end namespace llvm
390249259Sdim
391249259Sdim#endif
392