SystemZHazardRecognizer.cpp revision 327952
1//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines a hazard recognizer for the SystemZ scheduler.
11//
12// This class is used by the SystemZ scheduling strategy to maintain
13// the state during scheduling, and provide cost functions for
14// scheduling candidates. This includes:
15//
16// * Decoder grouping. A decoder group can maximally hold 3 uops, and
17// instructions that always begin a new group should be scheduled when
18// the current decoder group is empty.
19// * Processor resources usage. It is beneficial to balance the use of
20// resources.
21//
22// A goal is to consider all instructions, also those outside of any
23// scheduling region. Such instructions are "advanced" past and include
24// single instructions before a scheduling region, branches etc.
25//
26// A block that has only one predecessor continues scheduling with the state
27// of it (which may be updated by emitting branches).
28//
29// ===---------------------------------------------------------------------===//
30
31#include "SystemZHazardRecognizer.h"
32#include "llvm/ADT/Statistic.h"
33
34using namespace llvm;
35
36#define DEBUG_TYPE "machine-scheduler"
37
38// This is the limit of processor resource usage at which the
39// scheduler should try to look for other instructions (not using the
40// critical resource).
41static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
42                                   cl::desc("The OOO window for processor "
43                                            "resources during scheduling."),
44                                   cl::init(8));
45
46unsigned SystemZHazardRecognizer::
47getNumDecoderSlots(SUnit *SU) const {
48  const MCSchedClassDesc *SC = getSchedClass(SU);
49  if (!SC->isValid())
50    return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
51
52  if (SC->BeginGroup) {
53    if (!SC->EndGroup)
54      return 2; // Cracked instruction
55    else
56      return 3; // Expanded/group-alone instruction
57  }
58
59  return 1; // Normal instruction
60}
61
62unsigned SystemZHazardRecognizer::getCurrCycleIdx() {
63  unsigned Idx = CurrGroupSize;
64  if (GrpCount % 2)
65    Idx += 3;
66  return Idx;
67}
68
69ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
70getHazardType(SUnit *m, int Stalls) {
71  return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
72}
73
74void SystemZHazardRecognizer::Reset() {
75  CurrGroupSize = 0;
76  clearProcResCounters();
77  GrpCount = 0;
78  LastFPdOpCycleIdx = UINT_MAX;
79  LastEmittedMI = nullptr;
80  DEBUG(CurGroupDbg = "";);
81}
82
83bool
84SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
85  const MCSchedClassDesc *SC = getSchedClass(SU);
86  if (!SC->isValid())
87    return true;
88
89  // A cracked instruction only fits into schedule if the current
90  // group is empty.
91  if (SC->BeginGroup)
92    return (CurrGroupSize == 0);
93
94  // Since a full group is handled immediately in EmitInstruction(),
95  // SU should fit into current group. NumSlots should be 1 or 0,
96  // since it is not a cracked or expanded instruction.
97  assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
98          "Expected normal instruction to fit in non-full group!");
99
100  return true;
101}
102
103void SystemZHazardRecognizer::nextGroup(bool DbgOutput) {
104  if (CurrGroupSize > 0) {
105    DEBUG(dumpCurrGroup("Completed decode group"));
106    DEBUG(CurGroupDbg = "";);
107
108    GrpCount++;
109
110    // Reset counter for next group.
111    CurrGroupSize = 0;
112
113    // Decrease counters for execution units by one.
114    for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
115      if (ProcResourceCounters[i] > 0)
116        ProcResourceCounters[i]--;
117
118    // Clear CriticalResourceIdx if it is now below the threshold.
119    if (CriticalResourceIdx != UINT_MAX &&
120        (ProcResourceCounters[CriticalResourceIdx] <=
121         ProcResCostLim))
122      CriticalResourceIdx = UINT_MAX;
123  }
124
125  DEBUG(if (DbgOutput)
126          dumpProcResourceCounters(););
127}
128
129#ifndef NDEBUG // Debug output
130void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
131  OS << "SU(" << SU->NodeNum << "):";
132  OS << TII->getName(SU->getInstr()->getOpcode());
133
134  const MCSchedClassDesc *SC = getSchedClass(SU);
135  if (!SC->isValid())
136    return;
137
138  for (TargetSchedModel::ProcResIter
139         PI = SchedModel->getWriteProcResBegin(SC),
140         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
141    const MCProcResourceDesc &PRD =
142      *SchedModel->getProcResource(PI->ProcResourceIdx);
143    std::string FU(PRD.Name);
144    // trim e.g. Z13_FXaUnit -> FXa
145    FU = FU.substr(FU.find("_") + 1);
146    FU.resize(FU.find("Unit"));
147    OS << "/" << FU;
148
149    if (PI->Cycles > 1)
150      OS << "(" << PI->Cycles << "cyc)";
151  }
152
153  if (SC->NumMicroOps > 1)
154    OS << "/" << SC->NumMicroOps << "uops";
155  if (SC->BeginGroup && SC->EndGroup)
156    OS << "/GroupsAlone";
157  else if (SC->BeginGroup)
158    OS << "/BeginsGroup";
159  else if (SC->EndGroup)
160    OS << "/EndsGroup";
161  if (SU->isUnbuffered)
162    OS << "/Unbuffered";
163}
164
165void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
166  dbgs() << "+++ " << Msg;
167  dbgs() << ": ";
168
169  if (CurGroupDbg.empty())
170    dbgs() << " <empty>\n";
171  else {
172    dbgs() << "{ " << CurGroupDbg << " }";
173    dbgs() << " (" << CurrGroupSize << " decoder slot"
174           << (CurrGroupSize > 1 ? "s":"")
175           << ")\n";
176  }
177}
178
179void SystemZHazardRecognizer::dumpProcResourceCounters() const {
180  bool any = false;
181
182  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
183    if (ProcResourceCounters[i] > 0) {
184      any = true;
185      break;
186    }
187
188  if (!any)
189    return;
190
191  dbgs() << "+++ Resource counters:\n";
192  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
193    if (ProcResourceCounters[i] > 0) {
194      dbgs() << "+++ Extra schedule for execution unit "
195             << SchedModel->getProcResource(i)->Name
196             << ": " << ProcResourceCounters[i] << "\n";
197      any = true;
198    }
199}
200#endif //NDEBUG
201
202void SystemZHazardRecognizer::clearProcResCounters() {
203  ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
204  CriticalResourceIdx = UINT_MAX;
205}
206
207static inline bool isBranchRetTrap(MachineInstr *MI) {
208  return (MI->isBranch() || MI->isReturn() ||
209          MI->getOpcode() == SystemZ::CondTrap);
210}
211
212// Update state with SU as the next scheduled unit.
213void SystemZHazardRecognizer::
214EmitInstruction(SUnit *SU) {
215  const MCSchedClassDesc *SC = getSchedClass(SU);
216  DEBUG( dumpCurrGroup("Decode group before emission"););
217
218  // If scheduling an SU that must begin a new decoder group, move on
219  // to next group.
220  if (!fitsIntoCurrentGroup(SU))
221    nextGroup();
222
223  DEBUG( dbgs() << "+++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
224         dbgs() << "\n";
225         raw_string_ostream cgd(CurGroupDbg);
226         if (CurGroupDbg.length())
227           cgd << ", ";
228         dumpSU(SU, cgd););
229
230  LastEmittedMI = SU->getInstr();
231
232  // After returning from a call, we don't know much about the state.
233  if (SU->isCall) {
234    DEBUG (dbgs() << "+++ Clearing state after call.\n";);
235    clearProcResCounters();
236    LastFPdOpCycleIdx = UINT_MAX;
237    CurrGroupSize += getNumDecoderSlots(SU);
238    assert (CurrGroupSize <= 3);
239    nextGroup();
240    return;
241  }
242
243  // Increase counter for execution unit(s).
244  for (TargetSchedModel::ProcResIter
245         PI = SchedModel->getWriteProcResBegin(SC),
246         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
247    // Don't handle FPd together with the other resources.
248    if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
249      continue;
250    int &CurrCounter =
251      ProcResourceCounters[PI->ProcResourceIdx];
252    CurrCounter += PI->Cycles;
253    // Check if this is now the new critical resource.
254    if ((CurrCounter > ProcResCostLim) &&
255        (CriticalResourceIdx == UINT_MAX ||
256         (PI->ProcResourceIdx != CriticalResourceIdx &&
257          CurrCounter >
258          ProcResourceCounters[CriticalResourceIdx]))) {
259      DEBUG( dbgs() << "+++ New critical resource: "
260             << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
261             << "\n";);
262      CriticalResourceIdx = PI->ProcResourceIdx;
263    }
264  }
265
266  // Make note of an instruction that uses a blocking resource (FPd).
267  if (SU->isUnbuffered) {
268    LastFPdOpCycleIdx = getCurrCycleIdx();
269    DEBUG (dbgs() << "+++ Last FPd cycle index: "
270           << LastFPdOpCycleIdx << "\n";);
271  }
272
273  bool GroupEndingBranch =
274    (CurrGroupSize >= 1 && isBranchRetTrap(SU->getInstr()));
275
276  // Insert SU into current group by increasing number of slots used
277  // in current group.
278  CurrGroupSize += getNumDecoderSlots(SU);
279  assert (CurrGroupSize <= 3);
280
281  // Check if current group is now full/ended. If so, move on to next
282  // group to be ready to evaluate more candidates.
283  if (CurrGroupSize == 3 || SC->EndGroup || GroupEndingBranch)
284    nextGroup();
285}
286
287int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
288  const MCSchedClassDesc *SC = getSchedClass(SU);
289  if (!SC->isValid())
290    return 0;
291
292  // If SU begins new group, it can either break a current group early
293  // or fit naturally if current group is empty (negative cost).
294  if (SC->BeginGroup) {
295    if (CurrGroupSize)
296      return 3 - CurrGroupSize;
297    return -1;
298  }
299
300  // Similarly, a group-ending SU may either fit well (last in group), or
301  // end the group prematurely.
302  if (SC->EndGroup) {
303    unsigned resultingGroupSize =
304      (CurrGroupSize + getNumDecoderSlots(SU));
305    if (resultingGroupSize < 3)
306      return (3 - resultingGroupSize);
307    return -1;
308  }
309
310  // Most instructions can be placed in any decoder slot.
311  return 0;
312}
313
314bool SystemZHazardRecognizer::isFPdOpPreferred_distance(const SUnit *SU) {
315  assert (SU->isUnbuffered);
316  // If this is the first FPd op, it should be scheduled high.
317  if (LastFPdOpCycleIdx == UINT_MAX)
318    return true;
319  // If this is not the first PFd op, it should go into the other side
320  // of the processor to use the other FPd unit there. This should
321  // generally happen if two FPd ops are placed with 2 other
322  // instructions between them (modulo 6).
323  if (LastFPdOpCycleIdx > getCurrCycleIdx())
324    return ((LastFPdOpCycleIdx - getCurrCycleIdx()) == 3);
325  return ((getCurrCycleIdx() - LastFPdOpCycleIdx) == 3);
326}
327
328int SystemZHazardRecognizer::
329resourcesCost(SUnit *SU) {
330  int Cost = 0;
331
332  const MCSchedClassDesc *SC = getSchedClass(SU);
333  if (!SC->isValid())
334    return 0;
335
336  // For a FPd op, either return min or max value as indicated by the
337  // distance to any prior FPd op.
338  if (SU->isUnbuffered)
339    Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
340  // For other instructions, give a cost to the use of the critical resource.
341  else if (CriticalResourceIdx != UINT_MAX) {
342    for (TargetSchedModel::ProcResIter
343           PI = SchedModel->getWriteProcResBegin(SC),
344           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
345      if (PI->ProcResourceIdx == CriticalResourceIdx)
346        Cost = PI->Cycles;
347  }
348
349  return Cost;
350}
351
352void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
353                                              bool TakenBranch) {
354  // Make a temporary SUnit.
355  SUnit SU(MI, 0);
356
357  // Set interesting flags.
358  SU.isCall = MI->isCall();
359
360  const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
361  for (const MCWriteProcResEntry &PRE :
362         make_range(SchedModel->getWriteProcResBegin(SC),
363                    SchedModel->getWriteProcResEnd(SC))) {
364    switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
365    case 0:
366      SU.hasReservedResource = true;
367      break;
368    case 1:
369      SU.isUnbuffered = true;
370      break;
371    default:
372      break;
373    }
374  }
375
376  EmitInstruction(&SU);
377
378  if (TakenBranch && CurrGroupSize > 0)
379    nextGroup(false /*DbgOutput*/);
380
381  assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
382          "Scheduler: unhandled terminator!");
383}
384
385void SystemZHazardRecognizer::
386copyState(SystemZHazardRecognizer *Incoming) {
387  // Current decoder group
388  CurrGroupSize = Incoming->CurrGroupSize;
389  DEBUG (CurGroupDbg = Incoming->CurGroupDbg;);
390
391  // Processor resources
392  ProcResourceCounters = Incoming->ProcResourceCounters;
393  CriticalResourceIdx = Incoming->CriticalResourceIdx;
394
395  // FPd
396  LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
397  GrpCount = Incoming->GrpCount;
398}
399