SystemZHazardRecognizer.cpp revision 321369
1//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines a hazard recognizer for the SystemZ scheduler.
11//
12// This class is used by the SystemZ scheduling strategy to maintain
13// the state during scheduling, and provide cost functions for
14// scheduling candidates. This includes:
15//
16// * Decoder grouping. A decoder group can maximally hold 3 uops, and
17// instructions that always begin a new group should be scheduled when
18// the current decoder group is empty.
19// * Processor resources usage. It is beneficial to balance the use of
20// resources.
21//
22// ===---------------------------------------------------------------------===//
23
24#include "SystemZHazardRecognizer.h"
25#include "llvm/ADT/Statistic.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "machine-scheduler"
30
31// This is the limit of processor resource usage at which the
32// scheduler should try to look for other instructions (not using the
33// critical resource).
34static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
35                                   cl::desc("The OOO window for processor "
36                                            "resources during scheduling."),
37                                   cl::init(8));
38
39SystemZHazardRecognizer::
40SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr),
41                                                        SchedModel(nullptr) {}
42
43unsigned SystemZHazardRecognizer::
44getNumDecoderSlots(SUnit *SU) const {
45  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
46  if (!SC->isValid())
47    return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
48
49  if (SC->BeginGroup) {
50    if (!SC->EndGroup)
51      return 2; // Cracked instruction
52    else
53      return 3; // Expanded/group-alone instruction
54  }
55
56  return 1; // Normal instruction
57}
58
59unsigned SystemZHazardRecognizer::getCurrCycleIdx() {
60  unsigned Idx = CurrGroupSize;
61  if (GrpCount % 2)
62    Idx += 3;
63  return Idx;
64}
65
66ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
67getHazardType(SUnit *m, int Stalls) {
68  return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
69}
70
71void SystemZHazardRecognizer::Reset() {
72  CurrGroupSize = 0;
73  clearProcResCounters();
74  GrpCount = 0;
75  LastFPdOpCycleIdx = UINT_MAX;
76  DEBUG(CurGroupDbg = "";);
77}
78
79bool
80SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
81  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
82  if (!SC->isValid())
83    return true;
84
85  // A cracked instruction only fits into schedule if the current
86  // group is empty.
87  if (SC->BeginGroup)
88    return (CurrGroupSize == 0);
89
90  // Since a full group is handled immediately in EmitInstruction(),
91  // SU should fit into current group. NumSlots should be 1 or 0,
92  // since it is not a cracked or expanded instruction.
93  assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
94          "Expected normal instruction to fit in non-full group!");
95
96  return true;
97}
98
99void SystemZHazardRecognizer::nextGroup(bool DbgOutput) {
100  if (CurrGroupSize > 0) {
101    DEBUG(dumpCurrGroup("Completed decode group"));
102    DEBUG(CurGroupDbg = "";);
103
104    GrpCount++;
105
106    // Reset counter for next group.
107    CurrGroupSize = 0;
108
109    // Decrease counters for execution units by one.
110    for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
111      if (ProcResourceCounters[i] > 0)
112        ProcResourceCounters[i]--;
113
114    // Clear CriticalResourceIdx if it is now below the threshold.
115    if (CriticalResourceIdx != UINT_MAX &&
116        (ProcResourceCounters[CriticalResourceIdx] <=
117         ProcResCostLim))
118      CriticalResourceIdx = UINT_MAX;
119  }
120
121  DEBUG(if (DbgOutput)
122          dumpProcResourceCounters(););
123}
124
125#ifndef NDEBUG // Debug output
126void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
127  OS << "SU(" << SU->NodeNum << "):";
128  OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode());
129
130  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
131  if (!SC->isValid())
132    return;
133
134  for (TargetSchedModel::ProcResIter
135         PI = SchedModel->getWriteProcResBegin(SC),
136         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
137    const MCProcResourceDesc &PRD =
138      *SchedModel->getProcResource(PI->ProcResourceIdx);
139    std::string FU(PRD.Name);
140    // trim e.g. Z13_FXaUnit -> FXa
141    FU = FU.substr(FU.find("_") + 1);
142    FU.resize(FU.find("Unit"));
143    OS << "/" << FU;
144
145    if (PI->Cycles > 1)
146      OS << "(" << PI->Cycles << "cyc)";
147  }
148
149  if (SC->NumMicroOps > 1)
150    OS << "/" << SC->NumMicroOps << "uops";
151  if (SC->BeginGroup && SC->EndGroup)
152    OS << "/GroupsAlone";
153  else if (SC->BeginGroup)
154    OS << "/BeginsGroup";
155  else if (SC->EndGroup)
156    OS << "/EndsGroup";
157  if (SU->isUnbuffered)
158    OS << "/Unbuffered";
159}
160
161void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
162  dbgs() << "+++ " << Msg;
163  dbgs() << ": ";
164
165  if (CurGroupDbg.empty())
166    dbgs() << " <empty>\n";
167  else {
168    dbgs() << "{ " << CurGroupDbg << " }";
169    dbgs() << " (" << CurrGroupSize << " decoder slot"
170           << (CurrGroupSize > 1 ? "s":"")
171           << ")\n";
172  }
173}
174
175void SystemZHazardRecognizer::dumpProcResourceCounters() const {
176  bool any = false;
177
178  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
179    if (ProcResourceCounters[i] > 0) {
180      any = true;
181      break;
182    }
183
184  if (!any)
185    return;
186
187  dbgs() << "+++ Resource counters:\n";
188  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
189    if (ProcResourceCounters[i] > 0) {
190      dbgs() << "+++ Extra schedule for execution unit "
191             << SchedModel->getProcResource(i)->Name
192             << ": " << ProcResourceCounters[i] << "\n";
193      any = true;
194    }
195}
196#endif //NDEBUG
197
198void SystemZHazardRecognizer::clearProcResCounters() {
199  ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
200  CriticalResourceIdx = UINT_MAX;
201}
202
203// Update state with SU as the next scheduled unit.
204void SystemZHazardRecognizer::
205EmitInstruction(SUnit *SU) {
206  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
207  DEBUG( dumpCurrGroup("Decode group before emission"););
208
209  // If scheduling an SU that must begin a new decoder group, move on
210  // to next group.
211  if (!fitsIntoCurrentGroup(SU))
212    nextGroup();
213
214  DEBUG( dbgs() << "+++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
215         dbgs() << "\n";
216         raw_string_ostream cgd(CurGroupDbg);
217         if (CurGroupDbg.length())
218           cgd << ", ";
219         dumpSU(SU, cgd););
220
221  // After returning from a call, we don't know much about the state.
222  if (SU->getInstr()->isCall()) {
223    DEBUG (dbgs() << "+++ Clearing state after call.\n";);
224    clearProcResCounters();
225    LastFPdOpCycleIdx = UINT_MAX;
226    CurrGroupSize += getNumDecoderSlots(SU);
227    assert (CurrGroupSize <= 3);
228    nextGroup();
229    return;
230  }
231
232  // Increase counter for execution unit(s).
233  for (TargetSchedModel::ProcResIter
234         PI = SchedModel->getWriteProcResBegin(SC),
235         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
236    // Don't handle FPd together with the other resources.
237    if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
238      continue;
239    int &CurrCounter =
240      ProcResourceCounters[PI->ProcResourceIdx];
241    CurrCounter += PI->Cycles;
242    // Check if this is now the new critical resource.
243    if ((CurrCounter > ProcResCostLim) &&
244        (CriticalResourceIdx == UINT_MAX ||
245         (PI->ProcResourceIdx != CriticalResourceIdx &&
246          CurrCounter >
247          ProcResourceCounters[CriticalResourceIdx]))) {
248      DEBUG( dbgs() << "+++ New critical resource: "
249             << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
250             << "\n";);
251      CriticalResourceIdx = PI->ProcResourceIdx;
252    }
253  }
254
255  // Make note of an instruction that uses a blocking resource (FPd).
256  if (SU->isUnbuffered) {
257    LastFPdOpCycleIdx = getCurrCycleIdx();
258    DEBUG (dbgs() << "+++ Last FPd cycle index: "
259           << LastFPdOpCycleIdx << "\n";);
260  }
261
262  // Insert SU into current group by increasing number of slots used
263  // in current group.
264  CurrGroupSize += getNumDecoderSlots(SU);
265  assert (CurrGroupSize <= 3);
266
267  // Check if current group is now full/ended. If so, move on to next
268  // group to be ready to evaluate more candidates.
269  if (CurrGroupSize == 3 || SC->EndGroup)
270    nextGroup();
271}
272
273int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
274  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
275  if (!SC->isValid())
276    return 0;
277
278  // If SU begins new group, it can either break a current group early
279  // or fit naturally if current group is empty (negative cost).
280  if (SC->BeginGroup) {
281    if (CurrGroupSize)
282      return 3 - CurrGroupSize;
283    return -1;
284  }
285
286  // Similarly, a group-ending SU may either fit well (last in group), or
287  // end the group prematurely.
288  if (SC->EndGroup) {
289    unsigned resultingGroupSize =
290      (CurrGroupSize + getNumDecoderSlots(SU));
291    if (resultingGroupSize < 3)
292      return (3 - resultingGroupSize);
293    return -1;
294  }
295
296  // Most instructions can be placed in any decoder slot.
297  return 0;
298}
299
300bool SystemZHazardRecognizer::isFPdOpPreferred_distance(const SUnit *SU) {
301  assert (SU->isUnbuffered);
302  // If this is the first FPd op, it should be scheduled high.
303  if (LastFPdOpCycleIdx == UINT_MAX)
304    return true;
305  // If this is not the first PFd op, it should go into the other side
306  // of the processor to use the other FPd unit there. This should
307  // generally happen if two FPd ops are placed with 2 other
308  // instructions between them (modulo 6).
309  if (LastFPdOpCycleIdx > getCurrCycleIdx())
310    return ((LastFPdOpCycleIdx - getCurrCycleIdx()) == 3);
311  return ((getCurrCycleIdx() - LastFPdOpCycleIdx) == 3);
312}
313
314int SystemZHazardRecognizer::
315resourcesCost(SUnit *SU) {
316  int Cost = 0;
317
318  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
319  if (!SC->isValid())
320    return 0;
321
322  // For a FPd op, either return min or max value as indicated by the
323  // distance to any prior FPd op.
324  if (SU->isUnbuffered)
325    Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
326  // For other instructions, give a cost to the use of the critical resource.
327  else if (CriticalResourceIdx != UINT_MAX) {
328    for (TargetSchedModel::ProcResIter
329           PI = SchedModel->getWriteProcResBegin(SC),
330           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
331      if (PI->ProcResourceIdx == CriticalResourceIdx)
332        Cost = PI->Cycles;
333  }
334
335  return Cost;
336}
337
338