SystemZHazardRecognizer.cpp revision 311116
1311116Sdim//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2311116Sdim//
3311116Sdim//                     The LLVM Compiler Infrastructure
4311116Sdim//
5311116Sdim// This file is distributed under the University of Illinois Open Source
6311116Sdim// License. See LICENSE.TXT for details.
7311116Sdim//
8311116Sdim//===----------------------------------------------------------------------===//
9311116Sdim//
10311116Sdim// This file defines a hazard recognizer for the SystemZ scheduler.
11311116Sdim//
12311116Sdim// This class is used by the SystemZ scheduling strategy to maintain
13311116Sdim// the state during scheduling, and provide cost functions for
14311116Sdim// scheduling candidates. This includes:
15311116Sdim//
16311116Sdim// * Decoder grouping. A decoder group can maximally hold 3 uops, and
17311116Sdim// instructions that always begin a new group should be scheduled when
18311116Sdim// the current decoder group is empty.
19311116Sdim// * Processor resources usage. It is beneficial to balance the use of
20311116Sdim// resources.
21311116Sdim//
22311116Sdim// ===---------------------------------------------------------------------===//
23311116Sdim
24311116Sdim#include "SystemZHazardRecognizer.h"
25311116Sdim#include "llvm/ADT/Statistic.h"
26311116Sdim
27311116Sdimusing namespace llvm;
28311116Sdim
29311116Sdim#define DEBUG_TYPE "misched"
30311116Sdim
31311116Sdim// This is the limit of processor resource usage at which the
32311116Sdim// scheduler should try to look for other instructions (not using the
33311116Sdim// critical resource).
34311116Sdimstatic cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
35311116Sdim                                   cl::desc("The OOO window for processor "
36311116Sdim                                            "resources during scheduling."),
37311116Sdim                                   cl::init(8));
38311116Sdim
39311116SdimSystemZHazardRecognizer::
40311116SdimSystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr),
41311116Sdim                                                        SchedModel(nullptr) {}
42311116Sdim
43311116Sdimunsigned SystemZHazardRecognizer::
44311116SdimgetNumDecoderSlots(SUnit *SU) const {
45311116Sdim  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
46311116Sdim  if (!SC->isValid())
47311116Sdim    return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
48311116Sdim
49311116Sdim  if (SC->BeginGroup) {
50311116Sdim    if (!SC->EndGroup)
51311116Sdim      return 2; // Cracked instruction
52311116Sdim    else
53311116Sdim      return 3; // Expanded/group-alone instruction
54311116Sdim  }
55311116Sdim
56311116Sdim  return 1; // Normal instruction
57311116Sdim}
58311116Sdim
59311116Sdimunsigned SystemZHazardRecognizer::getCurrCycleIdx() {
60311116Sdim  unsigned Idx = CurrGroupSize;
61311116Sdim  if (GrpCount % 2)
62311116Sdim    Idx += 3;
63311116Sdim  return Idx;
64311116Sdim}
65311116Sdim
66311116SdimScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
67311116SdimgetHazardType(SUnit *m, int Stalls) {
68311116Sdim  return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
69311116Sdim}
70311116Sdim
71311116Sdimvoid SystemZHazardRecognizer::Reset() {
72311116Sdim  CurrGroupSize = 0;
73311116Sdim  clearProcResCounters();
74311116Sdim  GrpCount = 0;
75311116Sdim  LastFPdOpCycleIdx = UINT_MAX;
76311116Sdim  DEBUG(CurGroupDbg = "";);
77311116Sdim}
78311116Sdim
79311116Sdimbool
80311116SdimSystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
81311116Sdim  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
82311116Sdim  if (!SC->isValid())
83311116Sdim    return true;
84311116Sdim
85311116Sdim  // A cracked instruction only fits into schedule if the current
86311116Sdim  // group is empty.
87311116Sdim  if (SC->BeginGroup)
88311116Sdim    return (CurrGroupSize == 0);
89311116Sdim
90311116Sdim  // Since a full group is handled immediately in EmitInstruction(),
91311116Sdim  // SU should fit into current group. NumSlots should be 1 or 0,
92311116Sdim  // since it is not a cracked or expanded instruction.
93311116Sdim  assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
94311116Sdim          "Expected normal instruction to fit in non-full group!");
95311116Sdim
96311116Sdim  return true;
97311116Sdim}
98311116Sdim
99311116Sdimvoid SystemZHazardRecognizer::nextGroup(bool DbgOutput) {
100311116Sdim  if (CurrGroupSize > 0) {
101311116Sdim    DEBUG(dumpCurrGroup("Completed decode group"));
102311116Sdim    DEBUG(CurGroupDbg = "";);
103311116Sdim
104311116Sdim    GrpCount++;
105311116Sdim
106311116Sdim    // Reset counter for next group.
107311116Sdim    CurrGroupSize = 0;
108311116Sdim
109311116Sdim    // Decrease counters for execution units by one.
110311116Sdim    for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
111311116Sdim      if (ProcResourceCounters[i] > 0)
112311116Sdim        ProcResourceCounters[i]--;
113311116Sdim
114311116Sdim    // Clear CriticalResourceIdx if it is now below the threshold.
115311116Sdim    if (CriticalResourceIdx != UINT_MAX &&
116311116Sdim        (ProcResourceCounters[CriticalResourceIdx] <=
117311116Sdim         ProcResCostLim))
118311116Sdim      CriticalResourceIdx = UINT_MAX;
119311116Sdim  }
120311116Sdim
121311116Sdim  DEBUG(if (DbgOutput)
122311116Sdim          dumpProcResourceCounters(););
123311116Sdim}
124311116Sdim
125311116Sdim#ifndef NDEBUG // Debug output
126311116Sdimvoid SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
127311116Sdim  OS << "SU(" << SU->NodeNum << "):";
128311116Sdim  OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode());
129311116Sdim
130311116Sdim  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
131311116Sdim  if (!SC->isValid())
132311116Sdim    return;
133311116Sdim
134311116Sdim  for (TargetSchedModel::ProcResIter
135311116Sdim         PI = SchedModel->getWriteProcResBegin(SC),
136311116Sdim         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
137311116Sdim    const MCProcResourceDesc &PRD =
138311116Sdim      *SchedModel->getProcResource(PI->ProcResourceIdx);
139311116Sdim    std::string FU(PRD.Name);
140311116Sdim    // trim e.g. Z13_FXaUnit -> FXa
141311116Sdim    FU = FU.substr(FU.find("_") + 1);
142311116Sdim    FU.resize(FU.find("Unit"));
143311116Sdim    OS << "/" << FU;
144311116Sdim
145311116Sdim    if (PI->Cycles > 1)
146311116Sdim      OS << "(" << PI->Cycles << "cyc)";
147311116Sdim  }
148311116Sdim
149311116Sdim  if (SC->NumMicroOps > 1)
150311116Sdim    OS << "/" << SC->NumMicroOps << "uops";
151311116Sdim  if (SC->BeginGroup && SC->EndGroup)
152311116Sdim    OS << "/GroupsAlone";
153311116Sdim  else if (SC->BeginGroup)
154311116Sdim    OS << "/BeginsGroup";
155311116Sdim  else if (SC->EndGroup)
156311116Sdim    OS << "/EndsGroup";
157311116Sdim  if (SU->isUnbuffered)
158311116Sdim    OS << "/Unbuffered";
159311116Sdim}
160311116Sdim
161311116Sdimvoid SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
162311116Sdim  dbgs() << "+++ " << Msg;
163311116Sdim  dbgs() << ": ";
164311116Sdim
165311116Sdim  if (CurGroupDbg.empty())
166311116Sdim    dbgs() << " <empty>\n";
167311116Sdim  else {
168311116Sdim    dbgs() << "{ " << CurGroupDbg << " }";
169311116Sdim    dbgs() << " (" << CurrGroupSize << " decoder slot"
170311116Sdim           << (CurrGroupSize > 1 ? "s":"")
171311116Sdim           << ")\n";
172311116Sdim  }
173311116Sdim}
174311116Sdim
175311116Sdimvoid SystemZHazardRecognizer::dumpProcResourceCounters() const {
176311116Sdim  bool any = false;
177311116Sdim
178311116Sdim  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
179311116Sdim    if (ProcResourceCounters[i] > 0) {
180311116Sdim      any = true;
181311116Sdim      break;
182311116Sdim    }
183311116Sdim
184311116Sdim  if (!any)
185311116Sdim    return;
186311116Sdim
187311116Sdim  dbgs() << "+++ Resource counters:\n";
188311116Sdim  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
189311116Sdim    if (ProcResourceCounters[i] > 0) {
190311116Sdim      dbgs() << "+++ Extra schedule for execution unit "
191311116Sdim             << SchedModel->getProcResource(i)->Name
192311116Sdim             << ": " << ProcResourceCounters[i] << "\n";
193311116Sdim      any = true;
194311116Sdim    }
195311116Sdim}
196311116Sdim#endif //NDEBUG
197311116Sdim
198311116Sdimvoid SystemZHazardRecognizer::clearProcResCounters() {
199311116Sdim  ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
200311116Sdim  CriticalResourceIdx = UINT_MAX;
201311116Sdim}
202311116Sdim
203311116Sdim// Update state with SU as the next scheduled unit.
204311116Sdimvoid SystemZHazardRecognizer::
205311116SdimEmitInstruction(SUnit *SU) {
206311116Sdim  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
207311116Sdim  DEBUG( dumpCurrGroup("Decode group before emission"););
208311116Sdim
209311116Sdim  // If scheduling an SU that must begin a new decoder group, move on
210311116Sdim  // to next group.
211311116Sdim  if (!fitsIntoCurrentGroup(SU))
212311116Sdim    nextGroup();
213311116Sdim
214311116Sdim  DEBUG( dbgs() << "+++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
215311116Sdim         dbgs() << "\n";
216311116Sdim         raw_string_ostream cgd(CurGroupDbg);
217311116Sdim         if (CurGroupDbg.length())
218311116Sdim           cgd << ", ";
219311116Sdim         dumpSU(SU, cgd););
220311116Sdim
221311116Sdim  // After returning from a call, we don't know much about the state.
222311116Sdim  if (SU->getInstr()->isCall()) {
223311116Sdim    DEBUG (dbgs() << "+++ Clearing state after call.\n";);
224311116Sdim    clearProcResCounters();
225311116Sdim    LastFPdOpCycleIdx = UINT_MAX;
226311116Sdim    CurrGroupSize += getNumDecoderSlots(SU);
227311116Sdim    assert (CurrGroupSize <= 3);
228311116Sdim    nextGroup();
229311116Sdim    return;
230311116Sdim  }
231311116Sdim
232311116Sdim  // Increase counter for execution unit(s).
233311116Sdim  for (TargetSchedModel::ProcResIter
234311116Sdim         PI = SchedModel->getWriteProcResBegin(SC),
235311116Sdim         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
236311116Sdim    // Don't handle FPd together with the other resources.
237311116Sdim    if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
238311116Sdim      continue;
239311116Sdim    int &CurrCounter =
240311116Sdim      ProcResourceCounters[PI->ProcResourceIdx];
241311116Sdim    CurrCounter += PI->Cycles;
242311116Sdim    // Check if this is now the new critical resource.
243311116Sdim    if ((CurrCounter > ProcResCostLim) &&
244311116Sdim        (CriticalResourceIdx == UINT_MAX ||
245311116Sdim         (PI->ProcResourceIdx != CriticalResourceIdx &&
246311116Sdim          CurrCounter >
247311116Sdim          ProcResourceCounters[CriticalResourceIdx]))) {
248311116Sdim      DEBUG( dbgs() << "+++ New critical resource: "
249311116Sdim             << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
250311116Sdim             << "\n";);
251311116Sdim      CriticalResourceIdx = PI->ProcResourceIdx;
252311116Sdim    }
253311116Sdim  }
254311116Sdim
255311116Sdim  // Make note of an instruction that uses a blocking resource (FPd).
256311116Sdim  if (SU->isUnbuffered) {
257311116Sdim    LastFPdOpCycleIdx = getCurrCycleIdx();
258311116Sdim    DEBUG (dbgs() << "+++ Last FPd cycle index: "
259311116Sdim           << LastFPdOpCycleIdx << "\n";);
260311116Sdim  }
261311116Sdim
262311116Sdim  // Insert SU into current group by increasing number of slots used
263311116Sdim  // in current group.
264311116Sdim  CurrGroupSize += getNumDecoderSlots(SU);
265311116Sdim  assert (CurrGroupSize <= 3);
266311116Sdim
267311116Sdim  // Check if current group is now full/ended. If so, move on to next
268311116Sdim  // group to be ready to evaluate more candidates.
269311116Sdim  if (CurrGroupSize == 3 || SC->EndGroup)
270311116Sdim    nextGroup();
271311116Sdim}
272311116Sdim
273311116Sdimint SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
274311116Sdim  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
275311116Sdim  if (!SC->isValid())
276311116Sdim    return 0;
277311116Sdim
278311116Sdim  // If SU begins new group, it can either break a current group early
279311116Sdim  // or fit naturally if current group is empty (negative cost).
280311116Sdim  if (SC->BeginGroup) {
281311116Sdim    if (CurrGroupSize)
282311116Sdim      return 3 - CurrGroupSize;
283311116Sdim    return -1;
284311116Sdim  }
285311116Sdim
286311116Sdim  // Similarly, a group-ending SU may either fit well (last in group), or
287311116Sdim  // end the group prematurely.
288311116Sdim  if (SC->EndGroup) {
289311116Sdim    unsigned resultingGroupSize =
290311116Sdim      (CurrGroupSize + getNumDecoderSlots(SU));
291311116Sdim    if (resultingGroupSize < 3)
292311116Sdim      return (3 - resultingGroupSize);
293311116Sdim    return -1;
294311116Sdim  }
295311116Sdim
296311116Sdim  // Most instructions can be placed in any decoder slot.
297311116Sdim  return 0;
298311116Sdim}
299311116Sdim
300311116Sdimbool SystemZHazardRecognizer::isFPdOpPreferred_distance(const SUnit *SU) {
301311116Sdim  assert (SU->isUnbuffered);
302311116Sdim  // If this is the first FPd op, it should be scheduled high.
303311116Sdim  if (LastFPdOpCycleIdx == UINT_MAX)
304311116Sdim    return true;
305311116Sdim  // If this is not the first PFd op, it should go into the other side
306311116Sdim  // of the processor to use the other FPd unit there. This should
307311116Sdim  // generally happen if two FPd ops are placed with 2 other
308311116Sdim  // instructions between them (modulo 6).
309311116Sdim  if (LastFPdOpCycleIdx > getCurrCycleIdx())
310311116Sdim    return ((LastFPdOpCycleIdx - getCurrCycleIdx()) == 3);
311311116Sdim  return ((getCurrCycleIdx() - LastFPdOpCycleIdx) == 3);
312311116Sdim}
313311116Sdim
314311116Sdimint SystemZHazardRecognizer::
315311116SdimresourcesCost(SUnit *SU) {
316311116Sdim  int Cost = 0;
317311116Sdim
318311116Sdim  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
319311116Sdim  if (!SC->isValid())
320311116Sdim    return 0;
321311116Sdim
322311116Sdim  // For a FPd op, either return min or max value as indicated by the
323311116Sdim  // distance to any prior FPd op.
324311116Sdim  if (SU->isUnbuffered)
325311116Sdim    Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
326311116Sdim  // For other instructions, give a cost to the use of the critical resource.
327311116Sdim  else if (CriticalResourceIdx != UINT_MAX) {
328311116Sdim    for (TargetSchedModel::ProcResIter
329311116Sdim           PI = SchedModel->getWriteProcResBegin(SC),
330311116Sdim           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
331311116Sdim      if (PI->ProcResourceIdx == CriticalResourceIdx)
332311116Sdim        Cost = PI->Cycles;
333311116Sdim  }
334311116Sdim
335311116Sdim  return Cost;
336311116Sdim}
337311116Sdim
338