1193323Sed//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file implements hazard recognizers for scheduling on PowerPC processors.
11193323Sed//
12193323Sed//===----------------------------------------------------------------------===//
13193323Sed
14193323Sed#include "PPCHazardRecognizers.h"
15193323Sed#include "PPC.h"
16193323Sed#include "PPCInstrInfo.h"
17276479Sdim#include "PPCTargetMachine.h"
18193323Sed#include "llvm/CodeGen/ScheduleDAG.h"
19193323Sed#include "llvm/Support/Debug.h"
20198090Srdivacky#include "llvm/Support/ErrorHandling.h"
21198090Srdivacky#include "llvm/Support/raw_ostream.h"
22193323Sedusing namespace llvm;
23193323Sed
24276479Sdim#define DEBUG_TYPE "pre-RA-sched"
25276479Sdim
26276479Sdimbool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
27276479Sdim  // FIXME: Move this.
28276479Sdim  if (isBCTRAfterSet(SU))
29276479Sdim    return true;
30276479Sdim
31234353Sdim  const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
32234353Sdim  if (!MCID)
33276479Sdim    return false;
34234353Sdim
35276479Sdim  if (!MCID->mayLoad())
36276479Sdim    return false;
37276479Sdim
38276479Sdim  // SU is a load; for any predecessors in this dispatch group, that are stores,
39276479Sdim  // and with which we have an ordering dependency, return true.
40276479Sdim  for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
41276479Sdim    const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
42276479Sdim    if (!PredMCID || !PredMCID->mayStore())
43276479Sdim      continue;
44276479Sdim
45276479Sdim    if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier())
46276479Sdim      continue;
47276479Sdim
48276479Sdim    for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
49276479Sdim      if (SU->Preds[i].getSUnit() == CurGroup[j])
50276479Sdim        return true;
51276479Sdim  }
52276479Sdim
53276479Sdim  return false;
54234353Sdim}
55234353Sdim
56276479Sdimbool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
57276479Sdim  const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
58276479Sdim  if (!MCID)
59276479Sdim    return false;
60276479Sdim
61276479Sdim  if (!MCID->isBranch())
62276479Sdim    return false;
63276479Sdim
64276479Sdim  // SU is a branch; for any predecessors in this dispatch group, with which we
65276479Sdim  // have a data dependence and set the counter register, return true.
66276479Sdim  for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
67276479Sdim    const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
68276479Sdim    if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR)
69276479Sdim      continue;
70276479Sdim
71276479Sdim    if (SU->Preds[i].isCtrl())
72276479Sdim      continue;
73276479Sdim
74276479Sdim    for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
75276479Sdim      if (SU->Preds[i].getSUnit() == CurGroup[j])
76276479Sdim        return true;
77276479Sdim  }
78276479Sdim
79276479Sdim  return false;
80276479Sdim}
81276479Sdim
82276479Sdim// FIXME: Remove this when we don't need this:
83276479Sdimnamespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } }
84276479Sdim
85276479Sdim// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific.
86276479Sdim
87276479Sdimbool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
88276479Sdim                                                       unsigned &NSlots) {
89276479Sdim  // FIXME: Indirectly, this information is contained in the itinerary, and
90276479Sdim  // we should derive it from there instead of separately specifying it
91276479Sdim  // here.
92276479Sdim  unsigned IIC = MCID->getSchedClass();
93276479Sdim  switch (IIC) {
94276479Sdim  default:
95276479Sdim    NSlots = 1;
96276479Sdim    break;
97276479Sdim  case PPC::Sched::IIC_IntDivW:
98276479Sdim  case PPC::Sched::IIC_IntDivD:
99276479Sdim  case PPC::Sched::IIC_LdStLoadUpd:
100276479Sdim  case PPC::Sched::IIC_LdStLDU:
101276479Sdim  case PPC::Sched::IIC_LdStLFDU:
102276479Sdim  case PPC::Sched::IIC_LdStLFDUX:
103276479Sdim  case PPC::Sched::IIC_LdStLHA:
104276479Sdim  case PPC::Sched::IIC_LdStLHAU:
105276479Sdim  case PPC::Sched::IIC_LdStLWA:
106276479Sdim  case PPC::Sched::IIC_LdStSTDU:
107276479Sdim  case PPC::Sched::IIC_LdStSTFDU:
108276479Sdim    NSlots = 2;
109276479Sdim    break;
110276479Sdim  case PPC::Sched::IIC_LdStLoadUpdX:
111276479Sdim  case PPC::Sched::IIC_LdStLDUX:
112276479Sdim  case PPC::Sched::IIC_LdStLHAUX:
113276479Sdim  case PPC::Sched::IIC_LdStLWARX:
114276479Sdim  case PPC::Sched::IIC_LdStLDARX:
115276479Sdim  case PPC::Sched::IIC_LdStSTDUX:
116276479Sdim  case PPC::Sched::IIC_LdStSTDCX:
117276479Sdim  case PPC::Sched::IIC_LdStSTWCX:
118276479Sdim  case PPC::Sched::IIC_BrMCRX: // mtcr
119276479Sdim  // FIXME: Add sync/isync (here and in the itinerary).
120276479Sdim    NSlots = 4;
121276479Sdim    break;
122276479Sdim  }
123276479Sdim
124276479Sdim  // FIXME: record-form instructions need a different itinerary class.
125276479Sdim  if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1)
126276479Sdim    NSlots = 2;
127276479Sdim
128276479Sdim  switch (IIC) {
129276479Sdim  default:
130276479Sdim    // All multi-slot instructions must come first.
131276479Sdim    return NSlots > 1;
132276479Sdim  case PPC::Sched::IIC_BrCR: // cr logicals
133276479Sdim  case PPC::Sched::IIC_SprMFCR:
134276479Sdim  case PPC::Sched::IIC_SprMFCRF:
135276479Sdim  case PPC::Sched::IIC_SprMTSPR:
136276479Sdim    return true;
137276479Sdim  }
138276479Sdim}
139276479Sdim
140234353SdimScheduleHazardRecognizer::HazardType
141276479SdimPPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
142276479Sdim  if (Stalls == 0 && isLoadAfterStore(SU))
143276479Sdim    return NoopHazard;
144276479Sdim
145234353Sdim  return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
146234353Sdim}
147234353Sdim
148276479Sdimbool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
149276479Sdim  const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
150276479Sdim  unsigned NSlots;
151276479Sdim  if (MCID && mustComeFirst(MCID, NSlots) && CurSlots)
152276479Sdim    return true;
153276479Sdim
154276479Sdim  return ScoreboardHazardRecognizer::ShouldPreferAnother(SU);
155234353Sdim}
156234353Sdim
157276479Sdimunsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
158276479Sdim  // We only need to fill out a maximum of 5 slots here: The 6th slot could
159276479Sdim  // only be a second branch, and otherwise the next instruction will start a
160276479Sdim  // new group.
161276479Sdim  if (isLoadAfterStore(SU) && CurSlots < 6) {
162276479Sdim    unsigned Directive =
163288943Sdim        DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
164276479Sdim    // If we're using a special group-terminating nop, then we need only one.
165276479Sdim    if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
166276479Sdim        Directive == PPC::DIR_PWR8 )
167276479Sdim      return 1;
168276479Sdim
169276479Sdim    return 5 - CurSlots;
170276479Sdim  }
171276479Sdim
172276479Sdim  return ScoreboardHazardRecognizer::PreEmitNoops(SU);
173234353Sdim}
174234353Sdim
175276479Sdimvoid PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
176276479Sdim  const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
177276479Sdim  if (MCID) {
178276479Sdim    if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) {
179276479Sdim      CurGroup.clear();
180276479Sdim      CurSlots = CurBranches = 0;
181276479Sdim    } else {
182276479Sdim      DEBUG(dbgs() << "**** Adding to dispatch group: SU(" <<
183276479Sdim                      SU->NodeNum << "): ");
184276479Sdim      DEBUG(DAG->dumpNode(SU));
185276479Sdim
186276479Sdim      unsigned NSlots;
187276479Sdim      bool MustBeFirst = mustComeFirst(MCID, NSlots);
188276479Sdim
189276479Sdim      // If this instruction must come first, but does not, then it starts a
190276479Sdim      // new group.
191276479Sdim      if (MustBeFirst && CurSlots) {
192276479Sdim        CurSlots = CurBranches = 0;
193276479Sdim        CurGroup.clear();
194276479Sdim      }
195276479Sdim
196276479Sdim      CurSlots += NSlots;
197276479Sdim      CurGroup.push_back(SU);
198276479Sdim
199276479Sdim      if (MCID->isBranch())
200276479Sdim        ++CurBranches;
201276479Sdim    }
202276479Sdim  }
203276479Sdim
204276479Sdim  return ScoreboardHazardRecognizer::EmitInstruction(SU);
205276479Sdim}
206276479Sdim
207276479Sdimvoid PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() {
208276479Sdim  return ScoreboardHazardRecognizer::AdvanceCycle();
209276479Sdim}
210276479Sdim
211276479Sdimvoid PPCDispatchGroupSBHazardRecognizer::RecedeCycle() {
212276479Sdim  llvm_unreachable("Bottom-up scheduling not supported");
213276479Sdim}
214276479Sdim
215276479Sdimvoid PPCDispatchGroupSBHazardRecognizer::Reset() {
216276479Sdim  CurGroup.clear();
217276479Sdim  CurSlots = CurBranches = 0;
218276479Sdim  return ScoreboardHazardRecognizer::Reset();
219276479Sdim}
220276479Sdim
221276479Sdimvoid PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
222276479Sdim  unsigned Directive =
223288943Sdim      DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
224276479Sdim  // If the group has now filled all of its slots, or if we're using a special
225276479Sdim  // group-terminating nop, the group is complete.
226276479Sdim  if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
227276479Sdim      Directive == PPC::DIR_PWR8 || CurSlots == 6)  {
228276479Sdim    CurGroup.clear();
229276479Sdim    CurSlots = CurBranches = 0;
230276479Sdim  } else {
231276479Sdim    CurGroup.push_back(nullptr);
232276479Sdim    ++CurSlots;
233276479Sdim  }
234276479Sdim}
235276479Sdim
236234353Sdim//===----------------------------------------------------------------------===//
237193323Sed// PowerPC 970 Hazard Recognizer
238193323Sed//
239193323Sed// This models the dispatch group formation of the PPC970 processor.  Dispatch
240193323Sed// groups are bundles of up to five instructions that can contain various mixes
241218893Sdim// of instructions.  The PPC970 can dispatch a peak of 4 non-branch and one
242193323Sed// branch instruction per-cycle.
243193323Sed//
244193323Sed// There are a number of restrictions to dispatch group formation: some
245193323Sed// instructions can only be issued in the first slot of a dispatch group, & some
246193323Sed// instructions fill an entire dispatch group.  Additionally, only branches can
247193323Sed// issue in the 5th (last) slot.
248193323Sed//
249193323Sed// Finally, there are a number of "structural" hazards on the PPC970.  These
250193323Sed// conditions cause large performance penalties due to misprediction, recovery,
251193323Sed// and replay logic that has to happen.  These cases include setting a CTR and
252193323Sed// branching through it in the same dispatch group, and storing to an address,
253193323Sed// then loading from the same address within a dispatch group.  To avoid these
254193323Sed// conditions, we insert no-op instructions when appropriate.
255193323Sed//
256193323Sed// FIXME: This is missing some significant cases:
257193323Sed//   1. Modeling of microcoded instructions.
258193323Sed//   2. Handling of serialized operations.
259193323Sed//   3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
260193323Sed//
261193323Sed
262276479SdimPPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG)
263276479Sdim    : DAG(DAG) {
264193323Sed  EndDispatchGroup();
265193323Sed}
266193323Sed
267193323Sedvoid PPCHazardRecognizer970::EndDispatchGroup() {
268198090Srdivacky  DEBUG(errs() << "=== Start of dispatch group\n");
269193323Sed  NumIssued = 0;
270218893Sdim
271193323Sed  // Structural hazard info.
272193323Sed  HasCTRSet = false;
273193323Sed  NumStores = 0;
274193323Sed}
275193323Sed
276193323Sed
277218893SdimPPCII::PPC970_Unit
278193323SedPPCHazardRecognizer970::GetInstrType(unsigned Opcode,
279193323Sed                                     bool &isFirst, bool &isSingle,
280193323Sed                                     bool &isCracked,
281193323Sed                                     bool &isLoad, bool &isStore) {
282276479Sdim  const MCInstrDesc &MCID = DAG.TII->get(Opcode);
283218893Sdim
284224145Sdim  isLoad  = MCID.mayLoad();
285224145Sdim  isStore = MCID.mayStore();
286218893Sdim
287224145Sdim  uint64_t TSFlags = MCID.TSFlags;
288218893Sdim
289193323Sed  isFirst   = TSFlags & PPCII::PPC970_First;
290193323Sed  isSingle  = TSFlags & PPCII::PPC970_Single;
291193323Sed  isCracked = TSFlags & PPCII::PPC970_Cracked;
292193323Sed  return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
293193323Sed}
294193323Sed
295193323Sed/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
296193323Sed/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
297193323Sedbool PPCHazardRecognizer970::
298234353SdimisLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
299234353Sdim  const Value *LoadValue) const {
300193323Sed  for (unsigned i = 0, e = NumStores; i != e; ++i) {
301193323Sed    // Handle exact and commuted addresses.
302234353Sdim    if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
303193323Sed      return true;
304218893Sdim
305193323Sed    // Okay, we don't have an exact match, if this is an indexed offset, see if
306193323Sed    // we have overlap (which happens during fp->int conversion for example).
307234353Sdim    if (StoreValue[i] == LoadValue) {
308234353Sdim      // Okay the base pointers match, so we have [c1+r] vs [c2+r].  Check
309234353Sdim      // to see if the load and store actually overlap.
310234353Sdim      if (StoreOffset[i] < LoadOffset) {
311234353Sdim        if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
312234353Sdim      } else {
313234353Sdim        if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
314234353Sdim      }
315193323Sed    }
316193323Sed  }
317193323Sed  return false;
318193323Sed}
319193323Sed
320193323Sed/// getHazardType - We return hazard for any non-branch instruction that would
321203954Srdivacky/// terminate the dispatch group.  We turn NoopHazard for any
322193323Sed/// instructions that wouldn't terminate the dispatch group that would cause a
323193323Sed/// pipeline flush.
324193323SedScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
325218893SdimgetHazardType(SUnit *SU, int Stalls) {
326218893Sdim  assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
327218893Sdim
328234353Sdim  MachineInstr *MI = SU->getInstr();
329234353Sdim
330234353Sdim  if (MI->isDebugValue())
331234353Sdim    return NoHazard;
332234353Sdim
333234353Sdim  unsigned Opcode = MI->getOpcode();
334193323Sed  bool isFirst, isSingle, isCracked, isLoad, isStore;
335218893Sdim  PPCII::PPC970_Unit InstrType =
336234353Sdim    GetInstrType(Opcode, isFirst, isSingle, isCracked,
337193323Sed                 isLoad, isStore);
338218893Sdim  if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
339193323Sed
340193323Sed  // We can only issue a PPC970_First/PPC970_Single instruction (such as
341193323Sed  // crand/mtspr/etc) if this is the first cycle of the dispatch group.
342193323Sed  if (NumIssued != 0 && (isFirst || isSingle))
343193323Sed    return Hazard;
344218893Sdim
345193323Sed  // If this instruction is cracked into two ops by the decoder, we know that
346193323Sed  // it is not a branch and that it cannot issue if 3 other instructions are
347193323Sed  // already in the dispatch group.
348193323Sed  if (isCracked && NumIssued > 2)
349193323Sed    return Hazard;
350218893Sdim
351193323Sed  switch (InstrType) {
352198090Srdivacky  default: llvm_unreachable("Unknown instruction type!");
353193323Sed  case PPCII::PPC970_FXU:
354193323Sed  case PPCII::PPC970_LSU:
355193323Sed  case PPCII::PPC970_FPU:
356193323Sed  case PPCII::PPC970_VALU:
357193323Sed  case PPCII::PPC970_VPERM:
358193323Sed    // We can only issue a branch as the last instruction in a group.
359193323Sed    if (NumIssued == 4) return Hazard;
360193323Sed    break;
361193323Sed  case PPCII::PPC970_CRU:
362193323Sed    // We can only issue a CR instruction in the first two slots.
363193323Sed    if (NumIssued >= 2) return Hazard;
364193323Sed    break;
365193323Sed  case PPCII::PPC970_BRU:
366193323Sed    break;
367193323Sed  }
368218893Sdim
369193323Sed  // Do not allow MTCTR and BCTRL to be in the same dispatch group.
370249423Sdim  if (HasCTRSet && Opcode == PPC::BCTRL)
371193323Sed    return NoopHazard;
372218893Sdim
373193323Sed  // If this is a load following a store, make sure it's not to the same or
374193323Sed  // overlapping address.
375234353Sdim  if (isLoad && NumStores && !MI->memoperands_empty()) {
376234353Sdim    MachineMemOperand *MO = *MI->memoperands_begin();
377234353Sdim    if (isLoadOfStoredAddress(MO->getSize(),
378234353Sdim                              MO->getOffset(), MO->getValue()))
379193323Sed      return NoopHazard;
380193323Sed  }
381218893Sdim
382193323Sed  return NoHazard;
383193323Sed}
384193323Sed
385193323Sedvoid PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
386234353Sdim  MachineInstr *MI = SU->getInstr();
387234353Sdim
388234353Sdim  if (MI->isDebugValue())
389234353Sdim    return;
390234353Sdim
391234353Sdim  unsigned Opcode = MI->getOpcode();
392193323Sed  bool isFirst, isSingle, isCracked, isLoad, isStore;
393218893Sdim  PPCII::PPC970_Unit InstrType =
394234353Sdim    GetInstrType(Opcode, isFirst, isSingle, isCracked,
395193323Sed                 isLoad, isStore);
396218893Sdim  if (InstrType == PPCII::PPC970_Pseudo) return;
397193323Sed
398193323Sed  // Update structural hazard information.
399223017Sdim  if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
400218893Sdim
401193323Sed  // Track the address stored to.
402234353Sdim  if (isStore && NumStores < 4 && !MI->memoperands_empty()) {
403234353Sdim    MachineMemOperand *MO = *MI->memoperands_begin();
404234353Sdim    StoreSize[NumStores] = MO->getSize();
405234353Sdim    StoreOffset[NumStores] = MO->getOffset();
406234353Sdim    StoreValue[NumStores] = MO->getValue();
407193323Sed    ++NumStores;
408193323Sed  }
409218893Sdim
410193323Sed  if (InstrType == PPCII::PPC970_BRU || isSingle)
411193323Sed    NumIssued = 4;  // Terminate a d-group.
412193323Sed  ++NumIssued;
413218893Sdim
414193323Sed  // If this instruction is cracked into two ops by the decoder, remember that
415193323Sed  // we issued two pieces.
416193323Sed  if (isCracked)
417193323Sed    ++NumIssued;
418218893Sdim
419193323Sed  if (NumIssued == 5)
420193323Sed    EndDispatchGroup();
421193323Sed}
422193323Sed
423193323Sedvoid PPCHazardRecognizer970::AdvanceCycle() {
424193323Sed  assert(NumIssued < 5 && "Illegal dispatch group!");
425193323Sed  ++NumIssued;
426193323Sed  if (NumIssued == 5)
427193323Sed    EndDispatchGroup();
428193323Sed}
429234353Sdim
430234353Sdimvoid PPCHazardRecognizer970::Reset() {
431234353Sdim  EndDispatchGroup();
432234353Sdim}
433234353Sdim
434