PPCHazardRecognizers.cpp revision 218893
1193323Sed//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file implements hazard recognizers for scheduling on PowerPC processors.
11193323Sed//
12193323Sed//===----------------------------------------------------------------------===//
13193323Sed
14193323Sed#define DEBUG_TYPE "pre-RA-sched"
15193323Sed#include "PPCHazardRecognizers.h"
16193323Sed#include "PPC.h"
17193323Sed#include "PPCInstrInfo.h"
18193323Sed#include "llvm/CodeGen/ScheduleDAG.h"
19193323Sed#include "llvm/Support/Debug.h"
20198090Srdivacky#include "llvm/Support/ErrorHandling.h"
21198090Srdivacky#include "llvm/Support/raw_ostream.h"
22193323Sedusing namespace llvm;
23193323Sed
24193323Sed//===----------------------------------------------------------------------===//
25193323Sed// PowerPC 970 Hazard Recognizer
26193323Sed//
27193323Sed// This models the dispatch group formation of the PPC970 processor.  Dispatch
28193323Sed// groups are bundles of up to five instructions that can contain various mixes
29218893Sdim// of instructions.  The PPC970 can dispatch a peak of 4 non-branch and one
30193323Sed// branch instruction per-cycle.
31193323Sed//
32193323Sed// There are a number of restrictions to dispatch group formation: some
33193323Sed// instructions can only be issued in the first slot of a dispatch group, & some
34193323Sed// instructions fill an entire dispatch group.  Additionally, only branches can
35193323Sed// issue in the 5th (last) slot.
36193323Sed//
37193323Sed// Finally, there are a number of "structural" hazards on the PPC970.  These
38193323Sed// conditions cause large performance penalties due to misprediction, recovery,
39193323Sed// and replay logic that has to happen.  These cases include setting a CTR and
40193323Sed// branching through it in the same dispatch group, and storing to an address,
41193323Sed// then loading from the same address within a dispatch group.  To avoid these
42193323Sed// conditions, we insert no-op instructions when appropriate.
43193323Sed//
44193323Sed// FIXME: This is missing some significant cases:
45193323Sed//   1. Modeling of microcoded instructions.
46193323Sed//   2. Handling of serialized operations.
47193323Sed//   3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
48193323Sed//
49193323Sed
50193323SedPPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
51193323Sed  : TII(tii) {
52193323Sed  EndDispatchGroup();
53193323Sed}
54193323Sed
55193323Sedvoid PPCHazardRecognizer970::EndDispatchGroup() {
56198090Srdivacky  DEBUG(errs() << "=== Start of dispatch group\n");
57193323Sed  NumIssued = 0;
58218893Sdim
59193323Sed  // Structural hazard info.
60193323Sed  HasCTRSet = false;
61193323Sed  NumStores = 0;
62193323Sed}
63193323Sed
64193323Sed
65218893SdimPPCII::PPC970_Unit
66193323SedPPCHazardRecognizer970::GetInstrType(unsigned Opcode,
67193323Sed                                     bool &isFirst, bool &isSingle,
68193323Sed                                     bool &isCracked,
69193323Sed                                     bool &isLoad, bool &isStore) {
70193323Sed  if ((int)Opcode >= 0) {
71193323Sed    isFirst = isSingle = isCracked = isLoad = isStore = false;
72193323Sed    return PPCII::PPC970_Pseudo;
73193323Sed  }
74193323Sed  Opcode = ~Opcode;
75218893Sdim
76193323Sed  const TargetInstrDesc &TID = TII.get(Opcode);
77218893Sdim
78193323Sed  isLoad  = TID.mayLoad();
79193323Sed  isStore = TID.mayStore();
80218893Sdim
81210299Sed  uint64_t TSFlags = TID.TSFlags;
82218893Sdim
83193323Sed  isFirst   = TSFlags & PPCII::PPC970_First;
84193323Sed  isSingle  = TSFlags & PPCII::PPC970_Single;
85193323Sed  isCracked = TSFlags & PPCII::PPC970_Cracked;
86193323Sed  return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
87193323Sed}
88193323Sed
89193323Sed/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
90193323Sed/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
91193323Sedbool PPCHazardRecognizer970::
92193323SedisLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
93193323Sed  for (unsigned i = 0, e = NumStores; i != e; ++i) {
94193323Sed    // Handle exact and commuted addresses.
95193323Sed    if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
96193323Sed      return true;
97193323Sed    if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
98193323Sed      return true;
99218893Sdim
100193323Sed    // Okay, we don't have an exact match, if this is an indexed offset, see if
101193323Sed    // we have overlap (which happens during fp->int conversion for example).
102193323Sed    if (StorePtr2[i] == Ptr2) {
103193323Sed      if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
104193323Sed        if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
105193323Sed          // Okay the base pointers match, so we have [c1+r] vs [c2+r].  Check
106193323Sed          // to see if the load and store actually overlap.
107193323Sed          int StoreOffs = StoreOffset->getZExtValue();
108193323Sed          int LoadOffs  = LoadOffset->getZExtValue();
109193323Sed          if (StoreOffs < LoadOffs) {
110193323Sed            if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
111193323Sed          } else {
112193323Sed            if (int(LoadOffs+LoadSize) > StoreOffs) return true;
113193323Sed          }
114193323Sed        }
115193323Sed    }
116193323Sed  }
117193323Sed  return false;
118193323Sed}
119193323Sed
120193323Sed/// getHazardType - We return hazard for any non-branch instruction that would
121203954Srdivacky/// terminate the dispatch group.  We turn NoopHazard for any
122193323Sed/// instructions that wouldn't terminate the dispatch group that would cause a
123193323Sed/// pipeline flush.
124193323SedScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
125218893SdimgetHazardType(SUnit *SU, int Stalls) {
126218893Sdim  assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
127218893Sdim
128218893Sdim  const SDNode *Node = SU->getNode()->getGluedMachineNode();
129193323Sed  bool isFirst, isSingle, isCracked, isLoad, isStore;
130218893Sdim  PPCII::PPC970_Unit InstrType =
131193323Sed    GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
132193323Sed                 isLoad, isStore);
133218893Sdim  if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
134193323Sed  unsigned Opcode = Node->getMachineOpcode();
135193323Sed
136193323Sed  // We can only issue a PPC970_First/PPC970_Single instruction (such as
137193323Sed  // crand/mtspr/etc) if this is the first cycle of the dispatch group.
138193323Sed  if (NumIssued != 0 && (isFirst || isSingle))
139193323Sed    return Hazard;
140218893Sdim
141193323Sed  // If this instruction is cracked into two ops by the decoder, we know that
142193323Sed  // it is not a branch and that it cannot issue if 3 other instructions are
143193323Sed  // already in the dispatch group.
144193323Sed  if (isCracked && NumIssued > 2)
145193323Sed    return Hazard;
146218893Sdim
147193323Sed  switch (InstrType) {
148198090Srdivacky  default: llvm_unreachable("Unknown instruction type!");
149193323Sed  case PPCII::PPC970_FXU:
150193323Sed  case PPCII::PPC970_LSU:
151193323Sed  case PPCII::PPC970_FPU:
152193323Sed  case PPCII::PPC970_VALU:
153193323Sed  case PPCII::PPC970_VPERM:
154193323Sed    // We can only issue a branch as the last instruction in a group.
155193323Sed    if (NumIssued == 4) return Hazard;
156193323Sed    break;
157193323Sed  case PPCII::PPC970_CRU:
158193323Sed    // We can only issue a CR instruction in the first two slots.
159193323Sed    if (NumIssued >= 2) return Hazard;
160193323Sed    break;
161193323Sed  case PPCII::PPC970_BRU:
162193323Sed    break;
163193323Sed  }
164218893Sdim
165193323Sed  // Do not allow MTCTR and BCTRL to be in the same dispatch group.
166195340Sed  if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
167193323Sed    return NoopHazard;
168218893Sdim
169193323Sed  // If this is a load following a store, make sure it's not to the same or
170193323Sed  // overlapping address.
171193323Sed  if (isLoad && NumStores) {
172193323Sed    unsigned LoadSize;
173193323Sed    switch (Opcode) {
174198090Srdivacky    default: llvm_unreachable("Unknown load!");
175193323Sed    case PPC::LBZ:   case PPC::LBZU:
176193323Sed    case PPC::LBZX:
177193323Sed    case PPC::LBZ8:  case PPC::LBZU8:
178193323Sed    case PPC::LBZX8:
179193323Sed    case PPC::LVEBX:
180193323Sed      LoadSize = 1;
181193323Sed      break;
182193323Sed    case PPC::LHA:   case PPC::LHAU:
183193323Sed    case PPC::LHAX:
184193323Sed    case PPC::LHZ:   case PPC::LHZU:
185193323Sed    case PPC::LHZX:
186193323Sed    case PPC::LVEHX:
187193323Sed    case PPC::LHBRX:
188193323Sed    case PPC::LHA8:   case PPC::LHAU8:
189193323Sed    case PPC::LHAX8:
190193323Sed    case PPC::LHZ8:   case PPC::LHZU8:
191193323Sed    case PPC::LHZX8:
192193323Sed      LoadSize = 2;
193193323Sed      break;
194193323Sed    case PPC::LFS:    case PPC::LFSU:
195193323Sed    case PPC::LFSX:
196193323Sed    case PPC::LWZ:    case PPC::LWZU:
197193323Sed    case PPC::LWZX:
198193323Sed    case PPC::LWA:
199193323Sed    case PPC::LWAX:
200193323Sed    case PPC::LVEWX:
201193323Sed    case PPC::LWBRX:
202193323Sed    case PPC::LWZ8:
203193323Sed    case PPC::LWZX8:
204193323Sed      LoadSize = 4;
205193323Sed      break;
206193323Sed    case PPC::LFD:    case PPC::LFDU:
207193323Sed    case PPC::LFDX:
208193323Sed    case PPC::LD:     case PPC::LDU:
209193323Sed    case PPC::LDX:
210193323Sed      LoadSize = 8;
211193323Sed      break;
212193323Sed    case PPC::LVX:
213193323Sed    case PPC::LVXL:
214193323Sed      LoadSize = 16;
215193323Sed      break;
216193323Sed    }
217218893Sdim
218218893Sdim    if (isLoadOfStoredAddress(LoadSize,
219193323Sed                              Node->getOperand(0), Node->getOperand(1)))
220193323Sed      return NoopHazard;
221193323Sed  }
222218893Sdim
223193323Sed  return NoHazard;
224193323Sed}
225193323Sed
226193323Sedvoid PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
227218893Sdim  const SDNode *Node = SU->getNode()->getGluedMachineNode();
228193323Sed  bool isFirst, isSingle, isCracked, isLoad, isStore;
229218893Sdim  PPCII::PPC970_Unit InstrType =
230193323Sed    GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
231193323Sed                 isLoad, isStore);
232218893Sdim  if (InstrType == PPCII::PPC970_Pseudo) return;
233193323Sed  unsigned Opcode = Node->getMachineOpcode();
234193323Sed
235193323Sed  // Update structural hazard information.
236193323Sed  if (Opcode == PPC::MTCTR) HasCTRSet = true;
237218893Sdim
238193323Sed  // Track the address stored to.
239193323Sed  if (isStore) {
240193323Sed    unsigned ThisStoreSize;
241193323Sed    switch (Opcode) {
242198090Srdivacky    default: llvm_unreachable("Unknown store instruction!");
243193323Sed    case PPC::STB:    case PPC::STB8:
244193323Sed    case PPC::STBU:   case PPC::STBU8:
245193323Sed    case PPC::STBX:   case PPC::STBX8:
246193323Sed    case PPC::STVEBX:
247193323Sed      ThisStoreSize = 1;
248193323Sed      break;
249193323Sed    case PPC::STH:    case PPC::STH8:
250193323Sed    case PPC::STHU:   case PPC::STHU8:
251193323Sed    case PPC::STHX:   case PPC::STHX8:
252193323Sed    case PPC::STVEHX:
253193323Sed    case PPC::STHBRX:
254193323Sed      ThisStoreSize = 2;
255193323Sed      break;
256193323Sed    case PPC::STFS:
257193323Sed    case PPC::STFSU:
258193323Sed    case PPC::STFSX:
259193323Sed    case PPC::STWX:   case PPC::STWX8:
260193323Sed    case PPC::STWUX:
261193323Sed    case PPC::STW:    case PPC::STW8:
262204642Srdivacky    case PPC::STWU:
263193323Sed    case PPC::STVEWX:
264193323Sed    case PPC::STFIWX:
265193323Sed    case PPC::STWBRX:
266193323Sed      ThisStoreSize = 4;
267193323Sed      break;
268193323Sed    case PPC::STD_32:
269193323Sed    case PPC::STDX_32:
270193323Sed    case PPC::STD:
271193323Sed    case PPC::STDU:
272193323Sed    case PPC::STFD:
273193323Sed    case PPC::STFDX:
274193323Sed    case PPC::STDX:
275193323Sed    case PPC::STDUX:
276193323Sed      ThisStoreSize = 8;
277193323Sed      break;
278193323Sed    case PPC::STVX:
279193323Sed    case PPC::STVXL:
280193323Sed      ThisStoreSize = 16;
281193323Sed      break;
282193323Sed    }
283218893Sdim
284193323Sed    StoreSize[NumStores] = ThisStoreSize;
285193323Sed    StorePtr1[NumStores] = Node->getOperand(1);
286193323Sed    StorePtr2[NumStores] = Node->getOperand(2);
287193323Sed    ++NumStores;
288193323Sed  }
289218893Sdim
290193323Sed  if (InstrType == PPCII::PPC970_BRU || isSingle)
291193323Sed    NumIssued = 4;  // Terminate a d-group.
292193323Sed  ++NumIssued;
293218893Sdim
294193323Sed  // If this instruction is cracked into two ops by the decoder, remember that
295193323Sed  // we issued two pieces.
296193323Sed  if (isCracked)
297193323Sed    ++NumIssued;
298218893Sdim
299193323Sed  if (NumIssued == 5)
300193323Sed    EndDispatchGroup();
301193323Sed}
302193323Sed
303193323Sedvoid PPCHazardRecognizer970::AdvanceCycle() {
304193323Sed  assert(NumIssued < 5 && "Illegal dispatch group!");
305193323Sed  ++NumIssued;
306193323Sed  if (NumIssued == 5)
307193323Sed    EndDispatchGroup();
308193323Sed}
309