PPCHazardRecognizers.cpp revision 204642
1193323Sed//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file implements hazard recognizers for scheduling on PowerPC processors.
11193323Sed//
12193323Sed//===----------------------------------------------------------------------===//
13193323Sed
14193323Sed#define DEBUG_TYPE "pre-RA-sched"
15193323Sed#include "PPCHazardRecognizers.h"
16193323Sed#include "PPC.h"
17193323Sed#include "PPCInstrInfo.h"
18193323Sed#include "llvm/CodeGen/ScheduleDAG.h"
19193323Sed#include "llvm/Support/Debug.h"
20198090Srdivacky#include "llvm/Support/ErrorHandling.h"
21198090Srdivacky#include "llvm/Support/raw_ostream.h"
22193323Sedusing namespace llvm;
23193323Sed
24193323Sed//===----------------------------------------------------------------------===//
25193323Sed// PowerPC 970 Hazard Recognizer
26193323Sed//
27193323Sed// This models the dispatch group formation of the PPC970 processor.  Dispatch
28193323Sed// groups are bundles of up to five instructions that can contain various mixes
29193323Sed// of instructions.  The PPC970 can dispatch a peak of 4 non-branch and one
30193323Sed// branch instruction per-cycle.
31193323Sed//
32193323Sed// There are a number of restrictions to dispatch group formation: some
33193323Sed// instructions can only be issued in the first slot of a dispatch group, & some
34193323Sed// instructions fill an entire dispatch group.  Additionally, only branches can
35193323Sed// issue in the 5th (last) slot.
36193323Sed//
37193323Sed// Finally, there are a number of "structural" hazards on the PPC970.  These
38193323Sed// conditions cause large performance penalties due to misprediction, recovery,
39193323Sed// and replay logic that has to happen.  These cases include setting a CTR and
40193323Sed// branching through it in the same dispatch group, and storing to an address,
41193323Sed// then loading from the same address within a dispatch group.  To avoid these
42193323Sed// conditions, we insert no-op instructions when appropriate.
43193323Sed//
44193323Sed// FIXME: This is missing some significant cases:
45193323Sed//   1. Modeling of microcoded instructions.
46193323Sed//   2. Handling of serialized operations.
47193323Sed//   3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
48193323Sed//
49193323Sed
50193323SedPPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
51193323Sed  : TII(tii) {
52193323Sed  EndDispatchGroup();
53193323Sed}
54193323Sed
55193323Sedvoid PPCHazardRecognizer970::EndDispatchGroup() {
56198090Srdivacky  DEBUG(errs() << "=== Start of dispatch group\n");
57193323Sed  NumIssued = 0;
58193323Sed
59193323Sed  // Structural hazard info.
60193323Sed  HasCTRSet = false;
61193323Sed  NumStores = 0;
62193323Sed}
63193323Sed
64193323Sed
65193323SedPPCII::PPC970_Unit
66193323SedPPCHazardRecognizer970::GetInstrType(unsigned Opcode,
67193323Sed                                     bool &isFirst, bool &isSingle,
68193323Sed                                     bool &isCracked,
69193323Sed                                     bool &isLoad, bool &isStore) {
70193323Sed  if ((int)Opcode >= 0) {
71193323Sed    isFirst = isSingle = isCracked = isLoad = isStore = false;
72193323Sed    return PPCII::PPC970_Pseudo;
73193323Sed  }
74193323Sed  Opcode = ~Opcode;
75193323Sed
76193323Sed  const TargetInstrDesc &TID = TII.get(Opcode);
77193323Sed
78193323Sed  isLoad  = TID.mayLoad();
79193323Sed  isStore = TID.mayStore();
80193323Sed
81193323Sed  unsigned TSFlags = TID.TSFlags;
82193323Sed
83193323Sed  isFirst   = TSFlags & PPCII::PPC970_First;
84193323Sed  isSingle  = TSFlags & PPCII::PPC970_Single;
85193323Sed  isCracked = TSFlags & PPCII::PPC970_Cracked;
86193323Sed  return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
87193323Sed}
88193323Sed
89193323Sed/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
90193323Sed/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
91193323Sedbool PPCHazardRecognizer970::
92193323SedisLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
93193323Sed  for (unsigned i = 0, e = NumStores; i != e; ++i) {
94193323Sed    // Handle exact and commuted addresses.
95193323Sed    if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
96193323Sed      return true;
97193323Sed    if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
98193323Sed      return true;
99193323Sed
100193323Sed    // Okay, we don't have an exact match, if this is an indexed offset, see if
101193323Sed    // we have overlap (which happens during fp->int conversion for example).
102193323Sed    if (StorePtr2[i] == Ptr2) {
103193323Sed      if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
104193323Sed        if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
105193323Sed          // Okay the base pointers match, so we have [c1+r] vs [c2+r].  Check
106193323Sed          // to see if the load and store actually overlap.
107193323Sed          int StoreOffs = StoreOffset->getZExtValue();
108193323Sed          int LoadOffs  = LoadOffset->getZExtValue();
109193323Sed          if (StoreOffs < LoadOffs) {
110193323Sed            if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
111193323Sed          } else {
112193323Sed            if (int(LoadOffs+LoadSize) > StoreOffs) return true;
113193323Sed          }
114193323Sed        }
115193323Sed    }
116193323Sed  }
117193323Sed  return false;
118193323Sed}
119193323Sed
120193323Sed/// getHazardType - We return hazard for any non-branch instruction that would
121203954Srdivacky/// terminate the dispatch group.  We turn NoopHazard for any
122193323Sed/// instructions that wouldn't terminate the dispatch group that would cause a
123193323Sed/// pipeline flush.
124193323SedScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
125193323SedgetHazardType(SUnit *SU) {
126193323Sed  const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
127193323Sed  bool isFirst, isSingle, isCracked, isLoad, isStore;
128193323Sed  PPCII::PPC970_Unit InstrType =
129193323Sed    GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
130193323Sed                 isLoad, isStore);
131193323Sed  if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
132193323Sed  unsigned Opcode = Node->getMachineOpcode();
133193323Sed
134193323Sed  // We can only issue a PPC970_First/PPC970_Single instruction (such as
135193323Sed  // crand/mtspr/etc) if this is the first cycle of the dispatch group.
136193323Sed  if (NumIssued != 0 && (isFirst || isSingle))
137193323Sed    return Hazard;
138193323Sed
139193323Sed  // If this instruction is cracked into two ops by the decoder, we know that
140193323Sed  // it is not a branch and that it cannot issue if 3 other instructions are
141193323Sed  // already in the dispatch group.
142193323Sed  if (isCracked && NumIssued > 2)
143193323Sed    return Hazard;
144193323Sed
145193323Sed  switch (InstrType) {
146198090Srdivacky  default: llvm_unreachable("Unknown instruction type!");
147193323Sed  case PPCII::PPC970_FXU:
148193323Sed  case PPCII::PPC970_LSU:
149193323Sed  case PPCII::PPC970_FPU:
150193323Sed  case PPCII::PPC970_VALU:
151193323Sed  case PPCII::PPC970_VPERM:
152193323Sed    // We can only issue a branch as the last instruction in a group.
153193323Sed    if (NumIssued == 4) return Hazard;
154193323Sed    break;
155193323Sed  case PPCII::PPC970_CRU:
156193323Sed    // We can only issue a CR instruction in the first two slots.
157193323Sed    if (NumIssued >= 2) return Hazard;
158193323Sed    break;
159193323Sed  case PPCII::PPC970_BRU:
160193323Sed    break;
161193323Sed  }
162193323Sed
163193323Sed  // Do not allow MTCTR and BCTRL to be in the same dispatch group.
164195340Sed  if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
165193323Sed    return NoopHazard;
166193323Sed
167193323Sed  // If this is a load following a store, make sure it's not to the same or
168193323Sed  // overlapping address.
169193323Sed  if (isLoad && NumStores) {
170193323Sed    unsigned LoadSize;
171193323Sed    switch (Opcode) {
172198090Srdivacky    default: llvm_unreachable("Unknown load!");
173193323Sed    case PPC::LBZ:   case PPC::LBZU:
174193323Sed    case PPC::LBZX:
175193323Sed    case PPC::LBZ8:  case PPC::LBZU8:
176193323Sed    case PPC::LBZX8:
177193323Sed    case PPC::LVEBX:
178193323Sed      LoadSize = 1;
179193323Sed      break;
180193323Sed    case PPC::LHA:   case PPC::LHAU:
181193323Sed    case PPC::LHAX:
182193323Sed    case PPC::LHZ:   case PPC::LHZU:
183193323Sed    case PPC::LHZX:
184193323Sed    case PPC::LVEHX:
185193323Sed    case PPC::LHBRX:
186193323Sed    case PPC::LHA8:   case PPC::LHAU8:
187193323Sed    case PPC::LHAX8:
188193323Sed    case PPC::LHZ8:   case PPC::LHZU8:
189193323Sed    case PPC::LHZX8:
190193323Sed      LoadSize = 2;
191193323Sed      break;
192193323Sed    case PPC::LFS:    case PPC::LFSU:
193193323Sed    case PPC::LFSX:
194193323Sed    case PPC::LWZ:    case PPC::LWZU:
195193323Sed    case PPC::LWZX:
196193323Sed    case PPC::LWA:
197193323Sed    case PPC::LWAX:
198193323Sed    case PPC::LVEWX:
199193323Sed    case PPC::LWBRX:
200193323Sed    case PPC::LWZ8:
201193323Sed    case PPC::LWZX8:
202193323Sed      LoadSize = 4;
203193323Sed      break;
204193323Sed    case PPC::LFD:    case PPC::LFDU:
205193323Sed    case PPC::LFDX:
206193323Sed    case PPC::LD:     case PPC::LDU:
207193323Sed    case PPC::LDX:
208193323Sed      LoadSize = 8;
209193323Sed      break;
210193323Sed    case PPC::LVX:
211193323Sed    case PPC::LVXL:
212193323Sed      LoadSize = 16;
213193323Sed      break;
214193323Sed    }
215193323Sed
216193323Sed    if (isLoadOfStoredAddress(LoadSize,
217193323Sed                              Node->getOperand(0), Node->getOperand(1)))
218193323Sed      return NoopHazard;
219193323Sed  }
220193323Sed
221193323Sed  return NoHazard;
222193323Sed}
223193323Sed
224193323Sedvoid PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
225193323Sed  const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
226193323Sed  bool isFirst, isSingle, isCracked, isLoad, isStore;
227193323Sed  PPCII::PPC970_Unit InstrType =
228193323Sed    GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
229193323Sed                 isLoad, isStore);
230193323Sed  if (InstrType == PPCII::PPC970_Pseudo) return;
231193323Sed  unsigned Opcode = Node->getMachineOpcode();
232193323Sed
233193323Sed  // Update structural hazard information.
234193323Sed  if (Opcode == PPC::MTCTR) HasCTRSet = true;
235193323Sed
236193323Sed  // Track the address stored to.
237193323Sed  if (isStore) {
238193323Sed    unsigned ThisStoreSize;
239193323Sed    switch (Opcode) {
240198090Srdivacky    default: llvm_unreachable("Unknown store instruction!");
241193323Sed    case PPC::STB:    case PPC::STB8:
242193323Sed    case PPC::STBU:   case PPC::STBU8:
243193323Sed    case PPC::STBX:   case PPC::STBX8:
244193323Sed    case PPC::STVEBX:
245193323Sed      ThisStoreSize = 1;
246193323Sed      break;
247193323Sed    case PPC::STH:    case PPC::STH8:
248193323Sed    case PPC::STHU:   case PPC::STHU8:
249193323Sed    case PPC::STHX:   case PPC::STHX8:
250193323Sed    case PPC::STVEHX:
251193323Sed    case PPC::STHBRX:
252193323Sed      ThisStoreSize = 2;
253193323Sed      break;
254193323Sed    case PPC::STFS:
255193323Sed    case PPC::STFSU:
256193323Sed    case PPC::STFSX:
257193323Sed    case PPC::STWX:   case PPC::STWX8:
258193323Sed    case PPC::STWUX:
259193323Sed    case PPC::STW:    case PPC::STW8:
260204642Srdivacky    case PPC::STWU:
261193323Sed    case PPC::STVEWX:
262193323Sed    case PPC::STFIWX:
263193323Sed    case PPC::STWBRX:
264193323Sed      ThisStoreSize = 4;
265193323Sed      break;
266193323Sed    case PPC::STD_32:
267193323Sed    case PPC::STDX_32:
268193323Sed    case PPC::STD:
269193323Sed    case PPC::STDU:
270193323Sed    case PPC::STFD:
271193323Sed    case PPC::STFDX:
272193323Sed    case PPC::STDX:
273193323Sed    case PPC::STDUX:
274193323Sed      ThisStoreSize = 8;
275193323Sed      break;
276193323Sed    case PPC::STVX:
277193323Sed    case PPC::STVXL:
278193323Sed      ThisStoreSize = 16;
279193323Sed      break;
280193323Sed    }
281193323Sed
282193323Sed    StoreSize[NumStores] = ThisStoreSize;
283193323Sed    StorePtr1[NumStores] = Node->getOperand(1);
284193323Sed    StorePtr2[NumStores] = Node->getOperand(2);
285193323Sed    ++NumStores;
286193323Sed  }
287193323Sed
288193323Sed  if (InstrType == PPCII::PPC970_BRU || isSingle)
289193323Sed    NumIssued = 4;  // Terminate a d-group.
290193323Sed  ++NumIssued;
291193323Sed
292193323Sed  // If this instruction is cracked into two ops by the decoder, remember that
293193323Sed  // we issued two pieces.
294193323Sed  if (isCracked)
295193323Sed    ++NumIssued;
296193323Sed
297193323Sed  if (NumIssued == 5)
298193323Sed    EndDispatchGroup();
299193323Sed}
300193323Sed
301193323Sedvoid PPCHazardRecognizer970::AdvanceCycle() {
302193323Sed  assert(NumIssued < 5 && "Illegal dispatch group!");
303193323Sed  ++NumIssued;
304193323Sed  if (NumIssued == 5)
305193323Sed    EndDispatchGroup();
306193323Sed}
307