PPCHazardRecognizers.cpp revision 218893
1193323Sed//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file implements hazard recognizers for scheduling on PowerPC processors. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#define DEBUG_TYPE "pre-RA-sched" 15193323Sed#include "PPCHazardRecognizers.h" 16193323Sed#include "PPC.h" 17193323Sed#include "PPCInstrInfo.h" 18193323Sed#include "llvm/CodeGen/ScheduleDAG.h" 19193323Sed#include "llvm/Support/Debug.h" 20198090Srdivacky#include "llvm/Support/ErrorHandling.h" 21198090Srdivacky#include "llvm/Support/raw_ostream.h" 22193323Sedusing namespace llvm; 23193323Sed 24193323Sed//===----------------------------------------------------------------------===// 25193323Sed// PowerPC 970 Hazard Recognizer 26193323Sed// 27193323Sed// This models the dispatch group formation of the PPC970 processor. Dispatch 28193323Sed// groups are bundles of up to five instructions that can contain various mixes 29218893Sdim// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one 30193323Sed// branch instruction per-cycle. 31193323Sed// 32193323Sed// There are a number of restrictions to dispatch group formation: some 33193323Sed// instructions can only be issued in the first slot of a dispatch group, & some 34193323Sed// instructions fill an entire dispatch group. Additionally, only branches can 35193323Sed// issue in the 5th (last) slot. 36193323Sed// 37193323Sed// Finally, there are a number of "structural" hazards on the PPC970. These 38193323Sed// conditions cause large performance penalties due to misprediction, recovery, 39193323Sed// and replay logic that has to happen. These cases include setting a CTR and 40193323Sed// branching through it in the same dispatch group, and storing to an address, 41193323Sed// then loading from the same address within a dispatch group. To avoid these 42193323Sed// conditions, we insert no-op instructions when appropriate. 43193323Sed// 44193323Sed// FIXME: This is missing some significant cases: 45193323Sed// 1. Modeling of microcoded instructions. 46193323Sed// 2. Handling of serialized operations. 47193323Sed// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping". 48193323Sed// 49193323Sed 50193323SedPPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) 51193323Sed : TII(tii) { 52193323Sed EndDispatchGroup(); 53193323Sed} 54193323Sed 55193323Sedvoid PPCHazardRecognizer970::EndDispatchGroup() { 56198090Srdivacky DEBUG(errs() << "=== Start of dispatch group\n"); 57193323Sed NumIssued = 0; 58218893Sdim 59193323Sed // Structural hazard info. 60193323Sed HasCTRSet = false; 61193323Sed NumStores = 0; 62193323Sed} 63193323Sed 64193323Sed 65218893SdimPPCII::PPC970_Unit 66193323SedPPCHazardRecognizer970::GetInstrType(unsigned Opcode, 67193323Sed bool &isFirst, bool &isSingle, 68193323Sed bool &isCracked, 69193323Sed bool &isLoad, bool &isStore) { 70193323Sed if ((int)Opcode >= 0) { 71193323Sed isFirst = isSingle = isCracked = isLoad = isStore = false; 72193323Sed return PPCII::PPC970_Pseudo; 73193323Sed } 74193323Sed Opcode = ~Opcode; 75218893Sdim 76193323Sed const TargetInstrDesc &TID = TII.get(Opcode); 77218893Sdim 78193323Sed isLoad = TID.mayLoad(); 79193323Sed isStore = TID.mayStore(); 80218893Sdim 81210299Sed uint64_t TSFlags = TID.TSFlags; 82218893Sdim 83193323Sed isFirst = TSFlags & PPCII::PPC970_First; 84193323Sed isSingle = TSFlags & PPCII::PPC970_Single; 85193323Sed isCracked = TSFlags & PPCII::PPC970_Cracked; 86193323Sed return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask); 87193323Sed} 88193323Sed 89193323Sed/// isLoadOfStoredAddress - If we have a load from the previously stored pointer 90193323Sed/// as indicated by StorePtr1/StorePtr2/StoreSize, return true. 91193323Sedbool PPCHazardRecognizer970:: 92193323SedisLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const { 93193323Sed for (unsigned i = 0, e = NumStores; i != e; ++i) { 94193323Sed // Handle exact and commuted addresses. 95193323Sed if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i]) 96193323Sed return true; 97193323Sed if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i]) 98193323Sed return true; 99218893Sdim 100193323Sed // Okay, we don't have an exact match, if this is an indexed offset, see if 101193323Sed // we have overlap (which happens during fp->int conversion for example). 102193323Sed if (StorePtr2[i] == Ptr2) { 103193323Sed if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i])) 104193323Sed if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) { 105193323Sed // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check 106193323Sed // to see if the load and store actually overlap. 107193323Sed int StoreOffs = StoreOffset->getZExtValue(); 108193323Sed int LoadOffs = LoadOffset->getZExtValue(); 109193323Sed if (StoreOffs < LoadOffs) { 110193323Sed if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true; 111193323Sed } else { 112193323Sed if (int(LoadOffs+LoadSize) > StoreOffs) return true; 113193323Sed } 114193323Sed } 115193323Sed } 116193323Sed } 117193323Sed return false; 118193323Sed} 119193323Sed 120193323Sed/// getHazardType - We return hazard for any non-branch instruction that would 121203954Srdivacky/// terminate the dispatch group. We turn NoopHazard for any 122193323Sed/// instructions that wouldn't terminate the dispatch group that would cause a 123193323Sed/// pipeline flush. 124193323SedScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: 125218893SdimgetHazardType(SUnit *SU, int Stalls) { 126218893Sdim assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead"); 127218893Sdim 128218893Sdim const SDNode *Node = SU->getNode()->getGluedMachineNode(); 129193323Sed bool isFirst, isSingle, isCracked, isLoad, isStore; 130218893Sdim PPCII::PPC970_Unit InstrType = 131193323Sed GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, 132193323Sed isLoad, isStore); 133218893Sdim if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; 134193323Sed unsigned Opcode = Node->getMachineOpcode(); 135193323Sed 136193323Sed // We can only issue a PPC970_First/PPC970_Single instruction (such as 137193323Sed // crand/mtspr/etc) if this is the first cycle of the dispatch group. 138193323Sed if (NumIssued != 0 && (isFirst || isSingle)) 139193323Sed return Hazard; 140218893Sdim 141193323Sed // If this instruction is cracked into two ops by the decoder, we know that 142193323Sed // it is not a branch and that it cannot issue if 3 other instructions are 143193323Sed // already in the dispatch group. 144193323Sed if (isCracked && NumIssued > 2) 145193323Sed return Hazard; 146218893Sdim 147193323Sed switch (InstrType) { 148198090Srdivacky default: llvm_unreachable("Unknown instruction type!"); 149193323Sed case PPCII::PPC970_FXU: 150193323Sed case PPCII::PPC970_LSU: 151193323Sed case PPCII::PPC970_FPU: 152193323Sed case PPCII::PPC970_VALU: 153193323Sed case PPCII::PPC970_VPERM: 154193323Sed // We can only issue a branch as the last instruction in a group. 155193323Sed if (NumIssued == 4) return Hazard; 156193323Sed break; 157193323Sed case PPCII::PPC970_CRU: 158193323Sed // We can only issue a CR instruction in the first two slots. 159193323Sed if (NumIssued >= 2) return Hazard; 160193323Sed break; 161193323Sed case PPCII::PPC970_BRU: 162193323Sed break; 163193323Sed } 164218893Sdim 165193323Sed // Do not allow MTCTR and BCTRL to be in the same dispatch group. 166195340Sed if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4)) 167193323Sed return NoopHazard; 168218893Sdim 169193323Sed // If this is a load following a store, make sure it's not to the same or 170193323Sed // overlapping address. 171193323Sed if (isLoad && NumStores) { 172193323Sed unsigned LoadSize; 173193323Sed switch (Opcode) { 174198090Srdivacky default: llvm_unreachable("Unknown load!"); 175193323Sed case PPC::LBZ: case PPC::LBZU: 176193323Sed case PPC::LBZX: 177193323Sed case PPC::LBZ8: case PPC::LBZU8: 178193323Sed case PPC::LBZX8: 179193323Sed case PPC::LVEBX: 180193323Sed LoadSize = 1; 181193323Sed break; 182193323Sed case PPC::LHA: case PPC::LHAU: 183193323Sed case PPC::LHAX: 184193323Sed case PPC::LHZ: case PPC::LHZU: 185193323Sed case PPC::LHZX: 186193323Sed case PPC::LVEHX: 187193323Sed case PPC::LHBRX: 188193323Sed case PPC::LHA8: case PPC::LHAU8: 189193323Sed case PPC::LHAX8: 190193323Sed case PPC::LHZ8: case PPC::LHZU8: 191193323Sed case PPC::LHZX8: 192193323Sed LoadSize = 2; 193193323Sed break; 194193323Sed case PPC::LFS: case PPC::LFSU: 195193323Sed case PPC::LFSX: 196193323Sed case PPC::LWZ: case PPC::LWZU: 197193323Sed case PPC::LWZX: 198193323Sed case PPC::LWA: 199193323Sed case PPC::LWAX: 200193323Sed case PPC::LVEWX: 201193323Sed case PPC::LWBRX: 202193323Sed case PPC::LWZ8: 203193323Sed case PPC::LWZX8: 204193323Sed LoadSize = 4; 205193323Sed break; 206193323Sed case PPC::LFD: case PPC::LFDU: 207193323Sed case PPC::LFDX: 208193323Sed case PPC::LD: case PPC::LDU: 209193323Sed case PPC::LDX: 210193323Sed LoadSize = 8; 211193323Sed break; 212193323Sed case PPC::LVX: 213193323Sed case PPC::LVXL: 214193323Sed LoadSize = 16; 215193323Sed break; 216193323Sed } 217218893Sdim 218218893Sdim if (isLoadOfStoredAddress(LoadSize, 219193323Sed Node->getOperand(0), Node->getOperand(1))) 220193323Sed return NoopHazard; 221193323Sed } 222218893Sdim 223193323Sed return NoHazard; 224193323Sed} 225193323Sed 226193323Sedvoid PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { 227218893Sdim const SDNode *Node = SU->getNode()->getGluedMachineNode(); 228193323Sed bool isFirst, isSingle, isCracked, isLoad, isStore; 229218893Sdim PPCII::PPC970_Unit InstrType = 230193323Sed GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, 231193323Sed isLoad, isStore); 232218893Sdim if (InstrType == PPCII::PPC970_Pseudo) return; 233193323Sed unsigned Opcode = Node->getMachineOpcode(); 234193323Sed 235193323Sed // Update structural hazard information. 236193323Sed if (Opcode == PPC::MTCTR) HasCTRSet = true; 237218893Sdim 238193323Sed // Track the address stored to. 239193323Sed if (isStore) { 240193323Sed unsigned ThisStoreSize; 241193323Sed switch (Opcode) { 242198090Srdivacky default: llvm_unreachable("Unknown store instruction!"); 243193323Sed case PPC::STB: case PPC::STB8: 244193323Sed case PPC::STBU: case PPC::STBU8: 245193323Sed case PPC::STBX: case PPC::STBX8: 246193323Sed case PPC::STVEBX: 247193323Sed ThisStoreSize = 1; 248193323Sed break; 249193323Sed case PPC::STH: case PPC::STH8: 250193323Sed case PPC::STHU: case PPC::STHU8: 251193323Sed case PPC::STHX: case PPC::STHX8: 252193323Sed case PPC::STVEHX: 253193323Sed case PPC::STHBRX: 254193323Sed ThisStoreSize = 2; 255193323Sed break; 256193323Sed case PPC::STFS: 257193323Sed case PPC::STFSU: 258193323Sed case PPC::STFSX: 259193323Sed case PPC::STWX: case PPC::STWX8: 260193323Sed case PPC::STWUX: 261193323Sed case PPC::STW: case PPC::STW8: 262204642Srdivacky case PPC::STWU: 263193323Sed case PPC::STVEWX: 264193323Sed case PPC::STFIWX: 265193323Sed case PPC::STWBRX: 266193323Sed ThisStoreSize = 4; 267193323Sed break; 268193323Sed case PPC::STD_32: 269193323Sed case PPC::STDX_32: 270193323Sed case PPC::STD: 271193323Sed case PPC::STDU: 272193323Sed case PPC::STFD: 273193323Sed case PPC::STFDX: 274193323Sed case PPC::STDX: 275193323Sed case PPC::STDUX: 276193323Sed ThisStoreSize = 8; 277193323Sed break; 278193323Sed case PPC::STVX: 279193323Sed case PPC::STVXL: 280193323Sed ThisStoreSize = 16; 281193323Sed break; 282193323Sed } 283218893Sdim 284193323Sed StoreSize[NumStores] = ThisStoreSize; 285193323Sed StorePtr1[NumStores] = Node->getOperand(1); 286193323Sed StorePtr2[NumStores] = Node->getOperand(2); 287193323Sed ++NumStores; 288193323Sed } 289218893Sdim 290193323Sed if (InstrType == PPCII::PPC970_BRU || isSingle) 291193323Sed NumIssued = 4; // Terminate a d-group. 292193323Sed ++NumIssued; 293218893Sdim 294193323Sed // If this instruction is cracked into two ops by the decoder, remember that 295193323Sed // we issued two pieces. 296193323Sed if (isCracked) 297193323Sed ++NumIssued; 298218893Sdim 299193323Sed if (NumIssued == 5) 300193323Sed EndDispatchGroup(); 301193323Sed} 302193323Sed 303193323Sedvoid PPCHazardRecognizer970::AdvanceCycle() { 304193323Sed assert(NumIssued < 5 && "Illegal dispatch group!"); 305193323Sed ++NumIssued; 306193323Sed if (NumIssued == 5) 307193323Sed EndDispatchGroup(); 308193323Sed} 309