PPCHazardRecognizers.cpp revision 204642
1193323Sed//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file implements hazard recognizers for scheduling on PowerPC processors. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#define DEBUG_TYPE "pre-RA-sched" 15193323Sed#include "PPCHazardRecognizers.h" 16193323Sed#include "PPC.h" 17193323Sed#include "PPCInstrInfo.h" 18193323Sed#include "llvm/CodeGen/ScheduleDAG.h" 19193323Sed#include "llvm/Support/Debug.h" 20198090Srdivacky#include "llvm/Support/ErrorHandling.h" 21198090Srdivacky#include "llvm/Support/raw_ostream.h" 22193323Sedusing namespace llvm; 23193323Sed 24193323Sed//===----------------------------------------------------------------------===// 25193323Sed// PowerPC 970 Hazard Recognizer 26193323Sed// 27193323Sed// This models the dispatch group formation of the PPC970 processor. Dispatch 28193323Sed// groups are bundles of up to five instructions that can contain various mixes 29193323Sed// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one 30193323Sed// branch instruction per-cycle. 31193323Sed// 32193323Sed// There are a number of restrictions to dispatch group formation: some 33193323Sed// instructions can only be issued in the first slot of a dispatch group, & some 34193323Sed// instructions fill an entire dispatch group. Additionally, only branches can 35193323Sed// issue in the 5th (last) slot. 36193323Sed// 37193323Sed// Finally, there are a number of "structural" hazards on the PPC970. These 38193323Sed// conditions cause large performance penalties due to misprediction, recovery, 39193323Sed// and replay logic that has to happen. These cases include setting a CTR and 40193323Sed// branching through it in the same dispatch group, and storing to an address, 41193323Sed// then loading from the same address within a dispatch group. To avoid these 42193323Sed// conditions, we insert no-op instructions when appropriate. 43193323Sed// 44193323Sed// FIXME: This is missing some significant cases: 45193323Sed// 1. Modeling of microcoded instructions. 46193323Sed// 2. Handling of serialized operations. 47193323Sed// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping". 48193323Sed// 49193323Sed 50193323SedPPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) 51193323Sed : TII(tii) { 52193323Sed EndDispatchGroup(); 53193323Sed} 54193323Sed 55193323Sedvoid PPCHazardRecognizer970::EndDispatchGroup() { 56198090Srdivacky DEBUG(errs() << "=== Start of dispatch group\n"); 57193323Sed NumIssued = 0; 58193323Sed 59193323Sed // Structural hazard info. 60193323Sed HasCTRSet = false; 61193323Sed NumStores = 0; 62193323Sed} 63193323Sed 64193323Sed 65193323SedPPCII::PPC970_Unit 66193323SedPPCHazardRecognizer970::GetInstrType(unsigned Opcode, 67193323Sed bool &isFirst, bool &isSingle, 68193323Sed bool &isCracked, 69193323Sed bool &isLoad, bool &isStore) { 70193323Sed if ((int)Opcode >= 0) { 71193323Sed isFirst = isSingle = isCracked = isLoad = isStore = false; 72193323Sed return PPCII::PPC970_Pseudo; 73193323Sed } 74193323Sed Opcode = ~Opcode; 75193323Sed 76193323Sed const TargetInstrDesc &TID = TII.get(Opcode); 77193323Sed 78193323Sed isLoad = TID.mayLoad(); 79193323Sed isStore = TID.mayStore(); 80193323Sed 81193323Sed unsigned TSFlags = TID.TSFlags; 82193323Sed 83193323Sed isFirst = TSFlags & PPCII::PPC970_First; 84193323Sed isSingle = TSFlags & PPCII::PPC970_Single; 85193323Sed isCracked = TSFlags & PPCII::PPC970_Cracked; 86193323Sed return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask); 87193323Sed} 88193323Sed 89193323Sed/// isLoadOfStoredAddress - If we have a load from the previously stored pointer 90193323Sed/// as indicated by StorePtr1/StorePtr2/StoreSize, return true. 91193323Sedbool PPCHazardRecognizer970:: 92193323SedisLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const { 93193323Sed for (unsigned i = 0, e = NumStores; i != e; ++i) { 94193323Sed // Handle exact and commuted addresses. 95193323Sed if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i]) 96193323Sed return true; 97193323Sed if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i]) 98193323Sed return true; 99193323Sed 100193323Sed // Okay, we don't have an exact match, if this is an indexed offset, see if 101193323Sed // we have overlap (which happens during fp->int conversion for example). 102193323Sed if (StorePtr2[i] == Ptr2) { 103193323Sed if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i])) 104193323Sed if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) { 105193323Sed // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check 106193323Sed // to see if the load and store actually overlap. 107193323Sed int StoreOffs = StoreOffset->getZExtValue(); 108193323Sed int LoadOffs = LoadOffset->getZExtValue(); 109193323Sed if (StoreOffs < LoadOffs) { 110193323Sed if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true; 111193323Sed } else { 112193323Sed if (int(LoadOffs+LoadSize) > StoreOffs) return true; 113193323Sed } 114193323Sed } 115193323Sed } 116193323Sed } 117193323Sed return false; 118193323Sed} 119193323Sed 120193323Sed/// getHazardType - We return hazard for any non-branch instruction that would 121203954Srdivacky/// terminate the dispatch group. We turn NoopHazard for any 122193323Sed/// instructions that wouldn't terminate the dispatch group that would cause a 123193323Sed/// pipeline flush. 124193323SedScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: 125193323SedgetHazardType(SUnit *SU) { 126193323Sed const SDNode *Node = SU->getNode()->getFlaggedMachineNode(); 127193323Sed bool isFirst, isSingle, isCracked, isLoad, isStore; 128193323Sed PPCII::PPC970_Unit InstrType = 129193323Sed GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, 130193323Sed isLoad, isStore); 131193323Sed if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; 132193323Sed unsigned Opcode = Node->getMachineOpcode(); 133193323Sed 134193323Sed // We can only issue a PPC970_First/PPC970_Single instruction (such as 135193323Sed // crand/mtspr/etc) if this is the first cycle of the dispatch group. 136193323Sed if (NumIssued != 0 && (isFirst || isSingle)) 137193323Sed return Hazard; 138193323Sed 139193323Sed // If this instruction is cracked into two ops by the decoder, we know that 140193323Sed // it is not a branch and that it cannot issue if 3 other instructions are 141193323Sed // already in the dispatch group. 142193323Sed if (isCracked && NumIssued > 2) 143193323Sed return Hazard; 144193323Sed 145193323Sed switch (InstrType) { 146198090Srdivacky default: llvm_unreachable("Unknown instruction type!"); 147193323Sed case PPCII::PPC970_FXU: 148193323Sed case PPCII::PPC970_LSU: 149193323Sed case PPCII::PPC970_FPU: 150193323Sed case PPCII::PPC970_VALU: 151193323Sed case PPCII::PPC970_VPERM: 152193323Sed // We can only issue a branch as the last instruction in a group. 153193323Sed if (NumIssued == 4) return Hazard; 154193323Sed break; 155193323Sed case PPCII::PPC970_CRU: 156193323Sed // We can only issue a CR instruction in the first two slots. 157193323Sed if (NumIssued >= 2) return Hazard; 158193323Sed break; 159193323Sed case PPCII::PPC970_BRU: 160193323Sed break; 161193323Sed } 162193323Sed 163193323Sed // Do not allow MTCTR and BCTRL to be in the same dispatch group. 164195340Sed if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4)) 165193323Sed return NoopHazard; 166193323Sed 167193323Sed // If this is a load following a store, make sure it's not to the same or 168193323Sed // overlapping address. 169193323Sed if (isLoad && NumStores) { 170193323Sed unsigned LoadSize; 171193323Sed switch (Opcode) { 172198090Srdivacky default: llvm_unreachable("Unknown load!"); 173193323Sed case PPC::LBZ: case PPC::LBZU: 174193323Sed case PPC::LBZX: 175193323Sed case PPC::LBZ8: case PPC::LBZU8: 176193323Sed case PPC::LBZX8: 177193323Sed case PPC::LVEBX: 178193323Sed LoadSize = 1; 179193323Sed break; 180193323Sed case PPC::LHA: case PPC::LHAU: 181193323Sed case PPC::LHAX: 182193323Sed case PPC::LHZ: case PPC::LHZU: 183193323Sed case PPC::LHZX: 184193323Sed case PPC::LVEHX: 185193323Sed case PPC::LHBRX: 186193323Sed case PPC::LHA8: case PPC::LHAU8: 187193323Sed case PPC::LHAX8: 188193323Sed case PPC::LHZ8: case PPC::LHZU8: 189193323Sed case PPC::LHZX8: 190193323Sed LoadSize = 2; 191193323Sed break; 192193323Sed case PPC::LFS: case PPC::LFSU: 193193323Sed case PPC::LFSX: 194193323Sed case PPC::LWZ: case PPC::LWZU: 195193323Sed case PPC::LWZX: 196193323Sed case PPC::LWA: 197193323Sed case PPC::LWAX: 198193323Sed case PPC::LVEWX: 199193323Sed case PPC::LWBRX: 200193323Sed case PPC::LWZ8: 201193323Sed case PPC::LWZX8: 202193323Sed LoadSize = 4; 203193323Sed break; 204193323Sed case PPC::LFD: case PPC::LFDU: 205193323Sed case PPC::LFDX: 206193323Sed case PPC::LD: case PPC::LDU: 207193323Sed case PPC::LDX: 208193323Sed LoadSize = 8; 209193323Sed break; 210193323Sed case PPC::LVX: 211193323Sed case PPC::LVXL: 212193323Sed LoadSize = 16; 213193323Sed break; 214193323Sed } 215193323Sed 216193323Sed if (isLoadOfStoredAddress(LoadSize, 217193323Sed Node->getOperand(0), Node->getOperand(1))) 218193323Sed return NoopHazard; 219193323Sed } 220193323Sed 221193323Sed return NoHazard; 222193323Sed} 223193323Sed 224193323Sedvoid PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { 225193323Sed const SDNode *Node = SU->getNode()->getFlaggedMachineNode(); 226193323Sed bool isFirst, isSingle, isCracked, isLoad, isStore; 227193323Sed PPCII::PPC970_Unit InstrType = 228193323Sed GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, 229193323Sed isLoad, isStore); 230193323Sed if (InstrType == PPCII::PPC970_Pseudo) return; 231193323Sed unsigned Opcode = Node->getMachineOpcode(); 232193323Sed 233193323Sed // Update structural hazard information. 234193323Sed if (Opcode == PPC::MTCTR) HasCTRSet = true; 235193323Sed 236193323Sed // Track the address stored to. 237193323Sed if (isStore) { 238193323Sed unsigned ThisStoreSize; 239193323Sed switch (Opcode) { 240198090Srdivacky default: llvm_unreachable("Unknown store instruction!"); 241193323Sed case PPC::STB: case PPC::STB8: 242193323Sed case PPC::STBU: case PPC::STBU8: 243193323Sed case PPC::STBX: case PPC::STBX8: 244193323Sed case PPC::STVEBX: 245193323Sed ThisStoreSize = 1; 246193323Sed break; 247193323Sed case PPC::STH: case PPC::STH8: 248193323Sed case PPC::STHU: case PPC::STHU8: 249193323Sed case PPC::STHX: case PPC::STHX8: 250193323Sed case PPC::STVEHX: 251193323Sed case PPC::STHBRX: 252193323Sed ThisStoreSize = 2; 253193323Sed break; 254193323Sed case PPC::STFS: 255193323Sed case PPC::STFSU: 256193323Sed case PPC::STFSX: 257193323Sed case PPC::STWX: case PPC::STWX8: 258193323Sed case PPC::STWUX: 259193323Sed case PPC::STW: case PPC::STW8: 260204642Srdivacky case PPC::STWU: 261193323Sed case PPC::STVEWX: 262193323Sed case PPC::STFIWX: 263193323Sed case PPC::STWBRX: 264193323Sed ThisStoreSize = 4; 265193323Sed break; 266193323Sed case PPC::STD_32: 267193323Sed case PPC::STDX_32: 268193323Sed case PPC::STD: 269193323Sed case PPC::STDU: 270193323Sed case PPC::STFD: 271193323Sed case PPC::STFDX: 272193323Sed case PPC::STDX: 273193323Sed case PPC::STDUX: 274193323Sed ThisStoreSize = 8; 275193323Sed break; 276193323Sed case PPC::STVX: 277193323Sed case PPC::STVXL: 278193323Sed ThisStoreSize = 16; 279193323Sed break; 280193323Sed } 281193323Sed 282193323Sed StoreSize[NumStores] = ThisStoreSize; 283193323Sed StorePtr1[NumStores] = Node->getOperand(1); 284193323Sed StorePtr2[NumStores] = Node->getOperand(2); 285193323Sed ++NumStores; 286193323Sed } 287193323Sed 288193323Sed if (InstrType == PPCII::PPC970_BRU || isSingle) 289193323Sed NumIssued = 4; // Terminate a d-group. 290193323Sed ++NumIssued; 291193323Sed 292193323Sed // If this instruction is cracked into two ops by the decoder, remember that 293193323Sed // we issued two pieces. 294193323Sed if (isCracked) 295193323Sed ++NumIssued; 296193323Sed 297193323Sed if (NumIssued == 5) 298193323Sed EndDispatchGroup(); 299193323Sed} 300193323Sed 301193323Sedvoid PPCHazardRecognizer970::AdvanceCycle() { 302193323Sed assert(NumIssued < 5 && "Illegal dispatch group!"); 303193323Sed ++NumIssued; 304193323Sed if (NumIssued == 5) 305193323Sed EndDispatchGroup(); 306193323Sed} 307