1193323Sed//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file implements hazard recognizers for scheduling on PowerPC processors. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#include "PPCHazardRecognizers.h" 15193323Sed#include "PPC.h" 16193323Sed#include "PPCInstrInfo.h" 17276479Sdim#include "PPCTargetMachine.h" 18193323Sed#include "llvm/CodeGen/ScheduleDAG.h" 19193323Sed#include "llvm/Support/Debug.h" 20198090Srdivacky#include "llvm/Support/ErrorHandling.h" 21198090Srdivacky#include "llvm/Support/raw_ostream.h" 22193323Sedusing namespace llvm; 23193323Sed 24276479Sdim#define DEBUG_TYPE "pre-RA-sched" 25276479Sdim 26276479Sdimbool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) { 27276479Sdim // FIXME: Move this. 28276479Sdim if (isBCTRAfterSet(SU)) 29276479Sdim return true; 30276479Sdim 31234353Sdim const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 32234353Sdim if (!MCID) 33276479Sdim return false; 34234353Sdim 35276479Sdim if (!MCID->mayLoad()) 36276479Sdim return false; 37276479Sdim 38276479Sdim // SU is a load; for any predecessors in this dispatch group, that are stores, 39276479Sdim // and with which we have an ordering dependency, return true. 40276479Sdim for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { 41276479Sdim const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); 42276479Sdim if (!PredMCID || !PredMCID->mayStore()) 43276479Sdim continue; 44276479Sdim 45276479Sdim if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) 46276479Sdim continue; 47276479Sdim 48276479Sdim for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) 49276479Sdim if (SU->Preds[i].getSUnit() == CurGroup[j]) 50276479Sdim return true; 51276479Sdim } 52276479Sdim 53276479Sdim return false; 54234353Sdim} 55234353Sdim 56276479Sdimbool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) { 57276479Sdim const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 58276479Sdim if (!MCID) 59276479Sdim return false; 60276479Sdim 61276479Sdim if (!MCID->isBranch()) 62276479Sdim return false; 63276479Sdim 64276479Sdim // SU is a branch; for any predecessors in this dispatch group, with which we 65276479Sdim // have a data dependence and set the counter register, return true. 66276479Sdim for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { 67276479Sdim const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); 68276479Sdim if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR) 69276479Sdim continue; 70276479Sdim 71276479Sdim if (SU->Preds[i].isCtrl()) 72276479Sdim continue; 73276479Sdim 74276479Sdim for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) 75276479Sdim if (SU->Preds[i].getSUnit() == CurGroup[j]) 76276479Sdim return true; 77276479Sdim } 78276479Sdim 79276479Sdim return false; 80276479Sdim} 81276479Sdim 82276479Sdim// FIXME: Remove this when we don't need this: 83276479Sdimnamespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } } 84276479Sdim 85276479Sdim// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific. 86276479Sdim 87276479Sdimbool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID, 88276479Sdim unsigned &NSlots) { 89276479Sdim // FIXME: Indirectly, this information is contained in the itinerary, and 90276479Sdim // we should derive it from there instead of separately specifying it 91276479Sdim // here. 92276479Sdim unsigned IIC = MCID->getSchedClass(); 93276479Sdim switch (IIC) { 94276479Sdim default: 95276479Sdim NSlots = 1; 96276479Sdim break; 97276479Sdim case PPC::Sched::IIC_IntDivW: 98276479Sdim case PPC::Sched::IIC_IntDivD: 99276479Sdim case PPC::Sched::IIC_LdStLoadUpd: 100276479Sdim case PPC::Sched::IIC_LdStLDU: 101276479Sdim case PPC::Sched::IIC_LdStLFDU: 102276479Sdim case PPC::Sched::IIC_LdStLFDUX: 103276479Sdim case PPC::Sched::IIC_LdStLHA: 104276479Sdim case PPC::Sched::IIC_LdStLHAU: 105276479Sdim case PPC::Sched::IIC_LdStLWA: 106276479Sdim case PPC::Sched::IIC_LdStSTDU: 107276479Sdim case PPC::Sched::IIC_LdStSTFDU: 108276479Sdim NSlots = 2; 109276479Sdim break; 110276479Sdim case PPC::Sched::IIC_LdStLoadUpdX: 111276479Sdim case PPC::Sched::IIC_LdStLDUX: 112276479Sdim case PPC::Sched::IIC_LdStLHAUX: 113276479Sdim case PPC::Sched::IIC_LdStLWARX: 114276479Sdim case PPC::Sched::IIC_LdStLDARX: 115276479Sdim case PPC::Sched::IIC_LdStSTDUX: 116276479Sdim case PPC::Sched::IIC_LdStSTDCX: 117276479Sdim case PPC::Sched::IIC_LdStSTWCX: 118276479Sdim case PPC::Sched::IIC_BrMCRX: // mtcr 119276479Sdim // FIXME: Add sync/isync (here and in the itinerary). 120276479Sdim NSlots = 4; 121276479Sdim break; 122276479Sdim } 123276479Sdim 124276479Sdim // FIXME: record-form instructions need a different itinerary class. 125276479Sdim if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1) 126276479Sdim NSlots = 2; 127276479Sdim 128276479Sdim switch (IIC) { 129276479Sdim default: 130276479Sdim // All multi-slot instructions must come first. 131276479Sdim return NSlots > 1; 132276479Sdim case PPC::Sched::IIC_BrCR: // cr logicals 133276479Sdim case PPC::Sched::IIC_SprMFCR: 134276479Sdim case PPC::Sched::IIC_SprMFCRF: 135276479Sdim case PPC::Sched::IIC_SprMTSPR: 136276479Sdim return true; 137276479Sdim } 138276479Sdim} 139276479Sdim 140234353SdimScheduleHazardRecognizer::HazardType 141276479SdimPPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 142276479Sdim if (Stalls == 0 && isLoadAfterStore(SU)) 143276479Sdim return NoopHazard; 144276479Sdim 145234353Sdim return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); 146234353Sdim} 147234353Sdim 148276479Sdimbool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) { 149276479Sdim const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 150276479Sdim unsigned NSlots; 151276479Sdim if (MCID && mustComeFirst(MCID, NSlots) && CurSlots) 152276479Sdim return true; 153276479Sdim 154276479Sdim return ScoreboardHazardRecognizer::ShouldPreferAnother(SU); 155234353Sdim} 156234353Sdim 157276479Sdimunsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { 158276479Sdim // We only need to fill out a maximum of 5 slots here: The 6th slot could 159276479Sdim // only be a second branch, and otherwise the next instruction will start a 160276479Sdim // new group. 161276479Sdim if (isLoadAfterStore(SU) && CurSlots < 6) { 162276479Sdim unsigned Directive = 163288943Sdim DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective(); 164276479Sdim // If we're using a special group-terminating nop, then we need only one. 165276479Sdim if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || 166276479Sdim Directive == PPC::DIR_PWR8 ) 167276479Sdim return 1; 168276479Sdim 169276479Sdim return 5 - CurSlots; 170276479Sdim } 171276479Sdim 172276479Sdim return ScoreboardHazardRecognizer::PreEmitNoops(SU); 173234353Sdim} 174234353Sdim 175276479Sdimvoid PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) { 176276479Sdim const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 177276479Sdim if (MCID) { 178276479Sdim if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) { 179276479Sdim CurGroup.clear(); 180276479Sdim CurSlots = CurBranches = 0; 181276479Sdim } else { 182276479Sdim DEBUG(dbgs() << "**** Adding to dispatch group: SU(" << 183276479Sdim SU->NodeNum << "): "); 184276479Sdim DEBUG(DAG->dumpNode(SU)); 185276479Sdim 186276479Sdim unsigned NSlots; 187276479Sdim bool MustBeFirst = mustComeFirst(MCID, NSlots); 188276479Sdim 189276479Sdim // If this instruction must come first, but does not, then it starts a 190276479Sdim // new group. 191276479Sdim if (MustBeFirst && CurSlots) { 192276479Sdim CurSlots = CurBranches = 0; 193276479Sdim CurGroup.clear(); 194276479Sdim } 195276479Sdim 196276479Sdim CurSlots += NSlots; 197276479Sdim CurGroup.push_back(SU); 198276479Sdim 199276479Sdim if (MCID->isBranch()) 200276479Sdim ++CurBranches; 201276479Sdim } 202276479Sdim } 203276479Sdim 204276479Sdim return ScoreboardHazardRecognizer::EmitInstruction(SU); 205276479Sdim} 206276479Sdim 207276479Sdimvoid PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() { 208276479Sdim return ScoreboardHazardRecognizer::AdvanceCycle(); 209276479Sdim} 210276479Sdim 211276479Sdimvoid PPCDispatchGroupSBHazardRecognizer::RecedeCycle() { 212276479Sdim llvm_unreachable("Bottom-up scheduling not supported"); 213276479Sdim} 214276479Sdim 215276479Sdimvoid PPCDispatchGroupSBHazardRecognizer::Reset() { 216276479Sdim CurGroup.clear(); 217276479Sdim CurSlots = CurBranches = 0; 218276479Sdim return ScoreboardHazardRecognizer::Reset(); 219276479Sdim} 220276479Sdim 221276479Sdimvoid PPCDispatchGroupSBHazardRecognizer::EmitNoop() { 222276479Sdim unsigned Directive = 223288943Sdim DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective(); 224276479Sdim // If the group has now filled all of its slots, or if we're using a special 225276479Sdim // group-terminating nop, the group is complete. 226276479Sdim if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || 227276479Sdim Directive == PPC::DIR_PWR8 || CurSlots == 6) { 228276479Sdim CurGroup.clear(); 229276479Sdim CurSlots = CurBranches = 0; 230276479Sdim } else { 231276479Sdim CurGroup.push_back(nullptr); 232276479Sdim ++CurSlots; 233276479Sdim } 234276479Sdim} 235276479Sdim 236234353Sdim//===----------------------------------------------------------------------===// 237193323Sed// PowerPC 970 Hazard Recognizer 238193323Sed// 239193323Sed// This models the dispatch group formation of the PPC970 processor. Dispatch 240193323Sed// groups are bundles of up to five instructions that can contain various mixes 241218893Sdim// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one 242193323Sed// branch instruction per-cycle. 243193323Sed// 244193323Sed// There are a number of restrictions to dispatch group formation: some 245193323Sed// instructions can only be issued in the first slot of a dispatch group, & some 246193323Sed// instructions fill an entire dispatch group. Additionally, only branches can 247193323Sed// issue in the 5th (last) slot. 248193323Sed// 249193323Sed// Finally, there are a number of "structural" hazards on the PPC970. These 250193323Sed// conditions cause large performance penalties due to misprediction, recovery, 251193323Sed// and replay logic that has to happen. These cases include setting a CTR and 252193323Sed// branching through it in the same dispatch group, and storing to an address, 253193323Sed// then loading from the same address within a dispatch group. To avoid these 254193323Sed// conditions, we insert no-op instructions when appropriate. 255193323Sed// 256193323Sed// FIXME: This is missing some significant cases: 257193323Sed// 1. Modeling of microcoded instructions. 258193323Sed// 2. Handling of serialized operations. 259193323Sed// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping". 260193323Sed// 261193323Sed 262276479SdimPPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG) 263276479Sdim : DAG(DAG) { 264193323Sed EndDispatchGroup(); 265193323Sed} 266193323Sed 267193323Sedvoid PPCHazardRecognizer970::EndDispatchGroup() { 268198090Srdivacky DEBUG(errs() << "=== Start of dispatch group\n"); 269193323Sed NumIssued = 0; 270218893Sdim 271193323Sed // Structural hazard info. 272193323Sed HasCTRSet = false; 273193323Sed NumStores = 0; 274193323Sed} 275193323Sed 276193323Sed 277218893SdimPPCII::PPC970_Unit 278193323SedPPCHazardRecognizer970::GetInstrType(unsigned Opcode, 279193323Sed bool &isFirst, bool &isSingle, 280193323Sed bool &isCracked, 281193323Sed bool &isLoad, bool &isStore) { 282276479Sdim const MCInstrDesc &MCID = DAG.TII->get(Opcode); 283218893Sdim 284224145Sdim isLoad = MCID.mayLoad(); 285224145Sdim isStore = MCID.mayStore(); 286218893Sdim 287224145Sdim uint64_t TSFlags = MCID.TSFlags; 288218893Sdim 289193323Sed isFirst = TSFlags & PPCII::PPC970_First; 290193323Sed isSingle = TSFlags & PPCII::PPC970_Single; 291193323Sed isCracked = TSFlags & PPCII::PPC970_Cracked; 292193323Sed return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask); 293193323Sed} 294193323Sed 295193323Sed/// isLoadOfStoredAddress - If we have a load from the previously stored pointer 296193323Sed/// as indicated by StorePtr1/StorePtr2/StoreSize, return true. 297193323Sedbool PPCHazardRecognizer970:: 298234353SdimisLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset, 299234353Sdim const Value *LoadValue) const { 300193323Sed for (unsigned i = 0, e = NumStores; i != e; ++i) { 301193323Sed // Handle exact and commuted addresses. 302234353Sdim if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i]) 303193323Sed return true; 304218893Sdim 305193323Sed // Okay, we don't have an exact match, if this is an indexed offset, see if 306193323Sed // we have overlap (which happens during fp->int conversion for example). 307234353Sdim if (StoreValue[i] == LoadValue) { 308234353Sdim // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check 309234353Sdim // to see if the load and store actually overlap. 310234353Sdim if (StoreOffset[i] < LoadOffset) { 311234353Sdim if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true; 312234353Sdim } else { 313234353Sdim if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true; 314234353Sdim } 315193323Sed } 316193323Sed } 317193323Sed return false; 318193323Sed} 319193323Sed 320193323Sed/// getHazardType - We return hazard for any non-branch instruction that would 321203954Srdivacky/// terminate the dispatch group. We turn NoopHazard for any 322193323Sed/// instructions that wouldn't terminate the dispatch group that would cause a 323193323Sed/// pipeline flush. 324193323SedScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: 325218893SdimgetHazardType(SUnit *SU, int Stalls) { 326218893Sdim assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead"); 327218893Sdim 328234353Sdim MachineInstr *MI = SU->getInstr(); 329234353Sdim 330234353Sdim if (MI->isDebugValue()) 331234353Sdim return NoHazard; 332234353Sdim 333234353Sdim unsigned Opcode = MI->getOpcode(); 334193323Sed bool isFirst, isSingle, isCracked, isLoad, isStore; 335218893Sdim PPCII::PPC970_Unit InstrType = 336234353Sdim GetInstrType(Opcode, isFirst, isSingle, isCracked, 337193323Sed isLoad, isStore); 338218893Sdim if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; 339193323Sed 340193323Sed // We can only issue a PPC970_First/PPC970_Single instruction (such as 341193323Sed // crand/mtspr/etc) if this is the first cycle of the dispatch group. 342193323Sed if (NumIssued != 0 && (isFirst || isSingle)) 343193323Sed return Hazard; 344218893Sdim 345193323Sed // If this instruction is cracked into two ops by the decoder, we know that 346193323Sed // it is not a branch and that it cannot issue if 3 other instructions are 347193323Sed // already in the dispatch group. 348193323Sed if (isCracked && NumIssued > 2) 349193323Sed return Hazard; 350218893Sdim 351193323Sed switch (InstrType) { 352198090Srdivacky default: llvm_unreachable("Unknown instruction type!"); 353193323Sed case PPCII::PPC970_FXU: 354193323Sed case PPCII::PPC970_LSU: 355193323Sed case PPCII::PPC970_FPU: 356193323Sed case PPCII::PPC970_VALU: 357193323Sed case PPCII::PPC970_VPERM: 358193323Sed // We can only issue a branch as the last instruction in a group. 359193323Sed if (NumIssued == 4) return Hazard; 360193323Sed break; 361193323Sed case PPCII::PPC970_CRU: 362193323Sed // We can only issue a CR instruction in the first two slots. 363193323Sed if (NumIssued >= 2) return Hazard; 364193323Sed break; 365193323Sed case PPCII::PPC970_BRU: 366193323Sed break; 367193323Sed } 368218893Sdim 369193323Sed // Do not allow MTCTR and BCTRL to be in the same dispatch group. 370249423Sdim if (HasCTRSet && Opcode == PPC::BCTRL) 371193323Sed return NoopHazard; 372218893Sdim 373193323Sed // If this is a load following a store, make sure it's not to the same or 374193323Sed // overlapping address. 375234353Sdim if (isLoad && NumStores && !MI->memoperands_empty()) { 376234353Sdim MachineMemOperand *MO = *MI->memoperands_begin(); 377234353Sdim if (isLoadOfStoredAddress(MO->getSize(), 378234353Sdim MO->getOffset(), MO->getValue())) 379193323Sed return NoopHazard; 380193323Sed } 381218893Sdim 382193323Sed return NoHazard; 383193323Sed} 384193323Sed 385193323Sedvoid PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { 386234353Sdim MachineInstr *MI = SU->getInstr(); 387234353Sdim 388234353Sdim if (MI->isDebugValue()) 389234353Sdim return; 390234353Sdim 391234353Sdim unsigned Opcode = MI->getOpcode(); 392193323Sed bool isFirst, isSingle, isCracked, isLoad, isStore; 393218893Sdim PPCII::PPC970_Unit InstrType = 394234353Sdim GetInstrType(Opcode, isFirst, isSingle, isCracked, 395193323Sed isLoad, isStore); 396218893Sdim if (InstrType == PPCII::PPC970_Pseudo) return; 397193323Sed 398193323Sed // Update structural hazard information. 399223017Sdim if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true; 400218893Sdim 401193323Sed // Track the address stored to. 402234353Sdim if (isStore && NumStores < 4 && !MI->memoperands_empty()) { 403234353Sdim MachineMemOperand *MO = *MI->memoperands_begin(); 404234353Sdim StoreSize[NumStores] = MO->getSize(); 405234353Sdim StoreOffset[NumStores] = MO->getOffset(); 406234353Sdim StoreValue[NumStores] = MO->getValue(); 407193323Sed ++NumStores; 408193323Sed } 409218893Sdim 410193323Sed if (InstrType == PPCII::PPC970_BRU || isSingle) 411193323Sed NumIssued = 4; // Terminate a d-group. 412193323Sed ++NumIssued; 413218893Sdim 414193323Sed // If this instruction is cracked into two ops by the decoder, remember that 415193323Sed // we issued two pieces. 416193323Sed if (isCracked) 417193323Sed ++NumIssued; 418218893Sdim 419193323Sed if (NumIssued == 5) 420193323Sed EndDispatchGroup(); 421193323Sed} 422193323Sed 423193323Sedvoid PPCHazardRecognizer970::AdvanceCycle() { 424193323Sed assert(NumIssued < 5 && "Illegal dispatch group!"); 425193323Sed ++NumIssued; 426193323Sed if (NumIssued == 5) 427193323Sed EndDispatchGroup(); 428193323Sed} 429234353Sdim 430234353Sdimvoid PPCHazardRecognizer970::Reset() { 431234353Sdim EndDispatchGroup(); 432234353Sdim} 433234353Sdim 434