SystemZHazardRecognizer.cpp revision 311116
1311116Sdim//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// 2311116Sdim// 3311116Sdim// The LLVM Compiler Infrastructure 4311116Sdim// 5311116Sdim// This file is distributed under the University of Illinois Open Source 6311116Sdim// License. See LICENSE.TXT for details. 7311116Sdim// 8311116Sdim//===----------------------------------------------------------------------===// 9311116Sdim// 10311116Sdim// This file defines a hazard recognizer for the SystemZ scheduler. 11311116Sdim// 12311116Sdim// This class is used by the SystemZ scheduling strategy to maintain 13311116Sdim// the state during scheduling, and provide cost functions for 14311116Sdim// scheduling candidates. This includes: 15311116Sdim// 16311116Sdim// * Decoder grouping. A decoder group can maximally hold 3 uops, and 17311116Sdim// instructions that always begin a new group should be scheduled when 18311116Sdim// the current decoder group is empty. 19311116Sdim// * Processor resources usage. It is beneficial to balance the use of 20311116Sdim// resources. 21311116Sdim// 22311116Sdim// ===---------------------------------------------------------------------===// 23311116Sdim 24311116Sdim#include "SystemZHazardRecognizer.h" 25311116Sdim#include "llvm/ADT/Statistic.h" 26311116Sdim 27311116Sdimusing namespace llvm; 28311116Sdim 29311116Sdim#define DEBUG_TYPE "misched" 30311116Sdim 31311116Sdim// This is the limit of processor resource usage at which the 32311116Sdim// scheduler should try to look for other instructions (not using the 33311116Sdim// critical resource). 34311116Sdimstatic cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden, 35311116Sdim cl::desc("The OOO window for processor " 36311116Sdim "resources during scheduling."), 37311116Sdim cl::init(8)); 38311116Sdim 39311116SdimSystemZHazardRecognizer:: 40311116SdimSystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr), 41311116Sdim SchedModel(nullptr) {} 42311116Sdim 43311116Sdimunsigned SystemZHazardRecognizer:: 44311116SdimgetNumDecoderSlots(SUnit *SU) const { 45311116Sdim const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 46311116Sdim if (!SC->isValid()) 47311116Sdim return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. 48311116Sdim 49311116Sdim if (SC->BeginGroup) { 50311116Sdim if (!SC->EndGroup) 51311116Sdim return 2; // Cracked instruction 52311116Sdim else 53311116Sdim return 3; // Expanded/group-alone instruction 54311116Sdim } 55311116Sdim 56311116Sdim return 1; // Normal instruction 57311116Sdim} 58311116Sdim 59311116Sdimunsigned SystemZHazardRecognizer::getCurrCycleIdx() { 60311116Sdim unsigned Idx = CurrGroupSize; 61311116Sdim if (GrpCount % 2) 62311116Sdim Idx += 3; 63311116Sdim return Idx; 64311116Sdim} 65311116Sdim 66311116SdimScheduleHazardRecognizer::HazardType SystemZHazardRecognizer:: 67311116SdimgetHazardType(SUnit *m, int Stalls) { 68311116Sdim return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard); 69311116Sdim} 70311116Sdim 71311116Sdimvoid SystemZHazardRecognizer::Reset() { 72311116Sdim CurrGroupSize = 0; 73311116Sdim clearProcResCounters(); 74311116Sdim GrpCount = 0; 75311116Sdim LastFPdOpCycleIdx = UINT_MAX; 76311116Sdim DEBUG(CurGroupDbg = "";); 77311116Sdim} 78311116Sdim 79311116Sdimbool 80311116SdimSystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { 81311116Sdim const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 82311116Sdim if (!SC->isValid()) 83311116Sdim return true; 84311116Sdim 85311116Sdim // A cracked instruction only fits into schedule if the current 86311116Sdim // group is empty. 87311116Sdim if (SC->BeginGroup) 88311116Sdim return (CurrGroupSize == 0); 89311116Sdim 90311116Sdim // Since a full group is handled immediately in EmitInstruction(), 91311116Sdim // SU should fit into current group. NumSlots should be 1 or 0, 92311116Sdim // since it is not a cracked or expanded instruction. 93311116Sdim assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) && 94311116Sdim "Expected normal instruction to fit in non-full group!"); 95311116Sdim 96311116Sdim return true; 97311116Sdim} 98311116Sdim 99311116Sdimvoid SystemZHazardRecognizer::nextGroup(bool DbgOutput) { 100311116Sdim if (CurrGroupSize > 0) { 101311116Sdim DEBUG(dumpCurrGroup("Completed decode group")); 102311116Sdim DEBUG(CurGroupDbg = "";); 103311116Sdim 104311116Sdim GrpCount++; 105311116Sdim 106311116Sdim // Reset counter for next group. 107311116Sdim CurrGroupSize = 0; 108311116Sdim 109311116Sdim // Decrease counters for execution units by one. 110311116Sdim for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 111311116Sdim if (ProcResourceCounters[i] > 0) 112311116Sdim ProcResourceCounters[i]--; 113311116Sdim 114311116Sdim // Clear CriticalResourceIdx if it is now below the threshold. 115311116Sdim if (CriticalResourceIdx != UINT_MAX && 116311116Sdim (ProcResourceCounters[CriticalResourceIdx] <= 117311116Sdim ProcResCostLim)) 118311116Sdim CriticalResourceIdx = UINT_MAX; 119311116Sdim } 120311116Sdim 121311116Sdim DEBUG(if (DbgOutput) 122311116Sdim dumpProcResourceCounters();); 123311116Sdim} 124311116Sdim 125311116Sdim#ifndef NDEBUG // Debug output 126311116Sdimvoid SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { 127311116Sdim OS << "SU(" << SU->NodeNum << "):"; 128311116Sdim OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode()); 129311116Sdim 130311116Sdim const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 131311116Sdim if (!SC->isValid()) 132311116Sdim return; 133311116Sdim 134311116Sdim for (TargetSchedModel::ProcResIter 135311116Sdim PI = SchedModel->getWriteProcResBegin(SC), 136311116Sdim PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 137311116Sdim const MCProcResourceDesc &PRD = 138311116Sdim *SchedModel->getProcResource(PI->ProcResourceIdx); 139311116Sdim std::string FU(PRD.Name); 140311116Sdim // trim e.g. Z13_FXaUnit -> FXa 141311116Sdim FU = FU.substr(FU.find("_") + 1); 142311116Sdim FU.resize(FU.find("Unit")); 143311116Sdim OS << "/" << FU; 144311116Sdim 145311116Sdim if (PI->Cycles > 1) 146311116Sdim OS << "(" << PI->Cycles << "cyc)"; 147311116Sdim } 148311116Sdim 149311116Sdim if (SC->NumMicroOps > 1) 150311116Sdim OS << "/" << SC->NumMicroOps << "uops"; 151311116Sdim if (SC->BeginGroup && SC->EndGroup) 152311116Sdim OS << "/GroupsAlone"; 153311116Sdim else if (SC->BeginGroup) 154311116Sdim OS << "/BeginsGroup"; 155311116Sdim else if (SC->EndGroup) 156311116Sdim OS << "/EndsGroup"; 157311116Sdim if (SU->isUnbuffered) 158311116Sdim OS << "/Unbuffered"; 159311116Sdim} 160311116Sdim 161311116Sdimvoid SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { 162311116Sdim dbgs() << "+++ " << Msg; 163311116Sdim dbgs() << ": "; 164311116Sdim 165311116Sdim if (CurGroupDbg.empty()) 166311116Sdim dbgs() << " <empty>\n"; 167311116Sdim else { 168311116Sdim dbgs() << "{ " << CurGroupDbg << " }"; 169311116Sdim dbgs() << " (" << CurrGroupSize << " decoder slot" 170311116Sdim << (CurrGroupSize > 1 ? "s":"") 171311116Sdim << ")\n"; 172311116Sdim } 173311116Sdim} 174311116Sdim 175311116Sdimvoid SystemZHazardRecognizer::dumpProcResourceCounters() const { 176311116Sdim bool any = false; 177311116Sdim 178311116Sdim for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 179311116Sdim if (ProcResourceCounters[i] > 0) { 180311116Sdim any = true; 181311116Sdim break; 182311116Sdim } 183311116Sdim 184311116Sdim if (!any) 185311116Sdim return; 186311116Sdim 187311116Sdim dbgs() << "+++ Resource counters:\n"; 188311116Sdim for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 189311116Sdim if (ProcResourceCounters[i] > 0) { 190311116Sdim dbgs() << "+++ Extra schedule for execution unit " 191311116Sdim << SchedModel->getProcResource(i)->Name 192311116Sdim << ": " << ProcResourceCounters[i] << "\n"; 193311116Sdim any = true; 194311116Sdim } 195311116Sdim} 196311116Sdim#endif //NDEBUG 197311116Sdim 198311116Sdimvoid SystemZHazardRecognizer::clearProcResCounters() { 199311116Sdim ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0); 200311116Sdim CriticalResourceIdx = UINT_MAX; 201311116Sdim} 202311116Sdim 203311116Sdim// Update state with SU as the next scheduled unit. 204311116Sdimvoid SystemZHazardRecognizer:: 205311116SdimEmitInstruction(SUnit *SU) { 206311116Sdim const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 207311116Sdim DEBUG( dumpCurrGroup("Decode group before emission");); 208311116Sdim 209311116Sdim // If scheduling an SU that must begin a new decoder group, move on 210311116Sdim // to next group. 211311116Sdim if (!fitsIntoCurrentGroup(SU)) 212311116Sdim nextGroup(); 213311116Sdim 214311116Sdim DEBUG( dbgs() << "+++ HazardRecognizer emitting "; dumpSU(SU, dbgs()); 215311116Sdim dbgs() << "\n"; 216311116Sdim raw_string_ostream cgd(CurGroupDbg); 217311116Sdim if (CurGroupDbg.length()) 218311116Sdim cgd << ", "; 219311116Sdim dumpSU(SU, cgd);); 220311116Sdim 221311116Sdim // After returning from a call, we don't know much about the state. 222311116Sdim if (SU->getInstr()->isCall()) { 223311116Sdim DEBUG (dbgs() << "+++ Clearing state after call.\n";); 224311116Sdim clearProcResCounters(); 225311116Sdim LastFPdOpCycleIdx = UINT_MAX; 226311116Sdim CurrGroupSize += getNumDecoderSlots(SU); 227311116Sdim assert (CurrGroupSize <= 3); 228311116Sdim nextGroup(); 229311116Sdim return; 230311116Sdim } 231311116Sdim 232311116Sdim // Increase counter for execution unit(s). 233311116Sdim for (TargetSchedModel::ProcResIter 234311116Sdim PI = SchedModel->getWriteProcResBegin(SC), 235311116Sdim PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 236311116Sdim // Don't handle FPd together with the other resources. 237311116Sdim if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1) 238311116Sdim continue; 239311116Sdim int &CurrCounter = 240311116Sdim ProcResourceCounters[PI->ProcResourceIdx]; 241311116Sdim CurrCounter += PI->Cycles; 242311116Sdim // Check if this is now the new critical resource. 243311116Sdim if ((CurrCounter > ProcResCostLim) && 244311116Sdim (CriticalResourceIdx == UINT_MAX || 245311116Sdim (PI->ProcResourceIdx != CriticalResourceIdx && 246311116Sdim CurrCounter > 247311116Sdim ProcResourceCounters[CriticalResourceIdx]))) { 248311116Sdim DEBUG( dbgs() << "+++ New critical resource: " 249311116Sdim << SchedModel->getProcResource(PI->ProcResourceIdx)->Name 250311116Sdim << "\n";); 251311116Sdim CriticalResourceIdx = PI->ProcResourceIdx; 252311116Sdim } 253311116Sdim } 254311116Sdim 255311116Sdim // Make note of an instruction that uses a blocking resource (FPd). 256311116Sdim if (SU->isUnbuffered) { 257311116Sdim LastFPdOpCycleIdx = getCurrCycleIdx(); 258311116Sdim DEBUG (dbgs() << "+++ Last FPd cycle index: " 259311116Sdim << LastFPdOpCycleIdx << "\n";); 260311116Sdim } 261311116Sdim 262311116Sdim // Insert SU into current group by increasing number of slots used 263311116Sdim // in current group. 264311116Sdim CurrGroupSize += getNumDecoderSlots(SU); 265311116Sdim assert (CurrGroupSize <= 3); 266311116Sdim 267311116Sdim // Check if current group is now full/ended. If so, move on to next 268311116Sdim // group to be ready to evaluate more candidates. 269311116Sdim if (CurrGroupSize == 3 || SC->EndGroup) 270311116Sdim nextGroup(); 271311116Sdim} 272311116Sdim 273311116Sdimint SystemZHazardRecognizer::groupingCost(SUnit *SU) const { 274311116Sdim const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 275311116Sdim if (!SC->isValid()) 276311116Sdim return 0; 277311116Sdim 278311116Sdim // If SU begins new group, it can either break a current group early 279311116Sdim // or fit naturally if current group is empty (negative cost). 280311116Sdim if (SC->BeginGroup) { 281311116Sdim if (CurrGroupSize) 282311116Sdim return 3 - CurrGroupSize; 283311116Sdim return -1; 284311116Sdim } 285311116Sdim 286311116Sdim // Similarly, a group-ending SU may either fit well (last in group), or 287311116Sdim // end the group prematurely. 288311116Sdim if (SC->EndGroup) { 289311116Sdim unsigned resultingGroupSize = 290311116Sdim (CurrGroupSize + getNumDecoderSlots(SU)); 291311116Sdim if (resultingGroupSize < 3) 292311116Sdim return (3 - resultingGroupSize); 293311116Sdim return -1; 294311116Sdim } 295311116Sdim 296311116Sdim // Most instructions can be placed in any decoder slot. 297311116Sdim return 0; 298311116Sdim} 299311116Sdim 300311116Sdimbool SystemZHazardRecognizer::isFPdOpPreferred_distance(const SUnit *SU) { 301311116Sdim assert (SU->isUnbuffered); 302311116Sdim // If this is the first FPd op, it should be scheduled high. 303311116Sdim if (LastFPdOpCycleIdx == UINT_MAX) 304311116Sdim return true; 305311116Sdim // If this is not the first PFd op, it should go into the other side 306311116Sdim // of the processor to use the other FPd unit there. This should 307311116Sdim // generally happen if two FPd ops are placed with 2 other 308311116Sdim // instructions between them (modulo 6). 309311116Sdim if (LastFPdOpCycleIdx > getCurrCycleIdx()) 310311116Sdim return ((LastFPdOpCycleIdx - getCurrCycleIdx()) == 3); 311311116Sdim return ((getCurrCycleIdx() - LastFPdOpCycleIdx) == 3); 312311116Sdim} 313311116Sdim 314311116Sdimint SystemZHazardRecognizer:: 315311116SdimresourcesCost(SUnit *SU) { 316311116Sdim int Cost = 0; 317311116Sdim 318311116Sdim const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 319311116Sdim if (!SC->isValid()) 320311116Sdim return 0; 321311116Sdim 322311116Sdim // For a FPd op, either return min or max value as indicated by the 323311116Sdim // distance to any prior FPd op. 324311116Sdim if (SU->isUnbuffered) 325311116Sdim Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX); 326311116Sdim // For other instructions, give a cost to the use of the critical resource. 327311116Sdim else if (CriticalResourceIdx != UINT_MAX) { 328311116Sdim for (TargetSchedModel::ProcResIter 329311116Sdim PI = SchedModel->getWriteProcResBegin(SC), 330311116Sdim PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) 331311116Sdim if (PI->ProcResourceIdx == CriticalResourceIdx) 332311116Sdim Cost = PI->Cycles; 333311116Sdim } 334311116Sdim 335311116Sdim return Cost; 336311116Sdim} 337311116Sdim 338