SystemZHazardRecognizer.cpp revision 327952
1//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines a hazard recognizer for the SystemZ scheduler. 11// 12// This class is used by the SystemZ scheduling strategy to maintain 13// the state during scheduling, and provide cost functions for 14// scheduling candidates. This includes: 15// 16// * Decoder grouping. A decoder group can maximally hold 3 uops, and 17// instructions that always begin a new group should be scheduled when 18// the current decoder group is empty. 19// * Processor resources usage. It is beneficial to balance the use of 20// resources. 21// 22// A goal is to consider all instructions, also those outside of any 23// scheduling region. Such instructions are "advanced" past and include 24// single instructions before a scheduling region, branches etc. 25// 26// A block that has only one predecessor continues scheduling with the state 27// of it (which may be updated by emitting branches). 28// 29// ===---------------------------------------------------------------------===// 30 31#include "SystemZHazardRecognizer.h" 32#include "llvm/ADT/Statistic.h" 33 34using namespace llvm; 35 36#define DEBUG_TYPE "machine-scheduler" 37 38// This is the limit of processor resource usage at which the 39// scheduler should try to look for other instructions (not using the 40// critical resource). 41static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden, 42 cl::desc("The OOO window for processor " 43 "resources during scheduling."), 44 cl::init(8)); 45 46unsigned SystemZHazardRecognizer:: 47getNumDecoderSlots(SUnit *SU) const { 48 const MCSchedClassDesc *SC = getSchedClass(SU); 49 if (!SC->isValid()) 50 return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. 51 52 if (SC->BeginGroup) { 53 if (!SC->EndGroup) 54 return 2; // Cracked instruction 55 else 56 return 3; // Expanded/group-alone instruction 57 } 58 59 return 1; // Normal instruction 60} 61 62unsigned SystemZHazardRecognizer::getCurrCycleIdx() { 63 unsigned Idx = CurrGroupSize; 64 if (GrpCount % 2) 65 Idx += 3; 66 return Idx; 67} 68 69ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer:: 70getHazardType(SUnit *m, int Stalls) { 71 return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard); 72} 73 74void SystemZHazardRecognizer::Reset() { 75 CurrGroupSize = 0; 76 clearProcResCounters(); 77 GrpCount = 0; 78 LastFPdOpCycleIdx = UINT_MAX; 79 LastEmittedMI = nullptr; 80 DEBUG(CurGroupDbg = "";); 81} 82 83bool 84SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { 85 const MCSchedClassDesc *SC = getSchedClass(SU); 86 if (!SC->isValid()) 87 return true; 88 89 // A cracked instruction only fits into schedule if the current 90 // group is empty. 91 if (SC->BeginGroup) 92 return (CurrGroupSize == 0); 93 94 // Since a full group is handled immediately in EmitInstruction(), 95 // SU should fit into current group. NumSlots should be 1 or 0, 96 // since it is not a cracked or expanded instruction. 97 assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) && 98 "Expected normal instruction to fit in non-full group!"); 99 100 return true; 101} 102 103void SystemZHazardRecognizer::nextGroup(bool DbgOutput) { 104 if (CurrGroupSize > 0) { 105 DEBUG(dumpCurrGroup("Completed decode group")); 106 DEBUG(CurGroupDbg = "";); 107 108 GrpCount++; 109 110 // Reset counter for next group. 111 CurrGroupSize = 0; 112 113 // Decrease counters for execution units by one. 114 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 115 if (ProcResourceCounters[i] > 0) 116 ProcResourceCounters[i]--; 117 118 // Clear CriticalResourceIdx if it is now below the threshold. 119 if (CriticalResourceIdx != UINT_MAX && 120 (ProcResourceCounters[CriticalResourceIdx] <= 121 ProcResCostLim)) 122 CriticalResourceIdx = UINT_MAX; 123 } 124 125 DEBUG(if (DbgOutput) 126 dumpProcResourceCounters();); 127} 128 129#ifndef NDEBUG // Debug output 130void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { 131 OS << "SU(" << SU->NodeNum << "):"; 132 OS << TII->getName(SU->getInstr()->getOpcode()); 133 134 const MCSchedClassDesc *SC = getSchedClass(SU); 135 if (!SC->isValid()) 136 return; 137 138 for (TargetSchedModel::ProcResIter 139 PI = SchedModel->getWriteProcResBegin(SC), 140 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 141 const MCProcResourceDesc &PRD = 142 *SchedModel->getProcResource(PI->ProcResourceIdx); 143 std::string FU(PRD.Name); 144 // trim e.g. Z13_FXaUnit -> FXa 145 FU = FU.substr(FU.find("_") + 1); 146 FU.resize(FU.find("Unit")); 147 OS << "/" << FU; 148 149 if (PI->Cycles > 1) 150 OS << "(" << PI->Cycles << "cyc)"; 151 } 152 153 if (SC->NumMicroOps > 1) 154 OS << "/" << SC->NumMicroOps << "uops"; 155 if (SC->BeginGroup && SC->EndGroup) 156 OS << "/GroupsAlone"; 157 else if (SC->BeginGroup) 158 OS << "/BeginsGroup"; 159 else if (SC->EndGroup) 160 OS << "/EndsGroup"; 161 if (SU->isUnbuffered) 162 OS << "/Unbuffered"; 163} 164 165void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { 166 dbgs() << "+++ " << Msg; 167 dbgs() << ": "; 168 169 if (CurGroupDbg.empty()) 170 dbgs() << " <empty>\n"; 171 else { 172 dbgs() << "{ " << CurGroupDbg << " }"; 173 dbgs() << " (" << CurrGroupSize << " decoder slot" 174 << (CurrGroupSize > 1 ? "s":"") 175 << ")\n"; 176 } 177} 178 179void SystemZHazardRecognizer::dumpProcResourceCounters() const { 180 bool any = false; 181 182 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 183 if (ProcResourceCounters[i] > 0) { 184 any = true; 185 break; 186 } 187 188 if (!any) 189 return; 190 191 dbgs() << "+++ Resource counters:\n"; 192 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 193 if (ProcResourceCounters[i] > 0) { 194 dbgs() << "+++ Extra schedule for execution unit " 195 << SchedModel->getProcResource(i)->Name 196 << ": " << ProcResourceCounters[i] << "\n"; 197 any = true; 198 } 199} 200#endif //NDEBUG 201 202void SystemZHazardRecognizer::clearProcResCounters() { 203 ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0); 204 CriticalResourceIdx = UINT_MAX; 205} 206 207static inline bool isBranchRetTrap(MachineInstr *MI) { 208 return (MI->isBranch() || MI->isReturn() || 209 MI->getOpcode() == SystemZ::CondTrap); 210} 211 212// Update state with SU as the next scheduled unit. 213void SystemZHazardRecognizer:: 214EmitInstruction(SUnit *SU) { 215 const MCSchedClassDesc *SC = getSchedClass(SU); 216 DEBUG( dumpCurrGroup("Decode group before emission");); 217 218 // If scheduling an SU that must begin a new decoder group, move on 219 // to next group. 220 if (!fitsIntoCurrentGroup(SU)) 221 nextGroup(); 222 223 DEBUG( dbgs() << "+++ HazardRecognizer emitting "; dumpSU(SU, dbgs()); 224 dbgs() << "\n"; 225 raw_string_ostream cgd(CurGroupDbg); 226 if (CurGroupDbg.length()) 227 cgd << ", "; 228 dumpSU(SU, cgd);); 229 230 LastEmittedMI = SU->getInstr(); 231 232 // After returning from a call, we don't know much about the state. 233 if (SU->isCall) { 234 DEBUG (dbgs() << "+++ Clearing state after call.\n";); 235 clearProcResCounters(); 236 LastFPdOpCycleIdx = UINT_MAX; 237 CurrGroupSize += getNumDecoderSlots(SU); 238 assert (CurrGroupSize <= 3); 239 nextGroup(); 240 return; 241 } 242 243 // Increase counter for execution unit(s). 244 for (TargetSchedModel::ProcResIter 245 PI = SchedModel->getWriteProcResBegin(SC), 246 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 247 // Don't handle FPd together with the other resources. 248 if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1) 249 continue; 250 int &CurrCounter = 251 ProcResourceCounters[PI->ProcResourceIdx]; 252 CurrCounter += PI->Cycles; 253 // Check if this is now the new critical resource. 254 if ((CurrCounter > ProcResCostLim) && 255 (CriticalResourceIdx == UINT_MAX || 256 (PI->ProcResourceIdx != CriticalResourceIdx && 257 CurrCounter > 258 ProcResourceCounters[CriticalResourceIdx]))) { 259 DEBUG( dbgs() << "+++ New critical resource: " 260 << SchedModel->getProcResource(PI->ProcResourceIdx)->Name 261 << "\n";); 262 CriticalResourceIdx = PI->ProcResourceIdx; 263 } 264 } 265 266 // Make note of an instruction that uses a blocking resource (FPd). 267 if (SU->isUnbuffered) { 268 LastFPdOpCycleIdx = getCurrCycleIdx(); 269 DEBUG (dbgs() << "+++ Last FPd cycle index: " 270 << LastFPdOpCycleIdx << "\n";); 271 } 272 273 bool GroupEndingBranch = 274 (CurrGroupSize >= 1 && isBranchRetTrap(SU->getInstr())); 275 276 // Insert SU into current group by increasing number of slots used 277 // in current group. 278 CurrGroupSize += getNumDecoderSlots(SU); 279 assert (CurrGroupSize <= 3); 280 281 // Check if current group is now full/ended. If so, move on to next 282 // group to be ready to evaluate more candidates. 283 if (CurrGroupSize == 3 || SC->EndGroup || GroupEndingBranch) 284 nextGroup(); 285} 286 287int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { 288 const MCSchedClassDesc *SC = getSchedClass(SU); 289 if (!SC->isValid()) 290 return 0; 291 292 // If SU begins new group, it can either break a current group early 293 // or fit naturally if current group is empty (negative cost). 294 if (SC->BeginGroup) { 295 if (CurrGroupSize) 296 return 3 - CurrGroupSize; 297 return -1; 298 } 299 300 // Similarly, a group-ending SU may either fit well (last in group), or 301 // end the group prematurely. 302 if (SC->EndGroup) { 303 unsigned resultingGroupSize = 304 (CurrGroupSize + getNumDecoderSlots(SU)); 305 if (resultingGroupSize < 3) 306 return (3 - resultingGroupSize); 307 return -1; 308 } 309 310 // Most instructions can be placed in any decoder slot. 311 return 0; 312} 313 314bool SystemZHazardRecognizer::isFPdOpPreferred_distance(const SUnit *SU) { 315 assert (SU->isUnbuffered); 316 // If this is the first FPd op, it should be scheduled high. 317 if (LastFPdOpCycleIdx == UINT_MAX) 318 return true; 319 // If this is not the first PFd op, it should go into the other side 320 // of the processor to use the other FPd unit there. This should 321 // generally happen if two FPd ops are placed with 2 other 322 // instructions between them (modulo 6). 323 if (LastFPdOpCycleIdx > getCurrCycleIdx()) 324 return ((LastFPdOpCycleIdx - getCurrCycleIdx()) == 3); 325 return ((getCurrCycleIdx() - LastFPdOpCycleIdx) == 3); 326} 327 328int SystemZHazardRecognizer:: 329resourcesCost(SUnit *SU) { 330 int Cost = 0; 331 332 const MCSchedClassDesc *SC = getSchedClass(SU); 333 if (!SC->isValid()) 334 return 0; 335 336 // For a FPd op, either return min or max value as indicated by the 337 // distance to any prior FPd op. 338 if (SU->isUnbuffered) 339 Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX); 340 // For other instructions, give a cost to the use of the critical resource. 341 else if (CriticalResourceIdx != UINT_MAX) { 342 for (TargetSchedModel::ProcResIter 343 PI = SchedModel->getWriteProcResBegin(SC), 344 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) 345 if (PI->ProcResourceIdx == CriticalResourceIdx) 346 Cost = PI->Cycles; 347 } 348 349 return Cost; 350} 351 352void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI, 353 bool TakenBranch) { 354 // Make a temporary SUnit. 355 SUnit SU(MI, 0); 356 357 // Set interesting flags. 358 SU.isCall = MI->isCall(); 359 360 const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); 361 for (const MCWriteProcResEntry &PRE : 362 make_range(SchedModel->getWriteProcResBegin(SC), 363 SchedModel->getWriteProcResEnd(SC))) { 364 switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { 365 case 0: 366 SU.hasReservedResource = true; 367 break; 368 case 1: 369 SU.isUnbuffered = true; 370 break; 371 default: 372 break; 373 } 374 } 375 376 EmitInstruction(&SU); 377 378 if (TakenBranch && CurrGroupSize > 0) 379 nextGroup(false /*DbgOutput*/); 380 381 assert ((!MI->isTerminator() || isBranchRetTrap(MI)) && 382 "Scheduler: unhandled terminator!"); 383} 384 385void SystemZHazardRecognizer:: 386copyState(SystemZHazardRecognizer *Incoming) { 387 // Current decoder group 388 CurrGroupSize = Incoming->CurrGroupSize; 389 DEBUG (CurGroupDbg = Incoming->CurGroupDbg;); 390 391 // Processor resources 392 ProcResourceCounters = Incoming->ProcResourceCounters; 393 CriticalResourceIdx = Incoming->CriticalResourceIdx; 394 395 // FPd 396 LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; 397 GrpCount = Incoming->GrpCount; 398} 399