1//===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//==-----------------------------------------------------------------------===// 8 9#include "AMDGPU.h" 10#include "AMDGPUSubtarget.h" 11#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12#include "R600InstrInfo.h" 13#include "R600RegisterInfo.h" 14#include "llvm/ADT/DepthFirstIterator.h" 15#include "llvm/ADT/SCCIterator.h" 16#include "llvm/ADT/SmallPtrSet.h" 17#include "llvm/ADT/SmallVector.h" 18#include "llvm/ADT/Statistic.h" 19#include "llvm/ADT/StringRef.h" 20#include "llvm/CodeGen/MachineBasicBlock.h" 21#include "llvm/CodeGen/MachineDominators.h" 22#include "llvm/CodeGen/MachineFunction.h" 23#include "llvm/CodeGen/MachineFunctionPass.h" 24#include "llvm/CodeGen/MachineInstr.h" 25#include "llvm/CodeGen/MachineInstrBuilder.h" 26#include "llvm/CodeGen/MachineJumpTableInfo.h" 27#include "llvm/CodeGen/MachineLoopInfo.h" 28#include "llvm/CodeGen/MachineOperand.h" 29#include "llvm/CodeGen/MachinePostDominators.h" 30#include "llvm/CodeGen/MachineRegisterInfo.h" 31#include "llvm/IR/DebugLoc.h" 32#include "llvm/IR/LLVMContext.h" 33#include "llvm/InitializePasses.h" 34#include "llvm/Pass.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/Support/ErrorHandling.h" 37#include "llvm/Support/MachineValueType.h" 38#include "llvm/Support/raw_ostream.h" 39#include <cassert> 40#include <cstddef> 41#include <deque> 42#include <iterator> 43#include <map> 44#include <utility> 45#include <vector> 46 47using namespace llvm; 48 49#define DEBUG_TYPE "structcfg" 50 51#define DEFAULT_VEC_SLOTS 8 52 53// TODO: move-begin. 54 55//===----------------------------------------------------------------------===// 56// 57// Statistics for CFGStructurizer. 58// 59//===----------------------------------------------------------------------===// 60 61STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " 62 "matched"); 63STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " 64 "matched"); 65STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); 66STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); 67 68namespace llvm { 69 70void initializeAMDGPUCFGStructurizerPass(PassRegistry &); 71 72} // end namespace llvm 73 74namespace { 75 76//===----------------------------------------------------------------------===// 77// 78// Miscellaneous utility for CFGStructurizer. 79// 80//===----------------------------------------------------------------------===// 81 82#define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n"); 83 84#define SHOWNEWBLK(b, msg) \ 85 LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 86 dbgs() << "\n";); 87 88#define SHOWBLK_DETAIL(b, msg) \ 89 LLVM_DEBUG(if (b) { \ 90 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 91 b->print(dbgs()); \ 92 dbgs() << "\n"; \ 93 }); 94 95#define INVALIDSCCNUM -1 96 97//===----------------------------------------------------------------------===// 98// 99// supporting data structure for CFGStructurizer 100// 101//===----------------------------------------------------------------------===// 102 103class BlockInformation { 104public: 105 bool IsRetired = false; 106 int SccNum = INVALIDSCCNUM; 107 108 BlockInformation() = default; 109}; 110 111//===----------------------------------------------------------------------===// 112// 113// CFGStructurizer 114// 115//===----------------------------------------------------------------------===// 116 117class AMDGPUCFGStructurizer : public MachineFunctionPass { 118public: 119 using MBBVector = SmallVector<MachineBasicBlock *, 32>; 120 using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>; 121 using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>; 122 123 enum PathToKind { 124 Not_SinglePath = 0, 125 SinglePath_InPath = 1, 126 SinglePath_NotInPath = 2 127 }; 128 129 static char ID; 130 131 AMDGPUCFGStructurizer() : MachineFunctionPass(ID) { 132 initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry()); 133 } 134 135 StringRef getPassName() const override { 136 return "AMDGPU Control Flow Graph structurizer Pass"; 137 } 138 139 void getAnalysisUsage(AnalysisUsage &AU) const override { 140 AU.addRequired<MachineDominatorTree>(); 141 AU.addRequired<MachinePostDominatorTree>(); 142 AU.addRequired<MachineLoopInfo>(); 143 MachineFunctionPass::getAnalysisUsage(AU); 144 } 145 146 /// Perform the CFG structurization 147 bool run(); 148 149 /// Perform the CFG preparation 150 /// This step will remove every unconditionnal/dead jump instructions and make 151 /// sure all loops have an exit block 152 bool prepare(); 153 154 bool runOnMachineFunction(MachineFunction &MF) override { 155 TII = MF.getSubtarget<R600Subtarget>().getInstrInfo(); 156 TRI = &TII->getRegisterInfo(); 157 LLVM_DEBUG(MF.dump();); 158 OrderedBlks.clear(); 159 Visited.clear(); 160 FuncRep = &MF; 161 MLI = &getAnalysis<MachineLoopInfo>(); 162 LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); 163 MDT = &getAnalysis<MachineDominatorTree>(); 164 LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr);); 165 PDT = &getAnalysis<MachinePostDominatorTree>(); 166 LLVM_DEBUG(PDT->print(dbgs());); 167 prepare(); 168 run(); 169 LLVM_DEBUG(MF.dump();); 170 return true; 171 } 172 173protected: 174 MachineDominatorTree *MDT; 175 MachinePostDominatorTree *PDT; 176 MachineLoopInfo *MLI; 177 const R600InstrInfo *TII = nullptr; 178 const R600RegisterInfo *TRI = nullptr; 179 180 // PRINT FUNCTIONS 181 /// Print the ordered Blocks. 182 void printOrderedBlocks() const { 183 size_t i = 0; 184 for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), 185 iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { 186 dbgs() << "BB" << (*iterBlk)->getNumber(); 187 dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; 188 if (i != 0 && i % 10 == 0) { 189 dbgs() << "\n"; 190 } else { 191 dbgs() << " "; 192 } 193 } 194 } 195 196 static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { 197 for (MachineLoop::iterator iter = LoopInfo.begin(), 198 iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) { 199 (*iter)->print(dbgs(), 0); 200 } 201 } 202 203 // UTILITY FUNCTIONS 204 int getSCCNum(MachineBasicBlock *MBB) const; 205 MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; 206 bool hasBackEdge(MachineBasicBlock *MBB) const; 207 bool isRetiredBlock(MachineBasicBlock *MBB) const; 208 bool isActiveLoophead(MachineBasicBlock *MBB) const; 209 PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 210 bool AllowSideEntry = true) const; 211 int countActiveBlock(MBBVector::const_iterator It, 212 MBBVector::const_iterator E) const; 213 bool needMigrateBlock(MachineBasicBlock *MBB) const; 214 215 // Utility Functions 216 void reversePredicateSetter(MachineBasicBlock::iterator I, 217 MachineBasicBlock &MBB); 218 /// Compute the reversed DFS post order of Blocks 219 void orderBlocks(MachineFunction *MF); 220 221 // Function originally from CFGStructTraits 222 void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, 223 const DebugLoc &DL = DebugLoc()); 224 MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, 225 const DebugLoc &DL = DebugLoc()); 226 MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); 227 void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, 228 const DebugLoc &DL); 229 void insertCondBranchBefore(MachineBasicBlock *MBB, 230 MachineBasicBlock::iterator I, int NewOpcode, 231 int RegNum, const DebugLoc &DL); 232 233 static int getBranchNzeroOpcode(int OldOpcode); 234 static int getBranchZeroOpcode(int OldOpcode); 235 static int getContinueNzeroOpcode(int OldOpcode); 236 static int getContinueZeroOpcode(int OldOpcode); 237 static MachineBasicBlock *getTrueBranch(MachineInstr *MI); 238 static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); 239 static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, 240 MachineInstr *MI); 241 static bool isCondBranch(MachineInstr *MI); 242 static bool isUncondBranch(MachineInstr *MI); 243 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); 244 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); 245 246 /// The correct naming for this is getPossibleLoopendBlockBranchInstr. 247 /// 248 /// BB with backward-edge could have move instructions after the branch 249 /// instruction. Such move instruction "belong to" the loop backward-edge. 250 MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); 251 252 static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); 253 static bool isReturnBlock(MachineBasicBlock *MBB); 254 static void cloneSuccessorList(MachineBasicBlock *DstMBB, 255 MachineBasicBlock *SrcMBB); 256 static MachineBasicBlock *clone(MachineBasicBlock *MBB); 257 258 /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose 259 /// because the AMDGPU instruction is not recognized as terminator fix this 260 /// and retire this routine 261 void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, 262 MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); 263 264 static void wrapup(MachineBasicBlock *MBB); 265 266 int patternMatch(MachineBasicBlock *MBB); 267 int patternMatchGroup(MachineBasicBlock *MBB); 268 int serialPatternMatch(MachineBasicBlock *MBB); 269 int ifPatternMatch(MachineBasicBlock *MBB); 270 int loopendPatternMatch(); 271 int mergeLoop(MachineLoop *LoopRep); 272 273 /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in 274 /// the same loop with LoopLandInfo without explicitly keeping track of 275 /// loopContBlks and loopBreakBlks, this is a method to get the information. 276 bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, 277 MachineBasicBlock *Src2MBB); 278 int handleJumpintoIf(MachineBasicBlock *HeadMBB, 279 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 280 int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 281 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 282 int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 283 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 284 MachineBasicBlock **LandMBBPtr); 285 void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 286 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 287 MachineBasicBlock *LandMBB, bool Detail = false); 288 int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 289 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); 290 void mergeSerialBlock(MachineBasicBlock *DstMBB, 291 MachineBasicBlock *SrcMBB); 292 293 void mergeIfthenelseBlock(MachineInstr *BranchMI, 294 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 295 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); 296 void mergeLooplandBlock(MachineBasicBlock *DstMBB, 297 MachineBasicBlock *LandMBB); 298 void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 299 MachineBasicBlock *LandMBB); 300 void settleLoopcontBlock(MachineBasicBlock *ContingMBB, 301 MachineBasicBlock *ContMBB); 302 303 /// normalizeInfiniteLoopExit change 304 /// B1: 305 /// uncond_br LoopHeader 306 /// 307 /// to 308 /// B1: 309 /// cond_br 1 LoopHeader dummyExit 310 /// and return the newly added dummy exit block 311 MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); 312 void removeUnconditionalBranch(MachineBasicBlock *MBB); 313 314 /// Remove duplicate branches instructions in a block. 315 /// For instance 316 /// B0: 317 /// cond_br X B1 B2 318 /// cond_br X B1 B2 319 /// is transformed to 320 /// B0: 321 /// cond_br X B1 B2 322 void removeRedundantConditionalBranch(MachineBasicBlock *MBB); 323 324 void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB); 325 void removeSuccessor(MachineBasicBlock *MBB); 326 MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, 327 MachineBasicBlock *PredMBB); 328 void migrateInstruction(MachineBasicBlock *SrcMBB, 329 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); 330 void recordSccnum(MachineBasicBlock *MBB, int SCCNum); 331 void retireBlock(MachineBasicBlock *MBB); 332 333private: 334 MBBInfoMap BlockInfoMap; 335 LoopLandInfoMap LLInfoMap; 336 std::map<MachineLoop *, bool> Visited; 337 MachineFunction *FuncRep; 338 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks; 339}; 340 341} // end anonymous namespace 342 343char AMDGPUCFGStructurizer::ID = 0; 344 345int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { 346 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 347 if (It == BlockInfoMap.end()) 348 return INVALIDSCCNUM; 349 return (*It).second->SccNum; 350} 351 352MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) 353 const { 354 LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); 355 if (It == LLInfoMap.end()) 356 return nullptr; 357 return (*It).second; 358} 359 360bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { 361 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 362 if (!LoopRep) 363 return false; 364 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 365 return MBB->isSuccessor(LoopHeader); 366} 367 368bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { 369 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 370 if (It == BlockInfoMap.end()) 371 return false; 372 return (*It).second->IsRetired; 373} 374 375bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { 376 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 377 while (LoopRep && LoopRep->getHeader() == MBB) { 378 MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); 379 if(!LoopLand) 380 return true; 381 if (!isRetiredBlock(LoopLand)) 382 return true; 383 LoopRep = LoopRep->getParentLoop(); 384 } 385 return false; 386} 387 388AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo( 389 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 390 bool AllowSideEntry) const { 391 assert(DstMBB); 392 if (SrcMBB == DstMBB) 393 return SinglePath_InPath; 394 while (SrcMBB && SrcMBB->succ_size() == 1) { 395 SrcMBB = *SrcMBB->succ_begin(); 396 if (SrcMBB == DstMBB) 397 return SinglePath_InPath; 398 if (!AllowSideEntry && SrcMBB->pred_size() > 1) 399 return Not_SinglePath; 400 } 401 if (SrcMBB && SrcMBB->succ_size()==0) 402 return SinglePath_NotInPath; 403 return Not_SinglePath; 404} 405 406int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, 407 MBBVector::const_iterator E) const { 408 int Count = 0; 409 while (It != E) { 410 if (!isRetiredBlock(*It)) 411 ++Count; 412 ++It; 413 } 414 return Count; 415} 416 417bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { 418 unsigned BlockSizeThreshold = 30; 419 unsigned CloneInstrThreshold = 100; 420 bool MultiplePreds = MBB && (MBB->pred_size() > 1); 421 422 if(!MultiplePreds) 423 return false; 424 unsigned BlkSize = MBB->size(); 425 return ((BlkSize > BlockSizeThreshold) && 426 (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); 427} 428 429void AMDGPUCFGStructurizer::reversePredicateSetter( 430 MachineBasicBlock::iterator I, MachineBasicBlock &MBB) { 431 assert(I.isValid() && "Expected valid iterator"); 432 for (;; --I) { 433 if (I == MBB.end()) 434 continue; 435 if (I->getOpcode() == R600::PRED_X) { 436 switch (I->getOperand(2).getImm()) { 437 case R600::PRED_SETE_INT: 438 I->getOperand(2).setImm(R600::PRED_SETNE_INT); 439 return; 440 case R600::PRED_SETNE_INT: 441 I->getOperand(2).setImm(R600::PRED_SETE_INT); 442 return; 443 case R600::PRED_SETE: 444 I->getOperand(2).setImm(R600::PRED_SETNE); 445 return; 446 case R600::PRED_SETNE: 447 I->getOperand(2).setImm(R600::PRED_SETE); 448 return; 449 default: 450 llvm_unreachable("PRED_X Opcode invalid!"); 451 } 452 } 453 } 454} 455 456void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, 457 int NewOpcode, const DebugLoc &DL) { 458 MachineInstr *MI = 459 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 460 MBB->push_back(MI); 461 //assume the instruction doesn't take any reg operand ... 462 SHOWNEWINSTR(MI); 463} 464 465MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, 466 int NewOpcode, 467 const DebugLoc &DL) { 468 MachineInstr *MI = 469 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 470 if (MBB->begin() != MBB->end()) 471 MBB->insert(MBB->begin(), MI); 472 else 473 MBB->push_back(MI); 474 SHOWNEWINSTR(MI); 475 return MI; 476} 477 478MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore( 479 MachineBasicBlock::iterator I, int NewOpcode) { 480 MachineInstr *OldMI = &(*I); 481 MachineBasicBlock *MBB = OldMI->getParent(); 482 MachineInstr *NewMBB = 483 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); 484 MBB->insert(I, NewMBB); 485 //assume the instruction doesn't take any reg operand ... 486 SHOWNEWINSTR(NewMBB); 487 return NewMBB; 488} 489 490void AMDGPUCFGStructurizer::insertCondBranchBefore( 491 MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) { 492 MachineInstr *OldMI = &(*I); 493 MachineBasicBlock *MBB = OldMI->getParent(); 494 MachineFunction *MF = MBB->getParent(); 495 MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 496 MBB->insert(I, NewMI); 497 MachineInstrBuilder MIB(*MF, NewMI); 498 MIB.addReg(OldMI->getOperand(1).getReg(), false); 499 SHOWNEWINSTR(NewMI); 500 //erase later oldInstr->eraseFromParent(); 501} 502 503void AMDGPUCFGStructurizer::insertCondBranchBefore( 504 MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode, 505 int RegNum, const DebugLoc &DL) { 506 MachineFunction *MF = blk->getParent(); 507 MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 508 //insert before 509 blk->insert(I, NewInstr); 510 MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); 511 SHOWNEWINSTR(NewInstr); 512} 513 514int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { 515 switch(OldOpcode) { 516 case R600::JUMP_COND: 517 case R600::JUMP: return R600::IF_PREDICATE_SET; 518 case R600::BRANCH_COND_i32: 519 case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32; 520 default: llvm_unreachable("internal error"); 521 } 522 return -1; 523} 524 525int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { 526 switch(OldOpcode) { 527 case R600::JUMP_COND: 528 case R600::JUMP: return R600::IF_PREDICATE_SET; 529 case R600::BRANCH_COND_i32: 530 case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32; 531 default: llvm_unreachable("internal error"); 532 } 533 return -1; 534} 535 536int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { 537 switch(OldOpcode) { 538 case R600::JUMP_COND: 539 case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32; 540 default: llvm_unreachable("internal error"); 541 } 542 return -1; 543} 544 545int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { 546 switch(OldOpcode) { 547 case R600::JUMP_COND: 548 case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32; 549 default: llvm_unreachable("internal error"); 550 } 551 return -1; 552} 553 554MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) { 555 return MI->getOperand(0).getMBB(); 556} 557 558void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI, 559 MachineBasicBlock *MBB) { 560 MI->getOperand(0).setMBB(MBB); 561} 562 563MachineBasicBlock * 564AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, 565 MachineInstr *MI) { 566 assert(MBB->succ_size() == 2); 567 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 568 MachineBasicBlock::succ_iterator It = MBB->succ_begin(); 569 MachineBasicBlock::succ_iterator Next = It; 570 ++Next; 571 return (*It == TrueBranch) ? *Next : *It; 572} 573 574bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) { 575 switch (MI->getOpcode()) { 576 case R600::JUMP_COND: 577 case R600::BRANCH_COND_i32: 578 case R600::BRANCH_COND_f32: return true; 579 default: 580 return false; 581 } 582 return false; 583} 584 585bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) { 586 switch (MI->getOpcode()) { 587 case R600::JUMP: 588 case R600::BRANCH: 589 return true; 590 default: 591 return false; 592 } 593 return false; 594} 595 596DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { 597 //get DebugLoc from the first MachineBasicBlock instruction with debug info 598 DebugLoc DL; 599 for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); 600 ++It) { 601 MachineInstr *instr = &(*It); 602 if (instr->getDebugLoc()) 603 DL = instr->getDebugLoc(); 604 } 605 return DL; 606} 607 608MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr( 609 MachineBasicBlock *MBB) { 610 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 611 MachineInstr *MI = &*It; 612 if (MI && (isCondBranch(MI) || isUncondBranch(MI))) 613 return MI; 614 return nullptr; 615} 616 617MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( 618 MachineBasicBlock *MBB) { 619 for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); 620 It != E; ++It) { 621 // FIXME: Simplify 622 MachineInstr *MI = &*It; 623 if (MI) { 624 if (isCondBranch(MI) || isUncondBranch(MI)) 625 return MI; 626 else if (!TII->isMov(MI->getOpcode())) 627 break; 628 } 629 } 630 return nullptr; 631} 632 633MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { 634 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 635 if (It != MBB->rend()) { 636 MachineInstr *instr = &(*It); 637 if (instr->getOpcode() == R600::RETURN) 638 return instr; 639 } 640 return nullptr; 641} 642 643bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { 644 MachineInstr *MI = getReturnInstr(MBB); 645 bool IsReturn = (MBB->succ_size() == 0); 646 if (MI) 647 assert(IsReturn); 648 else if (IsReturn) 649 LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber() 650 << " is return block without RETURN instr\n";); 651 return IsReturn; 652} 653 654void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, 655 MachineBasicBlock *SrcMBB) { 656 for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(), 657 iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It) 658 DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of 659} 660 661MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) { 662 MachineFunction *Func = MBB->getParent(); 663 MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); 664 Func->push_back(NewMBB); //insert to function 665 for (const MachineInstr &It : *MBB) 666 NewMBB->push_back(Func->CloneMachineInstr(&It)); 667 return NewMBB; 668} 669 670void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith( 671 MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, 672 MachineBasicBlock *NewBlk) { 673 MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); 674 if (BranchMI && isCondBranch(BranchMI) && 675 getTrueBranch(BranchMI) == OldMBB) 676 setTrueBranch(BranchMI, NewBlk); 677} 678 679void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) { 680 assert((!MBB->getParent()->getJumpTableInfo() 681 || MBB->getParent()->getJumpTableInfo()->isEmpty()) 682 && "found a jump table"); 683 684 //collect continue right before endloop 685 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr; 686 MachineBasicBlock::iterator Pre = MBB->begin(); 687 MachineBasicBlock::iterator E = MBB->end(); 688 MachineBasicBlock::iterator It = Pre; 689 while (It != E) { 690 if (Pre->getOpcode() == R600::CONTINUE 691 && It->getOpcode() == R600::ENDLOOP) 692 ContInstr.push_back(&*Pre); 693 Pre = It; 694 ++It; 695 } 696 697 //delete continue right before endloop 698 for (unsigned i = 0; i < ContInstr.size(); ++i) 699 ContInstr[i]->eraseFromParent(); 700 701 // TODO to fix up jump table so later phase won't be confused. if 702 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but 703 // there isn't such an interface yet. alternatively, replace all the other 704 // blocks in the jump table with the entryBlk //} 705} 706 707bool AMDGPUCFGStructurizer::prepare() { 708 bool Changed = false; 709 710 //FIXME: if not reducible flow graph, make it so ??? 711 712 LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); 713 714 orderBlocks(FuncRep); 715 716 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks; 717 718 // Add an ExitBlk to loop that don't have one 719 for (MachineLoopInfo::iterator It = MLI->begin(), 720 E = MLI->end(); It != E; ++It) { 721 MachineLoop *LoopRep = (*It); 722 MBBVector ExitingMBBs; 723 LoopRep->getExitingBlocks(ExitingMBBs); 724 725 if (ExitingMBBs.size() == 0) { 726 MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); 727 if (DummyExitBlk) 728 RetBlks.push_back(DummyExitBlk); 729 } 730 } 731 732 // Remove unconditional branch instr. 733 // Add dummy exit block iff there are multiple returns. 734 for (SmallVectorImpl<MachineBasicBlock *>::const_iterator 735 It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) { 736 MachineBasicBlock *MBB = *It; 737 removeUnconditionalBranch(MBB); 738 removeRedundantConditionalBranch(MBB); 739 if (isReturnBlock(MBB)) { 740 RetBlks.push_back(MBB); 741 } 742 assert(MBB->succ_size() <= 2); 743 } 744 745 if (RetBlks.size() >= 2) { 746 addDummyExitBlock(RetBlks); 747 Changed = true; 748 } 749 750 return Changed; 751} 752 753bool AMDGPUCFGStructurizer::run() { 754 //Assume reducible CFG... 755 LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); 756 757#ifdef STRESSTEST 758 //Use the worse block ordering to test the algorithm. 759 ReverseVector(orderedBlks); 760#endif 761 762 LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); 763 int NumIter = 0; 764 bool Finish = false; 765 MachineBasicBlock *MBB; 766 bool MakeProgress = false; 767 int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), 768 OrderedBlks.end()); 769 770 do { 771 ++NumIter; 772 LLVM_DEBUG(dbgs() << "numIter = " << NumIter 773 << ", numRemaintedBlk = " << NumRemainedBlk << "\n";); 774 775 SmallVectorImpl<MachineBasicBlock *>::const_iterator It = 776 OrderedBlks.begin(); 777 SmallVectorImpl<MachineBasicBlock *>::const_iterator E = 778 OrderedBlks.end(); 779 780 SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter = 781 It; 782 MachineBasicBlock *SccBeginMBB = nullptr; 783 int SccNumBlk = 0; // The number of active blocks, init to a 784 // maximum possible number. 785 int SccNumIter; // Number of iteration in this SCC. 786 787 while (It != E) { 788 MBB = *It; 789 790 if (!SccBeginMBB) { 791 SccBeginIter = It; 792 SccBeginMBB = MBB; 793 SccNumIter = 0; 794 SccNumBlk = NumRemainedBlk; // Init to maximum possible number. 795 LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); 796 dbgs() << "\n";); 797 } 798 799 if (!isRetiredBlock(MBB)) 800 patternMatch(MBB); 801 802 ++It; 803 804 bool ContNextScc = true; 805 if (It == E 806 || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { 807 // Just finish one scc. 808 ++SccNumIter; 809 int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); 810 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { 811 LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB) 812 << ", sccNumIter = " << SccNumIter; 813 dbgs() << "doesn't make any progress\n";); 814 ContNextScc = true; 815 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { 816 SccNumBlk = sccRemainedNumBlk; 817 It = SccBeginIter; 818 ContNextScc = false; 819 LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB) 820 << "sccNumIter = " << SccNumIter << '\n';); 821 } else { 822 // Finish the current scc. 823 ContNextScc = true; 824 } 825 } else { 826 // Continue on next component in the current scc. 827 ContNextScc = false; 828 } 829 830 if (ContNextScc) 831 SccBeginMBB = nullptr; 832 } //while, "one iteration" over the function. 833 834 MachineBasicBlock *EntryMBB = 835 *GraphTraits<MachineFunction *>::nodes_begin(FuncRep); 836 if (EntryMBB->succ_size() == 0) { 837 Finish = true; 838 LLVM_DEBUG(dbgs() << "Reduce to one block\n";); 839 } else { 840 int NewnumRemainedBlk 841 = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); 842 // consider cloned blocks ?? 843 if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { 844 MakeProgress = true; 845 NumRemainedBlk = NewnumRemainedBlk; 846 } else { 847 MakeProgress = false; 848 LLVM_DEBUG(dbgs() << "No progress\n";); 849 } 850 } 851 } while (!Finish && MakeProgress); 852 853 // Misc wrap up to maintain the consistency of the Function representation. 854 wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep)); 855 856 // Detach retired Block, release memory. 857 for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end(); 858 It != E; ++It) { 859 if ((*It).second && (*It).second->IsRetired) { 860 assert(((*It).first)->getNumber() != -1); 861 LLVM_DEBUG(dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";); 862 (*It).first->eraseFromParent(); //Remove from the parent Function. 863 } 864 delete (*It).second; 865 } 866 BlockInfoMap.clear(); 867 LLInfoMap.clear(); 868 869 if (!Finish) { 870 LLVM_DEBUG(FuncRep->viewCFG()); 871 report_fatal_error("IRREDUCIBLE_CFG"); 872 } 873 874 return true; 875} 876 877void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { 878 int SccNum = 0; 879 MachineBasicBlock *MBB; 880 for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd(); 881 ++It, ++SccNum) { 882 const std::vector<MachineBasicBlock *> &SccNext = *It; 883 for (std::vector<MachineBasicBlock *>::const_iterator 884 blockIter = SccNext.begin(), blockEnd = SccNext.end(); 885 blockIter != blockEnd; ++blockIter) { 886 MBB = *blockIter; 887 OrderedBlks.push_back(MBB); 888 recordSccnum(MBB, SccNum); 889 } 890 } 891 892 // walk through all the block in func to check for unreachable 893 for (auto *MBB : nodes(MF)) { 894 SccNum = getSCCNum(MBB); 895 if (SccNum == INVALIDSCCNUM) 896 dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; 897 } 898} 899 900int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { 901 int NumMatch = 0; 902 int CurMatch; 903 904 LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";); 905 906 while ((CurMatch = patternMatchGroup(MBB)) > 0) 907 NumMatch += CurMatch; 908 909 LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber() 910 << ", numMatch = " << NumMatch << "\n";); 911 912 return NumMatch; 913} 914 915int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { 916 int NumMatch = 0; 917 NumMatch += loopendPatternMatch(); 918 NumMatch += serialPatternMatch(MBB); 919 NumMatch += ifPatternMatch(MBB); 920 return NumMatch; 921} 922 923int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { 924 if (MBB->succ_size() != 1) 925 return 0; 926 927 MachineBasicBlock *childBlk = *MBB->succ_begin(); 928 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) 929 return 0; 930 931 mergeSerialBlock(MBB, childBlk); 932 ++numSerialPatternMatch; 933 return 1; 934} 935 936int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { 937 //two edges 938 if (MBB->succ_size() != 2) 939 return 0; 940 if (hasBackEdge(MBB)) 941 return 0; 942 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 943 if (!BranchMI) 944 return 0; 945 946 assert(isCondBranch(BranchMI)); 947 int NumMatch = 0; 948 949 MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); 950 NumMatch += serialPatternMatch(TrueMBB); 951 NumMatch += ifPatternMatch(TrueMBB); 952 MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); 953 NumMatch += serialPatternMatch(FalseMBB); 954 NumMatch += ifPatternMatch(FalseMBB); 955 MachineBasicBlock *LandBlk; 956 int Cloned = 0; 957 958 assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); 959 // TODO: Simplify 960 if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 961 && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { 962 // Diamond pattern 963 LandBlk = *TrueMBB->succ_begin(); 964 } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { 965 // Triangle pattern, false is empty 966 LandBlk = FalseMBB; 967 FalseMBB = nullptr; 968 } else if (FalseMBB->succ_size() == 1 969 && *FalseMBB->succ_begin() == TrueMBB) { 970 // Triangle pattern, true is empty 971 // We reverse the predicate to make a triangle, empty false pattern; 972 std::swap(TrueMBB, FalseMBB); 973 reversePredicateSetter(MBB->end(), *MBB); 974 LandBlk = FalseMBB; 975 FalseMBB = nullptr; 976 } else if (FalseMBB->succ_size() == 1 977 && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { 978 LandBlk = *FalseMBB->succ_begin(); 979 } else if (TrueMBB->succ_size() == 1 980 && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { 981 LandBlk = *TrueMBB->succ_begin(); 982 } else { 983 return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB); 984 } 985 986 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the 987 // new BB created for landBlk==NULL may introduce new challenge to the 988 // reduction process. 989 if (LandBlk && 990 ((TrueMBB && TrueMBB->pred_size() > 1) 991 || (FalseMBB && FalseMBB->pred_size() > 1))) { 992 Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); 993 } 994 995 if (TrueMBB && TrueMBB->pred_size() > 1) { 996 TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); 997 ++Cloned; 998 } 999 1000 if (FalseMBB && FalseMBB->pred_size() > 1) { 1001 FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); 1002 ++Cloned; 1003 } 1004 1005 mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); 1006 1007 ++numIfPatternMatch; 1008 1009 numClonedBlock += Cloned; 1010 1011 return 1 + Cloned + NumMatch; 1012} 1013 1014int AMDGPUCFGStructurizer::loopendPatternMatch() { 1015 std::deque<MachineLoop *> NestedLoops; 1016 for (auto &It: *MLI) 1017 for (MachineLoop *ML : depth_first(It)) 1018 NestedLoops.push_front(ML); 1019 1020 if (NestedLoops.empty()) 1021 return 0; 1022 1023 // Process nested loop outside->inside (we did push_front), 1024 // so "continue" to a outside loop won't be mistaken as "break" 1025 // of the current loop. 1026 int Num = 0; 1027 for (MachineLoop *ExaminedLoop : NestedLoops) { 1028 if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) 1029 continue; 1030 LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); 1031 int NumBreak = mergeLoop(ExaminedLoop); 1032 if (NumBreak == -1) 1033 break; 1034 Num += NumBreak; 1035 } 1036 return Num; 1037} 1038 1039int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { 1040 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1041 MBBVector ExitingMBBs; 1042 LoopRep->getExitingBlocks(ExitingMBBs); 1043 assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); 1044 LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() 1045 << " exiting blocks\n";); 1046 // We assume a single ExitBlk 1047 MBBVector ExitBlks; 1048 LoopRep->getExitBlocks(ExitBlks); 1049 SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet; 1050 for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) 1051 ExitBlkSet.insert(ExitBlks[i]); 1052 assert(ExitBlkSet.size() == 1); 1053 MachineBasicBlock *ExitBlk = *ExitBlks.begin(); 1054 assert(ExitBlk && "Loop has several exit block"); 1055 MBBVector LatchBlks; 1056 for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader)) 1057 if (LoopRep->contains(LB)) 1058 LatchBlks.push_back(LB); 1059 1060 for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) 1061 mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); 1062 for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) 1063 settleLoopcontBlock(LatchBlks[i], LoopHeader); 1064 int Match = 0; 1065 do { 1066 Match = 0; 1067 Match += serialPatternMatch(LoopHeader); 1068 Match += ifPatternMatch(LoopHeader); 1069 } while (Match > 0); 1070 mergeLooplandBlock(LoopHeader, ExitBlk); 1071 MachineLoop *ParentLoop = LoopRep->getParentLoop(); 1072 if (ParentLoop) 1073 MLI->changeLoopFor(LoopHeader, ParentLoop); 1074 else 1075 MLI->removeBlock(LoopHeader); 1076 Visited[LoopRep] = true; 1077 return 1; 1078} 1079 1080bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak( 1081 MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { 1082 if (Src1MBB->succ_size() == 0) { 1083 MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); 1084 if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { 1085 MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; 1086 if (TheEntry) { 1087 LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB" 1088 << Src1MBB->getNumber() << " src2 = BB" 1089 << Src2MBB->getNumber() << "\n";); 1090 return true; 1091 } 1092 } 1093 } 1094 return false; 1095} 1096 1097int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, 1098 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1099 int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); 1100 if (Num == 0) { 1101 LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" 1102 << "\n";); 1103 Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); 1104 } 1105 return Num; 1106} 1107 1108int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 1109 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1110 int Num = 0; 1111 MachineBasicBlock *DownBlk; 1112 1113 //trueBlk could be the common post dominator 1114 DownBlk = TrueMBB; 1115 1116 LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() 1117 << " true = BB" << TrueMBB->getNumber() 1118 << ", numSucc=" << TrueMBB->succ_size() << " false = BB" 1119 << FalseMBB->getNumber() << "\n";); 1120 1121 while (DownBlk) { 1122 LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber();); 1123 1124 if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { 1125 LLVM_DEBUG(dbgs() << " working\n";); 1126 1127 Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); 1128 Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); 1129 1130 numClonedBlock += Num; 1131 Num += serialPatternMatch(*HeadMBB->succ_begin()); 1132 Num += serialPatternMatch(*std::next(HeadMBB->succ_begin())); 1133 Num += ifPatternMatch(HeadMBB); 1134 assert(Num > 0); 1135 1136 break; 1137 } 1138 LLVM_DEBUG(dbgs() << " not working\n";); 1139 DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; 1140 } // walk down the postDomTree 1141 1142 return Num; 1143} 1144 1145#ifndef NDEBUG 1146void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf( 1147 MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, 1148 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { 1149 dbgs() << "head = BB" << HeadMBB->getNumber() 1150 << " size = " << HeadMBB->size(); 1151 if (Detail) { 1152 dbgs() << "\n"; 1153 HeadMBB->print(dbgs()); 1154 dbgs() << "\n"; 1155 } 1156 1157 if (TrueMBB) { 1158 dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " 1159 << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); 1160 if (Detail) { 1161 dbgs() << "\n"; 1162 TrueMBB->print(dbgs()); 1163 dbgs() << "\n"; 1164 } 1165 } 1166 if (FalseMBB) { 1167 dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " 1168 << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); 1169 if (Detail) { 1170 dbgs() << "\n"; 1171 FalseMBB->print(dbgs()); 1172 dbgs() << "\n"; 1173 } 1174 } 1175 if (LandMBB) { 1176 dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " 1177 << LandMBB->size() << " numPred = " << LandMBB->pred_size(); 1178 if (Detail) { 1179 dbgs() << "\n"; 1180 LandMBB->print(dbgs()); 1181 dbgs() << "\n"; 1182 } 1183 } 1184 1185 dbgs() << "\n"; 1186} 1187#endif 1188 1189int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 1190 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 1191 MachineBasicBlock **LandMBBPtr) { 1192 bool MigrateTrue = false; 1193 bool MigrateFalse = false; 1194 1195 MachineBasicBlock *LandBlk = *LandMBBPtr; 1196 1197 assert((!TrueMBB || TrueMBB->succ_size() <= 1) 1198 && (!FalseMBB || FalseMBB->succ_size() <= 1)); 1199 1200 if (TrueMBB == FalseMBB) 1201 return 0; 1202 1203 MigrateTrue = needMigrateBlock(TrueMBB); 1204 MigrateFalse = needMigrateBlock(FalseMBB); 1205 1206 if (!MigrateTrue && !MigrateFalse) 1207 return 0; 1208 1209 // If we need to migrate either trueBlk and falseBlk, migrate the rest that 1210 // have more than one predecessors. without doing this, its predecessor 1211 // rather than headBlk will have undefined value in initReg. 1212 if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) 1213 MigrateTrue = true; 1214 if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) 1215 MigrateFalse = true; 1216 1217 LLVM_DEBUG( 1218 dbgs() << "before improveSimpleJumpintoIf: "; 1219 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); 1220 1221 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk 1222 // 1223 // new: headBlk => if () {initReg = 1; org trueBlk branch} else 1224 // {initReg = 0; org falseBlk branch } 1225 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} 1226 // => org landBlk 1227 // if landBlk->pred_size() > 2, put the about if-else inside 1228 // if (initReg !=2) {...} 1229 // 1230 // add initReg = initVal to headBlk 1231 1232 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1233 if (!MigrateTrue || !MigrateFalse) { 1234 // XXX: We have an opportunity here to optimize the "branch into if" case 1235 // here. Branch into if looks like this: 1236 // entry 1237 // / | 1238 // diamond_head branch_from 1239 // / \ | 1240 // diamond_false diamond_true 1241 // \ / 1242 // done 1243 // 1244 // The diamond_head block begins the "if" and the diamond_true block 1245 // is the block being "branched into". 1246 // 1247 // If MigrateTrue is true, then TrueBB is the block being "branched into" 1248 // and if MigrateFalse is true, then FalseBB is the block being 1249 // "branched into" 1250 // 1251 // Here is the pseudo code for how I think the optimization should work: 1252 // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. 1253 // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. 1254 // 3. Move the branch instruction from diamond_head into its own basic 1255 // block (new_block). 1256 // 4. Add an unconditional branch from diamond_head to new_block 1257 // 5. Replace the branch instruction in branch_from with an unconditional 1258 // branch to new_block. If branch_from has multiple predecessors, then 1259 // we need to replace the True/False block in the branch 1260 // instruction instead of replacing it. 1261 // 6. Change the condition of the branch instruction in new_block from 1262 // COND to (COND || GPR0) 1263 // 1264 // In order insert these MOV instruction, we will need to use the 1265 // RegisterScavenger. Usually liveness stops being tracked during 1266 // the late machine optimization passes, however if we implement 1267 // bool TargetRegisterInfo::requiresRegisterScavenging( 1268 // const MachineFunction &MF) 1269 // and have it return true, liveness will be tracked correctly 1270 // by generic optimization passes. We will also need to make sure that 1271 // all of our target-specific passes that run after regalloc and before 1272 // the CFGStructurizer track liveness and we will need to modify this pass 1273 // to correctly track liveness. 1274 // 1275 // After the above changes, the new CFG should look like this: 1276 // entry 1277 // / | 1278 // diamond_head branch_from 1279 // \ / 1280 // new_block 1281 // / | 1282 // diamond_false diamond_true 1283 // \ / 1284 // done 1285 // 1286 // Without this optimization, we are forced to duplicate the diamond_true 1287 // block and we will end up with a CFG like this: 1288 // 1289 // entry 1290 // / | 1291 // diamond_head branch_from 1292 // / \ | 1293 // diamond_false diamond_true diamond_true (duplicate) 1294 // \ / | 1295 // done --------------------| 1296 // 1297 // Duplicating diamond_true can be very costly especially if it has a 1298 // lot of instructions. 1299 return 0; 1300 } 1301 1302 int NumNewBlk = 0; 1303 1304 bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); 1305 1306 //insert R600::ENDIF to avoid special case "input landBlk == NULL" 1307 MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF); 1308 1309 if (LandBlkHasOtherPred) { 1310 report_fatal_error("Extra register needed to handle CFG"); 1311 Register CmpResReg = 1312 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1313 report_fatal_error("Extra compare instruction needed to handle CFG"); 1314 insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, 1315 CmpResReg, DebugLoc()); 1316 } 1317 1318 // XXX: We are running this after RA, so creating virtual registers will 1319 // cause an assertion failure in the PostRA scheduling pass. 1320 Register InitReg = 1321 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1322 insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg, 1323 DebugLoc()); 1324 1325 if (MigrateTrue) { 1326 migrateInstruction(TrueMBB, LandBlk, I); 1327 // need to uncondionally insert the assignment to ensure a path from its 1328 // predecessor rather than headBlk has valid value in initReg if 1329 // (initVal != 1). 1330 report_fatal_error("Extra register needed to handle CFG"); 1331 } 1332 insertInstrBefore(I, R600::ELSE); 1333 1334 if (MigrateFalse) { 1335 migrateInstruction(FalseMBB, LandBlk, I); 1336 // need to uncondionally insert the assignment to ensure a path from its 1337 // predecessor rather than headBlk has valid value in initReg if 1338 // (initVal != 0) 1339 report_fatal_error("Extra register needed to handle CFG"); 1340 } 1341 1342 if (LandBlkHasOtherPred) { 1343 // add endif 1344 insertInstrBefore(I, R600::ENDIF); 1345 1346 // put initReg = 2 to other predecessors of landBlk 1347 for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), 1348 PE = LandBlk->pred_end(); PI != PE; ++PI) { 1349 MachineBasicBlock *MBB = *PI; 1350 if (MBB != TrueMBB && MBB != FalseMBB) 1351 report_fatal_error("Extra register needed to handle CFG"); 1352 } 1353 } 1354 LLVM_DEBUG( 1355 dbgs() << "result from improveSimpleJumpintoIf: "; 1356 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); 1357 1358 // update landBlk 1359 *LandMBBPtr = LandBlk; 1360 1361 return NumNewBlk; 1362} 1363 1364void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, 1365 MachineBasicBlock *SrcMBB) { 1366 LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB" 1367 << SrcMBB->getNumber() << "\n";); 1368 DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); 1369 1370 DstMBB->removeSuccessor(SrcMBB, true); 1371 cloneSuccessorList(DstMBB, SrcMBB); 1372 1373 removeSuccessor(SrcMBB); 1374 MLI->removeBlock(SrcMBB); 1375 retireBlock(SrcMBB); 1376} 1377 1378void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, 1379 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 1380 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { 1381 assert (TrueMBB); 1382 LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ "; 1383 if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs() 1384 << " } else "; 1385 dbgs() << "{ "; if (FalseMBB) { 1386 dbgs() << "BB" << FalseMBB->getNumber(); 1387 } dbgs() << " }\n "; 1388 dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else { 1389 dbgs() << "BB" << LandMBB->getNumber(); 1390 } dbgs() << "\n";); 1391 1392 int OldOpcode = BranchMI->getOpcode(); 1393 DebugLoc BranchDL = BranchMI->getDebugLoc(); 1394 1395// transform to 1396// if cond 1397// trueBlk 1398// else 1399// falseBlk 1400// endif 1401// landBlk 1402 1403 MachineBasicBlock::iterator I = BranchMI; 1404 insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), 1405 BranchDL); 1406 1407 if (TrueMBB) { 1408 MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); 1409 MBB->removeSuccessor(TrueMBB, true); 1410 if (LandMBB && TrueMBB->succ_size()!=0) 1411 TrueMBB->removeSuccessor(LandMBB, true); 1412 retireBlock(TrueMBB); 1413 MLI->removeBlock(TrueMBB); 1414 } 1415 1416 if (FalseMBB) { 1417 insertInstrBefore(I, R600::ELSE); 1418 MBB->splice(I, FalseMBB, FalseMBB->begin(), 1419 FalseMBB->end()); 1420 MBB->removeSuccessor(FalseMBB, true); 1421 if (LandMBB && FalseMBB->succ_size() != 0) 1422 FalseMBB->removeSuccessor(LandMBB, true); 1423 retireBlock(FalseMBB); 1424 MLI->removeBlock(FalseMBB); 1425 } 1426 insertInstrBefore(I, R600::ENDIF); 1427 1428 BranchMI->eraseFromParent(); 1429 1430 if (LandMBB && TrueMBB && FalseMBB) 1431 MBB->addSuccessor(LandMBB); 1432} 1433 1434void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, 1435 MachineBasicBlock *LandMBB) { 1436 LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() 1437 << " land = BB" << LandMBB->getNumber() << "\n";); 1438 1439 insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc()); 1440 insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc()); 1441 DstBlk->replaceSuccessor(DstBlk, LandMBB); 1442} 1443 1444void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 1445 MachineBasicBlock *LandMBB) { 1446 LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB" 1447 << ExitingMBB->getNumber() << " land = BB" 1448 << LandMBB->getNumber() << "\n";); 1449 MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); 1450 assert(BranchMI && isCondBranch(BranchMI)); 1451 DebugLoc DL = BranchMI->getDebugLoc(); 1452 MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); 1453 MachineBasicBlock::iterator I = BranchMI; 1454 if (TrueBranch != LandMBB) 1455 reversePredicateSetter(I, *I->getParent()); 1456 insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL); 1457 insertInstrBefore(I, R600::BREAK); 1458 insertInstrBefore(I, R600::ENDIF); 1459 //now branchInst can be erase safely 1460 BranchMI->eraseFromParent(); 1461 //now take care of successors, retire blocks 1462 ExitingMBB->removeSuccessor(LandMBB, true); 1463} 1464 1465void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, 1466 MachineBasicBlock *ContMBB) { 1467 LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB" 1468 << ContingMBB->getNumber() << ", cont = BB" 1469 << ContMBB->getNumber() << "\n";); 1470 1471 MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); 1472 if (MI) { 1473 assert(isCondBranch(MI)); 1474 MachineBasicBlock::iterator I = MI; 1475 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 1476 int OldOpcode = MI->getOpcode(); 1477 DebugLoc DL = MI->getDebugLoc(); 1478 1479 bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); 1480 1481 if (!UseContinueLogical) { 1482 int BranchOpcode = 1483 TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : 1484 getBranchZeroOpcode(OldOpcode); 1485 insertCondBranchBefore(I, BranchOpcode, DL); 1486 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1487 insertInstrEnd(ContingMBB, R600::CONTINUE, DL); 1488 insertInstrEnd(ContingMBB, R600::ENDIF, DL); 1489 } else { 1490 int BranchOpcode = 1491 TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : 1492 getContinueZeroOpcode(OldOpcode); 1493 insertCondBranchBefore(I, BranchOpcode, DL); 1494 } 1495 1496 MI->eraseFromParent(); 1497 } else { 1498 // if we've arrived here then we've already erased the branch instruction 1499 // travel back up the basic block to see the last reference of our debug 1500 // location we've just inserted that reference here so it should be 1501 // representative insertEnd to ensure phi-moves, if exist, go before the 1502 // continue-instr. 1503 insertInstrEnd(ContingMBB, R600::CONTINUE, 1504 getLastDebugLocInBB(ContingMBB)); 1505 } 1506} 1507 1508int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 1509 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { 1510 int Cloned = 0; 1511 assert(PreMBB->isSuccessor(SrcMBB)); 1512 while (SrcMBB && SrcMBB != DstMBB) { 1513 assert(SrcMBB->succ_size() == 1); 1514 if (SrcMBB->pred_size() > 1) { 1515 SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); 1516 ++Cloned; 1517 } 1518 1519 PreMBB = SrcMBB; 1520 SrcMBB = *SrcMBB->succ_begin(); 1521 } 1522 1523 return Cloned; 1524} 1525 1526MachineBasicBlock * 1527AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, 1528 MachineBasicBlock *PredMBB) { 1529 assert(PredMBB->isSuccessor(MBB) && 1530 "succBlk is not a prececessor of curBlk"); 1531 1532 MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions 1533 replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); 1534 //srcBlk, oldBlk, newBlk 1535 1536 PredMBB->replaceSuccessor(MBB, CloneMBB); 1537 1538 // add all successor to cloneBlk 1539 cloneSuccessorList(CloneMBB, MBB); 1540 1541 numClonedInstr += MBB->size(); 1542 1543 LLVM_DEBUG(dbgs() << "Cloned block: " 1544 << "BB" << MBB->getNumber() << "size " << MBB->size() 1545 << "\n";); 1546 1547 SHOWNEWBLK(CloneMBB, "result of Cloned block: "); 1548 1549 return CloneMBB; 1550} 1551 1552void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, 1553 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { 1554 MachineBasicBlock::iterator SpliceEnd; 1555 //look for the input branchinstr, not the AMDGPU branchinstr 1556 MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); 1557 if (!BranchMI) { 1558 LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";); 1559 SpliceEnd = SrcMBB->end(); 1560 } else { 1561 LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); 1562 SpliceEnd = BranchMI; 1563 } 1564 LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = " 1565 << DstMBB->size() << "srcSize = " << SrcMBB->size() 1566 << "\n";); 1567 1568 //splice insert before insertPos 1569 DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); 1570 1571 LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = " 1572 << DstMBB->size() << "srcSize = " << SrcMBB->size() 1573 << '\n';); 1574} 1575 1576MachineBasicBlock * 1577AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { 1578 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1579 MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); 1580 1581 if (!LoopHeader || !LoopLatch) 1582 return nullptr; 1583 MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); 1584 // Is LoopRep an infinite loop ? 1585 if (!BranchMI || !isUncondBranch(BranchMI)) 1586 return nullptr; 1587 1588 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1589 FuncRep->push_back(DummyExitBlk); //insert to function 1590 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); 1591 LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); 1592 LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext(); 1593 Ctx.emitError("Extra register needed to handle CFG"); 1594 return nullptr; 1595} 1596 1597void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { 1598 MachineInstr *BranchMI; 1599 1600 // I saw two unconditional branch in one basic block in example 1601 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. 1602 while ((BranchMI = getLoopendBlockBranchInstr(MBB)) 1603 && isUncondBranch(BranchMI)) { 1604 LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); 1605 BranchMI->eraseFromParent(); 1606 } 1607} 1608 1609void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( 1610 MachineBasicBlock *MBB) { 1611 if (MBB->succ_size() != 2) 1612 return; 1613 MachineBasicBlock *MBB1 = *MBB->succ_begin(); 1614 MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin()); 1615 if (MBB1 != MBB2) 1616 return; 1617 1618 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 1619 assert(BranchMI && isCondBranch(BranchMI)); 1620 LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); 1621 BranchMI->eraseFromParent(); 1622 SHOWNEWBLK(MBB1, "Removing redundant successor"); 1623 MBB->removeSuccessor(MBB1, true); 1624} 1625 1626void AMDGPUCFGStructurizer::addDummyExitBlock( 1627 SmallVectorImpl<MachineBasicBlock*> &RetMBB) { 1628 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1629 FuncRep->push_back(DummyExitBlk); //insert to function 1630 insertInstrEnd(DummyExitBlk, R600::RETURN); 1631 1632 for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(), 1633 E = RetMBB.end(); It != E; ++It) { 1634 MachineBasicBlock *MBB = *It; 1635 MachineInstr *MI = getReturnInstr(MBB); 1636 if (MI) 1637 MI->eraseFromParent(); 1638 MBB->addSuccessor(DummyExitBlk); 1639 LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() 1640 << " successors\n";); 1641 } 1642 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); 1643} 1644 1645void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { 1646 while (MBB->succ_size()) 1647 MBB->removeSuccessor(*MBB->succ_begin()); 1648} 1649 1650void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, 1651 int SccNum) { 1652 BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; 1653 if (!srcBlkInfo) 1654 srcBlkInfo = new BlockInformation(); 1655 srcBlkInfo->SccNum = SccNum; 1656} 1657 1658void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { 1659 LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";); 1660 1661 BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; 1662 1663 if (!SrcBlkInfo) 1664 SrcBlkInfo = new BlockInformation(); 1665 1666 SrcBlkInfo->IsRetired = true; 1667 assert(MBB->succ_size() == 0 && MBB->pred_size() == 0 1668 && "can't retire block yet"); 1669} 1670 1671INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", 1672 "AMDGPU CFG Structurizer", false, false) 1673INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 1674INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) 1675INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 1676INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer", 1677 "AMDGPU CFG Structurizer", false, false) 1678 1679FunctionPass *llvm::createAMDGPUCFGStructurizerPass() { 1680 return new AMDGPUCFGStructurizer(); 1681} 1682