1327952Sdim//===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===// 2284677Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6284677Sdim// 7284677Sdim//==-----------------------------------------------------------------------===// 8284677Sdim 9284677Sdim#include "AMDGPU.h" 10284677Sdim#include "AMDGPUSubtarget.h" 11360784Sdim#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12284677Sdim#include "R600InstrInfo.h" 13321369Sdim#include "R600RegisterInfo.h" 14284677Sdim#include "llvm/ADT/DepthFirstIterator.h" 15284677Sdim#include "llvm/ADT/SCCIterator.h" 16321369Sdim#include "llvm/ADT/SmallPtrSet.h" 17284677Sdim#include "llvm/ADT/SmallVector.h" 18284677Sdim#include "llvm/ADT/Statistic.h" 19321369Sdim#include "llvm/ADT/StringRef.h" 20321369Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 21284677Sdim#include "llvm/CodeGen/MachineDominators.h" 22284677Sdim#include "llvm/CodeGen/MachineFunction.h" 23284677Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 24321369Sdim#include "llvm/CodeGen/MachineInstr.h" 25284677Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 26284677Sdim#include "llvm/CodeGen/MachineJumpTableInfo.h" 27284677Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 28321369Sdim#include "llvm/CodeGen/MachineOperand.h" 29284677Sdim#include "llvm/CodeGen/MachinePostDominators.h" 30284677Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 31321369Sdim#include "llvm/IR/DebugLoc.h" 32321369Sdim#include "llvm/IR/LLVMContext.h" 33360784Sdim#include "llvm/InitializePasses.h" 34321369Sdim#include "llvm/Pass.h" 35284677Sdim#include "llvm/Support/Debug.h" 36321369Sdim#include "llvm/Support/ErrorHandling.h" 37341825Sdim#include "llvm/Support/MachineValueType.h" 38284677Sdim#include "llvm/Support/raw_ostream.h" 39321369Sdim#include <cassert> 40321369Sdim#include <cstddef> 41284677Sdim#include <deque> 42321369Sdim#include <iterator> 43321369Sdim#include <map> 44321369Sdim#include <utility> 45321369Sdim#include <vector> 46284677Sdim 47284677Sdimusing namespace llvm; 48284677Sdim 49284677Sdim#define DEBUG_TYPE "structcfg" 50284677Sdim 51284677Sdim#define DEFAULT_VEC_SLOTS 8 52284677Sdim 53284677Sdim// TODO: move-begin. 54284677Sdim 55284677Sdim//===----------------------------------------------------------------------===// 56284677Sdim// 57284677Sdim// Statistics for CFGStructurizer. 58284677Sdim// 59284677Sdim//===----------------------------------------------------------------------===// 60284677Sdim 61284677SdimSTATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " 62284677Sdim "matched"); 63284677SdimSTATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " 64284677Sdim "matched"); 65284677SdimSTATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); 66284677SdimSTATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); 67284677Sdim 68284677Sdimnamespace llvm { 69321369Sdim 70327952Sdimvoid initializeAMDGPUCFGStructurizerPass(PassRegistry &); 71284677Sdim 72321369Sdim} // end namespace llvm 73321369Sdim 74321369Sdimnamespace { 75321369Sdim 76284677Sdim//===----------------------------------------------------------------------===// 77284677Sdim// 78284677Sdim// Miscellaneous utility for CFGStructurizer. 79284677Sdim// 80284677Sdim//===----------------------------------------------------------------------===// 81321369Sdim 82341825Sdim#define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n"); 83284677Sdim 84341825Sdim#define SHOWNEWBLK(b, msg) \ 85341825Sdim LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 86341825Sdim dbgs() << "\n";); 87284677Sdim 88341825Sdim#define SHOWBLK_DETAIL(b, msg) \ 89341825Sdim LLVM_DEBUG(if (b) { \ 90341825Sdim dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 91341825Sdim b->print(dbgs()); \ 92341825Sdim dbgs() << "\n"; \ 93341825Sdim }); 94284677Sdim 95284677Sdim#define INVALIDSCCNUM -1 96284677Sdim 97284677Sdim//===----------------------------------------------------------------------===// 98284677Sdim// 99284677Sdim// supporting data structure for CFGStructurizer 100284677Sdim// 101284677Sdim//===----------------------------------------------------------------------===// 102284677Sdim 103284677Sdimclass BlockInformation { 104284677Sdimpublic: 105321369Sdim bool IsRetired = false; 106321369Sdim int SccNum = INVALIDSCCNUM; 107321369Sdim 108321369Sdim BlockInformation() = default; 109284677Sdim}; 110284677Sdim 111284677Sdim//===----------------------------------------------------------------------===// 112284677Sdim// 113284677Sdim// CFGStructurizer 114284677Sdim// 115284677Sdim//===----------------------------------------------------------------------===// 116284677Sdim 117284677Sdimclass AMDGPUCFGStructurizer : public MachineFunctionPass { 118284677Sdimpublic: 119327952Sdim using MBBVector = SmallVector<MachineBasicBlock *, 32>; 120327952Sdim using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>; 121327952Sdim using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>; 122284677Sdim 123284677Sdim enum PathToKind { 124284677Sdim Not_SinglePath = 0, 125284677Sdim SinglePath_InPath = 1, 126284677Sdim SinglePath_NotInPath = 2 127284677Sdim }; 128284677Sdim 129284677Sdim static char ID; 130284677Sdim 131321369Sdim AMDGPUCFGStructurizer() : MachineFunctionPass(ID) { 132284677Sdim initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry()); 133284677Sdim } 134284677Sdim 135314564Sdim StringRef getPassName() const override { 136284677Sdim return "AMDGPU Control Flow Graph structurizer Pass"; 137284677Sdim } 138284677Sdim 139284677Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 140284677Sdim AU.addRequired<MachineDominatorTree>(); 141284677Sdim AU.addRequired<MachinePostDominatorTree>(); 142284677Sdim AU.addRequired<MachineLoopInfo>(); 143314564Sdim MachineFunctionPass::getAnalysisUsage(AU); 144284677Sdim } 145284677Sdim 146284677Sdim /// Perform the CFG structurization 147284677Sdim bool run(); 148284677Sdim 149284677Sdim /// Perform the CFG preparation 150284677Sdim /// This step will remove every unconditionnal/dead jump instructions and make 151284677Sdim /// sure all loops have an exit block 152284677Sdim bool prepare(); 153284677Sdim 154284677Sdim bool runOnMachineFunction(MachineFunction &MF) override { 155309124Sdim TII = MF.getSubtarget<R600Subtarget>().getInstrInfo(); 156284677Sdim TRI = &TII->getRegisterInfo(); 157341825Sdim LLVM_DEBUG(MF.dump();); 158284677Sdim OrderedBlks.clear(); 159284677Sdim Visited.clear(); 160284677Sdim FuncRep = &MF; 161284677Sdim MLI = &getAnalysis<MachineLoopInfo>(); 162341825Sdim LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); 163284677Sdim MDT = &getAnalysis<MachineDominatorTree>(); 164341825Sdim LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr);); 165284677Sdim PDT = &getAnalysis<MachinePostDominatorTree>(); 166341825Sdim LLVM_DEBUG(PDT->print(dbgs());); 167284677Sdim prepare(); 168284677Sdim run(); 169341825Sdim LLVM_DEBUG(MF.dump();); 170284677Sdim return true; 171284677Sdim } 172284677Sdim 173284677Sdimprotected: 174284677Sdim MachineDominatorTree *MDT; 175284677Sdim MachinePostDominatorTree *PDT; 176284677Sdim MachineLoopInfo *MLI; 177321369Sdim const R600InstrInfo *TII = nullptr; 178321369Sdim const R600RegisterInfo *TRI = nullptr; 179284677Sdim 180284677Sdim // PRINT FUNCTIONS 181284677Sdim /// Print the ordered Blocks. 182284677Sdim void printOrderedBlocks() const { 183284677Sdim size_t i = 0; 184284677Sdim for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), 185284677Sdim iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { 186284677Sdim dbgs() << "BB" << (*iterBlk)->getNumber(); 187284677Sdim dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; 188284677Sdim if (i != 0 && i % 10 == 0) { 189284677Sdim dbgs() << "\n"; 190284677Sdim } else { 191284677Sdim dbgs() << " "; 192284677Sdim } 193284677Sdim } 194284677Sdim } 195321369Sdim 196284677Sdim static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { 197284677Sdim for (MachineLoop::iterator iter = LoopInfo.begin(), 198284677Sdim iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) { 199284677Sdim (*iter)->print(dbgs(), 0); 200284677Sdim } 201284677Sdim } 202284677Sdim 203284677Sdim // UTILITY FUNCTIONS 204284677Sdim int getSCCNum(MachineBasicBlock *MBB) const; 205284677Sdim MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; 206284677Sdim bool hasBackEdge(MachineBasicBlock *MBB) const; 207284677Sdim bool isRetiredBlock(MachineBasicBlock *MBB) const; 208284677Sdim bool isActiveLoophead(MachineBasicBlock *MBB) const; 209284677Sdim PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 210284677Sdim bool AllowSideEntry = true) const; 211284677Sdim int countActiveBlock(MBBVector::const_iterator It, 212284677Sdim MBBVector::const_iterator E) const; 213284677Sdim bool needMigrateBlock(MachineBasicBlock *MBB) const; 214284677Sdim 215284677Sdim // Utility Functions 216314564Sdim void reversePredicateSetter(MachineBasicBlock::iterator I, 217314564Sdim MachineBasicBlock &MBB); 218284677Sdim /// Compute the reversed DFS post order of Blocks 219284677Sdim void orderBlocks(MachineFunction *MF); 220284677Sdim 221284677Sdim // Function originally from CFGStructTraits 222284677Sdim void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, 223309124Sdim const DebugLoc &DL = DebugLoc()); 224284677Sdim MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, 225309124Sdim const DebugLoc &DL = DebugLoc()); 226284677Sdim MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); 227284677Sdim void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, 228309124Sdim const DebugLoc &DL); 229284677Sdim void insertCondBranchBefore(MachineBasicBlock *MBB, 230309124Sdim MachineBasicBlock::iterator I, int NewOpcode, 231309124Sdim int RegNum, const DebugLoc &DL); 232327952Sdim 233284677Sdim static int getBranchNzeroOpcode(int OldOpcode); 234284677Sdim static int getBranchZeroOpcode(int OldOpcode); 235284677Sdim static int getContinueNzeroOpcode(int OldOpcode); 236284677Sdim static int getContinueZeroOpcode(int OldOpcode); 237284677Sdim static MachineBasicBlock *getTrueBranch(MachineInstr *MI); 238284677Sdim static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); 239284677Sdim static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, 240284677Sdim MachineInstr *MI); 241284677Sdim static bool isCondBranch(MachineInstr *MI); 242284677Sdim static bool isUncondBranch(MachineInstr *MI); 243284677Sdim static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); 244284677Sdim static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); 245327952Sdim 246284677Sdim /// The correct naming for this is getPossibleLoopendBlockBranchInstr. 247284677Sdim /// 248284677Sdim /// BB with backward-edge could have move instructions after the branch 249284677Sdim /// instruction. Such move instruction "belong to" the loop backward-edge. 250284677Sdim MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); 251327952Sdim 252284677Sdim static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); 253284677Sdim static bool isReturnBlock(MachineBasicBlock *MBB); 254284677Sdim static void cloneSuccessorList(MachineBasicBlock *DstMBB, 255327952Sdim MachineBasicBlock *SrcMBB); 256284677Sdim static MachineBasicBlock *clone(MachineBasicBlock *MBB); 257327952Sdim 258284677Sdim /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose 259284677Sdim /// because the AMDGPU instruction is not recognized as terminator fix this 260284677Sdim /// and retire this routine 261284677Sdim void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, 262284677Sdim MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); 263327952Sdim 264284677Sdim static void wrapup(MachineBasicBlock *MBB); 265284677Sdim 266284677Sdim int patternMatch(MachineBasicBlock *MBB); 267284677Sdim int patternMatchGroup(MachineBasicBlock *MBB); 268284677Sdim int serialPatternMatch(MachineBasicBlock *MBB); 269284677Sdim int ifPatternMatch(MachineBasicBlock *MBB); 270284677Sdim int loopendPatternMatch(); 271284677Sdim int mergeLoop(MachineLoop *LoopRep); 272284677Sdim 273284677Sdim /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in 274284677Sdim /// the same loop with LoopLandInfo without explicitly keeping track of 275284677Sdim /// loopContBlks and loopBreakBlks, this is a method to get the information. 276284677Sdim bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, 277284677Sdim MachineBasicBlock *Src2MBB); 278284677Sdim int handleJumpintoIf(MachineBasicBlock *HeadMBB, 279284677Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 280284677Sdim int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 281284677Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 282284677Sdim int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 283284677Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 284284677Sdim MachineBasicBlock **LandMBBPtr); 285284677Sdim void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 286284677Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 287284677Sdim MachineBasicBlock *LandMBB, bool Detail = false); 288284677Sdim int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 289284677Sdim MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); 290284677Sdim void mergeSerialBlock(MachineBasicBlock *DstMBB, 291284677Sdim MachineBasicBlock *SrcMBB); 292284677Sdim 293284677Sdim void mergeIfthenelseBlock(MachineInstr *BranchMI, 294284677Sdim MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 295284677Sdim MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); 296284677Sdim void mergeLooplandBlock(MachineBasicBlock *DstMBB, 297284677Sdim MachineBasicBlock *LandMBB); 298284677Sdim void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 299284677Sdim MachineBasicBlock *LandMBB); 300284677Sdim void settleLoopcontBlock(MachineBasicBlock *ContingMBB, 301284677Sdim MachineBasicBlock *ContMBB); 302327952Sdim 303284677Sdim /// normalizeInfiniteLoopExit change 304284677Sdim /// B1: 305284677Sdim /// uncond_br LoopHeader 306284677Sdim /// 307284677Sdim /// to 308284677Sdim /// B1: 309284677Sdim /// cond_br 1 LoopHeader dummyExit 310284677Sdim /// and return the newly added dummy exit block 311284677Sdim MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); 312284677Sdim void removeUnconditionalBranch(MachineBasicBlock *MBB); 313327952Sdim 314284677Sdim /// Remove duplicate branches instructions in a block. 315284677Sdim /// For instance 316284677Sdim /// B0: 317284677Sdim /// cond_br X B1 B2 318284677Sdim /// cond_br X B1 B2 319284677Sdim /// is transformed to 320284677Sdim /// B0: 321284677Sdim /// cond_br X B1 B2 322284677Sdim void removeRedundantConditionalBranch(MachineBasicBlock *MBB); 323327952Sdim 324284677Sdim void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB); 325284677Sdim void removeSuccessor(MachineBasicBlock *MBB); 326284677Sdim MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, 327284677Sdim MachineBasicBlock *PredMBB); 328284677Sdim void migrateInstruction(MachineBasicBlock *SrcMBB, 329284677Sdim MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); 330284677Sdim void recordSccnum(MachineBasicBlock *MBB, int SCCNum); 331284677Sdim void retireBlock(MachineBasicBlock *MBB); 332284677Sdim 333284677Sdimprivate: 334284677Sdim MBBInfoMap BlockInfoMap; 335284677Sdim LoopLandInfoMap LLInfoMap; 336284677Sdim std::map<MachineLoop *, bool> Visited; 337284677Sdim MachineFunction *FuncRep; 338284677Sdim SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks; 339284677Sdim}; 340284677Sdim 341327952Sdim} // end anonymous namespace 342327952Sdim 343321369Sdimchar AMDGPUCFGStructurizer::ID = 0; 344321369Sdim 345284677Sdimint AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { 346284677Sdim MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 347284677Sdim if (It == BlockInfoMap.end()) 348284677Sdim return INVALIDSCCNUM; 349284677Sdim return (*It).second->SccNum; 350284677Sdim} 351284677Sdim 352284677SdimMachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) 353284677Sdim const { 354284677Sdim LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); 355284677Sdim if (It == LLInfoMap.end()) 356284677Sdim return nullptr; 357284677Sdim return (*It).second; 358284677Sdim} 359284677Sdim 360284677Sdimbool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { 361284677Sdim MachineLoop *LoopRep = MLI->getLoopFor(MBB); 362284677Sdim if (!LoopRep) 363284677Sdim return false; 364284677Sdim MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 365284677Sdim return MBB->isSuccessor(LoopHeader); 366284677Sdim} 367284677Sdim 368284677Sdimbool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { 369284677Sdim MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 370284677Sdim if (It == BlockInfoMap.end()) 371284677Sdim return false; 372284677Sdim return (*It).second->IsRetired; 373284677Sdim} 374284677Sdim 375284677Sdimbool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { 376284677Sdim MachineLoop *LoopRep = MLI->getLoopFor(MBB); 377284677Sdim while (LoopRep && LoopRep->getHeader() == MBB) { 378284677Sdim MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); 379284677Sdim if(!LoopLand) 380284677Sdim return true; 381284677Sdim if (!isRetiredBlock(LoopLand)) 382284677Sdim return true; 383284677Sdim LoopRep = LoopRep->getParentLoop(); 384284677Sdim } 385284677Sdim return false; 386284677Sdim} 387321369Sdim 388284677SdimAMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo( 389284677Sdim MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 390284677Sdim bool AllowSideEntry) const { 391284677Sdim assert(DstMBB); 392284677Sdim if (SrcMBB == DstMBB) 393284677Sdim return SinglePath_InPath; 394284677Sdim while (SrcMBB && SrcMBB->succ_size() == 1) { 395284677Sdim SrcMBB = *SrcMBB->succ_begin(); 396284677Sdim if (SrcMBB == DstMBB) 397284677Sdim return SinglePath_InPath; 398284677Sdim if (!AllowSideEntry && SrcMBB->pred_size() > 1) 399284677Sdim return Not_SinglePath; 400284677Sdim } 401284677Sdim if (SrcMBB && SrcMBB->succ_size()==0) 402284677Sdim return SinglePath_NotInPath; 403284677Sdim return Not_SinglePath; 404284677Sdim} 405284677Sdim 406284677Sdimint AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, 407284677Sdim MBBVector::const_iterator E) const { 408284677Sdim int Count = 0; 409284677Sdim while (It != E) { 410284677Sdim if (!isRetiredBlock(*It)) 411284677Sdim ++Count; 412284677Sdim ++It; 413284677Sdim } 414284677Sdim return Count; 415284677Sdim} 416284677Sdim 417284677Sdimbool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { 418284677Sdim unsigned BlockSizeThreshold = 30; 419284677Sdim unsigned CloneInstrThreshold = 100; 420284677Sdim bool MultiplePreds = MBB && (MBB->pred_size() > 1); 421284677Sdim 422284677Sdim if(!MultiplePreds) 423284677Sdim return false; 424284677Sdim unsigned BlkSize = MBB->size(); 425284677Sdim return ((BlkSize > BlockSizeThreshold) && 426284677Sdim (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); 427284677Sdim} 428284677Sdim 429284677Sdimvoid AMDGPUCFGStructurizer::reversePredicateSetter( 430314564Sdim MachineBasicBlock::iterator I, MachineBasicBlock &MBB) { 431314564Sdim assert(I.isValid() && "Expected valid iterator"); 432309124Sdim for (;; --I) { 433314564Sdim if (I == MBB.end()) 434314564Sdim continue; 435341825Sdim if (I->getOpcode() == R600::PRED_X) { 436314564Sdim switch (I->getOperand(2).getImm()) { 437341825Sdim case R600::PRED_SETE_INT: 438341825Sdim I->getOperand(2).setImm(R600::PRED_SETNE_INT); 439284677Sdim return; 440341825Sdim case R600::PRED_SETNE_INT: 441341825Sdim I->getOperand(2).setImm(R600::PRED_SETE_INT); 442284677Sdim return; 443341825Sdim case R600::PRED_SETE: 444341825Sdim I->getOperand(2).setImm(R600::PRED_SETNE); 445284677Sdim return; 446341825Sdim case R600::PRED_SETNE: 447341825Sdim I->getOperand(2).setImm(R600::PRED_SETE); 448284677Sdim return; 449284677Sdim default: 450284677Sdim llvm_unreachable("PRED_X Opcode invalid!"); 451284677Sdim } 452284677Sdim } 453284677Sdim } 454284677Sdim} 455284677Sdim 456284677Sdimvoid AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, 457309124Sdim int NewOpcode, const DebugLoc &DL) { 458309124Sdim MachineInstr *MI = 459309124Sdim MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 460284677Sdim MBB->push_back(MI); 461284677Sdim //assume the instruction doesn't take any reg operand ... 462284677Sdim SHOWNEWINSTR(MI); 463284677Sdim} 464284677Sdim 465284677SdimMachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, 466309124Sdim int NewOpcode, 467309124Sdim const DebugLoc &DL) { 468284677Sdim MachineInstr *MI = 469284677Sdim MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 470284677Sdim if (MBB->begin() != MBB->end()) 471284677Sdim MBB->insert(MBB->begin(), MI); 472284677Sdim else 473284677Sdim MBB->push_back(MI); 474284677Sdim SHOWNEWINSTR(MI); 475284677Sdim return MI; 476284677Sdim} 477284677Sdim 478284677SdimMachineInstr *AMDGPUCFGStructurizer::insertInstrBefore( 479284677Sdim MachineBasicBlock::iterator I, int NewOpcode) { 480284677Sdim MachineInstr *OldMI = &(*I); 481284677Sdim MachineBasicBlock *MBB = OldMI->getParent(); 482284677Sdim MachineInstr *NewMBB = 483284677Sdim MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); 484284677Sdim MBB->insert(I, NewMBB); 485284677Sdim //assume the instruction doesn't take any reg operand ... 486284677Sdim SHOWNEWINSTR(NewMBB); 487284677Sdim return NewMBB; 488284677Sdim} 489284677Sdim 490284677Sdimvoid AMDGPUCFGStructurizer::insertCondBranchBefore( 491309124Sdim MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) { 492284677Sdim MachineInstr *OldMI = &(*I); 493284677Sdim MachineBasicBlock *MBB = OldMI->getParent(); 494284677Sdim MachineFunction *MF = MBB->getParent(); 495284677Sdim MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 496284677Sdim MBB->insert(I, NewMI); 497284677Sdim MachineInstrBuilder MIB(*MF, NewMI); 498284677Sdim MIB.addReg(OldMI->getOperand(1).getReg(), false); 499284677Sdim SHOWNEWINSTR(NewMI); 500284677Sdim //erase later oldInstr->eraseFromParent(); 501284677Sdim} 502284677Sdim 503309124Sdimvoid AMDGPUCFGStructurizer::insertCondBranchBefore( 504309124Sdim MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode, 505309124Sdim int RegNum, const DebugLoc &DL) { 506284677Sdim MachineFunction *MF = blk->getParent(); 507284677Sdim MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 508284677Sdim //insert before 509284677Sdim blk->insert(I, NewInstr); 510284677Sdim MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); 511284677Sdim SHOWNEWINSTR(NewInstr); 512284677Sdim} 513284677Sdim 514284677Sdimint AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { 515284677Sdim switch(OldOpcode) { 516341825Sdim case R600::JUMP_COND: 517341825Sdim case R600::JUMP: return R600::IF_PREDICATE_SET; 518341825Sdim case R600::BRANCH_COND_i32: 519341825Sdim case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32; 520284677Sdim default: llvm_unreachable("internal error"); 521284677Sdim } 522284677Sdim return -1; 523284677Sdim} 524284677Sdim 525284677Sdimint AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { 526284677Sdim switch(OldOpcode) { 527341825Sdim case R600::JUMP_COND: 528341825Sdim case R600::JUMP: return R600::IF_PREDICATE_SET; 529341825Sdim case R600::BRANCH_COND_i32: 530341825Sdim case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32; 531284677Sdim default: llvm_unreachable("internal error"); 532284677Sdim } 533284677Sdim return -1; 534284677Sdim} 535284677Sdim 536284677Sdimint AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { 537284677Sdim switch(OldOpcode) { 538341825Sdim case R600::JUMP_COND: 539341825Sdim case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32; 540284677Sdim default: llvm_unreachable("internal error"); 541327952Sdim } 542284677Sdim return -1; 543284677Sdim} 544284677Sdim 545284677Sdimint AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { 546284677Sdim switch(OldOpcode) { 547341825Sdim case R600::JUMP_COND: 548341825Sdim case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32; 549284677Sdim default: llvm_unreachable("internal error"); 550284677Sdim } 551284677Sdim return -1; 552284677Sdim} 553284677Sdim 554284677SdimMachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) { 555284677Sdim return MI->getOperand(0).getMBB(); 556284677Sdim} 557284677Sdim 558284677Sdimvoid AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI, 559284677Sdim MachineBasicBlock *MBB) { 560284677Sdim MI->getOperand(0).setMBB(MBB); 561284677Sdim} 562284677Sdim 563284677SdimMachineBasicBlock * 564284677SdimAMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, 565284677Sdim MachineInstr *MI) { 566284677Sdim assert(MBB->succ_size() == 2); 567284677Sdim MachineBasicBlock *TrueBranch = getTrueBranch(MI); 568284677Sdim MachineBasicBlock::succ_iterator It = MBB->succ_begin(); 569284677Sdim MachineBasicBlock::succ_iterator Next = It; 570284677Sdim ++Next; 571284677Sdim return (*It == TrueBranch) ? *Next : *It; 572284677Sdim} 573284677Sdim 574284677Sdimbool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) { 575284677Sdim switch (MI->getOpcode()) { 576341825Sdim case R600::JUMP_COND: 577341825Sdim case R600::BRANCH_COND_i32: 578341825Sdim case R600::BRANCH_COND_f32: return true; 579284677Sdim default: 580284677Sdim return false; 581284677Sdim } 582284677Sdim return false; 583284677Sdim} 584284677Sdim 585284677Sdimbool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) { 586284677Sdim switch (MI->getOpcode()) { 587341825Sdim case R600::JUMP: 588341825Sdim case R600::BRANCH: 589284677Sdim return true; 590284677Sdim default: 591284677Sdim return false; 592284677Sdim } 593284677Sdim return false; 594284677Sdim} 595284677Sdim 596284677SdimDebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { 597284677Sdim //get DebugLoc from the first MachineBasicBlock instruction with debug info 598284677Sdim DebugLoc DL; 599284677Sdim for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); 600284677Sdim ++It) { 601284677Sdim MachineInstr *instr = &(*It); 602284677Sdim if (instr->getDebugLoc()) 603284677Sdim DL = instr->getDebugLoc(); 604284677Sdim } 605284677Sdim return DL; 606284677Sdim} 607284677Sdim 608284677SdimMachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr( 609284677Sdim MachineBasicBlock *MBB) { 610284677Sdim MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 611284677Sdim MachineInstr *MI = &*It; 612284677Sdim if (MI && (isCondBranch(MI) || isUncondBranch(MI))) 613284677Sdim return MI; 614284677Sdim return nullptr; 615284677Sdim} 616284677Sdim 617284677SdimMachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( 618284677Sdim MachineBasicBlock *MBB) { 619284677Sdim for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); 620284677Sdim It != E; ++It) { 621284677Sdim // FIXME: Simplify 622284677Sdim MachineInstr *MI = &*It; 623284677Sdim if (MI) { 624284677Sdim if (isCondBranch(MI) || isUncondBranch(MI)) 625284677Sdim return MI; 626284677Sdim else if (!TII->isMov(MI->getOpcode())) 627284677Sdim break; 628284677Sdim } 629284677Sdim } 630284677Sdim return nullptr; 631284677Sdim} 632284677Sdim 633284677SdimMachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { 634284677Sdim MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 635284677Sdim if (It != MBB->rend()) { 636284677Sdim MachineInstr *instr = &(*It); 637341825Sdim if (instr->getOpcode() == R600::RETURN) 638284677Sdim return instr; 639284677Sdim } 640284677Sdim return nullptr; 641284677Sdim} 642284677Sdim 643284677Sdimbool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { 644284677Sdim MachineInstr *MI = getReturnInstr(MBB); 645284677Sdim bool IsReturn = (MBB->succ_size() == 0); 646284677Sdim if (MI) 647284677Sdim assert(IsReturn); 648284677Sdim else if (IsReturn) 649341825Sdim LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber() 650341825Sdim << " is return block without RETURN instr\n";); 651284677Sdim return IsReturn; 652284677Sdim} 653284677Sdim 654284677Sdimvoid AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, 655284677Sdim MachineBasicBlock *SrcMBB) { 656284677Sdim for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(), 657284677Sdim iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It) 658284677Sdim DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of 659284677Sdim} 660284677Sdim 661284677SdimMachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) { 662284677Sdim MachineFunction *Func = MBB->getParent(); 663284677Sdim MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); 664284677Sdim Func->push_back(NewMBB); //insert to function 665309124Sdim for (const MachineInstr &It : *MBB) 666309124Sdim NewMBB->push_back(Func->CloneMachineInstr(&It)); 667284677Sdim return NewMBB; 668284677Sdim} 669284677Sdim 670284677Sdimvoid AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith( 671284677Sdim MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, 672284677Sdim MachineBasicBlock *NewBlk) { 673284677Sdim MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); 674284677Sdim if (BranchMI && isCondBranch(BranchMI) && 675284677Sdim getTrueBranch(BranchMI) == OldMBB) 676284677Sdim setTrueBranch(BranchMI, NewBlk); 677284677Sdim} 678284677Sdim 679284677Sdimvoid AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) { 680284677Sdim assert((!MBB->getParent()->getJumpTableInfo() 681284677Sdim || MBB->getParent()->getJumpTableInfo()->isEmpty()) 682284677Sdim && "found a jump table"); 683284677Sdim 684284677Sdim //collect continue right before endloop 685284677Sdim SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr; 686284677Sdim MachineBasicBlock::iterator Pre = MBB->begin(); 687284677Sdim MachineBasicBlock::iterator E = MBB->end(); 688284677Sdim MachineBasicBlock::iterator It = Pre; 689284677Sdim while (It != E) { 690341825Sdim if (Pre->getOpcode() == R600::CONTINUE 691341825Sdim && It->getOpcode() == R600::ENDLOOP) 692309124Sdim ContInstr.push_back(&*Pre); 693284677Sdim Pre = It; 694284677Sdim ++It; 695284677Sdim } 696284677Sdim 697284677Sdim //delete continue right before endloop 698284677Sdim for (unsigned i = 0; i < ContInstr.size(); ++i) 699284677Sdim ContInstr[i]->eraseFromParent(); 700284677Sdim 701284677Sdim // TODO to fix up jump table so later phase won't be confused. if 702284677Sdim // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but 703284677Sdim // there isn't such an interface yet. alternatively, replace all the other 704284677Sdim // blocks in the jump table with the entryBlk //} 705284677Sdim} 706284677Sdim 707284677Sdimbool AMDGPUCFGStructurizer::prepare() { 708284677Sdim bool Changed = false; 709284677Sdim 710284677Sdim //FIXME: if not reducible flow graph, make it so ??? 711284677Sdim 712341825Sdim LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); 713284677Sdim 714284677Sdim orderBlocks(FuncRep); 715284677Sdim 716284677Sdim SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks; 717284677Sdim 718284677Sdim // Add an ExitBlk to loop that don't have one 719284677Sdim for (MachineLoopInfo::iterator It = MLI->begin(), 720284677Sdim E = MLI->end(); It != E; ++It) { 721284677Sdim MachineLoop *LoopRep = (*It); 722284677Sdim MBBVector ExitingMBBs; 723284677Sdim LoopRep->getExitingBlocks(ExitingMBBs); 724284677Sdim 725284677Sdim if (ExitingMBBs.size() == 0) { 726284677Sdim MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); 727284677Sdim if (DummyExitBlk) 728284677Sdim RetBlks.push_back(DummyExitBlk); 729284677Sdim } 730284677Sdim } 731284677Sdim 732284677Sdim // Remove unconditional branch instr. 733284677Sdim // Add dummy exit block iff there are multiple returns. 734284677Sdim for (SmallVectorImpl<MachineBasicBlock *>::const_iterator 735284677Sdim It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) { 736284677Sdim MachineBasicBlock *MBB = *It; 737284677Sdim removeUnconditionalBranch(MBB); 738284677Sdim removeRedundantConditionalBranch(MBB); 739284677Sdim if (isReturnBlock(MBB)) { 740284677Sdim RetBlks.push_back(MBB); 741284677Sdim } 742284677Sdim assert(MBB->succ_size() <= 2); 743284677Sdim } 744284677Sdim 745284677Sdim if (RetBlks.size() >= 2) { 746284677Sdim addDummyExitBlock(RetBlks); 747284677Sdim Changed = true; 748284677Sdim } 749284677Sdim 750284677Sdim return Changed; 751284677Sdim} 752284677Sdim 753284677Sdimbool AMDGPUCFGStructurizer::run() { 754284677Sdim //Assume reducible CFG... 755341825Sdim LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); 756284677Sdim 757284677Sdim#ifdef STRESSTEST 758284677Sdim //Use the worse block ordering to test the algorithm. 759284677Sdim ReverseVector(orderedBlks); 760284677Sdim#endif 761284677Sdim 762341825Sdim LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); 763284677Sdim int NumIter = 0; 764284677Sdim bool Finish = false; 765284677Sdim MachineBasicBlock *MBB; 766284677Sdim bool MakeProgress = false; 767284677Sdim int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), 768284677Sdim OrderedBlks.end()); 769284677Sdim 770284677Sdim do { 771284677Sdim ++NumIter; 772341825Sdim LLVM_DEBUG(dbgs() << "numIter = " << NumIter 773341825Sdim << ", numRemaintedBlk = " << NumRemainedBlk << "\n";); 774284677Sdim 775284677Sdim SmallVectorImpl<MachineBasicBlock *>::const_iterator It = 776284677Sdim OrderedBlks.begin(); 777284677Sdim SmallVectorImpl<MachineBasicBlock *>::const_iterator E = 778284677Sdim OrderedBlks.end(); 779284677Sdim 780284677Sdim SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter = 781284677Sdim It; 782284677Sdim MachineBasicBlock *SccBeginMBB = nullptr; 783284677Sdim int SccNumBlk = 0; // The number of active blocks, init to a 784284677Sdim // maximum possible number. 785284677Sdim int SccNumIter; // Number of iteration in this SCC. 786284677Sdim 787284677Sdim while (It != E) { 788284677Sdim MBB = *It; 789284677Sdim 790284677Sdim if (!SccBeginMBB) { 791284677Sdim SccBeginIter = It; 792284677Sdim SccBeginMBB = MBB; 793284677Sdim SccNumIter = 0; 794284677Sdim SccNumBlk = NumRemainedBlk; // Init to maximum possible number. 795341825Sdim LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); 796341825Sdim dbgs() << "\n";); 797284677Sdim } 798284677Sdim 799284677Sdim if (!isRetiredBlock(MBB)) 800284677Sdim patternMatch(MBB); 801284677Sdim 802284677Sdim ++It; 803284677Sdim 804284677Sdim bool ContNextScc = true; 805284677Sdim if (It == E 806284677Sdim || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { 807284677Sdim // Just finish one scc. 808284677Sdim ++SccNumIter; 809284677Sdim int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); 810284677Sdim if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { 811341825Sdim LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB) 812341825Sdim << ", sccNumIter = " << SccNumIter; 813341825Sdim dbgs() << "doesn't make any progress\n";); 814284677Sdim ContNextScc = true; 815284677Sdim } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { 816284677Sdim SccNumBlk = sccRemainedNumBlk; 817284677Sdim It = SccBeginIter; 818284677Sdim ContNextScc = false; 819341825Sdim LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB) 820341825Sdim << "sccNumIter = " << SccNumIter << '\n';); 821284677Sdim } else { 822284677Sdim // Finish the current scc. 823284677Sdim ContNextScc = true; 824284677Sdim } 825284677Sdim } else { 826284677Sdim // Continue on next component in the current scc. 827284677Sdim ContNextScc = false; 828284677Sdim } 829284677Sdim 830284677Sdim if (ContNextScc) 831284677Sdim SccBeginMBB = nullptr; 832284677Sdim } //while, "one iteration" over the function. 833284677Sdim 834284677Sdim MachineBasicBlock *EntryMBB = 835314564Sdim *GraphTraits<MachineFunction *>::nodes_begin(FuncRep); 836284677Sdim if (EntryMBB->succ_size() == 0) { 837284677Sdim Finish = true; 838341825Sdim LLVM_DEBUG(dbgs() << "Reduce to one block\n";); 839284677Sdim } else { 840284677Sdim int NewnumRemainedBlk 841284677Sdim = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); 842284677Sdim // consider cloned blocks ?? 843284677Sdim if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { 844284677Sdim MakeProgress = true; 845284677Sdim NumRemainedBlk = NewnumRemainedBlk; 846284677Sdim } else { 847284677Sdim MakeProgress = false; 848341825Sdim LLVM_DEBUG(dbgs() << "No progress\n";); 849284677Sdim } 850284677Sdim } 851284677Sdim } while (!Finish && MakeProgress); 852284677Sdim 853284677Sdim // Misc wrap up to maintain the consistency of the Function representation. 854314564Sdim wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep)); 855284677Sdim 856284677Sdim // Detach retired Block, release memory. 857284677Sdim for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end(); 858284677Sdim It != E; ++It) { 859284677Sdim if ((*It).second && (*It).second->IsRetired) { 860284677Sdim assert(((*It).first)->getNumber() != -1); 861341825Sdim LLVM_DEBUG(dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";); 862284677Sdim (*It).first->eraseFromParent(); //Remove from the parent Function. 863284677Sdim } 864284677Sdim delete (*It).second; 865284677Sdim } 866284677Sdim BlockInfoMap.clear(); 867284677Sdim LLInfoMap.clear(); 868284677Sdim 869284677Sdim if (!Finish) { 870341825Sdim LLVM_DEBUG(FuncRep->viewCFG()); 871309124Sdim report_fatal_error("IRREDUCIBLE_CFG"); 872284677Sdim } 873284677Sdim 874284677Sdim return true; 875284677Sdim} 876284677Sdim 877284677Sdimvoid AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { 878284677Sdim int SccNum = 0; 879284677Sdim MachineBasicBlock *MBB; 880284677Sdim for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd(); 881284677Sdim ++It, ++SccNum) { 882284677Sdim const std::vector<MachineBasicBlock *> &SccNext = *It; 883284677Sdim for (std::vector<MachineBasicBlock *>::const_iterator 884284677Sdim blockIter = SccNext.begin(), blockEnd = SccNext.end(); 885284677Sdim blockIter != blockEnd; ++blockIter) { 886284677Sdim MBB = *blockIter; 887284677Sdim OrderedBlks.push_back(MBB); 888284677Sdim recordSccnum(MBB, SccNum); 889284677Sdim } 890284677Sdim } 891284677Sdim 892321369Sdim // walk through all the block in func to check for unreachable 893321369Sdim for (auto *MBB : nodes(MF)) { 894284677Sdim SccNum = getSCCNum(MBB); 895284677Sdim if (SccNum == INVALIDSCCNUM) 896284677Sdim dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; 897284677Sdim } 898284677Sdim} 899284677Sdim 900284677Sdimint AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { 901284677Sdim int NumMatch = 0; 902284677Sdim int CurMatch; 903284677Sdim 904341825Sdim LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";); 905284677Sdim 906284677Sdim while ((CurMatch = patternMatchGroup(MBB)) > 0) 907284677Sdim NumMatch += CurMatch; 908284677Sdim 909341825Sdim LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber() 910341825Sdim << ", numMatch = " << NumMatch << "\n";); 911284677Sdim 912284677Sdim return NumMatch; 913284677Sdim} 914284677Sdim 915284677Sdimint AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { 916284677Sdim int NumMatch = 0; 917284677Sdim NumMatch += loopendPatternMatch(); 918284677Sdim NumMatch += serialPatternMatch(MBB); 919284677Sdim NumMatch += ifPatternMatch(MBB); 920284677Sdim return NumMatch; 921284677Sdim} 922284677Sdim 923284677Sdimint AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { 924284677Sdim if (MBB->succ_size() != 1) 925284677Sdim return 0; 926284677Sdim 927284677Sdim MachineBasicBlock *childBlk = *MBB->succ_begin(); 928284677Sdim if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) 929284677Sdim return 0; 930284677Sdim 931284677Sdim mergeSerialBlock(MBB, childBlk); 932284677Sdim ++numSerialPatternMatch; 933284677Sdim return 1; 934284677Sdim} 935284677Sdim 936284677Sdimint AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { 937284677Sdim //two edges 938284677Sdim if (MBB->succ_size() != 2) 939284677Sdim return 0; 940284677Sdim if (hasBackEdge(MBB)) 941284677Sdim return 0; 942284677Sdim MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 943284677Sdim if (!BranchMI) 944284677Sdim return 0; 945284677Sdim 946284677Sdim assert(isCondBranch(BranchMI)); 947284677Sdim int NumMatch = 0; 948284677Sdim 949284677Sdim MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); 950284677Sdim NumMatch += serialPatternMatch(TrueMBB); 951284677Sdim NumMatch += ifPatternMatch(TrueMBB); 952284677Sdim MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); 953284677Sdim NumMatch += serialPatternMatch(FalseMBB); 954284677Sdim NumMatch += ifPatternMatch(FalseMBB); 955284677Sdim MachineBasicBlock *LandBlk; 956284677Sdim int Cloned = 0; 957284677Sdim 958284677Sdim assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); 959284677Sdim // TODO: Simplify 960284677Sdim if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 961284677Sdim && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { 962284677Sdim // Diamond pattern 963284677Sdim LandBlk = *TrueMBB->succ_begin(); 964284677Sdim } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { 965284677Sdim // Triangle pattern, false is empty 966284677Sdim LandBlk = FalseMBB; 967284677Sdim FalseMBB = nullptr; 968284677Sdim } else if (FalseMBB->succ_size() == 1 969284677Sdim && *FalseMBB->succ_begin() == TrueMBB) { 970284677Sdim // Triangle pattern, true is empty 971284677Sdim // We reverse the predicate to make a triangle, empty false pattern; 972284677Sdim std::swap(TrueMBB, FalseMBB); 973314564Sdim reversePredicateSetter(MBB->end(), *MBB); 974284677Sdim LandBlk = FalseMBB; 975284677Sdim FalseMBB = nullptr; 976284677Sdim } else if (FalseMBB->succ_size() == 1 977284677Sdim && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { 978284677Sdim LandBlk = *FalseMBB->succ_begin(); 979284677Sdim } else if (TrueMBB->succ_size() == 1 980284677Sdim && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { 981284677Sdim LandBlk = *TrueMBB->succ_begin(); 982284677Sdim } else { 983284677Sdim return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB); 984284677Sdim } 985284677Sdim 986284677Sdim // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the 987284677Sdim // new BB created for landBlk==NULL may introduce new challenge to the 988284677Sdim // reduction process. 989284677Sdim if (LandBlk && 990284677Sdim ((TrueMBB && TrueMBB->pred_size() > 1) 991284677Sdim || (FalseMBB && FalseMBB->pred_size() > 1))) { 992284677Sdim Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); 993284677Sdim } 994284677Sdim 995284677Sdim if (TrueMBB && TrueMBB->pred_size() > 1) { 996284677Sdim TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); 997284677Sdim ++Cloned; 998284677Sdim } 999284677Sdim 1000284677Sdim if (FalseMBB && FalseMBB->pred_size() > 1) { 1001284677Sdim FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); 1002284677Sdim ++Cloned; 1003284677Sdim } 1004284677Sdim 1005284677Sdim mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); 1006284677Sdim 1007284677Sdim ++numIfPatternMatch; 1008284677Sdim 1009284677Sdim numClonedBlock += Cloned; 1010284677Sdim 1011284677Sdim return 1 + Cloned + NumMatch; 1012284677Sdim} 1013284677Sdim 1014284677Sdimint AMDGPUCFGStructurizer::loopendPatternMatch() { 1015284677Sdim std::deque<MachineLoop *> NestedLoops; 1016284677Sdim for (auto &It: *MLI) 1017284677Sdim for (MachineLoop *ML : depth_first(It)) 1018284677Sdim NestedLoops.push_front(ML); 1019284677Sdim 1020321369Sdim if (NestedLoops.empty()) 1021284677Sdim return 0; 1022284677Sdim 1023284677Sdim // Process nested loop outside->inside (we did push_front), 1024284677Sdim // so "continue" to a outside loop won't be mistaken as "break" 1025284677Sdim // of the current loop. 1026284677Sdim int Num = 0; 1027284677Sdim for (MachineLoop *ExaminedLoop : NestedLoops) { 1028284677Sdim if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) 1029284677Sdim continue; 1030341825Sdim LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); 1031284677Sdim int NumBreak = mergeLoop(ExaminedLoop); 1032284677Sdim if (NumBreak == -1) 1033284677Sdim break; 1034284677Sdim Num += NumBreak; 1035284677Sdim } 1036284677Sdim return Num; 1037284677Sdim} 1038284677Sdim 1039284677Sdimint AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { 1040284677Sdim MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1041284677Sdim MBBVector ExitingMBBs; 1042284677Sdim LoopRep->getExitingBlocks(ExitingMBBs); 1043284677Sdim assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); 1044341825Sdim LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() 1045341825Sdim << " exiting blocks\n";); 1046284677Sdim // We assume a single ExitBlk 1047284677Sdim MBBVector ExitBlks; 1048284677Sdim LoopRep->getExitBlocks(ExitBlks); 1049284677Sdim SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet; 1050284677Sdim for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) 1051284677Sdim ExitBlkSet.insert(ExitBlks[i]); 1052284677Sdim assert(ExitBlkSet.size() == 1); 1053284677Sdim MachineBasicBlock *ExitBlk = *ExitBlks.begin(); 1054284677Sdim assert(ExitBlk && "Loop has several exit block"); 1055284677Sdim MBBVector LatchBlks; 1056321369Sdim for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader)) 1057321369Sdim if (LoopRep->contains(LB)) 1058321369Sdim LatchBlks.push_back(LB); 1059284677Sdim 1060284677Sdim for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) 1061284677Sdim mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); 1062284677Sdim for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) 1063284677Sdim settleLoopcontBlock(LatchBlks[i], LoopHeader); 1064284677Sdim int Match = 0; 1065284677Sdim do { 1066284677Sdim Match = 0; 1067284677Sdim Match += serialPatternMatch(LoopHeader); 1068284677Sdim Match += ifPatternMatch(LoopHeader); 1069284677Sdim } while (Match > 0); 1070284677Sdim mergeLooplandBlock(LoopHeader, ExitBlk); 1071284677Sdim MachineLoop *ParentLoop = LoopRep->getParentLoop(); 1072284677Sdim if (ParentLoop) 1073284677Sdim MLI->changeLoopFor(LoopHeader, ParentLoop); 1074284677Sdim else 1075284677Sdim MLI->removeBlock(LoopHeader); 1076284677Sdim Visited[LoopRep] = true; 1077284677Sdim return 1; 1078284677Sdim} 1079284677Sdim 1080284677Sdimbool AMDGPUCFGStructurizer::isSameloopDetachedContbreak( 1081284677Sdim MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { 1082284677Sdim if (Src1MBB->succ_size() == 0) { 1083284677Sdim MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); 1084284677Sdim if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { 1085284677Sdim MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; 1086284677Sdim if (TheEntry) { 1087341825Sdim LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB" 1088341825Sdim << Src1MBB->getNumber() << " src2 = BB" 1089341825Sdim << Src2MBB->getNumber() << "\n";); 1090284677Sdim return true; 1091284677Sdim } 1092284677Sdim } 1093284677Sdim } 1094284677Sdim return false; 1095284677Sdim} 1096284677Sdim 1097284677Sdimint AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, 1098284677Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1099284677Sdim int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); 1100284677Sdim if (Num == 0) { 1101341825Sdim LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" 1102341825Sdim << "\n";); 1103284677Sdim Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); 1104284677Sdim } 1105284677Sdim return Num; 1106284677Sdim} 1107284677Sdim 1108284677Sdimint AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 1109284677Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1110284677Sdim int Num = 0; 1111284677Sdim MachineBasicBlock *DownBlk; 1112284677Sdim 1113284677Sdim //trueBlk could be the common post dominator 1114284677Sdim DownBlk = TrueMBB; 1115284677Sdim 1116341825Sdim LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() 1117341825Sdim << " true = BB" << TrueMBB->getNumber() 1118341825Sdim << ", numSucc=" << TrueMBB->succ_size() << " false = BB" 1119341825Sdim << FalseMBB->getNumber() << "\n";); 1120284677Sdim 1121284677Sdim while (DownBlk) { 1122341825Sdim LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber();); 1123284677Sdim 1124284677Sdim if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { 1125341825Sdim LLVM_DEBUG(dbgs() << " working\n";); 1126284677Sdim 1127284677Sdim Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); 1128284677Sdim Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); 1129284677Sdim 1130284677Sdim numClonedBlock += Num; 1131284677Sdim Num += serialPatternMatch(*HeadMBB->succ_begin()); 1132284677Sdim Num += serialPatternMatch(*std::next(HeadMBB->succ_begin())); 1133284677Sdim Num += ifPatternMatch(HeadMBB); 1134284677Sdim assert(Num > 0); 1135284677Sdim 1136284677Sdim break; 1137284677Sdim } 1138341825Sdim LLVM_DEBUG(dbgs() << " not working\n";); 1139284677Sdim DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; 1140284677Sdim } // walk down the postDomTree 1141284677Sdim 1142284677Sdim return Num; 1143284677Sdim} 1144284677Sdim 1145327952Sdim#ifndef NDEBUG 1146284677Sdimvoid AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf( 1147284677Sdim MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, 1148284677Sdim MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { 1149284677Sdim dbgs() << "head = BB" << HeadMBB->getNumber() 1150284677Sdim << " size = " << HeadMBB->size(); 1151284677Sdim if (Detail) { 1152284677Sdim dbgs() << "\n"; 1153284677Sdim HeadMBB->print(dbgs()); 1154284677Sdim dbgs() << "\n"; 1155284677Sdim } 1156284677Sdim 1157284677Sdim if (TrueMBB) { 1158284677Sdim dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " 1159284677Sdim << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); 1160284677Sdim if (Detail) { 1161284677Sdim dbgs() << "\n"; 1162284677Sdim TrueMBB->print(dbgs()); 1163284677Sdim dbgs() << "\n"; 1164284677Sdim } 1165284677Sdim } 1166284677Sdim if (FalseMBB) { 1167284677Sdim dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " 1168284677Sdim << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); 1169284677Sdim if (Detail) { 1170284677Sdim dbgs() << "\n"; 1171284677Sdim FalseMBB->print(dbgs()); 1172284677Sdim dbgs() << "\n"; 1173284677Sdim } 1174284677Sdim } 1175284677Sdim if (LandMBB) { 1176284677Sdim dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " 1177284677Sdim << LandMBB->size() << " numPred = " << LandMBB->pred_size(); 1178284677Sdim if (Detail) { 1179284677Sdim dbgs() << "\n"; 1180284677Sdim LandMBB->print(dbgs()); 1181284677Sdim dbgs() << "\n"; 1182284677Sdim } 1183284677Sdim } 1184284677Sdim 1185321369Sdim dbgs() << "\n"; 1186284677Sdim} 1187327952Sdim#endif 1188284677Sdim 1189284677Sdimint AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 1190284677Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 1191284677Sdim MachineBasicBlock **LandMBBPtr) { 1192284677Sdim bool MigrateTrue = false; 1193284677Sdim bool MigrateFalse = false; 1194284677Sdim 1195284677Sdim MachineBasicBlock *LandBlk = *LandMBBPtr; 1196284677Sdim 1197284677Sdim assert((!TrueMBB || TrueMBB->succ_size() <= 1) 1198284677Sdim && (!FalseMBB || FalseMBB->succ_size() <= 1)); 1199284677Sdim 1200284677Sdim if (TrueMBB == FalseMBB) 1201284677Sdim return 0; 1202284677Sdim 1203284677Sdim MigrateTrue = needMigrateBlock(TrueMBB); 1204284677Sdim MigrateFalse = needMigrateBlock(FalseMBB); 1205284677Sdim 1206284677Sdim if (!MigrateTrue && !MigrateFalse) 1207284677Sdim return 0; 1208284677Sdim 1209284677Sdim // If we need to migrate either trueBlk and falseBlk, migrate the rest that 1210284677Sdim // have more than one predecessors. without doing this, its predecessor 1211284677Sdim // rather than headBlk will have undefined value in initReg. 1212284677Sdim if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) 1213284677Sdim MigrateTrue = true; 1214284677Sdim if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) 1215284677Sdim MigrateFalse = true; 1216284677Sdim 1217341825Sdim LLVM_DEBUG( 1218341825Sdim dbgs() << "before improveSimpleJumpintoIf: "; 1219341825Sdim showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); 1220284677Sdim 1221284677Sdim // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk 1222284677Sdim // 1223284677Sdim // new: headBlk => if () {initReg = 1; org trueBlk branch} else 1224284677Sdim // {initReg = 0; org falseBlk branch } 1225284677Sdim // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} 1226284677Sdim // => org landBlk 1227284677Sdim // if landBlk->pred_size() > 2, put the about if-else inside 1228284677Sdim // if (initReg !=2) {...} 1229284677Sdim // 1230284677Sdim // add initReg = initVal to headBlk 1231284677Sdim 1232284677Sdim const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1233284677Sdim if (!MigrateTrue || !MigrateFalse) { 1234284677Sdim // XXX: We have an opportunity here to optimize the "branch into if" case 1235284677Sdim // here. Branch into if looks like this: 1236284677Sdim // entry 1237284677Sdim // / | 1238284677Sdim // diamond_head branch_from 1239284677Sdim // / \ | 1240284677Sdim // diamond_false diamond_true 1241284677Sdim // \ / 1242284677Sdim // done 1243284677Sdim // 1244284677Sdim // The diamond_head block begins the "if" and the diamond_true block 1245284677Sdim // is the block being "branched into". 1246284677Sdim // 1247284677Sdim // If MigrateTrue is true, then TrueBB is the block being "branched into" 1248284677Sdim // and if MigrateFalse is true, then FalseBB is the block being 1249284677Sdim // "branched into" 1250296417Sdim // 1251284677Sdim // Here is the pseudo code for how I think the optimization should work: 1252284677Sdim // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. 1253284677Sdim // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. 1254284677Sdim // 3. Move the branch instruction from diamond_head into its own basic 1255284677Sdim // block (new_block). 1256284677Sdim // 4. Add an unconditional branch from diamond_head to new_block 1257284677Sdim // 5. Replace the branch instruction in branch_from with an unconditional 1258284677Sdim // branch to new_block. If branch_from has multiple predecessors, then 1259284677Sdim // we need to replace the True/False block in the branch 1260284677Sdim // instruction instead of replacing it. 1261284677Sdim // 6. Change the condition of the branch instruction in new_block from 1262284677Sdim // COND to (COND || GPR0) 1263284677Sdim // 1264284677Sdim // In order insert these MOV instruction, we will need to use the 1265284677Sdim // RegisterScavenger. Usually liveness stops being tracked during 1266284677Sdim // the late machine optimization passes, however if we implement 1267284677Sdim // bool TargetRegisterInfo::requiresRegisterScavenging( 1268284677Sdim // const MachineFunction &MF) 1269296417Sdim // and have it return true, liveness will be tracked correctly 1270284677Sdim // by generic optimization passes. We will also need to make sure that 1271284677Sdim // all of our target-specific passes that run after regalloc and before 1272284677Sdim // the CFGStructurizer track liveness and we will need to modify this pass 1273284677Sdim // to correctly track liveness. 1274284677Sdim // 1275284677Sdim // After the above changes, the new CFG should look like this: 1276284677Sdim // entry 1277284677Sdim // / | 1278284677Sdim // diamond_head branch_from 1279284677Sdim // \ / 1280284677Sdim // new_block 1281284677Sdim // / | 1282284677Sdim // diamond_false diamond_true 1283284677Sdim // \ / 1284284677Sdim // done 1285284677Sdim // 1286284677Sdim // Without this optimization, we are forced to duplicate the diamond_true 1287284677Sdim // block and we will end up with a CFG like this: 1288284677Sdim // 1289284677Sdim // entry 1290284677Sdim // / | 1291284677Sdim // diamond_head branch_from 1292284677Sdim // / \ | 1293284677Sdim // diamond_false diamond_true diamond_true (duplicate) 1294284677Sdim // \ / | 1295284677Sdim // done --------------------| 1296284677Sdim // 1297284677Sdim // Duplicating diamond_true can be very costly especially if it has a 1298284677Sdim // lot of instructions. 1299284677Sdim return 0; 1300284677Sdim } 1301284677Sdim 1302284677Sdim int NumNewBlk = 0; 1303284677Sdim 1304284677Sdim bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); 1305284677Sdim 1306341825Sdim //insert R600::ENDIF to avoid special case "input landBlk == NULL" 1307341825Sdim MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF); 1308284677Sdim 1309284677Sdim if (LandBlkHasOtherPred) { 1310309124Sdim report_fatal_error("Extra register needed to handle CFG"); 1311360784Sdim Register CmpResReg = 1312360784Sdim HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1313309124Sdim report_fatal_error("Extra compare instruction needed to handle CFG"); 1314341825Sdim insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, 1315284677Sdim CmpResReg, DebugLoc()); 1316284677Sdim } 1317284677Sdim 1318284677Sdim // XXX: We are running this after RA, so creating virtual registers will 1319284677Sdim // cause an assertion failure in the PostRA scheduling pass. 1320360784Sdim Register InitReg = 1321360784Sdim HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1322341825Sdim insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg, 1323284677Sdim DebugLoc()); 1324284677Sdim 1325284677Sdim if (MigrateTrue) { 1326284677Sdim migrateInstruction(TrueMBB, LandBlk, I); 1327284677Sdim // need to uncondionally insert the assignment to ensure a path from its 1328284677Sdim // predecessor rather than headBlk has valid value in initReg if 1329284677Sdim // (initVal != 1). 1330309124Sdim report_fatal_error("Extra register needed to handle CFG"); 1331284677Sdim } 1332341825Sdim insertInstrBefore(I, R600::ELSE); 1333284677Sdim 1334284677Sdim if (MigrateFalse) { 1335284677Sdim migrateInstruction(FalseMBB, LandBlk, I); 1336284677Sdim // need to uncondionally insert the assignment to ensure a path from its 1337284677Sdim // predecessor rather than headBlk has valid value in initReg if 1338284677Sdim // (initVal != 0) 1339309124Sdim report_fatal_error("Extra register needed to handle CFG"); 1340284677Sdim } 1341284677Sdim 1342284677Sdim if (LandBlkHasOtherPred) { 1343284677Sdim // add endif 1344341825Sdim insertInstrBefore(I, R600::ENDIF); 1345284677Sdim 1346284677Sdim // put initReg = 2 to other predecessors of landBlk 1347284677Sdim for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), 1348284677Sdim PE = LandBlk->pred_end(); PI != PE; ++PI) { 1349284677Sdim MachineBasicBlock *MBB = *PI; 1350284677Sdim if (MBB != TrueMBB && MBB != FalseMBB) 1351309124Sdim report_fatal_error("Extra register needed to handle CFG"); 1352284677Sdim } 1353284677Sdim } 1354341825Sdim LLVM_DEBUG( 1355341825Sdim dbgs() << "result from improveSimpleJumpintoIf: "; 1356341825Sdim showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); 1357284677Sdim 1358284677Sdim // update landBlk 1359284677Sdim *LandMBBPtr = LandBlk; 1360284677Sdim 1361284677Sdim return NumNewBlk; 1362284677Sdim} 1363284677Sdim 1364284677Sdimvoid AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, 1365284677Sdim MachineBasicBlock *SrcMBB) { 1366341825Sdim LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB" 1367341825Sdim << SrcMBB->getNumber() << "\n";); 1368284677Sdim DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); 1369284677Sdim 1370296417Sdim DstMBB->removeSuccessor(SrcMBB, true); 1371284677Sdim cloneSuccessorList(DstMBB, SrcMBB); 1372284677Sdim 1373284677Sdim removeSuccessor(SrcMBB); 1374284677Sdim MLI->removeBlock(SrcMBB); 1375284677Sdim retireBlock(SrcMBB); 1376284677Sdim} 1377284677Sdim 1378284677Sdimvoid AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, 1379284677Sdim MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 1380284677Sdim MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { 1381284677Sdim assert (TrueMBB); 1382341825Sdim LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ "; 1383341825Sdim if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs() 1384341825Sdim << " } else "; 1385341825Sdim dbgs() << "{ "; if (FalseMBB) { 1386341825Sdim dbgs() << "BB" << FalseMBB->getNumber(); 1387341825Sdim } dbgs() << " }\n "; 1388341825Sdim dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else { 1389341825Sdim dbgs() << "BB" << LandMBB->getNumber(); 1390341825Sdim } dbgs() << "\n";); 1391284677Sdim 1392284677Sdim int OldOpcode = BranchMI->getOpcode(); 1393284677Sdim DebugLoc BranchDL = BranchMI->getDebugLoc(); 1394284677Sdim 1395284677Sdim// transform to 1396284677Sdim// if cond 1397284677Sdim// trueBlk 1398284677Sdim// else 1399284677Sdim// falseBlk 1400284677Sdim// endif 1401284677Sdim// landBlk 1402284677Sdim 1403284677Sdim MachineBasicBlock::iterator I = BranchMI; 1404284677Sdim insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), 1405284677Sdim BranchDL); 1406284677Sdim 1407284677Sdim if (TrueMBB) { 1408284677Sdim MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); 1409296417Sdim MBB->removeSuccessor(TrueMBB, true); 1410284677Sdim if (LandMBB && TrueMBB->succ_size()!=0) 1411296417Sdim TrueMBB->removeSuccessor(LandMBB, true); 1412284677Sdim retireBlock(TrueMBB); 1413284677Sdim MLI->removeBlock(TrueMBB); 1414284677Sdim } 1415284677Sdim 1416284677Sdim if (FalseMBB) { 1417341825Sdim insertInstrBefore(I, R600::ELSE); 1418284677Sdim MBB->splice(I, FalseMBB, FalseMBB->begin(), 1419284677Sdim FalseMBB->end()); 1420296417Sdim MBB->removeSuccessor(FalseMBB, true); 1421284677Sdim if (LandMBB && FalseMBB->succ_size() != 0) 1422296417Sdim FalseMBB->removeSuccessor(LandMBB, true); 1423284677Sdim retireBlock(FalseMBB); 1424284677Sdim MLI->removeBlock(FalseMBB); 1425284677Sdim } 1426341825Sdim insertInstrBefore(I, R600::ENDIF); 1427284677Sdim 1428284677Sdim BranchMI->eraseFromParent(); 1429284677Sdim 1430284677Sdim if (LandMBB && TrueMBB && FalseMBB) 1431284677Sdim MBB->addSuccessor(LandMBB); 1432284677Sdim} 1433284677Sdim 1434284677Sdimvoid AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, 1435284677Sdim MachineBasicBlock *LandMBB) { 1436341825Sdim LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() 1437341825Sdim << " land = BB" << LandMBB->getNumber() << "\n";); 1438284677Sdim 1439341825Sdim insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc()); 1440341825Sdim insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc()); 1441296417Sdim DstBlk->replaceSuccessor(DstBlk, LandMBB); 1442284677Sdim} 1443284677Sdim 1444284677Sdimvoid AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 1445284677Sdim MachineBasicBlock *LandMBB) { 1446341825Sdim LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB" 1447341825Sdim << ExitingMBB->getNumber() << " land = BB" 1448341825Sdim << LandMBB->getNumber() << "\n";); 1449284677Sdim MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); 1450284677Sdim assert(BranchMI && isCondBranch(BranchMI)); 1451284677Sdim DebugLoc DL = BranchMI->getDebugLoc(); 1452284677Sdim MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); 1453284677Sdim MachineBasicBlock::iterator I = BranchMI; 1454284677Sdim if (TrueBranch != LandMBB) 1455314564Sdim reversePredicateSetter(I, *I->getParent()); 1456341825Sdim insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL); 1457341825Sdim insertInstrBefore(I, R600::BREAK); 1458341825Sdim insertInstrBefore(I, R600::ENDIF); 1459284677Sdim //now branchInst can be erase safely 1460284677Sdim BranchMI->eraseFromParent(); 1461284677Sdim //now take care of successors, retire blocks 1462296417Sdim ExitingMBB->removeSuccessor(LandMBB, true); 1463284677Sdim} 1464284677Sdim 1465284677Sdimvoid AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, 1466284677Sdim MachineBasicBlock *ContMBB) { 1467341825Sdim LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB" 1468341825Sdim << ContingMBB->getNumber() << ", cont = BB" 1469341825Sdim << ContMBB->getNumber() << "\n";); 1470284677Sdim 1471284677Sdim MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); 1472284677Sdim if (MI) { 1473284677Sdim assert(isCondBranch(MI)); 1474284677Sdim MachineBasicBlock::iterator I = MI; 1475284677Sdim MachineBasicBlock *TrueBranch = getTrueBranch(MI); 1476284677Sdim int OldOpcode = MI->getOpcode(); 1477284677Sdim DebugLoc DL = MI->getDebugLoc(); 1478284677Sdim 1479284677Sdim bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); 1480284677Sdim 1481284677Sdim if (!UseContinueLogical) { 1482284677Sdim int BranchOpcode = 1483284677Sdim TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : 1484284677Sdim getBranchZeroOpcode(OldOpcode); 1485284677Sdim insertCondBranchBefore(I, BranchOpcode, DL); 1486284677Sdim // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1487341825Sdim insertInstrEnd(ContingMBB, R600::CONTINUE, DL); 1488341825Sdim insertInstrEnd(ContingMBB, R600::ENDIF, DL); 1489284677Sdim } else { 1490284677Sdim int BranchOpcode = 1491284677Sdim TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : 1492284677Sdim getContinueZeroOpcode(OldOpcode); 1493284677Sdim insertCondBranchBefore(I, BranchOpcode, DL); 1494284677Sdim } 1495284677Sdim 1496284677Sdim MI->eraseFromParent(); 1497284677Sdim } else { 1498284677Sdim // if we've arrived here then we've already erased the branch instruction 1499284677Sdim // travel back up the basic block to see the last reference of our debug 1500284677Sdim // location we've just inserted that reference here so it should be 1501284677Sdim // representative insertEnd to ensure phi-moves, if exist, go before the 1502284677Sdim // continue-instr. 1503341825Sdim insertInstrEnd(ContingMBB, R600::CONTINUE, 1504284677Sdim getLastDebugLocInBB(ContingMBB)); 1505284677Sdim } 1506284677Sdim} 1507284677Sdim 1508284677Sdimint AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 1509284677Sdim MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { 1510284677Sdim int Cloned = 0; 1511284677Sdim assert(PreMBB->isSuccessor(SrcMBB)); 1512284677Sdim while (SrcMBB && SrcMBB != DstMBB) { 1513284677Sdim assert(SrcMBB->succ_size() == 1); 1514284677Sdim if (SrcMBB->pred_size() > 1) { 1515284677Sdim SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); 1516284677Sdim ++Cloned; 1517284677Sdim } 1518284677Sdim 1519284677Sdim PreMBB = SrcMBB; 1520284677Sdim SrcMBB = *SrcMBB->succ_begin(); 1521284677Sdim } 1522284677Sdim 1523284677Sdim return Cloned; 1524284677Sdim} 1525284677Sdim 1526284677SdimMachineBasicBlock * 1527284677SdimAMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, 1528284677Sdim MachineBasicBlock *PredMBB) { 1529284677Sdim assert(PredMBB->isSuccessor(MBB) && 1530284677Sdim "succBlk is not a prececessor of curBlk"); 1531284677Sdim 1532284677Sdim MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions 1533284677Sdim replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); 1534284677Sdim //srcBlk, oldBlk, newBlk 1535284677Sdim 1536296417Sdim PredMBB->replaceSuccessor(MBB, CloneMBB); 1537284677Sdim 1538284677Sdim // add all successor to cloneBlk 1539284677Sdim cloneSuccessorList(CloneMBB, MBB); 1540284677Sdim 1541284677Sdim numClonedInstr += MBB->size(); 1542284677Sdim 1543341825Sdim LLVM_DEBUG(dbgs() << "Cloned block: " 1544341825Sdim << "BB" << MBB->getNumber() << "size " << MBB->size() 1545341825Sdim << "\n";); 1546284677Sdim 1547284677Sdim SHOWNEWBLK(CloneMBB, "result of Cloned block: "); 1548284677Sdim 1549284677Sdim return CloneMBB; 1550284677Sdim} 1551284677Sdim 1552284677Sdimvoid AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, 1553284677Sdim MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { 1554284677Sdim MachineBasicBlock::iterator SpliceEnd; 1555284677Sdim //look for the input branchinstr, not the AMDGPU branchinstr 1556284677Sdim MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); 1557284677Sdim if (!BranchMI) { 1558341825Sdim LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";); 1559284677Sdim SpliceEnd = SrcMBB->end(); 1560284677Sdim } else { 1561341825Sdim LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); 1562284677Sdim SpliceEnd = BranchMI; 1563284677Sdim } 1564341825Sdim LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = " 1565341825Sdim << DstMBB->size() << "srcSize = " << SrcMBB->size() 1566341825Sdim << "\n";); 1567284677Sdim 1568284677Sdim //splice insert before insertPos 1569284677Sdim DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); 1570284677Sdim 1571341825Sdim LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = " 1572341825Sdim << DstMBB->size() << "srcSize = " << SrcMBB->size() 1573341825Sdim << '\n';); 1574284677Sdim} 1575284677Sdim 1576284677SdimMachineBasicBlock * 1577284677SdimAMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { 1578284677Sdim MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1579284677Sdim MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); 1580284677Sdim 1581284677Sdim if (!LoopHeader || !LoopLatch) 1582284677Sdim return nullptr; 1583284677Sdim MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); 1584284677Sdim // Is LoopRep an infinite loop ? 1585284677Sdim if (!BranchMI || !isUncondBranch(BranchMI)) 1586284677Sdim return nullptr; 1587284677Sdim 1588284677Sdim MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1589284677Sdim FuncRep->push_back(DummyExitBlk); //insert to function 1590284677Sdim SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); 1591341825Sdim LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); 1592327952Sdim LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext(); 1593287521Sdim Ctx.emitError("Extra register needed to handle CFG"); 1594287521Sdim return nullptr; 1595284677Sdim} 1596284677Sdim 1597284677Sdimvoid AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { 1598284677Sdim MachineInstr *BranchMI; 1599284677Sdim 1600284677Sdim // I saw two unconditional branch in one basic block in example 1601284677Sdim // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. 1602284677Sdim while ((BranchMI = getLoopendBlockBranchInstr(MBB)) 1603284677Sdim && isUncondBranch(BranchMI)) { 1604341825Sdim LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); 1605284677Sdim BranchMI->eraseFromParent(); 1606284677Sdim } 1607284677Sdim} 1608284677Sdim 1609284677Sdimvoid AMDGPUCFGStructurizer::removeRedundantConditionalBranch( 1610284677Sdim MachineBasicBlock *MBB) { 1611284677Sdim if (MBB->succ_size() != 2) 1612284677Sdim return; 1613284677Sdim MachineBasicBlock *MBB1 = *MBB->succ_begin(); 1614284677Sdim MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin()); 1615284677Sdim if (MBB1 != MBB2) 1616284677Sdim return; 1617284677Sdim 1618284677Sdim MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 1619284677Sdim assert(BranchMI && isCondBranch(BranchMI)); 1620341825Sdim LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); 1621284677Sdim BranchMI->eraseFromParent(); 1622284677Sdim SHOWNEWBLK(MBB1, "Removing redundant successor"); 1623296417Sdim MBB->removeSuccessor(MBB1, true); 1624284677Sdim} 1625284677Sdim 1626284677Sdimvoid AMDGPUCFGStructurizer::addDummyExitBlock( 1627284677Sdim SmallVectorImpl<MachineBasicBlock*> &RetMBB) { 1628284677Sdim MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1629284677Sdim FuncRep->push_back(DummyExitBlk); //insert to function 1630341825Sdim insertInstrEnd(DummyExitBlk, R600::RETURN); 1631284677Sdim 1632284677Sdim for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(), 1633284677Sdim E = RetMBB.end(); It != E; ++It) { 1634284677Sdim MachineBasicBlock *MBB = *It; 1635284677Sdim MachineInstr *MI = getReturnInstr(MBB); 1636284677Sdim if (MI) 1637284677Sdim MI->eraseFromParent(); 1638284677Sdim MBB->addSuccessor(DummyExitBlk); 1639341825Sdim LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() 1640341825Sdim << " successors\n";); 1641284677Sdim } 1642284677Sdim SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); 1643284677Sdim} 1644284677Sdim 1645284677Sdimvoid AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { 1646284677Sdim while (MBB->succ_size()) 1647284677Sdim MBB->removeSuccessor(*MBB->succ_begin()); 1648284677Sdim} 1649284677Sdim 1650284677Sdimvoid AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, 1651284677Sdim int SccNum) { 1652284677Sdim BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; 1653284677Sdim if (!srcBlkInfo) 1654284677Sdim srcBlkInfo = new BlockInformation(); 1655284677Sdim srcBlkInfo->SccNum = SccNum; 1656284677Sdim} 1657284677Sdim 1658284677Sdimvoid AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { 1659341825Sdim LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";); 1660284677Sdim 1661284677Sdim BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; 1662284677Sdim 1663284677Sdim if (!SrcBlkInfo) 1664284677Sdim SrcBlkInfo = new BlockInformation(); 1665284677Sdim 1666284677Sdim SrcBlkInfo->IsRetired = true; 1667284677Sdim assert(MBB->succ_size() == 0 && MBB->pred_size() == 0 1668284677Sdim && "can't retire block yet"); 1669284677Sdim} 1670284677Sdim 1671284677SdimINITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", 1672284677Sdim "AMDGPU CFG Structurizer", false, false) 1673284677SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 1674284677SdimINITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) 1675284677SdimINITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 1676284677SdimINITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer", 1677284677Sdim "AMDGPU CFG Structurizer", false, false) 1678284677Sdim 1679284677SdimFunctionPass *llvm::createAMDGPUCFGStructurizerPass() { 1680284677Sdim return new AMDGPUCFGStructurizer(); 1681284677Sdim} 1682