1249259Sdim//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim/// \file 9249259Sdim//==-----------------------------------------------------------------------===// 10249259Sdim 11249259Sdim#define DEBUG_TYPE "structcfg" 12249259Sdim 13263509Sdim#include "AMDGPU.h" 14249259Sdim#include "AMDGPUInstrInfo.h" 15263509Sdim#include "R600InstrInfo.h" 16263509Sdim#include "llvm/Support/Debug.h" 17263509Sdim#include "llvm/Support/raw_ostream.h" 18249259Sdim#include "llvm/ADT/SCCIterator.h" 19249259Sdim#include "llvm/ADT/SmallVector.h" 20249259Sdim#include "llvm/ADT/Statistic.h" 21263509Sdim#include "llvm/ADT/DepthFirstIterator.h" 22249259Sdim#include "llvm/Analysis/DominatorInternals.h" 23249259Sdim#include "llvm/Analysis/Dominators.h" 24249259Sdim#include "llvm/CodeGen/MachineDominators.h" 25249259Sdim#include "llvm/CodeGen/MachineFunction.h" 26249259Sdim#include "llvm/CodeGen/MachineFunctionAnalysis.h" 27249259Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 28249259Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 29249259Sdim#include "llvm/CodeGen/MachineJumpTableInfo.h" 30249259Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 31249259Sdim#include "llvm/CodeGen/MachinePostDominators.h" 32249259Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 33249259Sdim#include "llvm/Target/TargetInstrInfo.h" 34263509Sdim#include "llvm/Target/TargetMachine.h" 35249259Sdim 36249259Sdimusing namespace llvm; 37249259Sdim 38263509Sdim#define DEFAULT_VEC_SLOTS 8 39263509Sdim 40249259Sdim// TODO: move-begin. 41249259Sdim 42249259Sdim//===----------------------------------------------------------------------===// 43249259Sdim// 44249259Sdim// Statistics for CFGStructurizer. 45249259Sdim// 46249259Sdim//===----------------------------------------------------------------------===// 47249259Sdim 48249259SdimSTATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " 49249259Sdim "matched"); 50249259SdimSTATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " 51249259Sdim "matched"); 52249259SdimSTATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " 53249259Sdim "pattern matched"); 54249259SdimSTATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); 55249259SdimSTATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); 56249259Sdim 57249259Sdim//===----------------------------------------------------------------------===// 58249259Sdim// 59249259Sdim// Miscellaneous utility for CFGStructurizer. 60249259Sdim// 61249259Sdim//===----------------------------------------------------------------------===// 62263509Sdimnamespace { 63249259Sdim#define SHOWNEWINSTR(i) \ 64263509Sdim DEBUG(dbgs() << "New instr: " << *i << "\n"); 65249259Sdim 66249259Sdim#define SHOWNEWBLK(b, msg) \ 67263509SdimDEBUG( \ 68263509Sdim dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 69263509Sdim dbgs() << "\n"; \ 70263509Sdim); 71249259Sdim 72249259Sdim#define SHOWBLK_DETAIL(b, msg) \ 73263509SdimDEBUG( \ 74249259Sdim if (b) { \ 75263509Sdim dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 76263509Sdim b->print(dbgs()); \ 77263509Sdim dbgs() << "\n"; \ 78249259Sdim } \ 79263509Sdim); 80249259Sdim 81249259Sdim#define INVALIDSCCNUM -1 82249259Sdim 83249259Sdimtemplate<class NodeT> 84263509Sdimvoid ReverseVector(SmallVectorImpl<NodeT *> &Src) { 85249259Sdim size_t sz = Src.size(); 86249259Sdim for (size_t i = 0; i < sz/2; ++i) { 87249259Sdim NodeT *t = Src[i]; 88249259Sdim Src[i] = Src[sz - i - 1]; 89249259Sdim Src[sz - i - 1] = t; 90249259Sdim } 91249259Sdim} 92249259Sdim 93263509Sdim} // end anonymous namespace 94249259Sdim 95249259Sdim//===----------------------------------------------------------------------===// 96249259Sdim// 97249259Sdim// supporting data structure for CFGStructurizer 98249259Sdim// 99249259Sdim//===----------------------------------------------------------------------===// 100249259Sdim 101249259Sdim 102263509Sdimnamespace { 103263509Sdim 104249259Sdimclass BlockInformation { 105249259Sdimpublic: 106263509Sdim bool IsRetired; 107263509Sdim int SccNum; 108263509Sdim BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {} 109249259Sdim}; 110249259Sdim 111263509Sdim} // end anonymous namespace 112249259Sdim 113249259Sdim//===----------------------------------------------------------------------===// 114249259Sdim// 115249259Sdim// CFGStructurizer 116249259Sdim// 117249259Sdim//===----------------------------------------------------------------------===// 118249259Sdim 119263509Sdimnamespace { 120263509Sdimclass AMDGPUCFGStructurizer : public MachineFunctionPass { 121249259Sdimpublic: 122263509Sdim typedef SmallVector<MachineBasicBlock *, 32> MBBVector; 123263509Sdim typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap; 124263509Sdim typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap; 125263509Sdim 126263509Sdim enum PathToKind { 127249259Sdim Not_SinglePath = 0, 128249259Sdim SinglePath_InPath = 1, 129249259Sdim SinglePath_NotInPath = 2 130263509Sdim }; 131249259Sdim 132263509Sdim static char ID; 133249259Sdim 134263509Sdim AMDGPUCFGStructurizer(TargetMachine &tm) : 135263509Sdim MachineFunctionPass(ID), TM(tm), 136263509Sdim TII(static_cast<const R600InstrInfo *>(tm.getInstrInfo())), 137263509Sdim TRI(&TII->getRegisterInfo()) { } 138249259Sdim 139263509Sdim const char *getPassName() const { 140263509Sdim return "AMD IL Control Flow Graph structurizer Pass"; 141263509Sdim } 142249259Sdim 143263509Sdim void getAnalysisUsage(AnalysisUsage &AU) const { 144263509Sdim AU.addPreserved<MachineFunctionAnalysis>(); 145263509Sdim AU.addRequired<MachineFunctionAnalysis>(); 146263509Sdim AU.addRequired<MachineDominatorTree>(); 147263509Sdim AU.addRequired<MachinePostDominatorTree>(); 148263509Sdim AU.addRequired<MachineLoopInfo>(); 149263509Sdim } 150249259Sdim 151249259Sdim /// Perform the CFG structurization 152263509Sdim bool run(); 153249259Sdim 154249259Sdim /// Perform the CFG preparation 155263509Sdim /// This step will remove every unconditionnal/dead jump instructions and make 156263509Sdim /// sure all loops have an exit block 157263509Sdim bool prepare(); 158249259Sdim 159263509Sdim bool runOnMachineFunction(MachineFunction &MF) { 160263509Sdim DEBUG(MF.dump();); 161263509Sdim OrderedBlks.clear(); 162263509Sdim FuncRep = &MF; 163263509Sdim MLI = &getAnalysis<MachineLoopInfo>(); 164263509Sdim DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); 165263509Sdim MDT = &getAnalysis<MachineDominatorTree>(); 166263509Sdim DEBUG(MDT->print(dbgs(), (const llvm::Module*)0);); 167263509Sdim PDT = &getAnalysis<MachinePostDominatorTree>(); 168263509Sdim DEBUG(PDT->print(dbgs());); 169263509Sdim prepare(); 170263509Sdim run(); 171263509Sdim DEBUG(MF.dump();); 172263509Sdim return true; 173263509Sdim } 174249259Sdim 175263509Sdimprotected: 176263509Sdim TargetMachine &TM; 177263509Sdim MachineDominatorTree *MDT; 178263509Sdim MachinePostDominatorTree *PDT; 179263509Sdim MachineLoopInfo *MLI; 180263509Sdim const R600InstrInfo *TII; 181263509Sdim const AMDGPURegisterInfo *TRI; 182249259Sdim 183263509Sdim // PRINT FUNCTIONS 184263509Sdim /// Print the ordered Blocks. 185263509Sdim void printOrderedBlocks() const { 186263509Sdim size_t i = 0; 187263509Sdim for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), 188263509Sdim iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { 189263509Sdim dbgs() << "BB" << (*iterBlk)->getNumber(); 190263509Sdim dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; 191263509Sdim if (i != 0 && i % 10 == 0) { 192263509Sdim dbgs() << "\n"; 193263509Sdim } else { 194263509Sdim dbgs() << " "; 195263509Sdim } 196263509Sdim } 197263509Sdim } 198263509Sdim static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { 199263509Sdim for (MachineLoop::iterator iter = LoopInfo.begin(), 200263509Sdim iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) { 201263509Sdim (*iter)->print(dbgs(), 0); 202263509Sdim } 203263509Sdim } 204249259Sdim 205263509Sdim // UTILITY FUNCTIONS 206263509Sdim int getSCCNum(MachineBasicBlock *MBB) const; 207263509Sdim MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; 208263509Sdim bool hasBackEdge(MachineBasicBlock *MBB) const; 209263509Sdim static unsigned getLoopDepth(MachineLoop *LoopRep); 210263509Sdim bool isRetiredBlock(MachineBasicBlock *MBB) const; 211263509Sdim bool isActiveLoophead(MachineBasicBlock *MBB) const; 212263509Sdim PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 213263509Sdim bool AllowSideEntry = true) const; 214263509Sdim int countActiveBlock(MBBVector::const_iterator It, 215263509Sdim MBBVector::const_iterator E) const; 216263509Sdim bool needMigrateBlock(MachineBasicBlock *MBB) const; 217249259Sdim 218263509Sdim // Utility Functions 219263509Sdim void reversePredicateSetter(MachineBasicBlock::iterator I); 220263509Sdim /// Compute the reversed DFS post order of Blocks 221263509Sdim void orderBlocks(MachineFunction *MF); 222249259Sdim 223263509Sdim // Function originaly from CFGStructTraits 224263509Sdim void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, 225263509Sdim DebugLoc DL = DebugLoc()); 226263509Sdim MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, 227263509Sdim DebugLoc DL = DebugLoc()); 228263509Sdim MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); 229263509Sdim void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, 230263509Sdim DebugLoc DL); 231263509Sdim void insertCondBranchBefore(MachineBasicBlock *MBB, 232263509Sdim MachineBasicBlock::iterator I, int NewOpcode, int RegNum, 233263509Sdim DebugLoc DL); 234263509Sdim void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum); 235263509Sdim static int getBranchNzeroOpcode(int OldOpcode); 236263509Sdim static int getBranchZeroOpcode(int OldOpcode); 237263509Sdim static int getContinueNzeroOpcode(int OldOpcode); 238263509Sdim static int getContinueZeroOpcode(int OldOpcode); 239263509Sdim static MachineBasicBlock *getTrueBranch(MachineInstr *MI); 240263509Sdim static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); 241263509Sdim static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, 242263509Sdim MachineInstr *MI); 243263509Sdim static bool isCondBranch(MachineInstr *MI); 244263509Sdim static bool isUncondBranch(MachineInstr *MI); 245263509Sdim static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); 246263509Sdim static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); 247263509Sdim /// The correct naming for this is getPossibleLoopendBlockBranchInstr. 248263509Sdim /// 249263509Sdim /// BB with backward-edge could have move instructions after the branch 250263509Sdim /// instruction. Such move instruction "belong to" the loop backward-edge. 251263509Sdim MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); 252263509Sdim static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); 253263509Sdim static MachineInstr *getContinueInstr(MachineBasicBlock *MBB); 254263509Sdim static bool isReturnBlock(MachineBasicBlock *MBB); 255263509Sdim static void cloneSuccessorList(MachineBasicBlock *DstMBB, 256263509Sdim MachineBasicBlock *SrcMBB) ; 257263509Sdim static MachineBasicBlock *clone(MachineBasicBlock *MBB); 258263509Sdim /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose 259263509Sdim /// because the AMDGPU instruction is not recognized as terminator fix this 260263509Sdim /// and retire this routine 261263509Sdim void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, 262263509Sdim MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); 263263509Sdim static void wrapup(MachineBasicBlock *MBB); 264249259Sdim 265249259Sdim 266263509Sdim int patternMatch(MachineBasicBlock *MBB); 267263509Sdim int patternMatchGroup(MachineBasicBlock *MBB); 268263509Sdim int serialPatternMatch(MachineBasicBlock *MBB); 269263509Sdim int ifPatternMatch(MachineBasicBlock *MBB); 270263509Sdim int loopendPatternMatch(); 271263509Sdim int mergeLoop(MachineLoop *LoopRep); 272263509Sdim int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader); 273249259Sdim 274263509Sdim void handleLoopcontBlock(MachineBasicBlock *ContingMBB, 275263509Sdim MachineLoop *ContingLoop, MachineBasicBlock *ContMBB, 276263509Sdim MachineLoop *ContLoop); 277263509Sdim /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in 278263509Sdim /// the same loop with LoopLandInfo without explicitly keeping track of 279263509Sdim /// loopContBlks and loopBreakBlks, this is a method to get the information. 280263509Sdim bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, 281263509Sdim MachineBasicBlock *Src2MBB); 282263509Sdim int handleJumpintoIf(MachineBasicBlock *HeadMBB, 283263509Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 284263509Sdim int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 285263509Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 286263509Sdim int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 287263509Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 288263509Sdim MachineBasicBlock **LandMBBPtr); 289263509Sdim void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 290263509Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 291263509Sdim MachineBasicBlock *LandMBB, bool Detail = false); 292263509Sdim int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 293263509Sdim MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); 294263509Sdim void mergeSerialBlock(MachineBasicBlock *DstMBB, 295263509Sdim MachineBasicBlock *SrcMBB); 296249259Sdim 297263509Sdim void mergeIfthenelseBlock(MachineInstr *BranchMI, 298263509Sdim MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 299263509Sdim MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); 300263509Sdim void mergeLooplandBlock(MachineBasicBlock *DstMBB, 301263509Sdim MachineBasicBlock *LandMBB); 302263509Sdim void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 303263509Sdim MachineBasicBlock *LandMBB); 304263509Sdim void settleLoopcontBlock(MachineBasicBlock *ContingMBB, 305263509Sdim MachineBasicBlock *ContMBB); 306263509Sdim /// normalizeInfiniteLoopExit change 307263509Sdim /// B1: 308263509Sdim /// uncond_br LoopHeader 309263509Sdim /// 310263509Sdim /// to 311263509Sdim /// B1: 312263509Sdim /// cond_br 1 LoopHeader dummyExit 313263509Sdim /// and return the newly added dummy exit block 314263509Sdim MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); 315263509Sdim void removeUnconditionalBranch(MachineBasicBlock *MBB); 316263509Sdim /// Remove duplicate branches instructions in a block. 317263509Sdim /// For instance 318263509Sdim /// B0: 319263509Sdim /// cond_br X B1 B2 320263509Sdim /// cond_br X B1 B2 321263509Sdim /// is transformed to 322263509Sdim /// B0: 323263509Sdim /// cond_br X B1 B2 324263509Sdim void removeRedundantConditionalBranch(MachineBasicBlock *MBB); 325263509Sdim void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB); 326263509Sdim void removeSuccessor(MachineBasicBlock *MBB); 327263509Sdim MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, 328263509Sdim MachineBasicBlock *PredMBB); 329263509Sdim void migrateInstruction(MachineBasicBlock *SrcMBB, 330263509Sdim MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); 331263509Sdim void recordSccnum(MachineBasicBlock *MBB, int SCCNum); 332263509Sdim void retireBlock(MachineBasicBlock *MBB); 333263509Sdim void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = NULL); 334249259Sdim 335263509Sdim MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&); 336263509Sdim /// This is work around solution for findNearestCommonDominator not avaiable 337263509Sdim /// to post dom a proper fix should go to Dominators.h. 338263509Sdim MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1, 339263509Sdim MachineBasicBlock *MBB2); 340249259Sdim 341249259Sdimprivate: 342263509Sdim MBBInfoMap BlockInfoMap; 343263509Sdim LoopLandInfoMap LLInfoMap; 344263509Sdim std::map<MachineLoop *, bool> Visited; 345263509Sdim MachineFunction *FuncRep; 346263509Sdim SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks; 347263509Sdim}; 348249259Sdim 349263509Sdimint AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { 350263509Sdim MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 351263509Sdim if (It == BlockInfoMap.end()) 352263509Sdim return INVALIDSCCNUM; 353263509Sdim return (*It).second->SccNum; 354263509Sdim} 355249259Sdim 356263509SdimMachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) 357263509Sdim const { 358263509Sdim LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); 359263509Sdim if (It == LLInfoMap.end()) 360263509Sdim return NULL; 361263509Sdim return (*It).second; 362263509Sdim} 363249259Sdim 364263509Sdimbool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { 365263509Sdim MachineLoop *LoopRep = MLI->getLoopFor(MBB); 366263509Sdim if (!LoopRep) 367263509Sdim return false; 368263509Sdim MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 369263509Sdim return MBB->isSuccessor(LoopHeader); 370249259Sdim} 371249259Sdim 372263509Sdimunsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) { 373263509Sdim return LoopRep ? LoopRep->getLoopDepth() : 0; 374263509Sdim} 375263509Sdim 376263509Sdimbool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { 377263509Sdim MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 378263509Sdim if (It == BlockInfoMap.end()) 379263509Sdim return false; 380263509Sdim return (*It).second->IsRetired; 381263509Sdim} 382263509Sdim 383263509Sdimbool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { 384263509Sdim MachineLoop *LoopRep = MLI->getLoopFor(MBB); 385263509Sdim while (LoopRep && LoopRep->getHeader() == MBB) { 386263509Sdim MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); 387263509Sdim if(!LoopLand) 388263509Sdim return true; 389263509Sdim if (!isRetiredBlock(LoopLand)) 390263509Sdim return true; 391263509Sdim LoopRep = LoopRep->getParentLoop(); 392249259Sdim } 393263509Sdim return false; 394249259Sdim} 395263509SdimAMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo( 396263509Sdim MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 397263509Sdim bool AllowSideEntry) const { 398263509Sdim assert(DstMBB); 399263509Sdim if (SrcMBB == DstMBB) 400263509Sdim return SinglePath_InPath; 401263509Sdim while (SrcMBB && SrcMBB->succ_size() == 1) { 402263509Sdim SrcMBB = *SrcMBB->succ_begin(); 403263509Sdim if (SrcMBB == DstMBB) 404263509Sdim return SinglePath_InPath; 405263509Sdim if (!AllowSideEntry && SrcMBB->pred_size() > 1) 406263509Sdim return Not_SinglePath; 407263509Sdim } 408263509Sdim if (SrcMBB && SrcMBB->succ_size()==0) 409263509Sdim return SinglePath_NotInPath; 410263509Sdim return Not_SinglePath; 411263509Sdim} 412249259Sdim 413263509Sdimint AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, 414263509Sdim MBBVector::const_iterator E) const { 415263509Sdim int Count = 0; 416263509Sdim while (It != E) { 417263509Sdim if (!isRetiredBlock(*It)) 418263509Sdim ++Count; 419263509Sdim ++It; 420263509Sdim } 421263509Sdim return Count; 422263509Sdim} 423249259Sdim 424263509Sdimbool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { 425263509Sdim unsigned BlockSizeThreshold = 30; 426263509Sdim unsigned CloneInstrThreshold = 100; 427263509Sdim bool MultiplePreds = MBB && (MBB->pred_size() > 1); 428249259Sdim 429263509Sdim if(!MultiplePreds) 430263509Sdim return false; 431263509Sdim unsigned BlkSize = MBB->size(); 432263509Sdim return ((BlkSize > BlockSizeThreshold) && 433263509Sdim (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); 434263509Sdim} 435249259Sdim 436263509Sdimvoid AMDGPUCFGStructurizer::reversePredicateSetter( 437263509Sdim MachineBasicBlock::iterator I) { 438263509Sdim while (I--) { 439263509Sdim if (I->getOpcode() == AMDGPU::PRED_X) { 440263509Sdim switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) { 441263509Sdim case OPCODE_IS_ZERO_INT: 442263509Sdim static_cast<MachineInstr *>(I)->getOperand(2) 443263509Sdim .setImm(OPCODE_IS_NOT_ZERO_INT); 444263509Sdim return; 445263509Sdim case OPCODE_IS_NOT_ZERO_INT: 446263509Sdim static_cast<MachineInstr *>(I)->getOperand(2) 447263509Sdim .setImm(OPCODE_IS_ZERO_INT); 448263509Sdim return; 449263509Sdim case OPCODE_IS_ZERO: 450263509Sdim static_cast<MachineInstr *>(I)->getOperand(2) 451263509Sdim .setImm(OPCODE_IS_NOT_ZERO); 452263509Sdim return; 453263509Sdim case OPCODE_IS_NOT_ZERO: 454263509Sdim static_cast<MachineInstr *>(I)->getOperand(2) 455263509Sdim .setImm(OPCODE_IS_ZERO); 456263509Sdim return; 457263509Sdim default: 458263509Sdim llvm_unreachable("PRED_X Opcode invalid!"); 459263509Sdim } 460263509Sdim } 461249259Sdim } 462263509Sdim} 463249259Sdim 464263509Sdimvoid AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, 465263509Sdim int NewOpcode, DebugLoc DL) { 466263509Sdim MachineInstr *MI = MBB->getParent() 467263509Sdim ->CreateMachineInstr(TII->get(NewOpcode), DL); 468263509Sdim MBB->push_back(MI); 469263509Sdim //assume the instruction doesn't take any reg operand ... 470263509Sdim SHOWNEWINSTR(MI); 471263509Sdim} 472263509Sdim 473263509SdimMachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, 474263509Sdim int NewOpcode, DebugLoc DL) { 475263509Sdim MachineInstr *MI = 476263509Sdim MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 477263509Sdim if (MBB->begin() != MBB->end()) 478263509Sdim MBB->insert(MBB->begin(), MI); 479263509Sdim else 480263509Sdim MBB->push_back(MI); 481263509Sdim SHOWNEWINSTR(MI); 482263509Sdim return MI; 483263509Sdim} 484263509Sdim 485263509SdimMachineInstr *AMDGPUCFGStructurizer::insertInstrBefore( 486263509Sdim MachineBasicBlock::iterator I, int NewOpcode) { 487263509Sdim MachineInstr *OldMI = &(*I); 488263509Sdim MachineBasicBlock *MBB = OldMI->getParent(); 489263509Sdim MachineInstr *NewMBB = 490263509Sdim MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); 491263509Sdim MBB->insert(I, NewMBB); 492263509Sdim //assume the instruction doesn't take any reg operand ... 493263509Sdim SHOWNEWINSTR(NewMBB); 494263509Sdim return NewMBB; 495263509Sdim} 496263509Sdim 497263509Sdimvoid AMDGPUCFGStructurizer::insertCondBranchBefore( 498263509Sdim MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) { 499263509Sdim MachineInstr *OldMI = &(*I); 500263509Sdim MachineBasicBlock *MBB = OldMI->getParent(); 501263509Sdim MachineFunction *MF = MBB->getParent(); 502263509Sdim MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 503263509Sdim MBB->insert(I, NewMI); 504263509Sdim MachineInstrBuilder MIB(*MF, NewMI); 505263509Sdim MIB.addReg(OldMI->getOperand(1).getReg(), false); 506263509Sdim SHOWNEWINSTR(NewMI); 507263509Sdim //erase later oldInstr->eraseFromParent(); 508263509Sdim} 509263509Sdim 510263509Sdimvoid AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk, 511263509Sdim MachineBasicBlock::iterator I, int NewOpcode, int RegNum, 512263509Sdim DebugLoc DL) { 513263509Sdim MachineFunction *MF = blk->getParent(); 514263509Sdim MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 515263509Sdim //insert before 516263509Sdim blk->insert(I, NewInstr); 517263509Sdim MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); 518263509Sdim SHOWNEWINSTR(NewInstr); 519263509Sdim} 520263509Sdim 521263509Sdimvoid AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB, 522263509Sdim int NewOpcode, int RegNum) { 523263509Sdim MachineFunction *MF = MBB->getParent(); 524263509Sdim MachineInstr *NewInstr = 525263509Sdim MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); 526263509Sdim MBB->push_back(NewInstr); 527263509Sdim MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); 528263509Sdim SHOWNEWINSTR(NewInstr); 529263509Sdim} 530263509Sdim 531263509Sdimint AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { 532263509Sdim switch(OldOpcode) { 533263509Sdim case AMDGPU::JUMP_COND: 534263509Sdim case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; 535263509Sdim case AMDGPU::BRANCH_COND_i32: 536263509Sdim case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; 537263509Sdim default: llvm_unreachable("internal error"); 538249259Sdim } 539263509Sdim return -1; 540263509Sdim} 541249259Sdim 542263509Sdimint AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { 543263509Sdim switch(OldOpcode) { 544263509Sdim case AMDGPU::JUMP_COND: 545263509Sdim case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; 546263509Sdim case AMDGPU::BRANCH_COND_i32: 547263509Sdim case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; 548263509Sdim default: llvm_unreachable("internal error"); 549249259Sdim } 550263509Sdim return -1; 551263509Sdim} 552249259Sdim 553263509Sdimint AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { 554263509Sdim switch(OldOpcode) { 555263509Sdim case AMDGPU::JUMP_COND: 556263509Sdim case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; 557263509Sdim default: llvm_unreachable("internal error"); 558263509Sdim }; 559263509Sdim return -1; 560263509Sdim} 561249259Sdim 562263509Sdimint AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { 563263509Sdim switch(OldOpcode) { 564263509Sdim case AMDGPU::JUMP_COND: 565263509Sdim case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; 566263509Sdim default: llvm_unreachable("internal error"); 567249259Sdim } 568263509Sdim return -1; 569263509Sdim} 570249259Sdim 571263509SdimMachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) { 572263509Sdim return MI->getOperand(0).getMBB(); 573263509Sdim} 574249259Sdim 575263509Sdimvoid AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI, 576263509Sdim MachineBasicBlock *MBB) { 577263509Sdim MI->getOperand(0).setMBB(MBB); 578263509Sdim} 579263509Sdim 580263509SdimMachineBasicBlock * 581263509SdimAMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, 582263509Sdim MachineInstr *MI) { 583263509Sdim assert(MBB->succ_size() == 2); 584263509Sdim MachineBasicBlock *TrueBranch = getTrueBranch(MI); 585263509Sdim MachineBasicBlock::succ_iterator It = MBB->succ_begin(); 586263509Sdim MachineBasicBlock::succ_iterator Next = It; 587263509Sdim ++Next; 588263509Sdim return (*It == TrueBranch) ? *Next : *It; 589263509Sdim} 590263509Sdim 591263509Sdimbool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) { 592263509Sdim switch (MI->getOpcode()) { 593263509Sdim case AMDGPU::JUMP_COND: 594263509Sdim case AMDGPU::BRANCH_COND_i32: 595263509Sdim case AMDGPU::BRANCH_COND_f32: return true; 596263509Sdim default: 597263509Sdim return false; 598263509Sdim } 599263509Sdim return false; 600263509Sdim} 601263509Sdim 602263509Sdimbool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) { 603263509Sdim switch (MI->getOpcode()) { 604263509Sdim case AMDGPU::JUMP: 605263509Sdim case AMDGPU::BRANCH: 606263509Sdim return true; 607263509Sdim default: 608263509Sdim return false; 609263509Sdim } 610263509Sdim return false; 611263509Sdim} 612263509Sdim 613263509SdimDebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { 614263509Sdim //get DebugLoc from the first MachineBasicBlock instruction with debug info 615263509Sdim DebugLoc DL; 616263509Sdim for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); 617263509Sdim ++It) { 618263509Sdim MachineInstr *instr = &(*It); 619263509Sdim if (instr->getDebugLoc().isUnknown() == false) 620263509Sdim DL = instr->getDebugLoc(); 621263509Sdim } 622263509Sdim return DL; 623263509Sdim} 624263509Sdim 625263509SdimMachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr( 626263509Sdim MachineBasicBlock *MBB) { 627263509Sdim MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 628263509Sdim MachineInstr *MI = &*It; 629263509Sdim if (MI && (isCondBranch(MI) || isUncondBranch(MI))) 630263509Sdim return MI; 631263509Sdim return NULL; 632263509Sdim} 633263509Sdim 634263509SdimMachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( 635263509Sdim MachineBasicBlock *MBB) { 636263509Sdim for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); 637263509Sdim It != E; ++It) { 638263509Sdim // FIXME: Simplify 639263509Sdim MachineInstr *MI = &*It; 640263509Sdim if (MI) { 641263509Sdim if (isCondBranch(MI) || isUncondBranch(MI)) 642263509Sdim return MI; 643263509Sdim else if (!TII->isMov(MI->getOpcode())) 644263509Sdim break; 645249259Sdim } 646263509Sdim } 647263509Sdim return NULL; 648263509Sdim} 649249259Sdim 650263509SdimMachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { 651263509Sdim MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 652263509Sdim if (It != MBB->rend()) { 653263509Sdim MachineInstr *instr = &(*It); 654263509Sdim if (instr->getOpcode() == AMDGPU::RETURN) 655263509Sdim return instr; 656249259Sdim } 657263509Sdim return NULL; 658263509Sdim} 659249259Sdim 660263509SdimMachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) { 661263509Sdim MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 662263509Sdim if (It != MBB->rend()) { 663263509Sdim MachineInstr *MI = &(*It); 664263509Sdim if (MI->getOpcode() == AMDGPU::CONTINUE) 665263509Sdim return MI; 666263509Sdim } 667263509Sdim return NULL; 668263509Sdim} 669249259Sdim 670263509Sdimbool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { 671263509Sdim MachineInstr *MI = getReturnInstr(MBB); 672263509Sdim bool IsReturn = (MBB->succ_size() == 0); 673263509Sdim if (MI) 674263509Sdim assert(IsReturn); 675263509Sdim else if (IsReturn) 676263509Sdim DEBUG( 677263509Sdim dbgs() << "BB" << MBB->getNumber() 678263509Sdim <<" is return block without RETURN instr\n";); 679263509Sdim return IsReturn; 680263509Sdim} 681249259Sdim 682263509Sdimvoid AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, 683263509Sdim MachineBasicBlock *SrcMBB) { 684263509Sdim for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(), 685263509Sdim iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It) 686263509Sdim DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of 687263509Sdim} 688263509Sdim 689263509SdimMachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) { 690263509Sdim MachineFunction *Func = MBB->getParent(); 691263509Sdim MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); 692263509Sdim Func->push_back(NewMBB); //insert to function 693263509Sdim for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end(); 694263509Sdim It != E; ++It) { 695263509Sdim MachineInstr *MI = Func->CloneMachineInstr(It); 696263509Sdim NewMBB->push_back(MI); 697249259Sdim } 698263509Sdim return NewMBB; 699263509Sdim} 700249259Sdim 701263509Sdimvoid AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith( 702263509Sdim MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, 703263509Sdim MachineBasicBlock *NewBlk) { 704263509Sdim MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); 705263509Sdim if (BranchMI && isCondBranch(BranchMI) && 706263509Sdim getTrueBranch(BranchMI) == OldMBB) 707263509Sdim setTrueBranch(BranchMI, NewBlk); 708263509Sdim} 709263509Sdim 710263509Sdimvoid AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) { 711263509Sdim assert((!MBB->getParent()->getJumpTableInfo() 712263509Sdim || MBB->getParent()->getJumpTableInfo()->isEmpty()) 713263509Sdim && "found a jump table"); 714263509Sdim 715263509Sdim //collect continue right before endloop 716263509Sdim SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr; 717263509Sdim MachineBasicBlock::iterator Pre = MBB->begin(); 718263509Sdim MachineBasicBlock::iterator E = MBB->end(); 719263509Sdim MachineBasicBlock::iterator It = Pre; 720263509Sdim while (It != E) { 721263509Sdim if (Pre->getOpcode() == AMDGPU::CONTINUE 722263509Sdim && It->getOpcode() == AMDGPU::ENDLOOP) 723263509Sdim ContInstr.push_back(Pre); 724263509Sdim Pre = It; 725263509Sdim ++It; 726263509Sdim } 727263509Sdim 728263509Sdim //delete continue right before endloop 729263509Sdim for (unsigned i = 0; i < ContInstr.size(); ++i) 730263509Sdim ContInstr[i]->eraseFromParent(); 731263509Sdim 732263509Sdim // TODO to fix up jump table so later phase won't be confused. if 733263509Sdim // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but 734263509Sdim // there isn't such an interface yet. alternatively, replace all the other 735263509Sdim // blocks in the jump table with the entryBlk //} 736263509Sdim 737263509Sdim} 738263509Sdim 739263509Sdim 740263509Sdimbool AMDGPUCFGStructurizer::prepare() { 741263509Sdim bool Changed = false; 742263509Sdim 743263509Sdim //FIXME: if not reducible flow graph, make it so ??? 744263509Sdim 745263509Sdim DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); 746263509Sdim 747263509Sdim orderBlocks(FuncRep); 748263509Sdim 749263509Sdim SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks; 750263509Sdim 751263509Sdim // Add an ExitBlk to loop that don't have one 752263509Sdim for (MachineLoopInfo::iterator It = MLI->begin(), 753263509Sdim E = MLI->end(); It != E; ++It) { 754263509Sdim MachineLoop *LoopRep = (*It); 755263509Sdim MBBVector ExitingMBBs; 756263509Sdim LoopRep->getExitingBlocks(ExitingMBBs); 757263509Sdim 758263509Sdim if (ExitingMBBs.size() == 0) { 759263509Sdim MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); 760263509Sdim if (DummyExitBlk) 761263509Sdim RetBlks.push_back(DummyExitBlk); 762263509Sdim } 763249259Sdim } 764249259Sdim 765263509Sdim // Remove unconditional branch instr. 766263509Sdim // Add dummy exit block iff there are multiple returns. 767263509Sdim for (SmallVectorImpl<MachineBasicBlock *>::const_iterator 768263509Sdim It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) { 769263509Sdim MachineBasicBlock *MBB = *It; 770263509Sdim removeUnconditionalBranch(MBB); 771263509Sdim removeRedundantConditionalBranch(MBB); 772263509Sdim if (isReturnBlock(MBB)) { 773263509Sdim RetBlks.push_back(MBB); 774263509Sdim } 775263509Sdim assert(MBB->succ_size() <= 2); 776249259Sdim } 777249259Sdim 778263509Sdim if (RetBlks.size() >= 2) { 779263509Sdim addDummyExitBlock(RetBlks); 780263509Sdim Changed = true; 781249259Sdim } 782249259Sdim 783263509Sdim return Changed; 784263509Sdim} 785263509Sdim 786263509Sdimbool AMDGPUCFGStructurizer::run() { 787263509Sdim 788263509Sdim //Assume reducible CFG... 789263509Sdim DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n";FuncRep->viewCFG();); 790263509Sdim 791249259Sdim#ifdef STRESSTEST 792249259Sdim //Use the worse block ordering to test the algorithm. 793249259Sdim ReverseVector(orderedBlks); 794249259Sdim#endif 795249259Sdim 796263509Sdim DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); 797263509Sdim int NumIter = 0; 798263509Sdim bool Finish = false; 799263509Sdim MachineBasicBlock *MBB; 800263509Sdim bool MakeProgress = false; 801263509Sdim int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), 802263509Sdim OrderedBlks.end()); 803249259Sdim 804249259Sdim do { 805263509Sdim ++NumIter; 806263509Sdim DEBUG( 807263509Sdim dbgs() << "numIter = " << NumIter 808263509Sdim << ", numRemaintedBlk = " << NumRemainedBlk << "\n"; 809263509Sdim ); 810249259Sdim 811263509Sdim SmallVectorImpl<MachineBasicBlock *>::const_iterator It = 812263509Sdim OrderedBlks.begin(); 813263509Sdim SmallVectorImpl<MachineBasicBlock *>::const_iterator E = 814263509Sdim OrderedBlks.end(); 815249259Sdim 816263509Sdim SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter = 817263509Sdim It; 818263509Sdim MachineBasicBlock *SccBeginMBB = NULL; 819263509Sdim int SccNumBlk = 0; // The number of active blocks, init to a 820249259Sdim // maximum possible number. 821263509Sdim int SccNumIter; // Number of iteration in this SCC. 822249259Sdim 823263509Sdim while (It != E) { 824263509Sdim MBB = *It; 825249259Sdim 826263509Sdim if (!SccBeginMBB) { 827263509Sdim SccBeginIter = It; 828263509Sdim SccBeginMBB = MBB; 829263509Sdim SccNumIter = 0; 830263509Sdim SccNumBlk = NumRemainedBlk; // Init to maximum possible number. 831263509Sdim DEBUG( 832263509Sdim dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); 833263509Sdim dbgs() << "\n"; 834263509Sdim ); 835249259Sdim } 836249259Sdim 837263509Sdim if (!isRetiredBlock(MBB)) 838263509Sdim patternMatch(MBB); 839249259Sdim 840263509Sdim ++It; 841249259Sdim 842263509Sdim bool ContNextScc = true; 843263509Sdim if (It == E 844263509Sdim || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { 845249259Sdim // Just finish one scc. 846263509Sdim ++SccNumIter; 847263509Sdim int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); 848263509Sdim if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { 849263509Sdim DEBUG( 850263509Sdim dbgs() << "Can't reduce SCC " << getSCCNum(MBB) 851263509Sdim << ", sccNumIter = " << SccNumIter; 852263509Sdim dbgs() << "doesn't make any progress\n"; 853263509Sdim ); 854263509Sdim ContNextScc = true; 855263509Sdim } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { 856263509Sdim SccNumBlk = sccRemainedNumBlk; 857263509Sdim It = SccBeginIter; 858263509Sdim ContNextScc = false; 859263509Sdim DEBUG( 860263509Sdim dbgs() << "repeat processing SCC" << getSCCNum(MBB) 861263509Sdim << "sccNumIter = " << SccNumIter << "\n"; 862263509Sdim FuncRep->viewCFG(); 863263509Sdim ); 864249259Sdim } else { 865249259Sdim // Finish the current scc. 866263509Sdim ContNextScc = true; 867249259Sdim } 868249259Sdim } else { 869249259Sdim // Continue on next component in the current scc. 870263509Sdim ContNextScc = false; 871249259Sdim } 872249259Sdim 873263509Sdim if (ContNextScc) 874263509Sdim SccBeginMBB = NULL; 875249259Sdim } //while, "one iteration" over the function. 876249259Sdim 877263509Sdim MachineBasicBlock *EntryMBB = 878263509Sdim GraphTraits<MachineFunction *>::nodes_begin(FuncRep); 879263509Sdim if (EntryMBB->succ_size() == 0) { 880263509Sdim Finish = true; 881263509Sdim DEBUG( 882263509Sdim dbgs() << "Reduce to one block\n"; 883263509Sdim ); 884249259Sdim } else { 885263509Sdim int NewnumRemainedBlk 886263509Sdim = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); 887249259Sdim // consider cloned blocks ?? 888263509Sdim if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { 889263509Sdim MakeProgress = true; 890263509Sdim NumRemainedBlk = NewnumRemainedBlk; 891249259Sdim } else { 892263509Sdim MakeProgress = false; 893263509Sdim DEBUG( 894263509Sdim dbgs() << "No progress\n"; 895263509Sdim ); 896249259Sdim } 897249259Sdim } 898263509Sdim } while (!Finish && MakeProgress); 899249259Sdim 900249259Sdim // Misc wrap up to maintain the consistency of the Function representation. 901263509Sdim wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep)); 902249259Sdim 903249259Sdim // Detach retired Block, release memory. 904263509Sdim for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end(); 905263509Sdim It != E; ++It) { 906263509Sdim if ((*It).second && (*It).second->IsRetired) { 907263509Sdim assert(((*It).first)->getNumber() != -1); 908263509Sdim DEBUG( 909263509Sdim dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n"; 910263509Sdim ); 911263509Sdim (*It).first->eraseFromParent(); //Remove from the parent Function. 912249259Sdim } 913263509Sdim delete (*It).second; 914249259Sdim } 915263509Sdim BlockInfoMap.clear(); 916263509Sdim LLInfoMap.clear(); 917249259Sdim 918263509Sdim DEBUG( 919263509Sdim FuncRep->viewCFG(); 920263509Sdim ); 921249259Sdim 922263509Sdim if (!Finish) 923263509Sdim llvm_unreachable("IRREDUCIBL_CF"); 924249259Sdim 925249259Sdim return true; 926263509Sdim} 927249259Sdim 928249259Sdim 929263509Sdim 930263509Sdimvoid AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { 931263509Sdim int SccNum = 0; 932263509Sdim MachineBasicBlock *MBB; 933263509Sdim for (scc_iterator<MachineFunction *> It = scc_begin(MF), E = scc_end(MF); 934263509Sdim It != E; ++It, ++SccNum) { 935263509Sdim std::vector<MachineBasicBlock *> &SccNext = *It; 936263509Sdim for (std::vector<MachineBasicBlock *>::const_iterator 937263509Sdim blockIter = SccNext.begin(), blockEnd = SccNext.end(); 938249259Sdim blockIter != blockEnd; ++blockIter) { 939263509Sdim MBB = *blockIter; 940263509Sdim OrderedBlks.push_back(MBB); 941263509Sdim recordSccnum(MBB, SccNum); 942249259Sdim } 943249259Sdim } 944249259Sdim 945249259Sdim //walk through all the block in func to check for unreachable 946263509Sdim typedef GraphTraits<MachineFunction *> GTM; 947263509Sdim MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF); 948263509Sdim for (; It != E; ++It) { 949263509Sdim MachineBasicBlock *MBB = &(*It); 950263509Sdim SccNum = getSCCNum(MBB); 951263509Sdim if (SccNum == INVALIDSCCNUM) 952263509Sdim dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; 953249259Sdim } 954263509Sdim} 955249259Sdim 956263509Sdimint AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { 957263509Sdim int NumMatch = 0; 958263509Sdim int CurMatch; 959249259Sdim 960263509Sdim DEBUG( 961263509Sdim dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n"; 962263509Sdim ); 963249259Sdim 964263509Sdim while ((CurMatch = patternMatchGroup(MBB)) > 0) 965263509Sdim NumMatch += CurMatch; 966249259Sdim 967263509Sdim DEBUG( 968263509Sdim dbgs() << "End patternMatch BB" << MBB->getNumber() 969263509Sdim << ", numMatch = " << NumMatch << "\n"; 970263509Sdim ); 971249259Sdim 972263509Sdim return NumMatch; 973263509Sdim} 974249259Sdim 975263509Sdimint AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { 976263509Sdim int NumMatch = 0; 977263509Sdim NumMatch += loopendPatternMatch(); 978263509Sdim NumMatch += serialPatternMatch(MBB); 979263509Sdim NumMatch += ifPatternMatch(MBB); 980263509Sdim return NumMatch; 981263509Sdim} 982249259Sdim 983263509Sdim 984263509Sdimint AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { 985263509Sdim if (MBB->succ_size() != 1) 986249259Sdim return 0; 987249259Sdim 988263509Sdim MachineBasicBlock *childBlk = *MBB->succ_begin(); 989263509Sdim if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) 990249259Sdim return 0; 991249259Sdim 992263509Sdim mergeSerialBlock(MBB, childBlk); 993249259Sdim ++numSerialPatternMatch; 994249259Sdim return 1; 995263509Sdim} 996249259Sdim 997263509Sdimint AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { 998249259Sdim //two edges 999263509Sdim if (MBB->succ_size() != 2) 1000249259Sdim return 0; 1001263509Sdim if (hasBackEdge(MBB)) 1002249259Sdim return 0; 1003263509Sdim MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 1004263509Sdim if (!BranchMI) 1005249259Sdim return 0; 1006249259Sdim 1007263509Sdim assert(isCondBranch(BranchMI)); 1008263509Sdim int NumMatch = 0; 1009249259Sdim 1010263509Sdim MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); 1011263509Sdim NumMatch += serialPatternMatch(TrueMBB); 1012263509Sdim NumMatch += ifPatternMatch(TrueMBB); 1013263509Sdim MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); 1014263509Sdim NumMatch += serialPatternMatch(FalseMBB); 1015263509Sdim NumMatch += ifPatternMatch(FalseMBB); 1016263509Sdim MachineBasicBlock *LandBlk; 1017263509Sdim int Cloned = 0; 1018249259Sdim 1019263509Sdim assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); 1020249259Sdim // TODO: Simplify 1021263509Sdim if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 1022263509Sdim && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { 1023263509Sdim // Diamond pattern 1024263509Sdim LandBlk = *TrueMBB->succ_begin(); 1025263509Sdim } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { 1026263509Sdim // Triangle pattern, false is empty 1027263509Sdim LandBlk = FalseMBB; 1028263509Sdim FalseMBB = NULL; 1029263509Sdim } else if (FalseMBB->succ_size() == 1 1030263509Sdim && *FalseMBB->succ_begin() == TrueMBB) { 1031263509Sdim // Triangle pattern, true is empty 1032263509Sdim // We reverse the predicate to make a triangle, empty false pattern; 1033263509Sdim std::swap(TrueMBB, FalseMBB); 1034263509Sdim reversePredicateSetter(MBB->end()); 1035263509Sdim LandBlk = FalseMBB; 1036263509Sdim FalseMBB = NULL; 1037263509Sdim } else if (FalseMBB->succ_size() == 1 1038263509Sdim && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { 1039263509Sdim LandBlk = *FalseMBB->succ_begin(); 1040263509Sdim } else if (TrueMBB->succ_size() == 1 1041263509Sdim && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { 1042263509Sdim LandBlk = *TrueMBB->succ_begin(); 1043249259Sdim } else { 1044263509Sdim return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB); 1045249259Sdim } 1046249259Sdim 1047249259Sdim // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the 1048249259Sdim // new BB created for landBlk==NULL may introduce new challenge to the 1049249259Sdim // reduction process. 1050263509Sdim if (LandBlk && 1051263509Sdim ((TrueMBB && TrueMBB->pred_size() > 1) 1052263509Sdim || (FalseMBB && FalseMBB->pred_size() > 1))) { 1053263509Sdim Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); 1054249259Sdim } 1055249259Sdim 1056263509Sdim if (TrueMBB && TrueMBB->pred_size() > 1) { 1057263509Sdim TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); 1058263509Sdim ++Cloned; 1059249259Sdim } 1060249259Sdim 1061263509Sdim if (FalseMBB && FalseMBB->pred_size() > 1) { 1062263509Sdim FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); 1063263509Sdim ++Cloned; 1064249259Sdim } 1065249259Sdim 1066263509Sdim mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); 1067249259Sdim 1068249259Sdim ++numIfPatternMatch; 1069249259Sdim 1070263509Sdim numClonedBlock += Cloned; 1071249259Sdim 1072263509Sdim return 1 + Cloned + NumMatch; 1073263509Sdim} 1074249259Sdim 1075263509Sdimint AMDGPUCFGStructurizer::loopendPatternMatch() { 1076263509Sdim std::vector<MachineLoop *> NestedLoops; 1077263509Sdim for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); 1078263509Sdim It != E; ++It) { 1079263509Sdim df_iterator<MachineLoop *> LpIt = df_begin(*It), 1080263509Sdim LpE = df_end(*It); 1081263509Sdim for (; LpIt != LpE; ++LpIt) 1082263509Sdim NestedLoops.push_back(*LpIt); 1083249259Sdim } 1084263509Sdim if (NestedLoops.size() == 0) 1085249259Sdim return 0; 1086249259Sdim 1087249259Sdim // Process nested loop outside->inside, so "continue" to a outside loop won't 1088249259Sdim // be mistaken as "break" of the current loop. 1089263509Sdim int Num = 0; 1090263509Sdim for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(), 1091263509Sdim E = NestedLoops.rend(); It != E; ++It) { 1092263509Sdim MachineLoop *ExaminedLoop = *It; 1093263509Sdim if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) 1094249259Sdim continue; 1095263509Sdim DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); 1096263509Sdim int NumBreak = mergeLoop(ExaminedLoop); 1097263509Sdim if (NumBreak == -1) 1098249259Sdim break; 1099263509Sdim Num += NumBreak; 1100249259Sdim } 1101263509Sdim return Num; 1102263509Sdim} 1103249259Sdim 1104263509Sdimint AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { 1105263509Sdim MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1106263509Sdim MBBVector ExitingMBBs; 1107263509Sdim LoopRep->getExitingBlocks(ExitingMBBs); 1108263509Sdim assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); 1109263509Sdim DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";); 1110263509Sdim // We assume a single ExitBlk 1111263509Sdim MBBVector ExitBlks; 1112263509Sdim LoopRep->getExitBlocks(ExitBlks); 1113263509Sdim SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet; 1114263509Sdim for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) 1115263509Sdim ExitBlkSet.insert(ExitBlks[i]); 1116263509Sdim assert(ExitBlkSet.size() == 1); 1117263509Sdim MachineBasicBlock *ExitBlk = *ExitBlks.begin(); 1118263509Sdim assert(ExitBlk && "Loop has several exit block"); 1119263509Sdim MBBVector LatchBlks; 1120263509Sdim typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits; 1121263509Sdim InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader), 1122263509Sdim PE = InvMBBTraits::child_end(LoopHeader); 1123263509Sdim for (; PI != PE; PI++) { 1124263509Sdim if (LoopRep->contains(*PI)) 1125263509Sdim LatchBlks.push_back(*PI); 1126249259Sdim } 1127249259Sdim 1128263509Sdim for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) 1129263509Sdim mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); 1130263509Sdim for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) 1131263509Sdim settleLoopcontBlock(LatchBlks[i], LoopHeader); 1132263509Sdim int Match = 0; 1133263509Sdim do { 1134263509Sdim Match = 0; 1135263509Sdim Match += serialPatternMatch(LoopHeader); 1136263509Sdim Match += ifPatternMatch(LoopHeader); 1137263509Sdim } while (Match > 0); 1138263509Sdim mergeLooplandBlock(LoopHeader, ExitBlk); 1139263509Sdim MachineLoop *ParentLoop = LoopRep->getParentLoop(); 1140263509Sdim if (ParentLoop) 1141263509Sdim MLI->changeLoopFor(LoopHeader, ParentLoop); 1142263509Sdim else 1143263509Sdim MLI->removeBlock(LoopHeader); 1144263509Sdim Visited[LoopRep] = true; 1145263509Sdim return 1; 1146263509Sdim} 1147249259Sdim 1148263509Sdimint AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep, 1149263509Sdim MachineBasicBlock *LoopHeader) { 1150263509Sdim int NumCont = 0; 1151263509Sdim SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> ContMBB; 1152263509Sdim typedef GraphTraits<Inverse<MachineBasicBlock *> > GTIM; 1153263509Sdim GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader), 1154263509Sdim E = GTIM::child_end(LoopHeader); 1155263509Sdim for (; It != E; ++It) { 1156263509Sdim MachineBasicBlock *MBB = *It; 1157263509Sdim if (LoopRep->contains(MBB)) { 1158263509Sdim handleLoopcontBlock(MBB, MLI->getLoopFor(MBB), 1159263509Sdim LoopHeader, LoopRep); 1160263509Sdim ContMBB.push_back(MBB); 1161263509Sdim ++NumCont; 1162249259Sdim } 1163249259Sdim } 1164249259Sdim 1165263509Sdim for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(), 1166263509Sdim E = ContMBB.end(); It != E; ++It) { 1167263509Sdim (*It)->removeSuccessor(LoopHeader); 1168249259Sdim } 1169249259Sdim 1170263509Sdim numLoopcontPatternMatch += NumCont; 1171249259Sdim 1172263509Sdim return NumCont; 1173263509Sdim} 1174249259Sdim 1175249259Sdim 1176263509Sdimbool AMDGPUCFGStructurizer::isSameloopDetachedContbreak( 1177263509Sdim MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { 1178263509Sdim if (Src1MBB->succ_size() == 0) { 1179263509Sdim MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); 1180263509Sdim if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { 1181263509Sdim MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; 1182263509Sdim if (TheEntry) { 1183263509Sdim DEBUG( 1184263509Sdim dbgs() << "isLoopContBreakBlock yes src1 = BB" 1185263509Sdim << Src1MBB->getNumber() 1186263509Sdim << " src2 = BB" << Src2MBB->getNumber() << "\n"; 1187263509Sdim ); 1188249259Sdim return true; 1189249259Sdim } 1190249259Sdim } 1191249259Sdim } 1192249259Sdim return false; 1193263509Sdim} 1194249259Sdim 1195263509Sdimint AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, 1196263509Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1197263509Sdim int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); 1198263509Sdim if (Num == 0) { 1199263509Sdim DEBUG( 1200263509Sdim dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; 1201263509Sdim ); 1202263509Sdim Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); 1203249259Sdim } 1204263509Sdim return Num; 1205249259Sdim} 1206249259Sdim 1207263509Sdimint AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 1208263509Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1209263509Sdim int Num = 0; 1210263509Sdim MachineBasicBlock *DownBlk; 1211249259Sdim 1212249259Sdim //trueBlk could be the common post dominator 1213263509Sdim DownBlk = TrueMBB; 1214249259Sdim 1215263509Sdim DEBUG( 1216263509Sdim dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() 1217263509Sdim << " true = BB" << TrueMBB->getNumber() 1218263509Sdim << ", numSucc=" << TrueMBB->succ_size() 1219263509Sdim << " false = BB" << FalseMBB->getNumber() << "\n"; 1220263509Sdim ); 1221249259Sdim 1222263509Sdim while (DownBlk) { 1223263509Sdim DEBUG( 1224263509Sdim dbgs() << "check down = BB" << DownBlk->getNumber(); 1225263509Sdim ); 1226249259Sdim 1227263509Sdim if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { 1228263509Sdim DEBUG( 1229263509Sdim dbgs() << " working\n"; 1230263509Sdim ); 1231249259Sdim 1232263509Sdim Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); 1233263509Sdim Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); 1234249259Sdim 1235263509Sdim numClonedBlock += Num; 1236263509Sdim Num += serialPatternMatch(*HeadMBB->succ_begin()); 1237263509Sdim Num += serialPatternMatch(*llvm::next(HeadMBB->succ_begin())); 1238263509Sdim Num += ifPatternMatch(HeadMBB); 1239263509Sdim assert(Num > 0); 1240249259Sdim 1241249259Sdim break; 1242249259Sdim } 1243263509Sdim DEBUG( 1244263509Sdim dbgs() << " not working\n"; 1245263509Sdim ); 1246263509Sdim DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : NULL; 1247249259Sdim } // walk down the postDomTree 1248249259Sdim 1249263509Sdim return Num; 1250263509Sdim} 1251249259Sdim 1252263509Sdimvoid AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf( 1253263509Sdim MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, 1254263509Sdim MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { 1255263509Sdim dbgs() << "head = BB" << HeadMBB->getNumber() 1256263509Sdim << " size = " << HeadMBB->size(); 1257263509Sdim if (Detail) { 1258263509Sdim dbgs() << "\n"; 1259263509Sdim HeadMBB->print(dbgs()); 1260263509Sdim dbgs() << "\n"; 1261249259Sdim } 1262249259Sdim 1263263509Sdim if (TrueMBB) { 1264263509Sdim dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " 1265263509Sdim << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); 1266263509Sdim if (Detail) { 1267263509Sdim dbgs() << "\n"; 1268263509Sdim TrueMBB->print(dbgs()); 1269263509Sdim dbgs() << "\n"; 1270249259Sdim } 1271249259Sdim } 1272263509Sdim if (FalseMBB) { 1273263509Sdim dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " 1274263509Sdim << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); 1275263509Sdim if (Detail) { 1276263509Sdim dbgs() << "\n"; 1277263509Sdim FalseMBB->print(dbgs()); 1278263509Sdim dbgs() << "\n"; 1279249259Sdim } 1280249259Sdim } 1281263509Sdim if (LandMBB) { 1282263509Sdim dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " 1283263509Sdim << LandMBB->size() << " numPred = " << LandMBB->pred_size(); 1284263509Sdim if (Detail) { 1285263509Sdim dbgs() << "\n"; 1286263509Sdim LandMBB->print(dbgs()); 1287263509Sdim dbgs() << "\n"; 1288249259Sdim } 1289249259Sdim } 1290249259Sdim 1291263509Sdim dbgs() << "\n"; 1292263509Sdim} 1293249259Sdim 1294263509Sdimint AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 1295263509Sdim MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 1296263509Sdim MachineBasicBlock **LandMBBPtr) { 1297263509Sdim bool MigrateTrue = false; 1298263509Sdim bool MigrateFalse = false; 1299249259Sdim 1300263509Sdim MachineBasicBlock *LandBlk = *LandMBBPtr; 1301249259Sdim 1302263509Sdim assert((!TrueMBB || TrueMBB->succ_size() <= 1) 1303263509Sdim && (!FalseMBB || FalseMBB->succ_size() <= 1)); 1304249259Sdim 1305263509Sdim if (TrueMBB == FalseMBB) 1306249259Sdim return 0; 1307249259Sdim 1308263509Sdim MigrateTrue = needMigrateBlock(TrueMBB); 1309263509Sdim MigrateFalse = needMigrateBlock(FalseMBB); 1310249259Sdim 1311263509Sdim if (!MigrateTrue && !MigrateFalse) 1312249259Sdim return 0; 1313249259Sdim 1314249259Sdim // If we need to migrate either trueBlk and falseBlk, migrate the rest that 1315249259Sdim // have more than one predecessors. without doing this, its predecessor 1316249259Sdim // rather than headBlk will have undefined value in initReg. 1317263509Sdim if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) 1318263509Sdim MigrateTrue = true; 1319263509Sdim if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) 1320263509Sdim MigrateFalse = true; 1321249259Sdim 1322263509Sdim DEBUG( 1323263509Sdim dbgs() << "before improveSimpleJumpintoIf: "; 1324263509Sdim showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); 1325263509Sdim ); 1326249259Sdim 1327249259Sdim // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk 1328249259Sdim // 1329249259Sdim // new: headBlk => if () {initReg = 1; org trueBlk branch} else 1330249259Sdim // {initReg = 0; org falseBlk branch } 1331249259Sdim // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} 1332249259Sdim // => org landBlk 1333249259Sdim // if landBlk->pred_size() > 2, put the about if-else inside 1334249259Sdim // if (initReg !=2) {...} 1335249259Sdim // 1336249259Sdim // add initReg = initVal to headBlk 1337249259Sdim 1338249259Sdim const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1339263509Sdim if (!MigrateTrue || !MigrateFalse) { 1340263509Sdim // XXX: We have an opportunity here to optimize the "branch into if" case 1341263509Sdim // here. Branch into if looks like this: 1342263509Sdim // entry 1343263509Sdim // / | 1344263509Sdim // diamond_head branch_from 1345263509Sdim // / \ | 1346263509Sdim // diamond_false diamond_true 1347263509Sdim // \ / 1348263509Sdim // done 1349263509Sdim // 1350263509Sdim // The diamond_head block begins the "if" and the diamond_true block 1351263509Sdim // is the block being "branched into". 1352263509Sdim // 1353263509Sdim // If MigrateTrue is true, then TrueBB is the block being "branched into" 1354263509Sdim // and if MigrateFalse is true, then FalseBB is the block being 1355263509Sdim // "branched into" 1356263509Sdim // 1357263509Sdim // Here is the pseudo code for how I think the optimization should work: 1358263509Sdim // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. 1359263509Sdim // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. 1360263509Sdim // 3. Move the branch instruction from diamond_head into its own basic 1361263509Sdim // block (new_block). 1362263509Sdim // 4. Add an unconditional branch from diamond_head to new_block 1363263509Sdim // 5. Replace the branch instruction in branch_from with an unconditional 1364263509Sdim // branch to new_block. If branch_from has multiple predecessors, then 1365263509Sdim // we need to replace the True/False block in the branch 1366263509Sdim // instruction instead of replacing it. 1367263509Sdim // 6. Change the condition of the branch instruction in new_block from 1368263509Sdim // COND to (COND || GPR0) 1369263509Sdim // 1370263509Sdim // In order insert these MOV instruction, we will need to use the 1371263509Sdim // RegisterScavenger. Usually liveness stops being tracked during 1372263509Sdim // the late machine optimization passes, however if we implement 1373263509Sdim // bool TargetRegisterInfo::requiresRegisterScavenging( 1374263509Sdim // const MachineFunction &MF) 1375263509Sdim // and have it return true, liveness will be tracked correctly 1376263509Sdim // by generic optimization passes. We will also need to make sure that 1377263509Sdim // all of our target-specific passes that run after regalloc and before 1378263509Sdim // the CFGStructurizer track liveness and we will need to modify this pass 1379263509Sdim // to correctly track liveness. 1380263509Sdim // 1381263509Sdim // After the above changes, the new CFG should look like this: 1382263509Sdim // entry 1383263509Sdim // / | 1384263509Sdim // diamond_head branch_from 1385263509Sdim // \ / 1386263509Sdim // new_block 1387263509Sdim // / | 1388263509Sdim // diamond_false diamond_true 1389263509Sdim // \ / 1390263509Sdim // done 1391263509Sdim // 1392263509Sdim // Without this optimization, we are forced to duplicate the diamond_true 1393263509Sdim // block and we will end up with a CFG like this: 1394263509Sdim // 1395263509Sdim // entry 1396263509Sdim // / | 1397263509Sdim // diamond_head branch_from 1398263509Sdim // / \ | 1399263509Sdim // diamond_false diamond_true diamond_true (duplicate) 1400263509Sdim // \ / | 1401263509Sdim // done --------------------| 1402263509Sdim // 1403263509Sdim // Duplicating diamond_true can be very costly especially if it has a 1404263509Sdim // lot of instructions. 1405263509Sdim return 0; 1406249259Sdim } 1407249259Sdim 1408263509Sdim int NumNewBlk = 0; 1409249259Sdim 1410263509Sdim bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); 1411249259Sdim 1412249259Sdim //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" 1413263509Sdim MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF); 1414249259Sdim 1415263509Sdim if (LandBlkHasOtherPred) { 1416263509Sdim llvm_unreachable("Extra register needed to handle CFG"); 1417263509Sdim unsigned CmpResReg = 1418263509Sdim HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1419263509Sdim llvm_unreachable("Extra compare instruction needed to handle CFG"); 1420263509Sdim insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, 1421263509Sdim CmpResReg, DebugLoc()); 1422249259Sdim } 1423249259Sdim 1424263509Sdim // XXX: We are running this after RA, so creating virtual registers will 1425263509Sdim // cause an assertion failure in the PostRA scheduling pass. 1426263509Sdim unsigned InitReg = 1427263509Sdim HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1428263509Sdim insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg, 1429263509Sdim DebugLoc()); 1430249259Sdim 1431263509Sdim if (MigrateTrue) { 1432263509Sdim migrateInstruction(TrueMBB, LandBlk, I); 1433249259Sdim // need to uncondionally insert the assignment to ensure a path from its 1434249259Sdim // predecessor rather than headBlk has valid value in initReg if 1435249259Sdim // (initVal != 1). 1436263509Sdim llvm_unreachable("Extra register needed to handle CFG"); 1437249259Sdim } 1438263509Sdim insertInstrBefore(I, AMDGPU::ELSE); 1439249259Sdim 1440263509Sdim if (MigrateFalse) { 1441263509Sdim migrateInstruction(FalseMBB, LandBlk, I); 1442249259Sdim // need to uncondionally insert the assignment to ensure a path from its 1443249259Sdim // predecessor rather than headBlk has valid value in initReg if 1444249259Sdim // (initVal != 0) 1445263509Sdim llvm_unreachable("Extra register needed to handle CFG"); 1446249259Sdim } 1447249259Sdim 1448263509Sdim if (LandBlkHasOtherPred) { 1449249259Sdim // add endif 1450263509Sdim insertInstrBefore(I, AMDGPU::ENDIF); 1451249259Sdim 1452249259Sdim // put initReg = 2 to other predecessors of landBlk 1453263509Sdim for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), 1454263509Sdim PE = LandBlk->pred_end(); PI != PE; ++PI) { 1455263509Sdim MachineBasicBlock *MBB = *PI; 1456263509Sdim if (MBB != TrueMBB && MBB != FalseMBB) 1457263509Sdim llvm_unreachable("Extra register needed to handle CFG"); 1458263509Sdim } 1459249259Sdim } 1460263509Sdim DEBUG( 1461263509Sdim dbgs() << "result from improveSimpleJumpintoIf: "; 1462263509Sdim showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); 1463263509Sdim ); 1464249259Sdim 1465249259Sdim // update landBlk 1466263509Sdim *LandMBBPtr = LandBlk; 1467249259Sdim 1468263509Sdim return NumNewBlk; 1469263509Sdim} 1470249259Sdim 1471263509Sdimvoid AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB, 1472263509Sdim MachineLoop *ContingLoop, MachineBasicBlock *ContMBB, 1473263509Sdim MachineLoop *ContLoop) { 1474263509Sdim DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber() 1475263509Sdim << " header = BB" << ContMBB->getNumber() << "\n"; 1476263509Sdim dbgs() << "Trying to continue loop-depth = " 1477263509Sdim << getLoopDepth(ContLoop) 1478263509Sdim << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";); 1479263509Sdim settleLoopcontBlock(ContingMBB, ContMBB); 1480263509Sdim} 1481249259Sdim 1482263509Sdimvoid AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, 1483263509Sdim MachineBasicBlock *SrcMBB) { 1484263509Sdim DEBUG( 1485263509Sdim dbgs() << "serialPattern BB" << DstMBB->getNumber() 1486263509Sdim << " <= BB" << SrcMBB->getNumber() << "\n"; 1487263509Sdim ); 1488263509Sdim DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); 1489249259Sdim 1490263509Sdim DstMBB->removeSuccessor(SrcMBB); 1491263509Sdim cloneSuccessorList(DstMBB, SrcMBB); 1492249259Sdim 1493263509Sdim removeSuccessor(SrcMBB); 1494263509Sdim MLI->removeBlock(SrcMBB); 1495263509Sdim retireBlock(SrcMBB); 1496263509Sdim} 1497249259Sdim 1498263509Sdimvoid AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, 1499263509Sdim MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 1500263509Sdim MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { 1501263509Sdim assert (TrueMBB); 1502263509Sdim DEBUG( 1503263509Sdim dbgs() << "ifPattern BB" << MBB->getNumber(); 1504263509Sdim dbgs() << "{ "; 1505263509Sdim if (TrueMBB) { 1506263509Sdim dbgs() << "BB" << TrueMBB->getNumber(); 1507249259Sdim } 1508263509Sdim dbgs() << " } else "; 1509263509Sdim dbgs() << "{ "; 1510263509Sdim if (FalseMBB) { 1511263509Sdim dbgs() << "BB" << FalseMBB->getNumber(); 1512249259Sdim } 1513263509Sdim dbgs() << " }\n "; 1514263509Sdim dbgs() << "landBlock: "; 1515263509Sdim if (!LandMBB) { 1516263509Sdim dbgs() << "NULL"; 1517249259Sdim } else { 1518263509Sdim dbgs() << "BB" << LandMBB->getNumber(); 1519249259Sdim } 1520263509Sdim dbgs() << "\n"; 1521263509Sdim ); 1522249259Sdim 1523263509Sdim int OldOpcode = BranchMI->getOpcode(); 1524263509Sdim DebugLoc BranchDL = BranchMI->getDebugLoc(); 1525249259Sdim 1526249259Sdim// transform to 1527249259Sdim// if cond 1528249259Sdim// trueBlk 1529249259Sdim// else 1530249259Sdim// falseBlk 1531249259Sdim// endif 1532249259Sdim// landBlk 1533249259Sdim 1534263509Sdim MachineBasicBlock::iterator I = BranchMI; 1535263509Sdim insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), 1536263509Sdim BranchDL); 1537249259Sdim 1538263509Sdim if (TrueMBB) { 1539263509Sdim MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); 1540263509Sdim MBB->removeSuccessor(TrueMBB); 1541263509Sdim if (LandMBB && TrueMBB->succ_size()!=0) 1542263509Sdim TrueMBB->removeSuccessor(LandMBB); 1543263509Sdim retireBlock(TrueMBB); 1544263509Sdim MLI->removeBlock(TrueMBB); 1545249259Sdim } 1546249259Sdim 1547263509Sdim if (FalseMBB) { 1548263509Sdim insertInstrBefore(I, AMDGPU::ELSE); 1549263509Sdim MBB->splice(I, FalseMBB, FalseMBB->begin(), 1550263509Sdim FalseMBB->end()); 1551263509Sdim MBB->removeSuccessor(FalseMBB); 1552263509Sdim if (LandMBB && FalseMBB->succ_size() != 0) 1553263509Sdim FalseMBB->removeSuccessor(LandMBB); 1554263509Sdim retireBlock(FalseMBB); 1555263509Sdim MLI->removeBlock(FalseMBB); 1556249259Sdim } 1557263509Sdim insertInstrBefore(I, AMDGPU::ENDIF); 1558249259Sdim 1559263509Sdim BranchMI->eraseFromParent(); 1560249259Sdim 1561263509Sdim if (LandMBB && TrueMBB && FalseMBB) 1562263509Sdim MBB->addSuccessor(LandMBB); 1563249259Sdim 1564263509Sdim} 1565249259Sdim 1566263509Sdimvoid AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, 1567263509Sdim MachineBasicBlock *LandMBB) { 1568263509Sdim DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() 1569263509Sdim << " land = BB" << LandMBB->getNumber() << "\n";); 1570249259Sdim 1571263509Sdim insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc()); 1572263509Sdim insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc()); 1573263509Sdim DstBlk->addSuccessor(LandMBB); 1574263509Sdim DstBlk->removeSuccessor(DstBlk); 1575249259Sdim} 1576249259Sdim 1577249259Sdim 1578263509Sdimvoid AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 1579263509Sdim MachineBasicBlock *LandMBB) { 1580263509Sdim DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber() 1581263509Sdim << " land = BB" << LandMBB->getNumber() << "\n";); 1582263509Sdim MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); 1583263509Sdim assert(BranchMI && isCondBranch(BranchMI)); 1584263509Sdim DebugLoc DL = BranchMI->getDebugLoc(); 1585263509Sdim MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); 1586263509Sdim MachineBasicBlock::iterator I = BranchMI; 1587263509Sdim if (TrueBranch != LandMBB) 1588263509Sdim reversePredicateSetter(I); 1589263509Sdim insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL); 1590263509Sdim insertInstrBefore(I, AMDGPU::BREAK); 1591263509Sdim insertInstrBefore(I, AMDGPU::ENDIF); 1592249259Sdim //now branchInst can be erase safely 1593263509Sdim BranchMI->eraseFromParent(); 1594249259Sdim //now take care of successors, retire blocks 1595263509Sdim ExitingMBB->removeSuccessor(LandMBB); 1596263509Sdim} 1597249259Sdim 1598263509Sdimvoid AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, 1599263509Sdim MachineBasicBlock *ContMBB) { 1600263509Sdim DEBUG(dbgs() << "settleLoopcontBlock conting = BB" 1601263509Sdim << ContingMBB->getNumber() 1602263509Sdim << ", cont = BB" << ContMBB->getNumber() << "\n";); 1603249259Sdim 1604263509Sdim MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); 1605263509Sdim if (MI) { 1606263509Sdim assert(isCondBranch(MI)); 1607263509Sdim MachineBasicBlock::iterator I = MI; 1608263509Sdim MachineBasicBlock *TrueBranch = getTrueBranch(MI); 1609263509Sdim int OldOpcode = MI->getOpcode(); 1610263509Sdim DebugLoc DL = MI->getDebugLoc(); 1611249259Sdim 1612263509Sdim bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); 1613249259Sdim 1614263509Sdim if (UseContinueLogical == false) { 1615263509Sdim int BranchOpcode = 1616263509Sdim TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : 1617263509Sdim getBranchZeroOpcode(OldOpcode); 1618263509Sdim insertCondBranchBefore(I, BranchOpcode, DL); 1619263509Sdim // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1620263509Sdim insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL); 1621263509Sdim insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL); 1622249259Sdim } else { 1623263509Sdim int BranchOpcode = 1624263509Sdim TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : 1625263509Sdim getContinueZeroOpcode(OldOpcode); 1626263509Sdim insertCondBranchBefore(I, BranchOpcode, DL); 1627249259Sdim } 1628249259Sdim 1629263509Sdim MI->eraseFromParent(); 1630249259Sdim } else { 1631249259Sdim // if we've arrived here then we've already erased the branch instruction 1632263509Sdim // travel back up the basic block to see the last reference of our debug 1633263509Sdim // location we've just inserted that reference here so it should be 1634263509Sdim // representative insertEnd to ensure phi-moves, if exist, go before the 1635263509Sdim // continue-instr. 1636263509Sdim insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, 1637263509Sdim getLastDebugLocInBB(ContingMBB)); 1638249259Sdim } 1639263509Sdim} 1640249259Sdim 1641263509Sdimint AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 1642263509Sdim MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { 1643263509Sdim int Cloned = 0; 1644263509Sdim assert(PreMBB->isSuccessor(SrcMBB)); 1645263509Sdim while (SrcMBB && SrcMBB != DstMBB) { 1646263509Sdim assert(SrcMBB->succ_size() == 1); 1647263509Sdim if (SrcMBB->pred_size() > 1) { 1648263509Sdim SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); 1649263509Sdim ++Cloned; 1650249259Sdim } 1651249259Sdim 1652263509Sdim PreMBB = SrcMBB; 1653263509Sdim SrcMBB = *SrcMBB->succ_begin(); 1654249259Sdim } 1655249259Sdim 1656263509Sdim return Cloned; 1657263509Sdim} 1658249259Sdim 1659263509SdimMachineBasicBlock * 1660263509SdimAMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, 1661263509Sdim MachineBasicBlock *PredMBB) { 1662263509Sdim assert(PredMBB->isSuccessor(MBB) && 1663249259Sdim "succBlk is not a prececessor of curBlk"); 1664249259Sdim 1665263509Sdim MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions 1666263509Sdim replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); 1667249259Sdim //srcBlk, oldBlk, newBlk 1668249259Sdim 1669263509Sdim PredMBB->removeSuccessor(MBB); 1670263509Sdim PredMBB->addSuccessor(CloneMBB); 1671249259Sdim 1672249259Sdim // add all successor to cloneBlk 1673263509Sdim cloneSuccessorList(CloneMBB, MBB); 1674249259Sdim 1675263509Sdim numClonedInstr += MBB->size(); 1676249259Sdim 1677263509Sdim DEBUG( 1678263509Sdim dbgs() << "Cloned block: " << "BB" 1679263509Sdim << MBB->getNumber() << "size " << MBB->size() << "\n"; 1680263509Sdim ); 1681249259Sdim 1682263509Sdim SHOWNEWBLK(CloneMBB, "result of Cloned block: "); 1683249259Sdim 1684263509Sdim return CloneMBB; 1685263509Sdim} 1686249259Sdim 1687263509Sdimvoid AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, 1688263509Sdim MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { 1689263509Sdim MachineBasicBlock::iterator SpliceEnd; 1690249259Sdim //look for the input branchinstr, not the AMDGPU branchinstr 1691263509Sdim MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); 1692263509Sdim if (!BranchMI) { 1693263509Sdim DEBUG( 1694263509Sdim dbgs() << "migrateInstruction don't see branch instr\n" ; 1695263509Sdim ); 1696263509Sdim SpliceEnd = SrcMBB->end(); 1697249259Sdim } else { 1698263509Sdim DEBUG( 1699263509Sdim dbgs() << "migrateInstruction see branch instr\n" ; 1700263509Sdim BranchMI->dump(); 1701263509Sdim ); 1702263509Sdim SpliceEnd = BranchMI; 1703249259Sdim } 1704263509Sdim DEBUG( 1705263509Sdim dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size() 1706263509Sdim << "srcSize = " << SrcMBB->size() << "\n"; 1707263509Sdim ); 1708249259Sdim 1709249259Sdim //splice insert before insertPos 1710263509Sdim DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); 1711249259Sdim 1712263509Sdim DEBUG( 1713263509Sdim dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size() 1714263509Sdim << "srcSize = " << SrcMBB->size() << "\n"; 1715263509Sdim ); 1716263509Sdim} 1717249259Sdim 1718263509SdimMachineBasicBlock * 1719263509SdimAMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { 1720263509Sdim MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1721263509Sdim MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); 1722249259Sdim const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1723249259Sdim 1724263509Sdim if (!LoopHeader || !LoopLatch) 1725263509Sdim return NULL; 1726263509Sdim MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); 1727263509Sdim // Is LoopRep an infinite loop ? 1728263509Sdim if (!BranchMI || !isUncondBranch(BranchMI)) 1729263509Sdim return NULL; 1730249259Sdim 1731263509Sdim MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1732263509Sdim FuncRep->push_back(DummyExitBlk); //insert to function 1733263509Sdim SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); 1734263509Sdim DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); 1735263509Sdim MachineBasicBlock::iterator I = BranchMI; 1736263509Sdim unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC); 1737263509Sdim llvm_unreachable("Extra register needed to handle CFG"); 1738263509Sdim MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32); 1739263509Sdim MachineInstrBuilder MIB(*FuncRep, NewMI); 1740263509Sdim MIB.addMBB(LoopHeader); 1741263509Sdim MIB.addReg(ImmReg, false); 1742263509Sdim SHOWNEWINSTR(NewMI); 1743263509Sdim BranchMI->eraseFromParent(); 1744263509Sdim LoopLatch->addSuccessor(DummyExitBlk); 1745249259Sdim 1746263509Sdim return DummyExitBlk; 1747263509Sdim} 1748249259Sdim 1749263509Sdimvoid AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { 1750263509Sdim MachineInstr *BranchMI; 1751249259Sdim 1752249259Sdim // I saw two unconditional branch in one basic block in example 1753249259Sdim // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. 1754263509Sdim while ((BranchMI = getLoopendBlockBranchInstr(MBB)) 1755263509Sdim && isUncondBranch(BranchMI)) { 1756263509Sdim DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump();); 1757263509Sdim BranchMI->eraseFromParent(); 1758249259Sdim } 1759263509Sdim} 1760249259Sdim 1761263509Sdimvoid AMDGPUCFGStructurizer::removeRedundantConditionalBranch( 1762263509Sdim MachineBasicBlock *MBB) { 1763263509Sdim if (MBB->succ_size() != 2) 1764263509Sdim return; 1765263509Sdim MachineBasicBlock *MBB1 = *MBB->succ_begin(); 1766263509Sdim MachineBasicBlock *MBB2 = *llvm::next(MBB->succ_begin()); 1767263509Sdim if (MBB1 != MBB2) 1768263509Sdim return; 1769249259Sdim 1770263509Sdim MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 1771263509Sdim assert(BranchMI && isCondBranch(BranchMI)); 1772263509Sdim DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump();); 1773263509Sdim BranchMI->eraseFromParent(); 1774263509Sdim SHOWNEWBLK(MBB1, "Removing redundant successor"); 1775263509Sdim MBB->removeSuccessor(MBB1); 1776263509Sdim} 1777249259Sdim 1778263509Sdimvoid AMDGPUCFGStructurizer::addDummyExitBlock( 1779263509Sdim SmallVectorImpl<MachineBasicBlock*> &RetMBB) { 1780263509Sdim MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1781263509Sdim FuncRep->push_back(DummyExitBlk); //insert to function 1782263509Sdim insertInstrEnd(DummyExitBlk, AMDGPU::RETURN); 1783249259Sdim 1784263509Sdim for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(), 1785263509Sdim E = RetMBB.end(); It != E; ++It) { 1786263509Sdim MachineBasicBlock *MBB = *It; 1787263509Sdim MachineInstr *MI = getReturnInstr(MBB); 1788263509Sdim if (MI) 1789263509Sdim MI->eraseFromParent(); 1790263509Sdim MBB->addSuccessor(DummyExitBlk); 1791263509Sdim DEBUG( 1792263509Sdim dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() 1793249259Sdim << " successors\n"; 1794263509Sdim ); 1795249259Sdim } 1796263509Sdim SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); 1797249259Sdim} 1798249259Sdim 1799263509Sdimvoid AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { 1800263509Sdim while (MBB->succ_size()) 1801263509Sdim MBB->removeSuccessor(*MBB->succ_begin()); 1802249259Sdim} 1803249259Sdim 1804263509Sdimvoid AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, 1805263509Sdim int SccNum) { 1806263509Sdim BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; 1807263509Sdim if (!srcBlkInfo) 1808263509Sdim srcBlkInfo = new BlockInformation(); 1809263509Sdim srcBlkInfo->SccNum = SccNum; 1810249259Sdim} 1811249259Sdim 1812263509Sdimvoid AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { 1813263509Sdim DEBUG( 1814263509Sdim dbgs() << "Retiring BB" << MBB->getNumber() << "\n"; 1815263509Sdim ); 1816249259Sdim 1817263509Sdim BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; 1818249259Sdim 1819263509Sdim if (!SrcBlkInfo) 1820263509Sdim SrcBlkInfo = new BlockInformation(); 1821249259Sdim 1822263509Sdim SrcBlkInfo->IsRetired = true; 1823263509Sdim assert(MBB->succ_size() == 0 && MBB->pred_size() == 0 1824249259Sdim && "can't retire block yet"); 1825249259Sdim} 1826249259Sdim 1827263509Sdimvoid AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep, 1828263509Sdim MachineBasicBlock *MBB) { 1829263509Sdim MachineBasicBlock *&TheEntry = LLInfoMap[loopRep]; 1830263509Sdim if (!MBB) { 1831263509Sdim MBB = FuncRep->CreateMachineBasicBlock(); 1832263509Sdim FuncRep->push_back(MBB); //insert to function 1833263509Sdim SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: "); 1834249259Sdim } 1835263509Sdim TheEntry = MBB; 1836263509Sdim DEBUG( 1837263509Sdim dbgs() << "setLoopLandBlock loop-header = BB" 1838249259Sdim << loopRep->getHeader()->getNumber() 1839263509Sdim << " landing-block = BB" << MBB->getNumber() << "\n"; 1840263509Sdim ); 1841263509Sdim} 1842249259Sdim 1843263509SdimMachineBasicBlock * 1844263509SdimAMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1, 1845263509Sdim MachineBasicBlock *MBB2) { 1846249259Sdim 1847263509Sdim if (PDT->dominates(MBB1, MBB2)) 1848263509Sdim return MBB1; 1849263509Sdim if (PDT->dominates(MBB2, MBB1)) 1850263509Sdim return MBB2; 1851249259Sdim 1852263509Sdim MachineDomTreeNode *Node1 = PDT->getNode(MBB1); 1853263509Sdim MachineDomTreeNode *Node2 = PDT->getNode(MBB2); 1854249259Sdim 1855249259Sdim // Handle newly cloned node. 1856263509Sdim if (!Node1 && MBB1->succ_size() == 1) 1857263509Sdim return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2); 1858263509Sdim if (!Node2 && MBB2->succ_size() == 1) 1859263509Sdim return findNearestCommonPostDom(MBB1, *MBB2->succ_begin()); 1860249259Sdim 1861263509Sdim if (!Node1 || !Node2) 1862249259Sdim return NULL; 1863249259Sdim 1864263509Sdim Node1 = Node1->getIDom(); 1865263509Sdim while (Node1) { 1866263509Sdim if (PDT->dominates(Node1, Node2)) 1867263509Sdim return Node1->getBlock(); 1868263509Sdim Node1 = Node1->getIDom(); 1869249259Sdim } 1870249259Sdim 1871249259Sdim return NULL; 1872249259Sdim} 1873249259Sdim 1874263509SdimMachineBasicBlock * 1875263509SdimAMDGPUCFGStructurizer::findNearestCommonPostDom( 1876263509Sdim std::set<MachineBasicBlock *> &MBBs) { 1877263509Sdim MachineBasicBlock *CommonDom; 1878263509Sdim std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin(); 1879263509Sdim std::set<MachineBasicBlock *>::const_iterator E = MBBs.end(); 1880263509Sdim for (CommonDom = *It; It != E && CommonDom; ++It) { 1881263509Sdim MachineBasicBlock *MBB = *It; 1882263509Sdim if (MBB != CommonDom) 1883263509Sdim CommonDom = findNearestCommonPostDom(MBB, CommonDom); 1884249259Sdim } 1885249259Sdim 1886263509Sdim DEBUG( 1887263509Sdim dbgs() << "Common post dominator for exit blocks is "; 1888263509Sdim if (CommonDom) 1889263509Sdim dbgs() << "BB" << CommonDom->getNumber() << "\n"; 1890263509Sdim else 1891263509Sdim dbgs() << "NULL\n"; 1892263509Sdim ); 1893249259Sdim 1894263509Sdim return CommonDom; 1895249259Sdim} 1896249259Sdim 1897263509Sdimchar AMDGPUCFGStructurizer::ID = 0; 1898249259Sdim 1899263509Sdim} // end anonymous namespace 1900249259Sdim 1901249259Sdim 1902263509SdimFunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm) { 1903263509Sdim return new AMDGPUCFGStructurizer(tm); 1904249259Sdim} 1905