1327952Sdim//===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
2284677Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6284677Sdim//
7284677Sdim//==-----------------------------------------------------------------------===//
8284677Sdim
9284677Sdim#include "AMDGPU.h"
10284677Sdim#include "AMDGPUSubtarget.h"
11360784Sdim#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12284677Sdim#include "R600InstrInfo.h"
13321369Sdim#include "R600RegisterInfo.h"
14284677Sdim#include "llvm/ADT/DepthFirstIterator.h"
15284677Sdim#include "llvm/ADT/SCCIterator.h"
16321369Sdim#include "llvm/ADT/SmallPtrSet.h"
17284677Sdim#include "llvm/ADT/SmallVector.h"
18284677Sdim#include "llvm/ADT/Statistic.h"
19321369Sdim#include "llvm/ADT/StringRef.h"
20321369Sdim#include "llvm/CodeGen/MachineBasicBlock.h"
21284677Sdim#include "llvm/CodeGen/MachineDominators.h"
22284677Sdim#include "llvm/CodeGen/MachineFunction.h"
23284677Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
24321369Sdim#include "llvm/CodeGen/MachineInstr.h"
25284677Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
26284677Sdim#include "llvm/CodeGen/MachineJumpTableInfo.h"
27284677Sdim#include "llvm/CodeGen/MachineLoopInfo.h"
28321369Sdim#include "llvm/CodeGen/MachineOperand.h"
29284677Sdim#include "llvm/CodeGen/MachinePostDominators.h"
30284677Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
31321369Sdim#include "llvm/IR/DebugLoc.h"
32321369Sdim#include "llvm/IR/LLVMContext.h"
33360784Sdim#include "llvm/InitializePasses.h"
34321369Sdim#include "llvm/Pass.h"
35284677Sdim#include "llvm/Support/Debug.h"
36321369Sdim#include "llvm/Support/ErrorHandling.h"
37341825Sdim#include "llvm/Support/MachineValueType.h"
38284677Sdim#include "llvm/Support/raw_ostream.h"
39321369Sdim#include <cassert>
40321369Sdim#include <cstddef>
41284677Sdim#include <deque>
42321369Sdim#include <iterator>
43321369Sdim#include <map>
44321369Sdim#include <utility>
45321369Sdim#include <vector>
46284677Sdim
47284677Sdimusing namespace llvm;
48284677Sdim
49284677Sdim#define DEBUG_TYPE "structcfg"
50284677Sdim
51284677Sdim#define DEFAULT_VEC_SLOTS 8
52284677Sdim
53284677Sdim// TODO: move-begin.
54284677Sdim
55284677Sdim//===----------------------------------------------------------------------===//
56284677Sdim//
57284677Sdim// Statistics for CFGStructurizer.
58284677Sdim//
59284677Sdim//===----------------------------------------------------------------------===//
60284677Sdim
61284677SdimSTATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
62284677Sdim    "matched");
63284677SdimSTATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
64284677Sdim    "matched");
65284677SdimSTATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
66284677SdimSTATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
67284677Sdim
68284677Sdimnamespace llvm {
69321369Sdim
70327952Sdimvoid initializeAMDGPUCFGStructurizerPass(PassRegistry &);
71284677Sdim
72321369Sdim} // end namespace llvm
73321369Sdim
74321369Sdimnamespace {
75321369Sdim
76284677Sdim//===----------------------------------------------------------------------===//
77284677Sdim//
78284677Sdim// Miscellaneous utility for CFGStructurizer.
79284677Sdim//
80284677Sdim//===----------------------------------------------------------------------===//
81321369Sdim
82341825Sdim#define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n");
83284677Sdim
84341825Sdim#define SHOWNEWBLK(b, msg)                                                     \
85341825Sdim  LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size();  \
86341825Sdim             dbgs() << "\n";);
87284677Sdim
88341825Sdim#define SHOWBLK_DETAIL(b, msg)                                                 \
89341825Sdim  LLVM_DEBUG(if (b) {                                                          \
90341825Sdim    dbgs() << msg << "BB" << b->getNumber() << "size " << b->size();           \
91341825Sdim    b->print(dbgs());                                                          \
92341825Sdim    dbgs() << "\n";                                                            \
93341825Sdim  });
94284677Sdim
95284677Sdim#define INVALIDSCCNUM -1
96284677Sdim
97284677Sdim//===----------------------------------------------------------------------===//
98284677Sdim//
99284677Sdim// supporting data structure for CFGStructurizer
100284677Sdim//
101284677Sdim//===----------------------------------------------------------------------===//
102284677Sdim
103284677Sdimclass BlockInformation {
104284677Sdimpublic:
105321369Sdim  bool IsRetired = false;
106321369Sdim  int SccNum = INVALIDSCCNUM;
107321369Sdim
108321369Sdim  BlockInformation() = default;
109284677Sdim};
110284677Sdim
111284677Sdim//===----------------------------------------------------------------------===//
112284677Sdim//
113284677Sdim// CFGStructurizer
114284677Sdim//
115284677Sdim//===----------------------------------------------------------------------===//
116284677Sdim
117284677Sdimclass AMDGPUCFGStructurizer : public MachineFunctionPass {
118284677Sdimpublic:
119327952Sdim  using MBBVector = SmallVector<MachineBasicBlock *, 32>;
120327952Sdim  using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>;
121327952Sdim  using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>;
122284677Sdim
123284677Sdim  enum PathToKind {
124284677Sdim    Not_SinglePath = 0,
125284677Sdim    SinglePath_InPath = 1,
126284677Sdim    SinglePath_NotInPath = 2
127284677Sdim  };
128284677Sdim
129284677Sdim  static char ID;
130284677Sdim
131321369Sdim  AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
132284677Sdim    initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
133284677Sdim  }
134284677Sdim
135314564Sdim  StringRef getPassName() const override {
136284677Sdim    return "AMDGPU Control Flow Graph structurizer Pass";
137284677Sdim  }
138284677Sdim
139284677Sdim  void getAnalysisUsage(AnalysisUsage &AU) const override {
140284677Sdim    AU.addRequired<MachineDominatorTree>();
141284677Sdim    AU.addRequired<MachinePostDominatorTree>();
142284677Sdim    AU.addRequired<MachineLoopInfo>();
143314564Sdim    MachineFunctionPass::getAnalysisUsage(AU);
144284677Sdim  }
145284677Sdim
146284677Sdim  /// Perform the CFG structurization
147284677Sdim  bool run();
148284677Sdim
149284677Sdim  /// Perform the CFG preparation
150284677Sdim  /// This step will remove every unconditionnal/dead jump instructions and make
151284677Sdim  /// sure all loops have an exit block
152284677Sdim  bool prepare();
153284677Sdim
154284677Sdim  bool runOnMachineFunction(MachineFunction &MF) override {
155309124Sdim    TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
156284677Sdim    TRI = &TII->getRegisterInfo();
157341825Sdim    LLVM_DEBUG(MF.dump(););
158284677Sdim    OrderedBlks.clear();
159284677Sdim    Visited.clear();
160284677Sdim    FuncRep = &MF;
161284677Sdim    MLI = &getAnalysis<MachineLoopInfo>();
162341825Sdim    LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
163284677Sdim    MDT = &getAnalysis<MachineDominatorTree>();
164341825Sdim    LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr););
165284677Sdim    PDT = &getAnalysis<MachinePostDominatorTree>();
166341825Sdim    LLVM_DEBUG(PDT->print(dbgs()););
167284677Sdim    prepare();
168284677Sdim    run();
169341825Sdim    LLVM_DEBUG(MF.dump(););
170284677Sdim    return true;
171284677Sdim  }
172284677Sdim
173284677Sdimprotected:
174284677Sdim  MachineDominatorTree *MDT;
175284677Sdim  MachinePostDominatorTree *PDT;
176284677Sdim  MachineLoopInfo *MLI;
177321369Sdim  const R600InstrInfo *TII = nullptr;
178321369Sdim  const R600RegisterInfo *TRI = nullptr;
179284677Sdim
180284677Sdim  // PRINT FUNCTIONS
181284677Sdim  /// Print the ordered Blocks.
182284677Sdim  void printOrderedBlocks() const {
183284677Sdim    size_t i = 0;
184284677Sdim    for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
185284677Sdim        iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
186284677Sdim      dbgs() << "BB" << (*iterBlk)->getNumber();
187284677Sdim      dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
188284677Sdim      if (i != 0 && i % 10 == 0) {
189284677Sdim        dbgs() << "\n";
190284677Sdim      } else {
191284677Sdim        dbgs() << " ";
192284677Sdim      }
193284677Sdim    }
194284677Sdim  }
195321369Sdim
196284677Sdim  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
197284677Sdim    for (MachineLoop::iterator iter = LoopInfo.begin(),
198284677Sdim         iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
199284677Sdim      (*iter)->print(dbgs(), 0);
200284677Sdim    }
201284677Sdim  }
202284677Sdim
203284677Sdim  // UTILITY FUNCTIONS
204284677Sdim  int getSCCNum(MachineBasicBlock *MBB) const;
205284677Sdim  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
206284677Sdim  bool hasBackEdge(MachineBasicBlock *MBB) const;
207284677Sdim  bool isRetiredBlock(MachineBasicBlock *MBB) const;
208284677Sdim  bool isActiveLoophead(MachineBasicBlock *MBB) const;
209284677Sdim  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
210284677Sdim      bool AllowSideEntry = true) const;
211284677Sdim  int countActiveBlock(MBBVector::const_iterator It,
212284677Sdim      MBBVector::const_iterator E) const;
213284677Sdim  bool needMigrateBlock(MachineBasicBlock *MBB) const;
214284677Sdim
215284677Sdim  // Utility Functions
216314564Sdim  void reversePredicateSetter(MachineBasicBlock::iterator I,
217314564Sdim                              MachineBasicBlock &MBB);
218284677Sdim  /// Compute the reversed DFS post order of Blocks
219284677Sdim  void orderBlocks(MachineFunction *MF);
220284677Sdim
221284677Sdim  // Function originally from CFGStructTraits
222284677Sdim  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
223309124Sdim                      const DebugLoc &DL = DebugLoc());
224284677Sdim  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
225309124Sdim                                  const DebugLoc &DL = DebugLoc());
226284677Sdim  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
227284677Sdim  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
228309124Sdim                              const DebugLoc &DL);
229284677Sdim  void insertCondBranchBefore(MachineBasicBlock *MBB,
230309124Sdim                              MachineBasicBlock::iterator I, int NewOpcode,
231309124Sdim                              int RegNum, const DebugLoc &DL);
232327952Sdim
233284677Sdim  static int getBranchNzeroOpcode(int OldOpcode);
234284677Sdim  static int getBranchZeroOpcode(int OldOpcode);
235284677Sdim  static int getContinueNzeroOpcode(int OldOpcode);
236284677Sdim  static int getContinueZeroOpcode(int OldOpcode);
237284677Sdim  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
238284677Sdim  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
239284677Sdim  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
240284677Sdim      MachineInstr *MI);
241284677Sdim  static bool isCondBranch(MachineInstr *MI);
242284677Sdim  static bool isUncondBranch(MachineInstr *MI);
243284677Sdim  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
244284677Sdim  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
245327952Sdim
246284677Sdim  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
247284677Sdim  ///
248284677Sdim  /// BB with backward-edge could have move instructions after the branch
249284677Sdim  /// instruction.  Such move instruction "belong to" the loop backward-edge.
250284677Sdim  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
251327952Sdim
252284677Sdim  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
253284677Sdim  static bool isReturnBlock(MachineBasicBlock *MBB);
254284677Sdim  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
255327952Sdim      MachineBasicBlock *SrcMBB);
256284677Sdim  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
257327952Sdim
258284677Sdim  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
259284677Sdim  /// because the AMDGPU instruction is not recognized as terminator fix this
260284677Sdim  /// and retire this routine
261284677Sdim  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
262284677Sdim      MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
263327952Sdim
264284677Sdim  static void wrapup(MachineBasicBlock *MBB);
265284677Sdim
266284677Sdim  int patternMatch(MachineBasicBlock *MBB);
267284677Sdim  int patternMatchGroup(MachineBasicBlock *MBB);
268284677Sdim  int serialPatternMatch(MachineBasicBlock *MBB);
269284677Sdim  int ifPatternMatch(MachineBasicBlock *MBB);
270284677Sdim  int loopendPatternMatch();
271284677Sdim  int mergeLoop(MachineLoop *LoopRep);
272284677Sdim
273284677Sdim  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
274284677Sdim  /// the same loop with LoopLandInfo without explicitly keeping track of
275284677Sdim  /// loopContBlks and loopBreakBlks, this is a method to get the information.
276284677Sdim  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
277284677Sdim      MachineBasicBlock *Src2MBB);
278284677Sdim  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
279284677Sdim      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
280284677Sdim  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
281284677Sdim      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
282284677Sdim  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
283284677Sdim      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
284284677Sdim      MachineBasicBlock **LandMBBPtr);
285284677Sdim  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
286284677Sdim      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
287284677Sdim      MachineBasicBlock *LandMBB, bool Detail = false);
288284677Sdim  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
289284677Sdim      MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
290284677Sdim  void mergeSerialBlock(MachineBasicBlock *DstMBB,
291284677Sdim      MachineBasicBlock *SrcMBB);
292284677Sdim
293284677Sdim  void mergeIfthenelseBlock(MachineInstr *BranchMI,
294284677Sdim      MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
295284677Sdim      MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
296284677Sdim  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
297284677Sdim      MachineBasicBlock *LandMBB);
298284677Sdim  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
299284677Sdim      MachineBasicBlock *LandMBB);
300284677Sdim  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
301284677Sdim      MachineBasicBlock *ContMBB);
302327952Sdim
303284677Sdim  /// normalizeInfiniteLoopExit change
304284677Sdim  ///   B1:
305284677Sdim  ///        uncond_br LoopHeader
306284677Sdim  ///
307284677Sdim  /// to
308284677Sdim  ///   B1:
309284677Sdim  ///        cond_br 1 LoopHeader dummyExit
310284677Sdim  /// and return the newly added dummy exit block
311284677Sdim  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
312284677Sdim  void removeUnconditionalBranch(MachineBasicBlock *MBB);
313327952Sdim
314284677Sdim  /// Remove duplicate branches instructions in a block.
315284677Sdim  /// For instance
316284677Sdim  /// B0:
317284677Sdim  ///    cond_br X B1 B2
318284677Sdim  ///    cond_br X B1 B2
319284677Sdim  /// is transformed to
320284677Sdim  /// B0:
321284677Sdim  ///    cond_br X B1 B2
322284677Sdim  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
323327952Sdim
324284677Sdim  void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
325284677Sdim  void removeSuccessor(MachineBasicBlock *MBB);
326284677Sdim  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
327284677Sdim      MachineBasicBlock *PredMBB);
328284677Sdim  void migrateInstruction(MachineBasicBlock *SrcMBB,
329284677Sdim      MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
330284677Sdim  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
331284677Sdim  void retireBlock(MachineBasicBlock *MBB);
332284677Sdim
333284677Sdimprivate:
334284677Sdim  MBBInfoMap BlockInfoMap;
335284677Sdim  LoopLandInfoMap LLInfoMap;
336284677Sdim  std::map<MachineLoop *, bool> Visited;
337284677Sdim  MachineFunction *FuncRep;
338284677Sdim  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
339284677Sdim};
340284677Sdim
341327952Sdim} // end anonymous namespace
342327952Sdim
343321369Sdimchar AMDGPUCFGStructurizer::ID = 0;
344321369Sdim
345284677Sdimint AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
346284677Sdim  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
347284677Sdim  if (It == BlockInfoMap.end())
348284677Sdim    return INVALIDSCCNUM;
349284677Sdim  return (*It).second->SccNum;
350284677Sdim}
351284677Sdim
352284677SdimMachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
353284677Sdim    const {
354284677Sdim  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
355284677Sdim  if (It == LLInfoMap.end())
356284677Sdim    return nullptr;
357284677Sdim  return (*It).second;
358284677Sdim}
359284677Sdim
360284677Sdimbool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
361284677Sdim  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
362284677Sdim  if (!LoopRep)
363284677Sdim    return false;
364284677Sdim  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
365284677Sdim  return MBB->isSuccessor(LoopHeader);
366284677Sdim}
367284677Sdim
368284677Sdimbool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
369284677Sdim  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
370284677Sdim  if (It == BlockInfoMap.end())
371284677Sdim    return false;
372284677Sdim  return (*It).second->IsRetired;
373284677Sdim}
374284677Sdim
375284677Sdimbool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
376284677Sdim  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
377284677Sdim  while (LoopRep && LoopRep->getHeader() == MBB) {
378284677Sdim    MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
379284677Sdim    if(!LoopLand)
380284677Sdim      return true;
381284677Sdim    if (!isRetiredBlock(LoopLand))
382284677Sdim      return true;
383284677Sdim    LoopRep = LoopRep->getParentLoop();
384284677Sdim  }
385284677Sdim  return false;
386284677Sdim}
387321369Sdim
388284677SdimAMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
389284677Sdim    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
390284677Sdim    bool AllowSideEntry) const {
391284677Sdim  assert(DstMBB);
392284677Sdim  if (SrcMBB == DstMBB)
393284677Sdim    return SinglePath_InPath;
394284677Sdim  while (SrcMBB && SrcMBB->succ_size() == 1) {
395284677Sdim    SrcMBB = *SrcMBB->succ_begin();
396284677Sdim    if (SrcMBB == DstMBB)
397284677Sdim      return SinglePath_InPath;
398284677Sdim    if (!AllowSideEntry && SrcMBB->pred_size() > 1)
399284677Sdim      return Not_SinglePath;
400284677Sdim  }
401284677Sdim  if (SrcMBB && SrcMBB->succ_size()==0)
402284677Sdim    return SinglePath_NotInPath;
403284677Sdim  return Not_SinglePath;
404284677Sdim}
405284677Sdim
406284677Sdimint AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
407284677Sdim    MBBVector::const_iterator E) const {
408284677Sdim  int Count = 0;
409284677Sdim  while (It != E) {
410284677Sdim    if (!isRetiredBlock(*It))
411284677Sdim      ++Count;
412284677Sdim    ++It;
413284677Sdim  }
414284677Sdim  return Count;
415284677Sdim}
416284677Sdim
417284677Sdimbool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
418284677Sdim  unsigned BlockSizeThreshold = 30;
419284677Sdim  unsigned CloneInstrThreshold = 100;
420284677Sdim  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
421284677Sdim
422284677Sdim  if(!MultiplePreds)
423284677Sdim    return false;
424284677Sdim  unsigned BlkSize = MBB->size();
425284677Sdim  return ((BlkSize > BlockSizeThreshold) &&
426284677Sdim      (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
427284677Sdim}
428284677Sdim
429284677Sdimvoid AMDGPUCFGStructurizer::reversePredicateSetter(
430314564Sdim    MachineBasicBlock::iterator I, MachineBasicBlock &MBB) {
431314564Sdim  assert(I.isValid() && "Expected valid iterator");
432309124Sdim  for (;; --I) {
433314564Sdim    if (I == MBB.end())
434314564Sdim      continue;
435341825Sdim    if (I->getOpcode() == R600::PRED_X) {
436314564Sdim      switch (I->getOperand(2).getImm()) {
437341825Sdim      case R600::PRED_SETE_INT:
438341825Sdim        I->getOperand(2).setImm(R600::PRED_SETNE_INT);
439284677Sdim        return;
440341825Sdim      case R600::PRED_SETNE_INT:
441341825Sdim        I->getOperand(2).setImm(R600::PRED_SETE_INT);
442284677Sdim        return;
443341825Sdim      case R600::PRED_SETE:
444341825Sdim        I->getOperand(2).setImm(R600::PRED_SETNE);
445284677Sdim        return;
446341825Sdim      case R600::PRED_SETNE:
447341825Sdim        I->getOperand(2).setImm(R600::PRED_SETE);
448284677Sdim        return;
449284677Sdim      default:
450284677Sdim        llvm_unreachable("PRED_X Opcode invalid!");
451284677Sdim      }
452284677Sdim    }
453284677Sdim  }
454284677Sdim}
455284677Sdim
456284677Sdimvoid AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
457309124Sdim                                           int NewOpcode, const DebugLoc &DL) {
458309124Sdim  MachineInstr *MI =
459309124Sdim      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
460284677Sdim  MBB->push_back(MI);
461284677Sdim  //assume the instruction doesn't take any reg operand ...
462284677Sdim  SHOWNEWINSTR(MI);
463284677Sdim}
464284677Sdim
465284677SdimMachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
466309124Sdim                                                       int NewOpcode,
467309124Sdim                                                       const DebugLoc &DL) {
468284677Sdim  MachineInstr *MI =
469284677Sdim      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
470284677Sdim  if (MBB->begin() != MBB->end())
471284677Sdim    MBB->insert(MBB->begin(), MI);
472284677Sdim  else
473284677Sdim    MBB->push_back(MI);
474284677Sdim  SHOWNEWINSTR(MI);
475284677Sdim  return MI;
476284677Sdim}
477284677Sdim
478284677SdimMachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
479284677Sdim    MachineBasicBlock::iterator I, int NewOpcode) {
480284677Sdim  MachineInstr *OldMI = &(*I);
481284677Sdim  MachineBasicBlock *MBB = OldMI->getParent();
482284677Sdim  MachineInstr *NewMBB =
483284677Sdim      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
484284677Sdim  MBB->insert(I, NewMBB);
485284677Sdim  //assume the instruction doesn't take any reg operand ...
486284677Sdim  SHOWNEWINSTR(NewMBB);
487284677Sdim  return NewMBB;
488284677Sdim}
489284677Sdim
490284677Sdimvoid AMDGPUCFGStructurizer::insertCondBranchBefore(
491309124Sdim    MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) {
492284677Sdim  MachineInstr *OldMI = &(*I);
493284677Sdim  MachineBasicBlock *MBB = OldMI->getParent();
494284677Sdim  MachineFunction *MF = MBB->getParent();
495284677Sdim  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
496284677Sdim  MBB->insert(I, NewMI);
497284677Sdim  MachineInstrBuilder MIB(*MF, NewMI);
498284677Sdim  MIB.addReg(OldMI->getOperand(1).getReg(), false);
499284677Sdim  SHOWNEWINSTR(NewMI);
500284677Sdim  //erase later oldInstr->eraseFromParent();
501284677Sdim}
502284677Sdim
503309124Sdimvoid AMDGPUCFGStructurizer::insertCondBranchBefore(
504309124Sdim    MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode,
505309124Sdim    int RegNum, const DebugLoc &DL) {
506284677Sdim  MachineFunction *MF = blk->getParent();
507284677Sdim  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
508284677Sdim  //insert before
509284677Sdim  blk->insert(I, NewInstr);
510284677Sdim  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
511284677Sdim  SHOWNEWINSTR(NewInstr);
512284677Sdim}
513284677Sdim
514284677Sdimint AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
515284677Sdim  switch(OldOpcode) {
516341825Sdim  case R600::JUMP_COND:
517341825Sdim  case R600::JUMP: return R600::IF_PREDICATE_SET;
518341825Sdim  case R600::BRANCH_COND_i32:
519341825Sdim  case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32;
520284677Sdim  default: llvm_unreachable("internal error");
521284677Sdim  }
522284677Sdim  return -1;
523284677Sdim}
524284677Sdim
525284677Sdimint AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
526284677Sdim  switch(OldOpcode) {
527341825Sdim  case R600::JUMP_COND:
528341825Sdim  case R600::JUMP: return R600::IF_PREDICATE_SET;
529341825Sdim  case R600::BRANCH_COND_i32:
530341825Sdim  case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32;
531284677Sdim  default: llvm_unreachable("internal error");
532284677Sdim  }
533284677Sdim  return -1;
534284677Sdim}
535284677Sdim
536284677Sdimint AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
537284677Sdim  switch(OldOpcode) {
538341825Sdim  case R600::JUMP_COND:
539341825Sdim  case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32;
540284677Sdim  default: llvm_unreachable("internal error");
541327952Sdim  }
542284677Sdim  return -1;
543284677Sdim}
544284677Sdim
545284677Sdimint AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
546284677Sdim  switch(OldOpcode) {
547341825Sdim  case R600::JUMP_COND:
548341825Sdim  case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32;
549284677Sdim  default: llvm_unreachable("internal error");
550284677Sdim  }
551284677Sdim  return -1;
552284677Sdim}
553284677Sdim
554284677SdimMachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
555284677Sdim  return MI->getOperand(0).getMBB();
556284677Sdim}
557284677Sdim
558284677Sdimvoid AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
559284677Sdim    MachineBasicBlock *MBB) {
560284677Sdim  MI->getOperand(0).setMBB(MBB);
561284677Sdim}
562284677Sdim
563284677SdimMachineBasicBlock *
564284677SdimAMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
565284677Sdim    MachineInstr *MI) {
566284677Sdim  assert(MBB->succ_size() == 2);
567284677Sdim  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
568284677Sdim  MachineBasicBlock::succ_iterator It = MBB->succ_begin();
569284677Sdim  MachineBasicBlock::succ_iterator Next = It;
570284677Sdim  ++Next;
571284677Sdim  return (*It == TrueBranch) ? *Next : *It;
572284677Sdim}
573284677Sdim
574284677Sdimbool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
575284677Sdim  switch (MI->getOpcode()) {
576341825Sdim    case R600::JUMP_COND:
577341825Sdim    case R600::BRANCH_COND_i32:
578341825Sdim    case R600::BRANCH_COND_f32: return true;
579284677Sdim  default:
580284677Sdim    return false;
581284677Sdim  }
582284677Sdim  return false;
583284677Sdim}
584284677Sdim
585284677Sdimbool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
586284677Sdim  switch (MI->getOpcode()) {
587341825Sdim  case R600::JUMP:
588341825Sdim  case R600::BRANCH:
589284677Sdim    return true;
590284677Sdim  default:
591284677Sdim    return false;
592284677Sdim  }
593284677Sdim  return false;
594284677Sdim}
595284677Sdim
596284677SdimDebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
597284677Sdim  //get DebugLoc from the first MachineBasicBlock instruction with debug info
598284677Sdim  DebugLoc DL;
599284677Sdim  for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
600284677Sdim      ++It) {
601284677Sdim    MachineInstr *instr = &(*It);
602284677Sdim    if (instr->getDebugLoc())
603284677Sdim      DL = instr->getDebugLoc();
604284677Sdim  }
605284677Sdim  return DL;
606284677Sdim}
607284677Sdim
608284677SdimMachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
609284677Sdim    MachineBasicBlock *MBB) {
610284677Sdim  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
611284677Sdim  MachineInstr *MI = &*It;
612284677Sdim  if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
613284677Sdim    return MI;
614284677Sdim  return nullptr;
615284677Sdim}
616284677Sdim
617284677SdimMachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
618284677Sdim    MachineBasicBlock *MBB) {
619284677Sdim  for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
620284677Sdim      It != E; ++It) {
621284677Sdim    // FIXME: Simplify
622284677Sdim    MachineInstr *MI = &*It;
623284677Sdim    if (MI) {
624284677Sdim      if (isCondBranch(MI) || isUncondBranch(MI))
625284677Sdim        return MI;
626284677Sdim      else if (!TII->isMov(MI->getOpcode()))
627284677Sdim        break;
628284677Sdim    }
629284677Sdim  }
630284677Sdim  return nullptr;
631284677Sdim}
632284677Sdim
633284677SdimMachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
634284677Sdim  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
635284677Sdim  if (It != MBB->rend()) {
636284677Sdim    MachineInstr *instr = &(*It);
637341825Sdim    if (instr->getOpcode() == R600::RETURN)
638284677Sdim      return instr;
639284677Sdim  }
640284677Sdim  return nullptr;
641284677Sdim}
642284677Sdim
643284677Sdimbool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
644284677Sdim  MachineInstr *MI = getReturnInstr(MBB);
645284677Sdim  bool IsReturn = (MBB->succ_size() == 0);
646284677Sdim  if (MI)
647284677Sdim    assert(IsReturn);
648284677Sdim  else if (IsReturn)
649341825Sdim    LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber()
650341825Sdim                      << " is return block without RETURN instr\n";);
651284677Sdim  return  IsReturn;
652284677Sdim}
653284677Sdim
654284677Sdimvoid AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
655284677Sdim    MachineBasicBlock *SrcMBB) {
656284677Sdim  for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
657284677Sdim       iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
658284677Sdim    DstMBB->addSuccessor(*It);  // *iter's predecessor is also taken care of
659284677Sdim}
660284677Sdim
661284677SdimMachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
662284677Sdim  MachineFunction *Func = MBB->getParent();
663284677Sdim  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
664284677Sdim  Func->push_back(NewMBB);  //insert to function
665309124Sdim  for (const MachineInstr &It : *MBB)
666309124Sdim    NewMBB->push_back(Func->CloneMachineInstr(&It));
667284677Sdim  return NewMBB;
668284677Sdim}
669284677Sdim
670284677Sdimvoid AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
671284677Sdim    MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
672284677Sdim    MachineBasicBlock *NewBlk) {
673284677Sdim  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
674284677Sdim  if (BranchMI && isCondBranch(BranchMI) &&
675284677Sdim      getTrueBranch(BranchMI) == OldMBB)
676284677Sdim    setTrueBranch(BranchMI, NewBlk);
677284677Sdim}
678284677Sdim
679284677Sdimvoid AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
680284677Sdim  assert((!MBB->getParent()->getJumpTableInfo()
681284677Sdim          || MBB->getParent()->getJumpTableInfo()->isEmpty())
682284677Sdim         && "found a jump table");
683284677Sdim
684284677Sdim   //collect continue right before endloop
685284677Sdim   SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
686284677Sdim   MachineBasicBlock::iterator Pre = MBB->begin();
687284677Sdim   MachineBasicBlock::iterator E = MBB->end();
688284677Sdim   MachineBasicBlock::iterator It = Pre;
689284677Sdim   while (It != E) {
690341825Sdim     if (Pre->getOpcode() == R600::CONTINUE
691341825Sdim         && It->getOpcode() == R600::ENDLOOP)
692309124Sdim       ContInstr.push_back(&*Pre);
693284677Sdim     Pre = It;
694284677Sdim     ++It;
695284677Sdim   }
696284677Sdim
697284677Sdim   //delete continue right before endloop
698284677Sdim   for (unsigned i = 0; i < ContInstr.size(); ++i)
699284677Sdim      ContInstr[i]->eraseFromParent();
700284677Sdim
701284677Sdim   // TODO to fix up jump table so later phase won't be confused.  if
702284677Sdim   // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
703284677Sdim   // there isn't such an interface yet.  alternatively, replace all the other
704284677Sdim   // blocks in the jump table with the entryBlk //}
705284677Sdim}
706284677Sdim
707284677Sdimbool AMDGPUCFGStructurizer::prepare() {
708284677Sdim  bool Changed = false;
709284677Sdim
710284677Sdim  //FIXME: if not reducible flow graph, make it so ???
711284677Sdim
712341825Sdim  LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
713284677Sdim
714284677Sdim  orderBlocks(FuncRep);
715284677Sdim
716284677Sdim  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
717284677Sdim
718284677Sdim  // Add an ExitBlk to loop that don't have one
719284677Sdim  for (MachineLoopInfo::iterator It = MLI->begin(),
720284677Sdim       E = MLI->end(); It != E; ++It) {
721284677Sdim    MachineLoop *LoopRep = (*It);
722284677Sdim    MBBVector ExitingMBBs;
723284677Sdim    LoopRep->getExitingBlocks(ExitingMBBs);
724284677Sdim
725284677Sdim    if (ExitingMBBs.size() == 0) {
726284677Sdim      MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
727284677Sdim      if (DummyExitBlk)
728284677Sdim        RetBlks.push_back(DummyExitBlk);
729284677Sdim    }
730284677Sdim  }
731284677Sdim
732284677Sdim  // Remove unconditional branch instr.
733284677Sdim  // Add dummy exit block iff there are multiple returns.
734284677Sdim  for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
735284677Sdim       It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
736284677Sdim    MachineBasicBlock *MBB = *It;
737284677Sdim    removeUnconditionalBranch(MBB);
738284677Sdim    removeRedundantConditionalBranch(MBB);
739284677Sdim    if (isReturnBlock(MBB)) {
740284677Sdim      RetBlks.push_back(MBB);
741284677Sdim    }
742284677Sdim    assert(MBB->succ_size() <= 2);
743284677Sdim  }
744284677Sdim
745284677Sdim  if (RetBlks.size() >= 2) {
746284677Sdim    addDummyExitBlock(RetBlks);
747284677Sdim    Changed = true;
748284677Sdim  }
749284677Sdim
750284677Sdim  return Changed;
751284677Sdim}
752284677Sdim
753284677Sdimbool AMDGPUCFGStructurizer::run() {
754284677Sdim  //Assume reducible CFG...
755341825Sdim  LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
756284677Sdim
757284677Sdim#ifdef STRESSTEST
758284677Sdim  //Use the worse block ordering to test the algorithm.
759284677Sdim  ReverseVector(orderedBlks);
760284677Sdim#endif
761284677Sdim
762341825Sdim  LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
763284677Sdim  int NumIter = 0;
764284677Sdim  bool Finish = false;
765284677Sdim  MachineBasicBlock *MBB;
766284677Sdim  bool MakeProgress = false;
767284677Sdim  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
768284677Sdim                                        OrderedBlks.end());
769284677Sdim
770284677Sdim  do {
771284677Sdim    ++NumIter;
772341825Sdim    LLVM_DEBUG(dbgs() << "numIter = " << NumIter
773341825Sdim                      << ", numRemaintedBlk = " << NumRemainedBlk << "\n";);
774284677Sdim
775284677Sdim    SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
776284677Sdim        OrderedBlks.begin();
777284677Sdim    SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
778284677Sdim        OrderedBlks.end();
779284677Sdim
780284677Sdim    SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
781284677Sdim        It;
782284677Sdim    MachineBasicBlock *SccBeginMBB = nullptr;
783284677Sdim    int SccNumBlk = 0;  // The number of active blocks, init to a
784284677Sdim                        // maximum possible number.
785284677Sdim    int SccNumIter;     // Number of iteration in this SCC.
786284677Sdim
787284677Sdim    while (It != E) {
788284677Sdim      MBB = *It;
789284677Sdim
790284677Sdim      if (!SccBeginMBB) {
791284677Sdim        SccBeginIter = It;
792284677Sdim        SccBeginMBB = MBB;
793284677Sdim        SccNumIter = 0;
794284677Sdim        SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
795341825Sdim        LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
796341825Sdim                   dbgs() << "\n";);
797284677Sdim      }
798284677Sdim
799284677Sdim      if (!isRetiredBlock(MBB))
800284677Sdim        patternMatch(MBB);
801284677Sdim
802284677Sdim      ++It;
803284677Sdim
804284677Sdim      bool ContNextScc = true;
805284677Sdim      if (It == E
806284677Sdim          || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
807284677Sdim        // Just finish one scc.
808284677Sdim        ++SccNumIter;
809284677Sdim        int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
810284677Sdim        if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
811341825Sdim          LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
812341825Sdim                            << ", sccNumIter = " << SccNumIter;
813341825Sdim                     dbgs() << "doesn't make any progress\n";);
814284677Sdim          ContNextScc = true;
815284677Sdim        } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
816284677Sdim          SccNumBlk = sccRemainedNumBlk;
817284677Sdim          It = SccBeginIter;
818284677Sdim          ContNextScc = false;
819341825Sdim          LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB)
820341825Sdim                            << "sccNumIter = " << SccNumIter << '\n';);
821284677Sdim        } else {
822284677Sdim          // Finish the current scc.
823284677Sdim          ContNextScc = true;
824284677Sdim        }
825284677Sdim      } else {
826284677Sdim        // Continue on next component in the current scc.
827284677Sdim        ContNextScc = false;
828284677Sdim      }
829284677Sdim
830284677Sdim      if (ContNextScc)
831284677Sdim        SccBeginMBB = nullptr;
832284677Sdim    } //while, "one iteration" over the function.
833284677Sdim
834284677Sdim    MachineBasicBlock *EntryMBB =
835314564Sdim        *GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
836284677Sdim    if (EntryMBB->succ_size() == 0) {
837284677Sdim      Finish = true;
838341825Sdim      LLVM_DEBUG(dbgs() << "Reduce to one block\n";);
839284677Sdim    } else {
840284677Sdim      int NewnumRemainedBlk
841284677Sdim        = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
842284677Sdim      // consider cloned blocks ??
843284677Sdim      if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
844284677Sdim        MakeProgress = true;
845284677Sdim        NumRemainedBlk = NewnumRemainedBlk;
846284677Sdim      } else {
847284677Sdim        MakeProgress = false;
848341825Sdim        LLVM_DEBUG(dbgs() << "No progress\n";);
849284677Sdim      }
850284677Sdim    }
851284677Sdim  } while (!Finish && MakeProgress);
852284677Sdim
853284677Sdim  // Misc wrap up to maintain the consistency of the Function representation.
854314564Sdim  wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
855284677Sdim
856284677Sdim  // Detach retired Block, release memory.
857284677Sdim  for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
858284677Sdim      It != E; ++It) {
859284677Sdim    if ((*It).second && (*It).second->IsRetired) {
860284677Sdim      assert(((*It).first)->getNumber() != -1);
861341825Sdim      LLVM_DEBUG(dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";);
862284677Sdim      (*It).first->eraseFromParent();  //Remove from the parent Function.
863284677Sdim    }
864284677Sdim    delete (*It).second;
865284677Sdim  }
866284677Sdim  BlockInfoMap.clear();
867284677Sdim  LLInfoMap.clear();
868284677Sdim
869284677Sdim  if (!Finish) {
870341825Sdim    LLVM_DEBUG(FuncRep->viewCFG());
871309124Sdim    report_fatal_error("IRREDUCIBLE_CFG");
872284677Sdim  }
873284677Sdim
874284677Sdim  return true;
875284677Sdim}
876284677Sdim
877284677Sdimvoid AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
878284677Sdim  int SccNum = 0;
879284677Sdim  MachineBasicBlock *MBB;
880284677Sdim  for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
881284677Sdim       ++It, ++SccNum) {
882284677Sdim    const std::vector<MachineBasicBlock *> &SccNext = *It;
883284677Sdim    for (std::vector<MachineBasicBlock *>::const_iterator
884284677Sdim         blockIter = SccNext.begin(), blockEnd = SccNext.end();
885284677Sdim         blockIter != blockEnd; ++blockIter) {
886284677Sdim      MBB = *blockIter;
887284677Sdim      OrderedBlks.push_back(MBB);
888284677Sdim      recordSccnum(MBB, SccNum);
889284677Sdim    }
890284677Sdim  }
891284677Sdim
892321369Sdim  // walk through all the block in func to check for unreachable
893321369Sdim  for (auto *MBB : nodes(MF)) {
894284677Sdim    SccNum = getSCCNum(MBB);
895284677Sdim    if (SccNum == INVALIDSCCNUM)
896284677Sdim      dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
897284677Sdim  }
898284677Sdim}
899284677Sdim
900284677Sdimint AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
901284677Sdim  int NumMatch = 0;
902284677Sdim  int CurMatch;
903284677Sdim
904341825Sdim  LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";);
905284677Sdim
906284677Sdim  while ((CurMatch = patternMatchGroup(MBB)) > 0)
907284677Sdim    NumMatch += CurMatch;
908284677Sdim
909341825Sdim  LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber()
910341825Sdim                    << ", numMatch = " << NumMatch << "\n";);
911284677Sdim
912284677Sdim  return NumMatch;
913284677Sdim}
914284677Sdim
915284677Sdimint AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
916284677Sdim  int NumMatch = 0;
917284677Sdim  NumMatch += loopendPatternMatch();
918284677Sdim  NumMatch += serialPatternMatch(MBB);
919284677Sdim  NumMatch += ifPatternMatch(MBB);
920284677Sdim  return NumMatch;
921284677Sdim}
922284677Sdim
923284677Sdimint AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
924284677Sdim  if (MBB->succ_size() != 1)
925284677Sdim    return 0;
926284677Sdim
927284677Sdim  MachineBasicBlock *childBlk = *MBB->succ_begin();
928284677Sdim  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
929284677Sdim    return 0;
930284677Sdim
931284677Sdim  mergeSerialBlock(MBB, childBlk);
932284677Sdim  ++numSerialPatternMatch;
933284677Sdim  return 1;
934284677Sdim}
935284677Sdim
936284677Sdimint AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
937284677Sdim  //two edges
938284677Sdim  if (MBB->succ_size() != 2)
939284677Sdim    return 0;
940284677Sdim  if (hasBackEdge(MBB))
941284677Sdim    return 0;
942284677Sdim  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
943284677Sdim  if (!BranchMI)
944284677Sdim    return 0;
945284677Sdim
946284677Sdim  assert(isCondBranch(BranchMI));
947284677Sdim  int NumMatch = 0;
948284677Sdim
949284677Sdim  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
950284677Sdim  NumMatch += serialPatternMatch(TrueMBB);
951284677Sdim  NumMatch += ifPatternMatch(TrueMBB);
952284677Sdim  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
953284677Sdim  NumMatch += serialPatternMatch(FalseMBB);
954284677Sdim  NumMatch += ifPatternMatch(FalseMBB);
955284677Sdim  MachineBasicBlock *LandBlk;
956284677Sdim  int Cloned = 0;
957284677Sdim
958284677Sdim  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
959284677Sdim  // TODO: Simplify
960284677Sdim  if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
961284677Sdim    && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
962284677Sdim    // Diamond pattern
963284677Sdim    LandBlk = *TrueMBB->succ_begin();
964284677Sdim  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
965284677Sdim    // Triangle pattern, false is empty
966284677Sdim    LandBlk = FalseMBB;
967284677Sdim    FalseMBB = nullptr;
968284677Sdim  } else if (FalseMBB->succ_size() == 1
969284677Sdim             && *FalseMBB->succ_begin() == TrueMBB) {
970284677Sdim    // Triangle pattern, true is empty
971284677Sdim    // We reverse the predicate to make a triangle, empty false pattern;
972284677Sdim    std::swap(TrueMBB, FalseMBB);
973314564Sdim    reversePredicateSetter(MBB->end(), *MBB);
974284677Sdim    LandBlk = FalseMBB;
975284677Sdim    FalseMBB = nullptr;
976284677Sdim  } else if (FalseMBB->succ_size() == 1
977284677Sdim             && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
978284677Sdim    LandBlk = *FalseMBB->succ_begin();
979284677Sdim  } else if (TrueMBB->succ_size() == 1
980284677Sdim    && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
981284677Sdim    LandBlk = *TrueMBB->succ_begin();
982284677Sdim  } else {
983284677Sdim    return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
984284677Sdim  }
985284677Sdim
986284677Sdim  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
987284677Sdim  // new BB created for landBlk==NULL may introduce new challenge to the
988284677Sdim  // reduction process.
989284677Sdim  if (LandBlk &&
990284677Sdim      ((TrueMBB && TrueMBB->pred_size() > 1)
991284677Sdim      || (FalseMBB && FalseMBB->pred_size() > 1))) {
992284677Sdim     Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
993284677Sdim  }
994284677Sdim
995284677Sdim  if (TrueMBB && TrueMBB->pred_size() > 1) {
996284677Sdim    TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
997284677Sdim    ++Cloned;
998284677Sdim  }
999284677Sdim
1000284677Sdim  if (FalseMBB && FalseMBB->pred_size() > 1) {
1001284677Sdim    FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1002284677Sdim    ++Cloned;
1003284677Sdim  }
1004284677Sdim
1005284677Sdim  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1006284677Sdim
1007284677Sdim  ++numIfPatternMatch;
1008284677Sdim
1009284677Sdim  numClonedBlock += Cloned;
1010284677Sdim
1011284677Sdim  return 1 + Cloned + NumMatch;
1012284677Sdim}
1013284677Sdim
1014284677Sdimint AMDGPUCFGStructurizer::loopendPatternMatch() {
1015284677Sdim  std::deque<MachineLoop *> NestedLoops;
1016284677Sdim  for (auto &It: *MLI)
1017284677Sdim    for (MachineLoop *ML : depth_first(It))
1018284677Sdim      NestedLoops.push_front(ML);
1019284677Sdim
1020321369Sdim  if (NestedLoops.empty())
1021284677Sdim    return 0;
1022284677Sdim
1023284677Sdim  // Process nested loop outside->inside (we did push_front),
1024284677Sdim  // so "continue" to a outside loop won't be mistaken as "break"
1025284677Sdim  // of the current loop.
1026284677Sdim  int Num = 0;
1027284677Sdim  for (MachineLoop *ExaminedLoop : NestedLoops) {
1028284677Sdim    if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
1029284677Sdim      continue;
1030341825Sdim    LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
1031284677Sdim    int NumBreak = mergeLoop(ExaminedLoop);
1032284677Sdim    if (NumBreak == -1)
1033284677Sdim      break;
1034284677Sdim    Num += NumBreak;
1035284677Sdim  }
1036284677Sdim  return Num;
1037284677Sdim}
1038284677Sdim
1039284677Sdimint AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1040284677Sdim  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1041284677Sdim  MBBVector ExitingMBBs;
1042284677Sdim  LoopRep->getExitingBlocks(ExitingMBBs);
1043284677Sdim  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1044341825Sdim  LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size()
1045341825Sdim                    << " exiting blocks\n";);
1046284677Sdim  // We assume a single ExitBlk
1047284677Sdim  MBBVector ExitBlks;
1048284677Sdim  LoopRep->getExitBlocks(ExitBlks);
1049284677Sdim  SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
1050284677Sdim  for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1051284677Sdim    ExitBlkSet.insert(ExitBlks[i]);
1052284677Sdim  assert(ExitBlkSet.size() == 1);
1053284677Sdim  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1054284677Sdim  assert(ExitBlk && "Loop has several exit block");
1055284677Sdim  MBBVector LatchBlks;
1056321369Sdim  for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader))
1057321369Sdim    if (LoopRep->contains(LB))
1058321369Sdim      LatchBlks.push_back(LB);
1059284677Sdim
1060284677Sdim  for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1061284677Sdim    mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1062284677Sdim  for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1063284677Sdim    settleLoopcontBlock(LatchBlks[i], LoopHeader);
1064284677Sdim  int Match = 0;
1065284677Sdim  do {
1066284677Sdim    Match = 0;
1067284677Sdim    Match += serialPatternMatch(LoopHeader);
1068284677Sdim    Match += ifPatternMatch(LoopHeader);
1069284677Sdim  } while (Match > 0);
1070284677Sdim  mergeLooplandBlock(LoopHeader, ExitBlk);
1071284677Sdim  MachineLoop *ParentLoop = LoopRep->getParentLoop();
1072284677Sdim  if (ParentLoop)
1073284677Sdim    MLI->changeLoopFor(LoopHeader, ParentLoop);
1074284677Sdim  else
1075284677Sdim    MLI->removeBlock(LoopHeader);
1076284677Sdim  Visited[LoopRep] = true;
1077284677Sdim  return 1;
1078284677Sdim}
1079284677Sdim
1080284677Sdimbool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1081284677Sdim    MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1082284677Sdim  if (Src1MBB->succ_size() == 0) {
1083284677Sdim    MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1084284677Sdim    if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1085284677Sdim      MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1086284677Sdim      if (TheEntry) {
1087341825Sdim        LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB"
1088341825Sdim                          << Src1MBB->getNumber() << " src2 = BB"
1089341825Sdim                          << Src2MBB->getNumber() << "\n";);
1090284677Sdim        return true;
1091284677Sdim      }
1092284677Sdim    }
1093284677Sdim  }
1094284677Sdim  return false;
1095284677Sdim}
1096284677Sdim
1097284677Sdimint AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1098284677Sdim    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1099284677Sdim  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1100284677Sdim  if (Num == 0) {
1101341825Sdim    LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk"
1102341825Sdim                      << "\n";);
1103284677Sdim    Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1104284677Sdim  }
1105284677Sdim  return Num;
1106284677Sdim}
1107284677Sdim
1108284677Sdimint AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1109284677Sdim    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1110284677Sdim  int Num = 0;
1111284677Sdim  MachineBasicBlock *DownBlk;
1112284677Sdim
1113284677Sdim  //trueBlk could be the common post dominator
1114284677Sdim  DownBlk = TrueMBB;
1115284677Sdim
1116341825Sdim  LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1117341825Sdim                    << " true = BB" << TrueMBB->getNumber()
1118341825Sdim                    << ", numSucc=" << TrueMBB->succ_size() << " false = BB"
1119341825Sdim                    << FalseMBB->getNumber() << "\n";);
1120284677Sdim
1121284677Sdim  while (DownBlk) {
1122341825Sdim    LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber(););
1123284677Sdim
1124284677Sdim    if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1125341825Sdim      LLVM_DEBUG(dbgs() << " working\n";);
1126284677Sdim
1127284677Sdim      Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1128284677Sdim      Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1129284677Sdim
1130284677Sdim      numClonedBlock += Num;
1131284677Sdim      Num += serialPatternMatch(*HeadMBB->succ_begin());
1132284677Sdim      Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
1133284677Sdim      Num += ifPatternMatch(HeadMBB);
1134284677Sdim      assert(Num > 0);
1135284677Sdim
1136284677Sdim      break;
1137284677Sdim    }
1138341825Sdim    LLVM_DEBUG(dbgs() << " not working\n";);
1139284677Sdim    DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
1140284677Sdim  } // walk down the postDomTree
1141284677Sdim
1142284677Sdim  return Num;
1143284677Sdim}
1144284677Sdim
1145327952Sdim#ifndef NDEBUG
1146284677Sdimvoid AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1147284677Sdim    MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1148284677Sdim    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1149284677Sdim  dbgs() << "head = BB" << HeadMBB->getNumber()
1150284677Sdim         << " size = " << HeadMBB->size();
1151284677Sdim  if (Detail) {
1152284677Sdim    dbgs() << "\n";
1153284677Sdim    HeadMBB->print(dbgs());
1154284677Sdim    dbgs() << "\n";
1155284677Sdim  }
1156284677Sdim
1157284677Sdim  if (TrueMBB) {
1158284677Sdim    dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1159284677Sdim           << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1160284677Sdim    if (Detail) {
1161284677Sdim      dbgs() << "\n";
1162284677Sdim      TrueMBB->print(dbgs());
1163284677Sdim      dbgs() << "\n";
1164284677Sdim    }
1165284677Sdim  }
1166284677Sdim  if (FalseMBB) {
1167284677Sdim    dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1168284677Sdim           << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1169284677Sdim    if (Detail) {
1170284677Sdim      dbgs() << "\n";
1171284677Sdim      FalseMBB->print(dbgs());
1172284677Sdim      dbgs() << "\n";
1173284677Sdim    }
1174284677Sdim  }
1175284677Sdim  if (LandMBB) {
1176284677Sdim    dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1177284677Sdim           << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1178284677Sdim    if (Detail) {
1179284677Sdim      dbgs() << "\n";
1180284677Sdim      LandMBB->print(dbgs());
1181284677Sdim      dbgs() << "\n";
1182284677Sdim    }
1183284677Sdim  }
1184284677Sdim
1185321369Sdim  dbgs() << "\n";
1186284677Sdim}
1187327952Sdim#endif
1188284677Sdim
1189284677Sdimint AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1190284677Sdim    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1191284677Sdim    MachineBasicBlock **LandMBBPtr) {
1192284677Sdim  bool MigrateTrue = false;
1193284677Sdim  bool MigrateFalse = false;
1194284677Sdim
1195284677Sdim  MachineBasicBlock *LandBlk = *LandMBBPtr;
1196284677Sdim
1197284677Sdim  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1198284677Sdim         && (!FalseMBB || FalseMBB->succ_size() <= 1));
1199284677Sdim
1200284677Sdim  if (TrueMBB == FalseMBB)
1201284677Sdim    return 0;
1202284677Sdim
1203284677Sdim  MigrateTrue = needMigrateBlock(TrueMBB);
1204284677Sdim  MigrateFalse = needMigrateBlock(FalseMBB);
1205284677Sdim
1206284677Sdim  if (!MigrateTrue && !MigrateFalse)
1207284677Sdim    return 0;
1208284677Sdim
1209284677Sdim  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1210284677Sdim  // have more than one predecessors.  without doing this, its predecessor
1211284677Sdim  // rather than headBlk will have undefined value in initReg.
1212284677Sdim  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
1213284677Sdim    MigrateTrue = true;
1214284677Sdim  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
1215284677Sdim    MigrateFalse = true;
1216284677Sdim
1217341825Sdim  LLVM_DEBUG(
1218341825Sdim      dbgs() << "before improveSimpleJumpintoIf: ";
1219341825Sdim      showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1220284677Sdim
1221284677Sdim  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1222284677Sdim  //
1223284677Sdim  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1224284677Sdim  //      {initReg = 0; org falseBlk branch }
1225284677Sdim  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1226284677Sdim  //      => org landBlk
1227284677Sdim  //      if landBlk->pred_size() > 2, put the about if-else inside
1228284677Sdim  //      if (initReg !=2) {...}
1229284677Sdim  //
1230284677Sdim  // add initReg = initVal to headBlk
1231284677Sdim
1232284677Sdim  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1233284677Sdim  if (!MigrateTrue || !MigrateFalse) {
1234284677Sdim    // XXX: We have an opportunity here to optimize the "branch into if" case
1235284677Sdim    // here.  Branch into if looks like this:
1236284677Sdim    //                        entry
1237284677Sdim    //                       /     |
1238284677Sdim    //           diamond_head       branch_from
1239284677Sdim    //             /      \           |
1240284677Sdim    // diamond_false        diamond_true
1241284677Sdim    //             \      /
1242284677Sdim    //               done
1243284677Sdim    //
1244284677Sdim    // The diamond_head block begins the "if" and the diamond_true block
1245284677Sdim    // is the block being "branched into".
1246284677Sdim    //
1247284677Sdim    // If MigrateTrue is true, then TrueBB is the block being "branched into"
1248284677Sdim    // and if MigrateFalse is true, then FalseBB is the block being
1249284677Sdim    // "branched into"
1250296417Sdim    //
1251284677Sdim    // Here is the pseudo code for how I think the optimization should work:
1252284677Sdim    // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1253284677Sdim    // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1254284677Sdim    // 3. Move the branch instruction from diamond_head into its own basic
1255284677Sdim    //    block (new_block).
1256284677Sdim    // 4. Add an unconditional branch from diamond_head to new_block
1257284677Sdim    // 5. Replace the branch instruction in branch_from with an unconditional
1258284677Sdim    //    branch to new_block.  If branch_from has multiple predecessors, then
1259284677Sdim    //    we need to replace the True/False block in the branch
1260284677Sdim    //    instruction instead of replacing it.
1261284677Sdim    // 6. Change the condition of the branch instruction in new_block from
1262284677Sdim    //    COND to (COND || GPR0)
1263284677Sdim    //
1264284677Sdim    // In order insert these MOV instruction, we will need to use the
1265284677Sdim    // RegisterScavenger.  Usually liveness stops being tracked during
1266284677Sdim    // the late machine optimization passes, however if we implement
1267284677Sdim    // bool TargetRegisterInfo::requiresRegisterScavenging(
1268284677Sdim    //                                                const MachineFunction &MF)
1269296417Sdim    // and have it return true, liveness will be tracked correctly
1270284677Sdim    // by generic optimization passes.  We will also need to make sure that
1271284677Sdim    // all of our target-specific passes that run after regalloc and before
1272284677Sdim    // the CFGStructurizer track liveness and we will need to modify this pass
1273284677Sdim    // to correctly track liveness.
1274284677Sdim    //
1275284677Sdim    // After the above changes, the new CFG should look like this:
1276284677Sdim    //                        entry
1277284677Sdim    //                       /     |
1278284677Sdim    //           diamond_head       branch_from
1279284677Sdim    //                       \     /
1280284677Sdim    //                      new_block
1281284677Sdim    //                      /      |
1282284677Sdim    //         diamond_false        diamond_true
1283284677Sdim    //                      \      /
1284284677Sdim    //                        done
1285284677Sdim    //
1286284677Sdim    // Without this optimization, we are forced to duplicate the diamond_true
1287284677Sdim    // block and we will end up with a CFG like this:
1288284677Sdim    //
1289284677Sdim    //                        entry
1290284677Sdim    //                       /     |
1291284677Sdim    //           diamond_head       branch_from
1292284677Sdim    //             /      \                   |
1293284677Sdim    // diamond_false        diamond_true      diamond_true (duplicate)
1294284677Sdim    //             \      /                   |
1295284677Sdim    //               done --------------------|
1296284677Sdim    //
1297284677Sdim    // Duplicating diamond_true can be very costly especially if it has a
1298284677Sdim    // lot of instructions.
1299284677Sdim    return 0;
1300284677Sdim  }
1301284677Sdim
1302284677Sdim  int NumNewBlk = 0;
1303284677Sdim
1304284677Sdim  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1305284677Sdim
1306341825Sdim  //insert R600::ENDIF to avoid special case "input landBlk == NULL"
1307341825Sdim  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF);
1308284677Sdim
1309284677Sdim  if (LandBlkHasOtherPred) {
1310309124Sdim    report_fatal_error("Extra register needed to handle CFG");
1311360784Sdim    Register CmpResReg =
1312360784Sdim        HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1313309124Sdim    report_fatal_error("Extra compare instruction needed to handle CFG");
1314341825Sdim    insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET,
1315284677Sdim        CmpResReg, DebugLoc());
1316284677Sdim  }
1317284677Sdim
1318284677Sdim  // XXX: We are running this after RA, so creating virtual registers will
1319284677Sdim  // cause an assertion failure in the PostRA scheduling pass.
1320360784Sdim  Register InitReg =
1321360784Sdim      HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1322341825Sdim  insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg,
1323284677Sdim      DebugLoc());
1324284677Sdim
1325284677Sdim  if (MigrateTrue) {
1326284677Sdim    migrateInstruction(TrueMBB, LandBlk, I);
1327284677Sdim    // need to uncondionally insert the assignment to ensure a path from its
1328284677Sdim    // predecessor rather than headBlk has valid value in initReg if
1329284677Sdim    // (initVal != 1).
1330309124Sdim    report_fatal_error("Extra register needed to handle CFG");
1331284677Sdim  }
1332341825Sdim  insertInstrBefore(I, R600::ELSE);
1333284677Sdim
1334284677Sdim  if (MigrateFalse) {
1335284677Sdim    migrateInstruction(FalseMBB, LandBlk, I);
1336284677Sdim    // need to uncondionally insert the assignment to ensure a path from its
1337284677Sdim    // predecessor rather than headBlk has valid value in initReg if
1338284677Sdim    // (initVal != 0)
1339309124Sdim    report_fatal_error("Extra register needed to handle CFG");
1340284677Sdim  }
1341284677Sdim
1342284677Sdim  if (LandBlkHasOtherPred) {
1343284677Sdim    // add endif
1344341825Sdim    insertInstrBefore(I, R600::ENDIF);
1345284677Sdim
1346284677Sdim    // put initReg = 2 to other predecessors of landBlk
1347284677Sdim    for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
1348284677Sdim         PE = LandBlk->pred_end(); PI != PE; ++PI) {
1349284677Sdim      MachineBasicBlock *MBB = *PI;
1350284677Sdim      if (MBB != TrueMBB && MBB != FalseMBB)
1351309124Sdim        report_fatal_error("Extra register needed to handle CFG");
1352284677Sdim    }
1353284677Sdim  }
1354341825Sdim  LLVM_DEBUG(
1355341825Sdim      dbgs() << "result from improveSimpleJumpintoIf: ";
1356341825Sdim      showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1357284677Sdim
1358284677Sdim  // update landBlk
1359284677Sdim  *LandMBBPtr = LandBlk;
1360284677Sdim
1361284677Sdim  return NumNewBlk;
1362284677Sdim}
1363284677Sdim
1364284677Sdimvoid AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1365284677Sdim    MachineBasicBlock *SrcMBB) {
1366341825Sdim  LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB"
1367341825Sdim                    << SrcMBB->getNumber() << "\n";);
1368284677Sdim  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1369284677Sdim
1370296417Sdim  DstMBB->removeSuccessor(SrcMBB, true);
1371284677Sdim  cloneSuccessorList(DstMBB, SrcMBB);
1372284677Sdim
1373284677Sdim  removeSuccessor(SrcMBB);
1374284677Sdim  MLI->removeBlock(SrcMBB);
1375284677Sdim  retireBlock(SrcMBB);
1376284677Sdim}
1377284677Sdim
1378284677Sdimvoid AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1379284677Sdim    MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
1380284677Sdim    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1381284677Sdim  assert (TrueMBB);
1382341825Sdim  LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{  ";
1383341825Sdim             if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs()
1384341825Sdim             << "  } else ";
1385341825Sdim             dbgs() << "{  "; if (FalseMBB) {
1386341825Sdim               dbgs() << "BB" << FalseMBB->getNumber();
1387341825Sdim             } dbgs() << "  }\n ";
1388341825Sdim             dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else {
1389341825Sdim               dbgs() << "BB" << LandMBB->getNumber();
1390341825Sdim             } dbgs() << "\n";);
1391284677Sdim
1392284677Sdim  int OldOpcode = BranchMI->getOpcode();
1393284677Sdim  DebugLoc BranchDL = BranchMI->getDebugLoc();
1394284677Sdim
1395284677Sdim//    transform to
1396284677Sdim//    if cond
1397284677Sdim//       trueBlk
1398284677Sdim//    else
1399284677Sdim//       falseBlk
1400284677Sdim//    endif
1401284677Sdim//    landBlk
1402284677Sdim
1403284677Sdim  MachineBasicBlock::iterator I = BranchMI;
1404284677Sdim  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1405284677Sdim      BranchDL);
1406284677Sdim
1407284677Sdim  if (TrueMBB) {
1408284677Sdim    MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1409296417Sdim    MBB->removeSuccessor(TrueMBB, true);
1410284677Sdim    if (LandMBB && TrueMBB->succ_size()!=0)
1411296417Sdim      TrueMBB->removeSuccessor(LandMBB, true);
1412284677Sdim    retireBlock(TrueMBB);
1413284677Sdim    MLI->removeBlock(TrueMBB);
1414284677Sdim  }
1415284677Sdim
1416284677Sdim  if (FalseMBB) {
1417341825Sdim    insertInstrBefore(I, R600::ELSE);
1418284677Sdim    MBB->splice(I, FalseMBB, FalseMBB->begin(),
1419284677Sdim                   FalseMBB->end());
1420296417Sdim    MBB->removeSuccessor(FalseMBB, true);
1421284677Sdim    if (LandMBB && FalseMBB->succ_size() != 0)
1422296417Sdim      FalseMBB->removeSuccessor(LandMBB, true);
1423284677Sdim    retireBlock(FalseMBB);
1424284677Sdim    MLI->removeBlock(FalseMBB);
1425284677Sdim  }
1426341825Sdim  insertInstrBefore(I, R600::ENDIF);
1427284677Sdim
1428284677Sdim  BranchMI->eraseFromParent();
1429284677Sdim
1430284677Sdim  if (LandMBB && TrueMBB && FalseMBB)
1431284677Sdim    MBB->addSuccessor(LandMBB);
1432284677Sdim}
1433284677Sdim
1434284677Sdimvoid AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1435284677Sdim    MachineBasicBlock *LandMBB) {
1436341825Sdim  LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1437341825Sdim                    << " land = BB" << LandMBB->getNumber() << "\n";);
1438284677Sdim
1439341825Sdim  insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc());
1440341825Sdim  insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc());
1441296417Sdim  DstBlk->replaceSuccessor(DstBlk, LandMBB);
1442284677Sdim}
1443284677Sdim
1444284677Sdimvoid AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1445284677Sdim    MachineBasicBlock *LandMBB) {
1446341825Sdim  LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB"
1447341825Sdim                    << ExitingMBB->getNumber() << " land = BB"
1448341825Sdim                    << LandMBB->getNumber() << "\n";);
1449284677Sdim  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1450284677Sdim  assert(BranchMI && isCondBranch(BranchMI));
1451284677Sdim  DebugLoc DL = BranchMI->getDebugLoc();
1452284677Sdim  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1453284677Sdim  MachineBasicBlock::iterator I = BranchMI;
1454284677Sdim  if (TrueBranch != LandMBB)
1455314564Sdim    reversePredicateSetter(I, *I->getParent());
1456341825Sdim  insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL);
1457341825Sdim  insertInstrBefore(I, R600::BREAK);
1458341825Sdim  insertInstrBefore(I, R600::ENDIF);
1459284677Sdim  //now branchInst can be erase safely
1460284677Sdim  BranchMI->eraseFromParent();
1461284677Sdim  //now take care of successors, retire blocks
1462296417Sdim  ExitingMBB->removeSuccessor(LandMBB, true);
1463284677Sdim}
1464284677Sdim
1465284677Sdimvoid AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1466284677Sdim    MachineBasicBlock *ContMBB) {
1467341825Sdim  LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1468341825Sdim                    << ContingMBB->getNumber() << ", cont = BB"
1469341825Sdim                    << ContMBB->getNumber() << "\n";);
1470284677Sdim
1471284677Sdim  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1472284677Sdim  if (MI) {
1473284677Sdim    assert(isCondBranch(MI));
1474284677Sdim    MachineBasicBlock::iterator I = MI;
1475284677Sdim    MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1476284677Sdim    int OldOpcode = MI->getOpcode();
1477284677Sdim    DebugLoc DL = MI->getDebugLoc();
1478284677Sdim
1479284677Sdim    bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1480284677Sdim
1481284677Sdim    if (!UseContinueLogical) {
1482284677Sdim      int BranchOpcode =
1483284677Sdim          TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1484284677Sdim          getBranchZeroOpcode(OldOpcode);
1485284677Sdim      insertCondBranchBefore(I, BranchOpcode, DL);
1486284677Sdim      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1487341825Sdim      insertInstrEnd(ContingMBB, R600::CONTINUE, DL);
1488341825Sdim      insertInstrEnd(ContingMBB, R600::ENDIF, DL);
1489284677Sdim    } else {
1490284677Sdim      int BranchOpcode =
1491284677Sdim          TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1492284677Sdim          getContinueZeroOpcode(OldOpcode);
1493284677Sdim      insertCondBranchBefore(I, BranchOpcode, DL);
1494284677Sdim    }
1495284677Sdim
1496284677Sdim    MI->eraseFromParent();
1497284677Sdim  } else {
1498284677Sdim    // if we've arrived here then we've already erased the branch instruction
1499284677Sdim    // travel back up the basic block to see the last reference of our debug
1500284677Sdim    // location we've just inserted that reference here so it should be
1501284677Sdim    // representative insertEnd to ensure phi-moves, if exist, go before the
1502284677Sdim    // continue-instr.
1503341825Sdim    insertInstrEnd(ContingMBB, R600::CONTINUE,
1504284677Sdim        getLastDebugLocInBB(ContingMBB));
1505284677Sdim  }
1506284677Sdim}
1507284677Sdim
1508284677Sdimint AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1509284677Sdim    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1510284677Sdim  int Cloned = 0;
1511284677Sdim  assert(PreMBB->isSuccessor(SrcMBB));
1512284677Sdim  while (SrcMBB && SrcMBB != DstMBB) {
1513284677Sdim    assert(SrcMBB->succ_size() == 1);
1514284677Sdim    if (SrcMBB->pred_size() > 1) {
1515284677Sdim      SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1516284677Sdim      ++Cloned;
1517284677Sdim    }
1518284677Sdim
1519284677Sdim    PreMBB = SrcMBB;
1520284677Sdim    SrcMBB = *SrcMBB->succ_begin();
1521284677Sdim  }
1522284677Sdim
1523284677Sdim  return Cloned;
1524284677Sdim}
1525284677Sdim
1526284677SdimMachineBasicBlock *
1527284677SdimAMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1528284677Sdim    MachineBasicBlock *PredMBB) {
1529284677Sdim  assert(PredMBB->isSuccessor(MBB) &&
1530284677Sdim         "succBlk is not a prececessor of curBlk");
1531284677Sdim
1532284677Sdim  MachineBasicBlock *CloneMBB = clone(MBB);  //clone instructions
1533284677Sdim  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1534284677Sdim  //srcBlk, oldBlk, newBlk
1535284677Sdim
1536296417Sdim  PredMBB->replaceSuccessor(MBB, CloneMBB);
1537284677Sdim
1538284677Sdim  // add all successor to cloneBlk
1539284677Sdim  cloneSuccessorList(CloneMBB, MBB);
1540284677Sdim
1541284677Sdim  numClonedInstr += MBB->size();
1542284677Sdim
1543341825Sdim  LLVM_DEBUG(dbgs() << "Cloned block: "
1544341825Sdim                    << "BB" << MBB->getNumber() << "size " << MBB->size()
1545341825Sdim                    << "\n";);
1546284677Sdim
1547284677Sdim  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1548284677Sdim
1549284677Sdim  return CloneMBB;
1550284677Sdim}
1551284677Sdim
1552284677Sdimvoid AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1553284677Sdim    MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
1554284677Sdim  MachineBasicBlock::iterator SpliceEnd;
1555284677Sdim  //look for the input branchinstr, not the AMDGPU branchinstr
1556284677Sdim  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1557284677Sdim  if (!BranchMI) {
1558341825Sdim    LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";);
1559284677Sdim    SpliceEnd = SrcMBB->end();
1560284677Sdim  } else {
1561341825Sdim    LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
1562284677Sdim    SpliceEnd = BranchMI;
1563284677Sdim  }
1564341825Sdim  LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = "
1565341825Sdim                    << DstMBB->size() << "srcSize = " << SrcMBB->size()
1566341825Sdim                    << "\n";);
1567284677Sdim
1568284677Sdim  //splice insert before insertPos
1569284677Sdim  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1570284677Sdim
1571341825Sdim  LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = "
1572341825Sdim                    << DstMBB->size() << "srcSize = " << SrcMBB->size()
1573341825Sdim                    << '\n';);
1574284677Sdim}
1575284677Sdim
1576284677SdimMachineBasicBlock *
1577284677SdimAMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1578284677Sdim  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1579284677Sdim  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1580284677Sdim
1581284677Sdim  if (!LoopHeader || !LoopLatch)
1582284677Sdim    return nullptr;
1583284677Sdim  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1584284677Sdim  // Is LoopRep an infinite loop ?
1585284677Sdim  if (!BranchMI || !isUncondBranch(BranchMI))
1586284677Sdim    return nullptr;
1587284677Sdim
1588284677Sdim  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1589284677Sdim  FuncRep->push_back(DummyExitBlk);  //insert to function
1590284677Sdim  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1591341825Sdim  LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1592327952Sdim  LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext();
1593287521Sdim  Ctx.emitError("Extra register needed to handle CFG");
1594287521Sdim  return nullptr;
1595284677Sdim}
1596284677Sdim
1597284677Sdimvoid AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1598284677Sdim  MachineInstr *BranchMI;
1599284677Sdim
1600284677Sdim  // I saw two unconditional branch in one basic block in example
1601284677Sdim  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1602284677Sdim  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1603284677Sdim          && isUncondBranch(BranchMI)) {
1604341825Sdim    LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
1605284677Sdim    BranchMI->eraseFromParent();
1606284677Sdim  }
1607284677Sdim}
1608284677Sdim
1609284677Sdimvoid AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1610284677Sdim    MachineBasicBlock *MBB) {
1611284677Sdim  if (MBB->succ_size() != 2)
1612284677Sdim    return;
1613284677Sdim  MachineBasicBlock *MBB1 = *MBB->succ_begin();
1614284677Sdim  MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
1615284677Sdim  if (MBB1 != MBB2)
1616284677Sdim    return;
1617284677Sdim
1618284677Sdim  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1619284677Sdim  assert(BranchMI && isCondBranch(BranchMI));
1620341825Sdim  LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
1621284677Sdim  BranchMI->eraseFromParent();
1622284677Sdim  SHOWNEWBLK(MBB1, "Removing redundant successor");
1623296417Sdim  MBB->removeSuccessor(MBB1, true);
1624284677Sdim}
1625284677Sdim
1626284677Sdimvoid AMDGPUCFGStructurizer::addDummyExitBlock(
1627284677Sdim    SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
1628284677Sdim  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1629284677Sdim  FuncRep->push_back(DummyExitBlk);  //insert to function
1630341825Sdim  insertInstrEnd(DummyExitBlk, R600::RETURN);
1631284677Sdim
1632284677Sdim  for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
1633284677Sdim       E = RetMBB.end(); It != E; ++It) {
1634284677Sdim    MachineBasicBlock *MBB = *It;
1635284677Sdim    MachineInstr *MI = getReturnInstr(MBB);
1636284677Sdim    if (MI)
1637284677Sdim      MI->eraseFromParent();
1638284677Sdim    MBB->addSuccessor(DummyExitBlk);
1639341825Sdim    LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1640341825Sdim                      << " successors\n";);
1641284677Sdim  }
1642284677Sdim  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1643284677Sdim}
1644284677Sdim
1645284677Sdimvoid AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1646284677Sdim  while (MBB->succ_size())
1647284677Sdim    MBB->removeSuccessor(*MBB->succ_begin());
1648284677Sdim}
1649284677Sdim
1650284677Sdimvoid AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1651284677Sdim    int SccNum) {
1652284677Sdim  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1653284677Sdim  if (!srcBlkInfo)
1654284677Sdim    srcBlkInfo = new BlockInformation();
1655284677Sdim  srcBlkInfo->SccNum = SccNum;
1656284677Sdim}
1657284677Sdim
1658284677Sdimvoid AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1659341825Sdim  LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";);
1660284677Sdim
1661284677Sdim  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1662284677Sdim
1663284677Sdim  if (!SrcBlkInfo)
1664284677Sdim    SrcBlkInfo = new BlockInformation();
1665284677Sdim
1666284677Sdim  SrcBlkInfo->IsRetired = true;
1667284677Sdim  assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
1668284677Sdim         && "can't retire block yet");
1669284677Sdim}
1670284677Sdim
1671284677SdimINITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
1672284677Sdim                      "AMDGPU CFG Structurizer", false, false)
1673284677SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
1674284677SdimINITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
1675284677SdimINITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
1676284677SdimINITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
1677284677Sdim                      "AMDGPU CFG Structurizer", false, false)
1678284677Sdim
1679284677SdimFunctionPass *llvm::createAMDGPUCFGStructurizerPass() {
1680284677Sdim  return new AMDGPUCFGStructurizer();
1681284677Sdim}
1682