1249259Sdim//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
2249259Sdim//
3249259Sdim//                     The LLVM Compiler Infrastructure
4249259Sdim//
5249259Sdim// This file is distributed under the University of Illinois Open Source
6249259Sdim// License. See LICENSE.TXT for details.
7249259Sdim//
8249259Sdim/// \file
9249259Sdim//==-----------------------------------------------------------------------===//
10249259Sdim
11249259Sdim#define DEBUG_TYPE "structcfg"
12249259Sdim
13263509Sdim#include "AMDGPU.h"
14249259Sdim#include "AMDGPUInstrInfo.h"
15263509Sdim#include "R600InstrInfo.h"
16263509Sdim#include "llvm/Support/Debug.h"
17263509Sdim#include "llvm/Support/raw_ostream.h"
18249259Sdim#include "llvm/ADT/SCCIterator.h"
19249259Sdim#include "llvm/ADT/SmallVector.h"
20249259Sdim#include "llvm/ADT/Statistic.h"
21263509Sdim#include "llvm/ADT/DepthFirstIterator.h"
22249259Sdim#include "llvm/Analysis/DominatorInternals.h"
23249259Sdim#include "llvm/Analysis/Dominators.h"
24249259Sdim#include "llvm/CodeGen/MachineDominators.h"
25249259Sdim#include "llvm/CodeGen/MachineFunction.h"
26249259Sdim#include "llvm/CodeGen/MachineFunctionAnalysis.h"
27249259Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
28249259Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
29249259Sdim#include "llvm/CodeGen/MachineJumpTableInfo.h"
30249259Sdim#include "llvm/CodeGen/MachineLoopInfo.h"
31249259Sdim#include "llvm/CodeGen/MachinePostDominators.h"
32249259Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
33249259Sdim#include "llvm/Target/TargetInstrInfo.h"
34263509Sdim#include "llvm/Target/TargetMachine.h"
35249259Sdim
36249259Sdimusing namespace llvm;
37249259Sdim
38263509Sdim#define DEFAULT_VEC_SLOTS 8
39263509Sdim
40249259Sdim// TODO: move-begin.
41249259Sdim
42249259Sdim//===----------------------------------------------------------------------===//
43249259Sdim//
44249259Sdim// Statistics for CFGStructurizer.
45249259Sdim//
46249259Sdim//===----------------------------------------------------------------------===//
47249259Sdim
48249259SdimSTATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
49249259Sdim    "matched");
50249259SdimSTATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
51249259Sdim    "matched");
52249259SdimSTATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
53249259Sdim    "pattern matched");
54249259SdimSTATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
55249259SdimSTATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
56249259Sdim
57249259Sdim//===----------------------------------------------------------------------===//
58249259Sdim//
59249259Sdim// Miscellaneous utility for CFGStructurizer.
60249259Sdim//
61249259Sdim//===----------------------------------------------------------------------===//
62263509Sdimnamespace {
63249259Sdim#define SHOWNEWINSTR(i) \
64263509Sdim  DEBUG(dbgs() << "New instr: " << *i << "\n");
65249259Sdim
66249259Sdim#define SHOWNEWBLK(b, msg) \
67263509SdimDEBUG( \
68263509Sdim  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
69263509Sdim  dbgs() << "\n"; \
70263509Sdim);
71249259Sdim
72249259Sdim#define SHOWBLK_DETAIL(b, msg) \
73263509SdimDEBUG( \
74249259Sdim  if (b) { \
75263509Sdim  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
76263509Sdim  b->print(dbgs()); \
77263509Sdim  dbgs() << "\n"; \
78249259Sdim  } \
79263509Sdim);
80249259Sdim
81249259Sdim#define INVALIDSCCNUM -1
82249259Sdim
83249259Sdimtemplate<class NodeT>
84263509Sdimvoid ReverseVector(SmallVectorImpl<NodeT *> &Src) {
85249259Sdim  size_t sz = Src.size();
86249259Sdim  for (size_t i = 0; i < sz/2; ++i) {
87249259Sdim    NodeT *t = Src[i];
88249259Sdim    Src[i] = Src[sz - i - 1];
89249259Sdim    Src[sz - i - 1] = t;
90249259Sdim  }
91249259Sdim}
92249259Sdim
93263509Sdim} // end anonymous namespace
94249259Sdim
95249259Sdim//===----------------------------------------------------------------------===//
96249259Sdim//
97249259Sdim// supporting data structure for CFGStructurizer
98249259Sdim//
99249259Sdim//===----------------------------------------------------------------------===//
100249259Sdim
101249259Sdim
102263509Sdimnamespace {
103263509Sdim
104249259Sdimclass BlockInformation {
105249259Sdimpublic:
106263509Sdim  bool IsRetired;
107263509Sdim  int  SccNum;
108263509Sdim  BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
109249259Sdim};
110249259Sdim
111263509Sdim} // end anonymous namespace
112249259Sdim
113249259Sdim//===----------------------------------------------------------------------===//
114249259Sdim//
115249259Sdim// CFGStructurizer
116249259Sdim//
117249259Sdim//===----------------------------------------------------------------------===//
118249259Sdim
119263509Sdimnamespace {
120263509Sdimclass AMDGPUCFGStructurizer : public MachineFunctionPass {
121249259Sdimpublic:
122263509Sdim  typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
123263509Sdim  typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
124263509Sdim  typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
125263509Sdim
126263509Sdim  enum PathToKind {
127249259Sdim    Not_SinglePath = 0,
128249259Sdim    SinglePath_InPath = 1,
129249259Sdim    SinglePath_NotInPath = 2
130263509Sdim  };
131249259Sdim
132263509Sdim  static char ID;
133249259Sdim
134263509Sdim  AMDGPUCFGStructurizer(TargetMachine &tm) :
135263509Sdim      MachineFunctionPass(ID), TM(tm),
136263509Sdim      TII(static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
137263509Sdim      TRI(&TII->getRegisterInfo()) { }
138249259Sdim
139263509Sdim   const char *getPassName() const {
140263509Sdim    return "AMD IL Control Flow Graph structurizer Pass";
141263509Sdim  }
142249259Sdim
143263509Sdim  void getAnalysisUsage(AnalysisUsage &AU) const {
144263509Sdim    AU.addPreserved<MachineFunctionAnalysis>();
145263509Sdim    AU.addRequired<MachineFunctionAnalysis>();
146263509Sdim    AU.addRequired<MachineDominatorTree>();
147263509Sdim    AU.addRequired<MachinePostDominatorTree>();
148263509Sdim    AU.addRequired<MachineLoopInfo>();
149263509Sdim  }
150249259Sdim
151249259Sdim  /// Perform the CFG structurization
152263509Sdim  bool run();
153249259Sdim
154249259Sdim  /// Perform the CFG preparation
155263509Sdim  /// This step will remove every unconditionnal/dead jump instructions and make
156263509Sdim  /// sure all loops have an exit block
157263509Sdim  bool prepare();
158249259Sdim
159263509Sdim  bool runOnMachineFunction(MachineFunction &MF) {
160263509Sdim    DEBUG(MF.dump(););
161263509Sdim    OrderedBlks.clear();
162263509Sdim    FuncRep = &MF;
163263509Sdim    MLI = &getAnalysis<MachineLoopInfo>();
164263509Sdim    DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
165263509Sdim    MDT = &getAnalysis<MachineDominatorTree>();
166263509Sdim    DEBUG(MDT->print(dbgs(), (const llvm::Module*)0););
167263509Sdim    PDT = &getAnalysis<MachinePostDominatorTree>();
168263509Sdim    DEBUG(PDT->print(dbgs()););
169263509Sdim    prepare();
170263509Sdim    run();
171263509Sdim    DEBUG(MF.dump(););
172263509Sdim    return true;
173263509Sdim  }
174249259Sdim
175263509Sdimprotected:
176263509Sdim  TargetMachine &TM;
177263509Sdim  MachineDominatorTree *MDT;
178263509Sdim  MachinePostDominatorTree *PDT;
179263509Sdim  MachineLoopInfo *MLI;
180263509Sdim  const R600InstrInfo *TII;
181263509Sdim  const AMDGPURegisterInfo *TRI;
182249259Sdim
183263509Sdim  // PRINT FUNCTIONS
184263509Sdim  /// Print the ordered Blocks.
185263509Sdim  void printOrderedBlocks() const {
186263509Sdim    size_t i = 0;
187263509Sdim    for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
188263509Sdim        iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
189263509Sdim      dbgs() << "BB" << (*iterBlk)->getNumber();
190263509Sdim      dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
191263509Sdim      if (i != 0 && i % 10 == 0) {
192263509Sdim        dbgs() << "\n";
193263509Sdim      } else {
194263509Sdim        dbgs() << " ";
195263509Sdim      }
196263509Sdim    }
197263509Sdim  }
198263509Sdim  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
199263509Sdim    for (MachineLoop::iterator iter = LoopInfo.begin(),
200263509Sdim         iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
201263509Sdim      (*iter)->print(dbgs(), 0);
202263509Sdim    }
203263509Sdim  }
204249259Sdim
205263509Sdim  // UTILITY FUNCTIONS
206263509Sdim  int getSCCNum(MachineBasicBlock *MBB) const;
207263509Sdim  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
208263509Sdim  bool hasBackEdge(MachineBasicBlock *MBB) const;
209263509Sdim  static unsigned getLoopDepth(MachineLoop *LoopRep);
210263509Sdim  bool isRetiredBlock(MachineBasicBlock *MBB) const;
211263509Sdim  bool isActiveLoophead(MachineBasicBlock *MBB) const;
212263509Sdim  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
213263509Sdim      bool AllowSideEntry = true) const;
214263509Sdim  int countActiveBlock(MBBVector::const_iterator It,
215263509Sdim      MBBVector::const_iterator E) const;
216263509Sdim  bool needMigrateBlock(MachineBasicBlock *MBB) const;
217249259Sdim
218263509Sdim  // Utility Functions
219263509Sdim  void reversePredicateSetter(MachineBasicBlock::iterator I);
220263509Sdim  /// Compute the reversed DFS post order of Blocks
221263509Sdim  void orderBlocks(MachineFunction *MF);
222249259Sdim
223263509Sdim  // Function originaly from CFGStructTraits
224263509Sdim  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
225263509Sdim      DebugLoc DL = DebugLoc());
226263509Sdim  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
227263509Sdim    DebugLoc DL = DebugLoc());
228263509Sdim  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
229263509Sdim  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
230263509Sdim      DebugLoc DL);
231263509Sdim  void insertCondBranchBefore(MachineBasicBlock *MBB,
232263509Sdim      MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
233263509Sdim      DebugLoc DL);
234263509Sdim  void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum);
235263509Sdim  static int getBranchNzeroOpcode(int OldOpcode);
236263509Sdim  static int getBranchZeroOpcode(int OldOpcode);
237263509Sdim  static int getContinueNzeroOpcode(int OldOpcode);
238263509Sdim  static int getContinueZeroOpcode(int OldOpcode);
239263509Sdim  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
240263509Sdim  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
241263509Sdim  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
242263509Sdim      MachineInstr *MI);
243263509Sdim  static bool isCondBranch(MachineInstr *MI);
244263509Sdim  static bool isUncondBranch(MachineInstr *MI);
245263509Sdim  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
246263509Sdim  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
247263509Sdim  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
248263509Sdim  ///
249263509Sdim  /// BB with backward-edge could have move instructions after the branch
250263509Sdim  /// instruction.  Such move instruction "belong to" the loop backward-edge.
251263509Sdim  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
252263509Sdim  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
253263509Sdim  static MachineInstr *getContinueInstr(MachineBasicBlock *MBB);
254263509Sdim  static bool isReturnBlock(MachineBasicBlock *MBB);
255263509Sdim  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
256263509Sdim      MachineBasicBlock *SrcMBB) ;
257263509Sdim  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
258263509Sdim  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
259263509Sdim  /// because the AMDGPU instruction is not recognized as terminator fix this
260263509Sdim  /// and retire this routine
261263509Sdim  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
262263509Sdim      MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
263263509Sdim  static void wrapup(MachineBasicBlock *MBB);
264249259Sdim
265249259Sdim
266263509Sdim  int patternMatch(MachineBasicBlock *MBB);
267263509Sdim  int patternMatchGroup(MachineBasicBlock *MBB);
268263509Sdim  int serialPatternMatch(MachineBasicBlock *MBB);
269263509Sdim  int ifPatternMatch(MachineBasicBlock *MBB);
270263509Sdim  int loopendPatternMatch();
271263509Sdim  int mergeLoop(MachineLoop *LoopRep);
272263509Sdim  int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader);
273249259Sdim
274263509Sdim  void handleLoopcontBlock(MachineBasicBlock *ContingMBB,
275263509Sdim      MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
276263509Sdim      MachineLoop *ContLoop);
277263509Sdim  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
278263509Sdim  /// the same loop with LoopLandInfo without explicitly keeping track of
279263509Sdim  /// loopContBlks and loopBreakBlks, this is a method to get the information.
280263509Sdim  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
281263509Sdim      MachineBasicBlock *Src2MBB);
282263509Sdim  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
283263509Sdim      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
284263509Sdim  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
285263509Sdim      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
286263509Sdim  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
287263509Sdim      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
288263509Sdim      MachineBasicBlock **LandMBBPtr);
289263509Sdim  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
290263509Sdim      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
291263509Sdim      MachineBasicBlock *LandMBB, bool Detail = false);
292263509Sdim  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
293263509Sdim      MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
294263509Sdim  void mergeSerialBlock(MachineBasicBlock *DstMBB,
295263509Sdim      MachineBasicBlock *SrcMBB);
296249259Sdim
297263509Sdim  void mergeIfthenelseBlock(MachineInstr *BranchMI,
298263509Sdim      MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
299263509Sdim      MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
300263509Sdim  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
301263509Sdim      MachineBasicBlock *LandMBB);
302263509Sdim  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
303263509Sdim      MachineBasicBlock *LandMBB);
304263509Sdim  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
305263509Sdim      MachineBasicBlock *ContMBB);
306263509Sdim  /// normalizeInfiniteLoopExit change
307263509Sdim  ///   B1:
308263509Sdim  ///        uncond_br LoopHeader
309263509Sdim  ///
310263509Sdim  /// to
311263509Sdim  ///   B1:
312263509Sdim  ///        cond_br 1 LoopHeader dummyExit
313263509Sdim  /// and return the newly added dummy exit block
314263509Sdim  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
315263509Sdim  void removeUnconditionalBranch(MachineBasicBlock *MBB);
316263509Sdim  /// Remove duplicate branches instructions in a block.
317263509Sdim  /// For instance
318263509Sdim  /// B0:
319263509Sdim  ///    cond_br X B1 B2
320263509Sdim  ///    cond_br X B1 B2
321263509Sdim  /// is transformed to
322263509Sdim  /// B0:
323263509Sdim  ///    cond_br X B1 B2
324263509Sdim  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
325263509Sdim  void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
326263509Sdim  void removeSuccessor(MachineBasicBlock *MBB);
327263509Sdim  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
328263509Sdim      MachineBasicBlock *PredMBB);
329263509Sdim  void migrateInstruction(MachineBasicBlock *SrcMBB,
330263509Sdim      MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
331263509Sdim  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
332263509Sdim  void retireBlock(MachineBasicBlock *MBB);
333263509Sdim  void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = NULL);
334249259Sdim
335263509Sdim  MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
336263509Sdim  /// This is work around solution for findNearestCommonDominator not avaiable
337263509Sdim  /// to post dom a proper fix should go to Dominators.h.
338263509Sdim  MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
339263509Sdim      MachineBasicBlock *MBB2);
340249259Sdim
341249259Sdimprivate:
342263509Sdim  MBBInfoMap BlockInfoMap;
343263509Sdim  LoopLandInfoMap LLInfoMap;
344263509Sdim  std::map<MachineLoop *, bool> Visited;
345263509Sdim  MachineFunction *FuncRep;
346263509Sdim  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
347263509Sdim};
348249259Sdim
349263509Sdimint AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
350263509Sdim  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
351263509Sdim  if (It == BlockInfoMap.end())
352263509Sdim    return INVALIDSCCNUM;
353263509Sdim  return (*It).second->SccNum;
354263509Sdim}
355249259Sdim
356263509SdimMachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
357263509Sdim    const {
358263509Sdim  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
359263509Sdim  if (It == LLInfoMap.end())
360263509Sdim    return NULL;
361263509Sdim  return (*It).second;
362263509Sdim}
363249259Sdim
364263509Sdimbool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
365263509Sdim  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
366263509Sdim  if (!LoopRep)
367263509Sdim    return false;
368263509Sdim  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
369263509Sdim  return MBB->isSuccessor(LoopHeader);
370249259Sdim}
371249259Sdim
372263509Sdimunsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) {
373263509Sdim  return LoopRep ? LoopRep->getLoopDepth() : 0;
374263509Sdim}
375263509Sdim
376263509Sdimbool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
377263509Sdim  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
378263509Sdim  if (It == BlockInfoMap.end())
379263509Sdim    return false;
380263509Sdim  return (*It).second->IsRetired;
381263509Sdim}
382263509Sdim
383263509Sdimbool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
384263509Sdim  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
385263509Sdim  while (LoopRep && LoopRep->getHeader() == MBB) {
386263509Sdim    MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
387263509Sdim    if(!LoopLand)
388263509Sdim      return true;
389263509Sdim    if (!isRetiredBlock(LoopLand))
390263509Sdim      return true;
391263509Sdim    LoopRep = LoopRep->getParentLoop();
392249259Sdim  }
393263509Sdim  return false;
394249259Sdim}
395263509SdimAMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
396263509Sdim    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
397263509Sdim    bool AllowSideEntry) const {
398263509Sdim  assert(DstMBB);
399263509Sdim  if (SrcMBB == DstMBB)
400263509Sdim    return SinglePath_InPath;
401263509Sdim  while (SrcMBB && SrcMBB->succ_size() == 1) {
402263509Sdim    SrcMBB = *SrcMBB->succ_begin();
403263509Sdim    if (SrcMBB == DstMBB)
404263509Sdim      return SinglePath_InPath;
405263509Sdim    if (!AllowSideEntry && SrcMBB->pred_size() > 1)
406263509Sdim      return Not_SinglePath;
407263509Sdim  }
408263509Sdim  if (SrcMBB && SrcMBB->succ_size()==0)
409263509Sdim    return SinglePath_NotInPath;
410263509Sdim  return Not_SinglePath;
411263509Sdim}
412249259Sdim
413263509Sdimint AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
414263509Sdim    MBBVector::const_iterator E) const {
415263509Sdim  int Count = 0;
416263509Sdim  while (It != E) {
417263509Sdim    if (!isRetiredBlock(*It))
418263509Sdim      ++Count;
419263509Sdim    ++It;
420263509Sdim  }
421263509Sdim  return Count;
422263509Sdim}
423249259Sdim
424263509Sdimbool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
425263509Sdim  unsigned BlockSizeThreshold = 30;
426263509Sdim  unsigned CloneInstrThreshold = 100;
427263509Sdim  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
428249259Sdim
429263509Sdim  if(!MultiplePreds)
430263509Sdim    return false;
431263509Sdim  unsigned BlkSize = MBB->size();
432263509Sdim  return ((BlkSize > BlockSizeThreshold) &&
433263509Sdim      (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
434263509Sdim}
435249259Sdim
436263509Sdimvoid AMDGPUCFGStructurizer::reversePredicateSetter(
437263509Sdim    MachineBasicBlock::iterator I) {
438263509Sdim  while (I--) {
439263509Sdim    if (I->getOpcode() == AMDGPU::PRED_X) {
440263509Sdim      switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
441263509Sdim      case OPCODE_IS_ZERO_INT:
442263509Sdim        static_cast<MachineInstr *>(I)->getOperand(2)
443263509Sdim            .setImm(OPCODE_IS_NOT_ZERO_INT);
444263509Sdim        return;
445263509Sdim      case OPCODE_IS_NOT_ZERO_INT:
446263509Sdim        static_cast<MachineInstr *>(I)->getOperand(2)
447263509Sdim            .setImm(OPCODE_IS_ZERO_INT);
448263509Sdim        return;
449263509Sdim      case OPCODE_IS_ZERO:
450263509Sdim        static_cast<MachineInstr *>(I)->getOperand(2)
451263509Sdim            .setImm(OPCODE_IS_NOT_ZERO);
452263509Sdim        return;
453263509Sdim      case OPCODE_IS_NOT_ZERO:
454263509Sdim        static_cast<MachineInstr *>(I)->getOperand(2)
455263509Sdim            .setImm(OPCODE_IS_ZERO);
456263509Sdim        return;
457263509Sdim      default:
458263509Sdim        llvm_unreachable("PRED_X Opcode invalid!");
459263509Sdim      }
460263509Sdim    }
461249259Sdim  }
462263509Sdim}
463249259Sdim
464263509Sdimvoid AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
465263509Sdim    int NewOpcode, DebugLoc DL) {
466263509Sdim MachineInstr *MI = MBB->getParent()
467263509Sdim    ->CreateMachineInstr(TII->get(NewOpcode), DL);
468263509Sdim  MBB->push_back(MI);
469263509Sdim  //assume the instruction doesn't take any reg operand ...
470263509Sdim  SHOWNEWINSTR(MI);
471263509Sdim}
472263509Sdim
473263509SdimMachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
474263509Sdim    int NewOpcode, DebugLoc DL) {
475263509Sdim  MachineInstr *MI =
476263509Sdim      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
477263509Sdim  if (MBB->begin() != MBB->end())
478263509Sdim    MBB->insert(MBB->begin(), MI);
479263509Sdim  else
480263509Sdim    MBB->push_back(MI);
481263509Sdim  SHOWNEWINSTR(MI);
482263509Sdim  return MI;
483263509Sdim}
484263509Sdim
485263509SdimMachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
486263509Sdim    MachineBasicBlock::iterator I, int NewOpcode) {
487263509Sdim  MachineInstr *OldMI = &(*I);
488263509Sdim  MachineBasicBlock *MBB = OldMI->getParent();
489263509Sdim  MachineInstr *NewMBB =
490263509Sdim      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
491263509Sdim  MBB->insert(I, NewMBB);
492263509Sdim  //assume the instruction doesn't take any reg operand ...
493263509Sdim  SHOWNEWINSTR(NewMBB);
494263509Sdim  return NewMBB;
495263509Sdim}
496263509Sdim
497263509Sdimvoid AMDGPUCFGStructurizer::insertCondBranchBefore(
498263509Sdim    MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) {
499263509Sdim  MachineInstr *OldMI = &(*I);
500263509Sdim  MachineBasicBlock *MBB = OldMI->getParent();
501263509Sdim  MachineFunction *MF = MBB->getParent();
502263509Sdim  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
503263509Sdim  MBB->insert(I, NewMI);
504263509Sdim  MachineInstrBuilder MIB(*MF, NewMI);
505263509Sdim  MIB.addReg(OldMI->getOperand(1).getReg(), false);
506263509Sdim  SHOWNEWINSTR(NewMI);
507263509Sdim  //erase later oldInstr->eraseFromParent();
508263509Sdim}
509263509Sdim
510263509Sdimvoid AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk,
511263509Sdim    MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
512263509Sdim    DebugLoc DL) {
513263509Sdim  MachineFunction *MF = blk->getParent();
514263509Sdim  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
515263509Sdim  //insert before
516263509Sdim  blk->insert(I, NewInstr);
517263509Sdim  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
518263509Sdim  SHOWNEWINSTR(NewInstr);
519263509Sdim}
520263509Sdim
521263509Sdimvoid AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB,
522263509Sdim    int NewOpcode, int RegNum) {
523263509Sdim  MachineFunction *MF = MBB->getParent();
524263509Sdim  MachineInstr *NewInstr =
525263509Sdim    MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
526263509Sdim  MBB->push_back(NewInstr);
527263509Sdim  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
528263509Sdim  SHOWNEWINSTR(NewInstr);
529263509Sdim}
530263509Sdim
531263509Sdimint AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
532263509Sdim  switch(OldOpcode) {
533263509Sdim  case AMDGPU::JUMP_COND:
534263509Sdim  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
535263509Sdim  case AMDGPU::BRANCH_COND_i32:
536263509Sdim  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
537263509Sdim  default: llvm_unreachable("internal error");
538249259Sdim  }
539263509Sdim  return -1;
540263509Sdim}
541249259Sdim
542263509Sdimint AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
543263509Sdim  switch(OldOpcode) {
544263509Sdim  case AMDGPU::JUMP_COND:
545263509Sdim  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
546263509Sdim  case AMDGPU::BRANCH_COND_i32:
547263509Sdim  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
548263509Sdim  default: llvm_unreachable("internal error");
549249259Sdim  }
550263509Sdim  return -1;
551263509Sdim}
552249259Sdim
553263509Sdimint AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
554263509Sdim  switch(OldOpcode) {
555263509Sdim  case AMDGPU::JUMP_COND:
556263509Sdim  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
557263509Sdim  default: llvm_unreachable("internal error");
558263509Sdim  };
559263509Sdim  return -1;
560263509Sdim}
561249259Sdim
562263509Sdimint AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
563263509Sdim  switch(OldOpcode) {
564263509Sdim  case AMDGPU::JUMP_COND:
565263509Sdim  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
566263509Sdim  default: llvm_unreachable("internal error");
567249259Sdim  }
568263509Sdim  return -1;
569263509Sdim}
570249259Sdim
571263509SdimMachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
572263509Sdim  return MI->getOperand(0).getMBB();
573263509Sdim}
574249259Sdim
575263509Sdimvoid AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
576263509Sdim    MachineBasicBlock *MBB) {
577263509Sdim  MI->getOperand(0).setMBB(MBB);
578263509Sdim}
579263509Sdim
580263509SdimMachineBasicBlock *
581263509SdimAMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
582263509Sdim    MachineInstr *MI) {
583263509Sdim  assert(MBB->succ_size() == 2);
584263509Sdim  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
585263509Sdim  MachineBasicBlock::succ_iterator It = MBB->succ_begin();
586263509Sdim  MachineBasicBlock::succ_iterator Next = It;
587263509Sdim  ++Next;
588263509Sdim  return (*It == TrueBranch) ? *Next : *It;
589263509Sdim}
590263509Sdim
591263509Sdimbool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
592263509Sdim  switch (MI->getOpcode()) {
593263509Sdim    case AMDGPU::JUMP_COND:
594263509Sdim    case AMDGPU::BRANCH_COND_i32:
595263509Sdim    case AMDGPU::BRANCH_COND_f32: return true;
596263509Sdim  default:
597263509Sdim    return false;
598263509Sdim  }
599263509Sdim  return false;
600263509Sdim}
601263509Sdim
602263509Sdimbool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
603263509Sdim  switch (MI->getOpcode()) {
604263509Sdim  case AMDGPU::JUMP:
605263509Sdim  case AMDGPU::BRANCH:
606263509Sdim    return true;
607263509Sdim  default:
608263509Sdim    return false;
609263509Sdim  }
610263509Sdim  return false;
611263509Sdim}
612263509Sdim
613263509SdimDebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
614263509Sdim  //get DebugLoc from the first MachineBasicBlock instruction with debug info
615263509Sdim  DebugLoc DL;
616263509Sdim  for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
617263509Sdim      ++It) {
618263509Sdim    MachineInstr *instr = &(*It);
619263509Sdim    if (instr->getDebugLoc().isUnknown() == false)
620263509Sdim      DL = instr->getDebugLoc();
621263509Sdim  }
622263509Sdim  return DL;
623263509Sdim}
624263509Sdim
625263509SdimMachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
626263509Sdim    MachineBasicBlock *MBB) {
627263509Sdim  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
628263509Sdim  MachineInstr *MI = &*It;
629263509Sdim  if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
630263509Sdim    return MI;
631263509Sdim  return NULL;
632263509Sdim}
633263509Sdim
634263509SdimMachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
635263509Sdim    MachineBasicBlock *MBB) {
636263509Sdim  for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
637263509Sdim      It != E; ++It) {
638263509Sdim    // FIXME: Simplify
639263509Sdim    MachineInstr *MI = &*It;
640263509Sdim    if (MI) {
641263509Sdim      if (isCondBranch(MI) || isUncondBranch(MI))
642263509Sdim        return MI;
643263509Sdim      else if (!TII->isMov(MI->getOpcode()))
644263509Sdim        break;
645249259Sdim    }
646263509Sdim  }
647263509Sdim  return NULL;
648263509Sdim}
649249259Sdim
650263509SdimMachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
651263509Sdim  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
652263509Sdim  if (It != MBB->rend()) {
653263509Sdim    MachineInstr *instr = &(*It);
654263509Sdim    if (instr->getOpcode() == AMDGPU::RETURN)
655263509Sdim      return instr;
656249259Sdim  }
657263509Sdim  return NULL;
658263509Sdim}
659249259Sdim
660263509SdimMachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) {
661263509Sdim  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
662263509Sdim  if (It != MBB->rend()) {
663263509Sdim    MachineInstr *MI = &(*It);
664263509Sdim    if (MI->getOpcode() == AMDGPU::CONTINUE)
665263509Sdim      return MI;
666263509Sdim  }
667263509Sdim  return NULL;
668263509Sdim}
669249259Sdim
670263509Sdimbool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
671263509Sdim  MachineInstr *MI = getReturnInstr(MBB);
672263509Sdim  bool IsReturn = (MBB->succ_size() == 0);
673263509Sdim  if (MI)
674263509Sdim    assert(IsReturn);
675263509Sdim  else if (IsReturn)
676263509Sdim    DEBUG(
677263509Sdim      dbgs() << "BB" << MBB->getNumber()
678263509Sdim             <<" is return block without RETURN instr\n";);
679263509Sdim  return  IsReturn;
680263509Sdim}
681249259Sdim
682263509Sdimvoid AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
683263509Sdim    MachineBasicBlock *SrcMBB) {
684263509Sdim  for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
685263509Sdim       iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
686263509Sdim    DstMBB->addSuccessor(*It);  // *iter's predecessor is also taken care of
687263509Sdim}
688263509Sdim
689263509SdimMachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
690263509Sdim  MachineFunction *Func = MBB->getParent();
691263509Sdim  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
692263509Sdim  Func->push_back(NewMBB);  //insert to function
693263509Sdim  for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end();
694263509Sdim      It != E; ++It) {
695263509Sdim    MachineInstr *MI = Func->CloneMachineInstr(It);
696263509Sdim    NewMBB->push_back(MI);
697249259Sdim  }
698263509Sdim  return NewMBB;
699263509Sdim}
700249259Sdim
701263509Sdimvoid AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
702263509Sdim    MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
703263509Sdim    MachineBasicBlock *NewBlk) {
704263509Sdim  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
705263509Sdim  if (BranchMI && isCondBranch(BranchMI) &&
706263509Sdim      getTrueBranch(BranchMI) == OldMBB)
707263509Sdim    setTrueBranch(BranchMI, NewBlk);
708263509Sdim}
709263509Sdim
710263509Sdimvoid AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
711263509Sdim  assert((!MBB->getParent()->getJumpTableInfo()
712263509Sdim          || MBB->getParent()->getJumpTableInfo()->isEmpty())
713263509Sdim         && "found a jump table");
714263509Sdim
715263509Sdim   //collect continue right before endloop
716263509Sdim   SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
717263509Sdim   MachineBasicBlock::iterator Pre = MBB->begin();
718263509Sdim   MachineBasicBlock::iterator E = MBB->end();
719263509Sdim   MachineBasicBlock::iterator It = Pre;
720263509Sdim   while (It != E) {
721263509Sdim     if (Pre->getOpcode() == AMDGPU::CONTINUE
722263509Sdim         && It->getOpcode() == AMDGPU::ENDLOOP)
723263509Sdim       ContInstr.push_back(Pre);
724263509Sdim     Pre = It;
725263509Sdim     ++It;
726263509Sdim   }
727263509Sdim
728263509Sdim   //delete continue right before endloop
729263509Sdim   for (unsigned i = 0; i < ContInstr.size(); ++i)
730263509Sdim      ContInstr[i]->eraseFromParent();
731263509Sdim
732263509Sdim   // TODO to fix up jump table so later phase won't be confused.  if
733263509Sdim   // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
734263509Sdim   // there isn't such an interface yet.  alternatively, replace all the other
735263509Sdim   // blocks in the jump table with the entryBlk //}
736263509Sdim
737263509Sdim}
738263509Sdim
739263509Sdim
740263509Sdimbool AMDGPUCFGStructurizer::prepare() {
741263509Sdim  bool Changed = false;
742263509Sdim
743263509Sdim  //FIXME: if not reducible flow graph, make it so ???
744263509Sdim
745263509Sdim  DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
746263509Sdim
747263509Sdim  orderBlocks(FuncRep);
748263509Sdim
749263509Sdim  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
750263509Sdim
751263509Sdim  // Add an ExitBlk to loop that don't have one
752263509Sdim  for (MachineLoopInfo::iterator It = MLI->begin(),
753263509Sdim       E = MLI->end(); It != E; ++It) {
754263509Sdim    MachineLoop *LoopRep = (*It);
755263509Sdim    MBBVector ExitingMBBs;
756263509Sdim    LoopRep->getExitingBlocks(ExitingMBBs);
757263509Sdim
758263509Sdim    if (ExitingMBBs.size() == 0) {
759263509Sdim      MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
760263509Sdim      if (DummyExitBlk)
761263509Sdim        RetBlks.push_back(DummyExitBlk);
762263509Sdim    }
763249259Sdim  }
764249259Sdim
765263509Sdim  // Remove unconditional branch instr.
766263509Sdim  // Add dummy exit block iff there are multiple returns.
767263509Sdim  for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
768263509Sdim       It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
769263509Sdim    MachineBasicBlock *MBB = *It;
770263509Sdim    removeUnconditionalBranch(MBB);
771263509Sdim    removeRedundantConditionalBranch(MBB);
772263509Sdim    if (isReturnBlock(MBB)) {
773263509Sdim      RetBlks.push_back(MBB);
774263509Sdim    }
775263509Sdim    assert(MBB->succ_size() <= 2);
776249259Sdim  }
777249259Sdim
778263509Sdim  if (RetBlks.size() >= 2) {
779263509Sdim    addDummyExitBlock(RetBlks);
780263509Sdim    Changed = true;
781249259Sdim  }
782249259Sdim
783263509Sdim  return Changed;
784263509Sdim}
785263509Sdim
786263509Sdimbool AMDGPUCFGStructurizer::run() {
787263509Sdim
788263509Sdim  //Assume reducible CFG...
789263509Sdim  DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n";FuncRep->viewCFG(););
790263509Sdim
791249259Sdim#ifdef STRESSTEST
792249259Sdim  //Use the worse block ordering to test the algorithm.
793249259Sdim  ReverseVector(orderedBlks);
794249259Sdim#endif
795249259Sdim
796263509Sdim  DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
797263509Sdim  int NumIter = 0;
798263509Sdim  bool Finish = false;
799263509Sdim  MachineBasicBlock *MBB;
800263509Sdim  bool MakeProgress = false;
801263509Sdim  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
802263509Sdim                                        OrderedBlks.end());
803249259Sdim
804249259Sdim  do {
805263509Sdim    ++NumIter;
806263509Sdim    DEBUG(
807263509Sdim      dbgs() << "numIter = " << NumIter
808263509Sdim             << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
809263509Sdim    );
810249259Sdim
811263509Sdim    SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
812263509Sdim        OrderedBlks.begin();
813263509Sdim    SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
814263509Sdim        OrderedBlks.end();
815249259Sdim
816263509Sdim    SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
817263509Sdim        It;
818263509Sdim    MachineBasicBlock *SccBeginMBB = NULL;
819263509Sdim    int SccNumBlk = 0;  // The number of active blocks, init to a
820249259Sdim                        // maximum possible number.
821263509Sdim    int SccNumIter;     // Number of iteration in this SCC.
822249259Sdim
823263509Sdim    while (It != E) {
824263509Sdim      MBB = *It;
825249259Sdim
826263509Sdim      if (!SccBeginMBB) {
827263509Sdim        SccBeginIter = It;
828263509Sdim        SccBeginMBB = MBB;
829263509Sdim        SccNumIter = 0;
830263509Sdim        SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
831263509Sdim        DEBUG(
832263509Sdim              dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
833263509Sdim              dbgs() << "\n";
834263509Sdim        );
835249259Sdim      }
836249259Sdim
837263509Sdim      if (!isRetiredBlock(MBB))
838263509Sdim        patternMatch(MBB);
839249259Sdim
840263509Sdim      ++It;
841249259Sdim
842263509Sdim      bool ContNextScc = true;
843263509Sdim      if (It == E
844263509Sdim          || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
845249259Sdim        // Just finish one scc.
846263509Sdim        ++SccNumIter;
847263509Sdim        int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
848263509Sdim        if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
849263509Sdim          DEBUG(
850263509Sdim            dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
851263509Sdim                   << ", sccNumIter = " << SccNumIter;
852263509Sdim            dbgs() << "doesn't make any progress\n";
853263509Sdim          );
854263509Sdim          ContNextScc = true;
855263509Sdim        } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
856263509Sdim          SccNumBlk = sccRemainedNumBlk;
857263509Sdim          It = SccBeginIter;
858263509Sdim          ContNextScc = false;
859263509Sdim          DEBUG(
860263509Sdim            dbgs() << "repeat processing SCC" << getSCCNum(MBB)
861263509Sdim                   << "sccNumIter = " << SccNumIter << "\n";
862263509Sdim            FuncRep->viewCFG();
863263509Sdim          );
864249259Sdim        } else {
865249259Sdim          // Finish the current scc.
866263509Sdim          ContNextScc = true;
867249259Sdim        }
868249259Sdim      } else {
869249259Sdim        // Continue on next component in the current scc.
870263509Sdim        ContNextScc = false;
871249259Sdim      }
872249259Sdim
873263509Sdim      if (ContNextScc)
874263509Sdim        SccBeginMBB = NULL;
875249259Sdim    } //while, "one iteration" over the function.
876249259Sdim
877263509Sdim    MachineBasicBlock *EntryMBB =
878263509Sdim        GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
879263509Sdim    if (EntryMBB->succ_size() == 0) {
880263509Sdim      Finish = true;
881263509Sdim      DEBUG(
882263509Sdim        dbgs() << "Reduce to one block\n";
883263509Sdim      );
884249259Sdim    } else {
885263509Sdim      int NewnumRemainedBlk
886263509Sdim        = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
887249259Sdim      // consider cloned blocks ??
888263509Sdim      if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
889263509Sdim        MakeProgress = true;
890263509Sdim        NumRemainedBlk = NewnumRemainedBlk;
891249259Sdim      } else {
892263509Sdim        MakeProgress = false;
893263509Sdim        DEBUG(
894263509Sdim          dbgs() << "No progress\n";
895263509Sdim        );
896249259Sdim      }
897249259Sdim    }
898263509Sdim  } while (!Finish && MakeProgress);
899249259Sdim
900249259Sdim  // Misc wrap up to maintain the consistency of the Function representation.
901263509Sdim  wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
902249259Sdim
903249259Sdim  // Detach retired Block, release memory.
904263509Sdim  for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
905263509Sdim      It != E; ++It) {
906263509Sdim    if ((*It).second && (*It).second->IsRetired) {
907263509Sdim      assert(((*It).first)->getNumber() != -1);
908263509Sdim      DEBUG(
909263509Sdim        dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
910263509Sdim      );
911263509Sdim      (*It).first->eraseFromParent();  //Remove from the parent Function.
912249259Sdim    }
913263509Sdim    delete (*It).second;
914249259Sdim  }
915263509Sdim  BlockInfoMap.clear();
916263509Sdim  LLInfoMap.clear();
917249259Sdim
918263509Sdim  DEBUG(
919263509Sdim    FuncRep->viewCFG();
920263509Sdim  );
921249259Sdim
922263509Sdim  if (!Finish)
923263509Sdim    llvm_unreachable("IRREDUCIBL_CF");
924249259Sdim
925249259Sdim  return true;
926263509Sdim}
927249259Sdim
928249259Sdim
929263509Sdim
930263509Sdimvoid AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
931263509Sdim  int SccNum = 0;
932263509Sdim  MachineBasicBlock *MBB;
933263509Sdim  for (scc_iterator<MachineFunction *> It = scc_begin(MF), E = scc_end(MF);
934263509Sdim      It != E; ++It, ++SccNum) {
935263509Sdim    std::vector<MachineBasicBlock *> &SccNext = *It;
936263509Sdim    for (std::vector<MachineBasicBlock *>::const_iterator
937263509Sdim         blockIter = SccNext.begin(), blockEnd = SccNext.end();
938249259Sdim         blockIter != blockEnd; ++blockIter) {
939263509Sdim      MBB = *blockIter;
940263509Sdim      OrderedBlks.push_back(MBB);
941263509Sdim      recordSccnum(MBB, SccNum);
942249259Sdim    }
943249259Sdim  }
944249259Sdim
945249259Sdim  //walk through all the block in func to check for unreachable
946263509Sdim  typedef GraphTraits<MachineFunction *> GTM;
947263509Sdim  MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
948263509Sdim  for (; It != E; ++It) {
949263509Sdim    MachineBasicBlock *MBB = &(*It);
950263509Sdim    SccNum = getSCCNum(MBB);
951263509Sdim    if (SccNum == INVALIDSCCNUM)
952263509Sdim      dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
953249259Sdim  }
954263509Sdim}
955249259Sdim
956263509Sdimint AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
957263509Sdim  int NumMatch = 0;
958263509Sdim  int CurMatch;
959249259Sdim
960263509Sdim  DEBUG(
961263509Sdim        dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
962263509Sdim  );
963249259Sdim
964263509Sdim  while ((CurMatch = patternMatchGroup(MBB)) > 0)
965263509Sdim    NumMatch += CurMatch;
966249259Sdim
967263509Sdim  DEBUG(
968263509Sdim        dbgs() << "End patternMatch BB" << MBB->getNumber()
969263509Sdim      << ", numMatch = " << NumMatch << "\n";
970263509Sdim  );
971249259Sdim
972263509Sdim  return NumMatch;
973263509Sdim}
974249259Sdim
975263509Sdimint AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
976263509Sdim  int NumMatch = 0;
977263509Sdim  NumMatch += loopendPatternMatch();
978263509Sdim  NumMatch += serialPatternMatch(MBB);
979263509Sdim  NumMatch += ifPatternMatch(MBB);
980263509Sdim  return NumMatch;
981263509Sdim}
982249259Sdim
983263509Sdim
984263509Sdimint AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
985263509Sdim  if (MBB->succ_size() != 1)
986249259Sdim    return 0;
987249259Sdim
988263509Sdim  MachineBasicBlock *childBlk = *MBB->succ_begin();
989263509Sdim  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
990249259Sdim    return 0;
991249259Sdim
992263509Sdim  mergeSerialBlock(MBB, childBlk);
993249259Sdim  ++numSerialPatternMatch;
994249259Sdim  return 1;
995263509Sdim}
996249259Sdim
997263509Sdimint AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
998249259Sdim  //two edges
999263509Sdim  if (MBB->succ_size() != 2)
1000249259Sdim    return 0;
1001263509Sdim  if (hasBackEdge(MBB))
1002249259Sdim    return 0;
1003263509Sdim  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1004263509Sdim  if (!BranchMI)
1005249259Sdim    return 0;
1006249259Sdim
1007263509Sdim  assert(isCondBranch(BranchMI));
1008263509Sdim  int NumMatch = 0;
1009249259Sdim
1010263509Sdim  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
1011263509Sdim  NumMatch += serialPatternMatch(TrueMBB);
1012263509Sdim  NumMatch += ifPatternMatch(TrueMBB);
1013263509Sdim  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
1014263509Sdim  NumMatch += serialPatternMatch(FalseMBB);
1015263509Sdim  NumMatch += ifPatternMatch(FalseMBB);
1016263509Sdim  MachineBasicBlock *LandBlk;
1017263509Sdim  int Cloned = 0;
1018249259Sdim
1019263509Sdim  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
1020249259Sdim  // TODO: Simplify
1021263509Sdim  if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
1022263509Sdim    && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
1023263509Sdim    // Diamond pattern
1024263509Sdim    LandBlk = *TrueMBB->succ_begin();
1025263509Sdim  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
1026263509Sdim    // Triangle pattern, false is empty
1027263509Sdim    LandBlk = FalseMBB;
1028263509Sdim    FalseMBB = NULL;
1029263509Sdim  } else if (FalseMBB->succ_size() == 1
1030263509Sdim             && *FalseMBB->succ_begin() == TrueMBB) {
1031263509Sdim    // Triangle pattern, true is empty
1032263509Sdim    // We reverse the predicate to make a triangle, empty false pattern;
1033263509Sdim    std::swap(TrueMBB, FalseMBB);
1034263509Sdim    reversePredicateSetter(MBB->end());
1035263509Sdim    LandBlk = FalseMBB;
1036263509Sdim    FalseMBB = NULL;
1037263509Sdim  } else if (FalseMBB->succ_size() == 1
1038263509Sdim             && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
1039263509Sdim    LandBlk = *FalseMBB->succ_begin();
1040263509Sdim  } else if (TrueMBB->succ_size() == 1
1041263509Sdim    && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
1042263509Sdim    LandBlk = *TrueMBB->succ_begin();
1043249259Sdim  } else {
1044263509Sdim    return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
1045249259Sdim  }
1046249259Sdim
1047249259Sdim  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
1048249259Sdim  // new BB created for landBlk==NULL may introduce new challenge to the
1049249259Sdim  // reduction process.
1050263509Sdim  if (LandBlk &&
1051263509Sdim      ((TrueMBB && TrueMBB->pred_size() > 1)
1052263509Sdim      || (FalseMBB && FalseMBB->pred_size() > 1))) {
1053263509Sdim     Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
1054249259Sdim  }
1055249259Sdim
1056263509Sdim  if (TrueMBB && TrueMBB->pred_size() > 1) {
1057263509Sdim    TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
1058263509Sdim    ++Cloned;
1059249259Sdim  }
1060249259Sdim
1061263509Sdim  if (FalseMBB && FalseMBB->pred_size() > 1) {
1062263509Sdim    FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1063263509Sdim    ++Cloned;
1064249259Sdim  }
1065249259Sdim
1066263509Sdim  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1067249259Sdim
1068249259Sdim  ++numIfPatternMatch;
1069249259Sdim
1070263509Sdim  numClonedBlock += Cloned;
1071249259Sdim
1072263509Sdim  return 1 + Cloned + NumMatch;
1073263509Sdim}
1074249259Sdim
1075263509Sdimint AMDGPUCFGStructurizer::loopendPatternMatch() {
1076263509Sdim  std::vector<MachineLoop *> NestedLoops;
1077263509Sdim  for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end();
1078263509Sdim      It != E; ++It) {
1079263509Sdim    df_iterator<MachineLoop *> LpIt = df_begin(*It),
1080263509Sdim        LpE = df_end(*It);
1081263509Sdim    for (; LpIt != LpE; ++LpIt)
1082263509Sdim      NestedLoops.push_back(*LpIt);
1083249259Sdim  }
1084263509Sdim  if (NestedLoops.size() == 0)
1085249259Sdim    return 0;
1086249259Sdim
1087249259Sdim  // Process nested loop outside->inside, so "continue" to a outside loop won't
1088249259Sdim  // be mistaken as "break" of the current loop.
1089263509Sdim  int Num = 0;
1090263509Sdim  for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(),
1091263509Sdim      E = NestedLoops.rend(); It != E; ++It) {
1092263509Sdim    MachineLoop *ExaminedLoop = *It;
1093263509Sdim    if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
1094249259Sdim      continue;
1095263509Sdim    DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
1096263509Sdim    int NumBreak = mergeLoop(ExaminedLoop);
1097263509Sdim    if (NumBreak == -1)
1098249259Sdim      break;
1099263509Sdim    Num += NumBreak;
1100249259Sdim  }
1101263509Sdim  return Num;
1102263509Sdim}
1103249259Sdim
1104263509Sdimint AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1105263509Sdim  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1106263509Sdim  MBBVector ExitingMBBs;
1107263509Sdim  LoopRep->getExitingBlocks(ExitingMBBs);
1108263509Sdim  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1109263509Sdim  DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
1110263509Sdim  // We assume a single ExitBlk
1111263509Sdim  MBBVector ExitBlks;
1112263509Sdim  LoopRep->getExitBlocks(ExitBlks);
1113263509Sdim  SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
1114263509Sdim  for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1115263509Sdim    ExitBlkSet.insert(ExitBlks[i]);
1116263509Sdim  assert(ExitBlkSet.size() == 1);
1117263509Sdim  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1118263509Sdim  assert(ExitBlk && "Loop has several exit block");
1119263509Sdim  MBBVector LatchBlks;
1120263509Sdim  typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
1121263509Sdim  InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
1122263509Sdim      PE = InvMBBTraits::child_end(LoopHeader);
1123263509Sdim  for (; PI != PE; PI++) {
1124263509Sdim    if (LoopRep->contains(*PI))
1125263509Sdim      LatchBlks.push_back(*PI);
1126249259Sdim  }
1127249259Sdim
1128263509Sdim  for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1129263509Sdim    mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1130263509Sdim  for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1131263509Sdim    settleLoopcontBlock(LatchBlks[i], LoopHeader);
1132263509Sdim  int Match = 0;
1133263509Sdim  do {
1134263509Sdim    Match = 0;
1135263509Sdim    Match += serialPatternMatch(LoopHeader);
1136263509Sdim    Match += ifPatternMatch(LoopHeader);
1137263509Sdim  } while (Match > 0);
1138263509Sdim  mergeLooplandBlock(LoopHeader, ExitBlk);
1139263509Sdim  MachineLoop *ParentLoop = LoopRep->getParentLoop();
1140263509Sdim  if (ParentLoop)
1141263509Sdim    MLI->changeLoopFor(LoopHeader, ParentLoop);
1142263509Sdim  else
1143263509Sdim    MLI->removeBlock(LoopHeader);
1144263509Sdim  Visited[LoopRep] = true;
1145263509Sdim  return 1;
1146263509Sdim}
1147249259Sdim
1148263509Sdimint AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
1149263509Sdim    MachineBasicBlock *LoopHeader) {
1150263509Sdim  int NumCont = 0;
1151263509Sdim  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> ContMBB;
1152263509Sdim  typedef GraphTraits<Inverse<MachineBasicBlock *> > GTIM;
1153263509Sdim  GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader),
1154263509Sdim      E = GTIM::child_end(LoopHeader);
1155263509Sdim  for (; It != E; ++It) {
1156263509Sdim    MachineBasicBlock *MBB = *It;
1157263509Sdim    if (LoopRep->contains(MBB)) {
1158263509Sdim      handleLoopcontBlock(MBB, MLI->getLoopFor(MBB),
1159263509Sdim                          LoopHeader, LoopRep);
1160263509Sdim      ContMBB.push_back(MBB);
1161263509Sdim      ++NumCont;
1162249259Sdim    }
1163249259Sdim  }
1164249259Sdim
1165263509Sdim  for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(),
1166263509Sdim      E = ContMBB.end(); It != E; ++It) {
1167263509Sdim    (*It)->removeSuccessor(LoopHeader);
1168249259Sdim  }
1169249259Sdim
1170263509Sdim  numLoopcontPatternMatch += NumCont;
1171249259Sdim
1172263509Sdim  return NumCont;
1173263509Sdim}
1174249259Sdim
1175249259Sdim
1176263509Sdimbool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1177263509Sdim    MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1178263509Sdim  if (Src1MBB->succ_size() == 0) {
1179263509Sdim    MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1180263509Sdim    if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1181263509Sdim      MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1182263509Sdim      if (TheEntry) {
1183263509Sdim        DEBUG(
1184263509Sdim          dbgs() << "isLoopContBreakBlock yes src1 = BB"
1185263509Sdim                 << Src1MBB->getNumber()
1186263509Sdim                 << " src2 = BB" << Src2MBB->getNumber() << "\n";
1187263509Sdim        );
1188249259Sdim        return true;
1189249259Sdim      }
1190249259Sdim    }
1191249259Sdim  }
1192249259Sdim  return false;
1193263509Sdim}
1194249259Sdim
1195263509Sdimint AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1196263509Sdim    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1197263509Sdim  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1198263509Sdim  if (Num == 0) {
1199263509Sdim    DEBUG(
1200263509Sdim      dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1201263509Sdim    );
1202263509Sdim    Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1203249259Sdim  }
1204263509Sdim  return Num;
1205249259Sdim}
1206249259Sdim
1207263509Sdimint AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1208263509Sdim    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1209263509Sdim  int Num = 0;
1210263509Sdim  MachineBasicBlock *DownBlk;
1211249259Sdim
1212249259Sdim  //trueBlk could be the common post dominator
1213263509Sdim  DownBlk = TrueMBB;
1214249259Sdim
1215263509Sdim  DEBUG(
1216263509Sdim    dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1217263509Sdim           << " true = BB" << TrueMBB->getNumber()
1218263509Sdim           << ", numSucc=" << TrueMBB->succ_size()
1219263509Sdim           << " false = BB" << FalseMBB->getNumber() << "\n";
1220263509Sdim  );
1221249259Sdim
1222263509Sdim  while (DownBlk) {
1223263509Sdim    DEBUG(
1224263509Sdim      dbgs() << "check down = BB" << DownBlk->getNumber();
1225263509Sdim    );
1226249259Sdim
1227263509Sdim    if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1228263509Sdim      DEBUG(
1229263509Sdim        dbgs() << " working\n";
1230263509Sdim      );
1231249259Sdim
1232263509Sdim      Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1233263509Sdim      Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1234249259Sdim
1235263509Sdim      numClonedBlock += Num;
1236263509Sdim      Num += serialPatternMatch(*HeadMBB->succ_begin());
1237263509Sdim      Num += serialPatternMatch(*llvm::next(HeadMBB->succ_begin()));
1238263509Sdim      Num += ifPatternMatch(HeadMBB);
1239263509Sdim      assert(Num > 0);
1240249259Sdim
1241249259Sdim      break;
1242249259Sdim    }
1243263509Sdim    DEBUG(
1244263509Sdim      dbgs() << " not working\n";
1245263509Sdim    );
1246263509Sdim    DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : NULL;
1247249259Sdim  } // walk down the postDomTree
1248249259Sdim
1249263509Sdim  return Num;
1250263509Sdim}
1251249259Sdim
1252263509Sdimvoid AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1253263509Sdim    MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1254263509Sdim    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1255263509Sdim  dbgs() << "head = BB" << HeadMBB->getNumber()
1256263509Sdim         << " size = " << HeadMBB->size();
1257263509Sdim  if (Detail) {
1258263509Sdim    dbgs() << "\n";
1259263509Sdim    HeadMBB->print(dbgs());
1260263509Sdim    dbgs() << "\n";
1261249259Sdim  }
1262249259Sdim
1263263509Sdim  if (TrueMBB) {
1264263509Sdim    dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1265263509Sdim           << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1266263509Sdim    if (Detail) {
1267263509Sdim      dbgs() << "\n";
1268263509Sdim      TrueMBB->print(dbgs());
1269263509Sdim      dbgs() << "\n";
1270249259Sdim    }
1271249259Sdim  }
1272263509Sdim  if (FalseMBB) {
1273263509Sdim    dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1274263509Sdim           << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1275263509Sdim    if (Detail) {
1276263509Sdim      dbgs() << "\n";
1277263509Sdim      FalseMBB->print(dbgs());
1278263509Sdim      dbgs() << "\n";
1279249259Sdim    }
1280249259Sdim  }
1281263509Sdim  if (LandMBB) {
1282263509Sdim    dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1283263509Sdim           << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1284263509Sdim    if (Detail) {
1285263509Sdim      dbgs() << "\n";
1286263509Sdim      LandMBB->print(dbgs());
1287263509Sdim      dbgs() << "\n";
1288249259Sdim    }
1289249259Sdim  }
1290249259Sdim
1291263509Sdim    dbgs() << "\n";
1292263509Sdim}
1293249259Sdim
1294263509Sdimint AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1295263509Sdim    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1296263509Sdim    MachineBasicBlock **LandMBBPtr) {
1297263509Sdim  bool MigrateTrue = false;
1298263509Sdim  bool MigrateFalse = false;
1299249259Sdim
1300263509Sdim  MachineBasicBlock *LandBlk = *LandMBBPtr;
1301249259Sdim
1302263509Sdim  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1303263509Sdim         && (!FalseMBB || FalseMBB->succ_size() <= 1));
1304249259Sdim
1305263509Sdim  if (TrueMBB == FalseMBB)
1306249259Sdim    return 0;
1307249259Sdim
1308263509Sdim  MigrateTrue = needMigrateBlock(TrueMBB);
1309263509Sdim  MigrateFalse = needMigrateBlock(FalseMBB);
1310249259Sdim
1311263509Sdim  if (!MigrateTrue && !MigrateFalse)
1312249259Sdim    return 0;
1313249259Sdim
1314249259Sdim  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1315249259Sdim  // have more than one predecessors.  without doing this, its predecessor
1316249259Sdim  // rather than headBlk will have undefined value in initReg.
1317263509Sdim  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
1318263509Sdim    MigrateTrue = true;
1319263509Sdim  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
1320263509Sdim    MigrateFalse = true;
1321249259Sdim
1322263509Sdim  DEBUG(
1323263509Sdim    dbgs() << "before improveSimpleJumpintoIf: ";
1324263509Sdim    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1325263509Sdim  );
1326249259Sdim
1327249259Sdim  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1328249259Sdim  //
1329249259Sdim  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1330249259Sdim  //      {initReg = 0; org falseBlk branch }
1331249259Sdim  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1332249259Sdim  //      => org landBlk
1333249259Sdim  //      if landBlk->pred_size() > 2, put the about if-else inside
1334249259Sdim  //      if (initReg !=2) {...}
1335249259Sdim  //
1336249259Sdim  // add initReg = initVal to headBlk
1337249259Sdim
1338249259Sdim  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1339263509Sdim  if (!MigrateTrue || !MigrateFalse) {
1340263509Sdim    // XXX: We have an opportunity here to optimize the "branch into if" case
1341263509Sdim    // here.  Branch into if looks like this:
1342263509Sdim    //                        entry
1343263509Sdim    //                       /     |
1344263509Sdim    //           diamond_head       branch_from
1345263509Sdim    //             /      \           |
1346263509Sdim    // diamond_false        diamond_true
1347263509Sdim    //             \      /
1348263509Sdim    //               done
1349263509Sdim    //
1350263509Sdim    // The diamond_head block begins the "if" and the diamond_true block
1351263509Sdim    // is the block being "branched into".
1352263509Sdim    //
1353263509Sdim    // If MigrateTrue is true, then TrueBB is the block being "branched into"
1354263509Sdim    // and if MigrateFalse is true, then FalseBB is the block being
1355263509Sdim    // "branched into"
1356263509Sdim    //
1357263509Sdim    // Here is the pseudo code for how I think the optimization should work:
1358263509Sdim    // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1359263509Sdim    // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1360263509Sdim    // 3. Move the branch instruction from diamond_head into its own basic
1361263509Sdim    //    block (new_block).
1362263509Sdim    // 4. Add an unconditional branch from diamond_head to new_block
1363263509Sdim    // 5. Replace the branch instruction in branch_from with an unconditional
1364263509Sdim    //    branch to new_block.  If branch_from has multiple predecessors, then
1365263509Sdim    //    we need to replace the True/False block in the branch
1366263509Sdim    //    instruction instead of replacing it.
1367263509Sdim    // 6. Change the condition of the branch instruction in new_block from
1368263509Sdim    //    COND to (COND || GPR0)
1369263509Sdim    //
1370263509Sdim    // In order insert these MOV instruction, we will need to use the
1371263509Sdim    // RegisterScavenger.  Usually liveness stops being tracked during
1372263509Sdim    // the late machine optimization passes, however if we implement
1373263509Sdim    // bool TargetRegisterInfo::requiresRegisterScavenging(
1374263509Sdim    //                                                const MachineFunction &MF)
1375263509Sdim    // and have it return true, liveness will be tracked correctly
1376263509Sdim    // by generic optimization passes.  We will also need to make sure that
1377263509Sdim    // all of our target-specific passes that run after regalloc and before
1378263509Sdim    // the CFGStructurizer track liveness and we will need to modify this pass
1379263509Sdim    // to correctly track liveness.
1380263509Sdim    //
1381263509Sdim    // After the above changes, the new CFG should look like this:
1382263509Sdim    //                        entry
1383263509Sdim    //                       /     |
1384263509Sdim    //           diamond_head       branch_from
1385263509Sdim    //                       \     /
1386263509Sdim    //                      new_block
1387263509Sdim    //                      /      |
1388263509Sdim    //         diamond_false        diamond_true
1389263509Sdim    //                      \      /
1390263509Sdim    //                        done
1391263509Sdim    //
1392263509Sdim    // Without this optimization, we are forced to duplicate the diamond_true
1393263509Sdim    // block and we will end up with a CFG like this:
1394263509Sdim    //
1395263509Sdim    //                        entry
1396263509Sdim    //                       /     |
1397263509Sdim    //           diamond_head       branch_from
1398263509Sdim    //             /      \                   |
1399263509Sdim    // diamond_false        diamond_true      diamond_true (duplicate)
1400263509Sdim    //             \      /                   |
1401263509Sdim    //               done --------------------|
1402263509Sdim    //
1403263509Sdim    // Duplicating diamond_true can be very costly especially if it has a
1404263509Sdim    // lot of instructions.
1405263509Sdim    return 0;
1406249259Sdim  }
1407249259Sdim
1408263509Sdim  int NumNewBlk = 0;
1409249259Sdim
1410263509Sdim  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1411249259Sdim
1412249259Sdim  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1413263509Sdim  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
1414249259Sdim
1415263509Sdim  if (LandBlkHasOtherPred) {
1416263509Sdim    llvm_unreachable("Extra register needed to handle CFG");
1417263509Sdim    unsigned CmpResReg =
1418263509Sdim      HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1419263509Sdim    llvm_unreachable("Extra compare instruction needed to handle CFG");
1420263509Sdim    insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
1421263509Sdim        CmpResReg, DebugLoc());
1422249259Sdim  }
1423249259Sdim
1424263509Sdim  // XXX: We are running this after RA, so creating virtual registers will
1425263509Sdim  // cause an assertion failure in the PostRA scheduling pass.
1426263509Sdim  unsigned InitReg =
1427263509Sdim    HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1428263509Sdim  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
1429263509Sdim      DebugLoc());
1430249259Sdim
1431263509Sdim  if (MigrateTrue) {
1432263509Sdim    migrateInstruction(TrueMBB, LandBlk, I);
1433249259Sdim    // need to uncondionally insert the assignment to ensure a path from its
1434249259Sdim    // predecessor rather than headBlk has valid value in initReg if
1435249259Sdim    // (initVal != 1).
1436263509Sdim    llvm_unreachable("Extra register needed to handle CFG");
1437249259Sdim  }
1438263509Sdim  insertInstrBefore(I, AMDGPU::ELSE);
1439249259Sdim
1440263509Sdim  if (MigrateFalse) {
1441263509Sdim    migrateInstruction(FalseMBB, LandBlk, I);
1442249259Sdim    // need to uncondionally insert the assignment to ensure a path from its
1443249259Sdim    // predecessor rather than headBlk has valid value in initReg if
1444249259Sdim    // (initVal != 0)
1445263509Sdim    llvm_unreachable("Extra register needed to handle CFG");
1446249259Sdim  }
1447249259Sdim
1448263509Sdim  if (LandBlkHasOtherPred) {
1449249259Sdim    // add endif
1450263509Sdim    insertInstrBefore(I, AMDGPU::ENDIF);
1451249259Sdim
1452249259Sdim    // put initReg = 2 to other predecessors of landBlk
1453263509Sdim    for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
1454263509Sdim         PE = LandBlk->pred_end(); PI != PE; ++PI) {
1455263509Sdim      MachineBasicBlock *MBB = *PI;
1456263509Sdim      if (MBB != TrueMBB && MBB != FalseMBB)
1457263509Sdim        llvm_unreachable("Extra register needed to handle CFG");
1458263509Sdim    }
1459249259Sdim  }
1460263509Sdim  DEBUG(
1461263509Sdim    dbgs() << "result from improveSimpleJumpintoIf: ";
1462263509Sdim    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1463263509Sdim  );
1464249259Sdim
1465249259Sdim  // update landBlk
1466263509Sdim  *LandMBBPtr = LandBlk;
1467249259Sdim
1468263509Sdim  return NumNewBlk;
1469263509Sdim}
1470249259Sdim
1471263509Sdimvoid AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB,
1472263509Sdim    MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
1473263509Sdim    MachineLoop *ContLoop) {
1474263509Sdim  DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber()
1475263509Sdim               << " header = BB" << ContMBB->getNumber() << "\n";
1476263509Sdim        dbgs() << "Trying to continue loop-depth = "
1477263509Sdim               << getLoopDepth(ContLoop)
1478263509Sdim               << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";);
1479263509Sdim  settleLoopcontBlock(ContingMBB, ContMBB);
1480263509Sdim}
1481249259Sdim
1482263509Sdimvoid AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1483263509Sdim    MachineBasicBlock *SrcMBB) {
1484263509Sdim  DEBUG(
1485263509Sdim    dbgs() << "serialPattern BB" << DstMBB->getNumber()
1486263509Sdim           << " <= BB" << SrcMBB->getNumber() << "\n";
1487263509Sdim  );
1488263509Sdim  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1489249259Sdim
1490263509Sdim  DstMBB->removeSuccessor(SrcMBB);
1491263509Sdim  cloneSuccessorList(DstMBB, SrcMBB);
1492249259Sdim
1493263509Sdim  removeSuccessor(SrcMBB);
1494263509Sdim  MLI->removeBlock(SrcMBB);
1495263509Sdim  retireBlock(SrcMBB);
1496263509Sdim}
1497249259Sdim
1498263509Sdimvoid AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1499263509Sdim    MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
1500263509Sdim    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1501263509Sdim  assert (TrueMBB);
1502263509Sdim  DEBUG(
1503263509Sdim    dbgs() << "ifPattern BB" << MBB->getNumber();
1504263509Sdim    dbgs() << "{  ";
1505263509Sdim    if (TrueMBB) {
1506263509Sdim      dbgs() << "BB" << TrueMBB->getNumber();
1507249259Sdim    }
1508263509Sdim    dbgs() << "  } else ";
1509263509Sdim    dbgs() << "{  ";
1510263509Sdim    if (FalseMBB) {
1511263509Sdim      dbgs() << "BB" << FalseMBB->getNumber();
1512249259Sdim    }
1513263509Sdim    dbgs() << "  }\n ";
1514263509Sdim    dbgs() << "landBlock: ";
1515263509Sdim    if (!LandMBB) {
1516263509Sdim      dbgs() << "NULL";
1517249259Sdim    } else {
1518263509Sdim      dbgs() << "BB" << LandMBB->getNumber();
1519249259Sdim    }
1520263509Sdim    dbgs() << "\n";
1521263509Sdim  );
1522249259Sdim
1523263509Sdim  int OldOpcode = BranchMI->getOpcode();
1524263509Sdim  DebugLoc BranchDL = BranchMI->getDebugLoc();
1525249259Sdim
1526249259Sdim//    transform to
1527249259Sdim//    if cond
1528249259Sdim//       trueBlk
1529249259Sdim//    else
1530249259Sdim//       falseBlk
1531249259Sdim//    endif
1532249259Sdim//    landBlk
1533249259Sdim
1534263509Sdim  MachineBasicBlock::iterator I = BranchMI;
1535263509Sdim  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1536263509Sdim      BranchDL);
1537249259Sdim
1538263509Sdim  if (TrueMBB) {
1539263509Sdim    MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1540263509Sdim    MBB->removeSuccessor(TrueMBB);
1541263509Sdim    if (LandMBB && TrueMBB->succ_size()!=0)
1542263509Sdim      TrueMBB->removeSuccessor(LandMBB);
1543263509Sdim    retireBlock(TrueMBB);
1544263509Sdim    MLI->removeBlock(TrueMBB);
1545249259Sdim  }
1546249259Sdim
1547263509Sdim  if (FalseMBB) {
1548263509Sdim    insertInstrBefore(I, AMDGPU::ELSE);
1549263509Sdim    MBB->splice(I, FalseMBB, FalseMBB->begin(),
1550263509Sdim                   FalseMBB->end());
1551263509Sdim    MBB->removeSuccessor(FalseMBB);
1552263509Sdim    if (LandMBB && FalseMBB->succ_size() != 0)
1553263509Sdim      FalseMBB->removeSuccessor(LandMBB);
1554263509Sdim    retireBlock(FalseMBB);
1555263509Sdim    MLI->removeBlock(FalseMBB);
1556249259Sdim  }
1557263509Sdim  insertInstrBefore(I, AMDGPU::ENDIF);
1558249259Sdim
1559263509Sdim  BranchMI->eraseFromParent();
1560249259Sdim
1561263509Sdim  if (LandMBB && TrueMBB && FalseMBB)
1562263509Sdim    MBB->addSuccessor(LandMBB);
1563249259Sdim
1564263509Sdim}
1565249259Sdim
1566263509Sdimvoid AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1567263509Sdim    MachineBasicBlock *LandMBB) {
1568263509Sdim  DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1569263509Sdim               << " land = BB" << LandMBB->getNumber() << "\n";);
1570249259Sdim
1571263509Sdim  insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
1572263509Sdim  insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
1573263509Sdim  DstBlk->addSuccessor(LandMBB);
1574263509Sdim  DstBlk->removeSuccessor(DstBlk);
1575249259Sdim}
1576249259Sdim
1577249259Sdim
1578263509Sdimvoid AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1579263509Sdim    MachineBasicBlock *LandMBB) {
1580263509Sdim  DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
1581263509Sdim               << " land = BB" << LandMBB->getNumber() << "\n";);
1582263509Sdim  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1583263509Sdim  assert(BranchMI && isCondBranch(BranchMI));
1584263509Sdim  DebugLoc DL = BranchMI->getDebugLoc();
1585263509Sdim  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1586263509Sdim  MachineBasicBlock::iterator I = BranchMI;
1587263509Sdim  if (TrueBranch != LandMBB)
1588263509Sdim    reversePredicateSetter(I);
1589263509Sdim  insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
1590263509Sdim  insertInstrBefore(I, AMDGPU::BREAK);
1591263509Sdim  insertInstrBefore(I, AMDGPU::ENDIF);
1592249259Sdim  //now branchInst can be erase safely
1593263509Sdim  BranchMI->eraseFromParent();
1594249259Sdim  //now take care of successors, retire blocks
1595263509Sdim  ExitingMBB->removeSuccessor(LandMBB);
1596263509Sdim}
1597249259Sdim
1598263509Sdimvoid AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1599263509Sdim    MachineBasicBlock *ContMBB) {
1600263509Sdim  DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1601263509Sdim               << ContingMBB->getNumber()
1602263509Sdim               << ", cont = BB" << ContMBB->getNumber() << "\n";);
1603249259Sdim
1604263509Sdim  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1605263509Sdim  if (MI) {
1606263509Sdim    assert(isCondBranch(MI));
1607263509Sdim    MachineBasicBlock::iterator I = MI;
1608263509Sdim    MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1609263509Sdim    int OldOpcode = MI->getOpcode();
1610263509Sdim    DebugLoc DL = MI->getDebugLoc();
1611249259Sdim
1612263509Sdim    bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1613249259Sdim
1614263509Sdim    if (UseContinueLogical == false) {
1615263509Sdim      int BranchOpcode =
1616263509Sdim          TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1617263509Sdim          getBranchZeroOpcode(OldOpcode);
1618263509Sdim      insertCondBranchBefore(I, BranchOpcode, DL);
1619263509Sdim      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1620263509Sdim      insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
1621263509Sdim      insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
1622249259Sdim    } else {
1623263509Sdim      int BranchOpcode =
1624263509Sdim          TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1625263509Sdim          getContinueZeroOpcode(OldOpcode);
1626263509Sdim      insertCondBranchBefore(I, BranchOpcode, DL);
1627249259Sdim    }
1628249259Sdim
1629263509Sdim    MI->eraseFromParent();
1630249259Sdim  } else {
1631249259Sdim    // if we've arrived here then we've already erased the branch instruction
1632263509Sdim    // travel back up the basic block to see the last reference of our debug
1633263509Sdim    // location we've just inserted that reference here so it should be
1634263509Sdim    // representative insertEnd to ensure phi-moves, if exist, go before the
1635263509Sdim    // continue-instr.
1636263509Sdim    insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
1637263509Sdim        getLastDebugLocInBB(ContingMBB));
1638249259Sdim  }
1639263509Sdim}
1640249259Sdim
1641263509Sdimint AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1642263509Sdim    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1643263509Sdim  int Cloned = 0;
1644263509Sdim  assert(PreMBB->isSuccessor(SrcMBB));
1645263509Sdim  while (SrcMBB && SrcMBB != DstMBB) {
1646263509Sdim    assert(SrcMBB->succ_size() == 1);
1647263509Sdim    if (SrcMBB->pred_size() > 1) {
1648263509Sdim      SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1649263509Sdim      ++Cloned;
1650249259Sdim    }
1651249259Sdim
1652263509Sdim    PreMBB = SrcMBB;
1653263509Sdim    SrcMBB = *SrcMBB->succ_begin();
1654249259Sdim  }
1655249259Sdim
1656263509Sdim  return Cloned;
1657263509Sdim}
1658249259Sdim
1659263509SdimMachineBasicBlock *
1660263509SdimAMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1661263509Sdim    MachineBasicBlock *PredMBB) {
1662263509Sdim  assert(PredMBB->isSuccessor(MBB) &&
1663249259Sdim         "succBlk is not a prececessor of curBlk");
1664249259Sdim
1665263509Sdim  MachineBasicBlock *CloneMBB = clone(MBB);  //clone instructions
1666263509Sdim  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1667249259Sdim  //srcBlk, oldBlk, newBlk
1668249259Sdim
1669263509Sdim  PredMBB->removeSuccessor(MBB);
1670263509Sdim  PredMBB->addSuccessor(CloneMBB);
1671249259Sdim
1672249259Sdim  // add all successor to cloneBlk
1673263509Sdim  cloneSuccessorList(CloneMBB, MBB);
1674249259Sdim
1675263509Sdim  numClonedInstr += MBB->size();
1676249259Sdim
1677263509Sdim  DEBUG(
1678263509Sdim    dbgs() << "Cloned block: " << "BB"
1679263509Sdim           << MBB->getNumber() << "size " << MBB->size() << "\n";
1680263509Sdim  );
1681249259Sdim
1682263509Sdim  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1683249259Sdim
1684263509Sdim  return CloneMBB;
1685263509Sdim}
1686249259Sdim
1687263509Sdimvoid AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1688263509Sdim    MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
1689263509Sdim  MachineBasicBlock::iterator SpliceEnd;
1690249259Sdim  //look for the input branchinstr, not the AMDGPU branchinstr
1691263509Sdim  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1692263509Sdim  if (!BranchMI) {
1693263509Sdim    DEBUG(
1694263509Sdim      dbgs() << "migrateInstruction don't see branch instr\n" ;
1695263509Sdim    );
1696263509Sdim    SpliceEnd = SrcMBB->end();
1697249259Sdim  } else {
1698263509Sdim    DEBUG(
1699263509Sdim      dbgs() << "migrateInstruction see branch instr\n" ;
1700263509Sdim      BranchMI->dump();
1701263509Sdim    );
1702263509Sdim    SpliceEnd = BranchMI;
1703249259Sdim  }
1704263509Sdim  DEBUG(
1705263509Sdim    dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
1706263509Sdim      << "srcSize = " << SrcMBB->size() << "\n";
1707263509Sdim  );
1708249259Sdim
1709249259Sdim  //splice insert before insertPos
1710263509Sdim  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1711249259Sdim
1712263509Sdim  DEBUG(
1713263509Sdim    dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
1714263509Sdim      << "srcSize = " << SrcMBB->size() << "\n";
1715263509Sdim  );
1716263509Sdim}
1717249259Sdim
1718263509SdimMachineBasicBlock *
1719263509SdimAMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1720263509Sdim  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1721263509Sdim  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1722249259Sdim  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1723249259Sdim
1724263509Sdim  if (!LoopHeader || !LoopLatch)
1725263509Sdim    return NULL;
1726263509Sdim  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1727263509Sdim  // Is LoopRep an infinite loop ?
1728263509Sdim  if (!BranchMI || !isUncondBranch(BranchMI))
1729263509Sdim    return NULL;
1730249259Sdim
1731263509Sdim  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1732263509Sdim  FuncRep->push_back(DummyExitBlk);  //insert to function
1733263509Sdim  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1734263509Sdim  DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1735263509Sdim  MachineBasicBlock::iterator I = BranchMI;
1736263509Sdim  unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC);
1737263509Sdim  llvm_unreachable("Extra register needed to handle CFG");
1738263509Sdim  MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32);
1739263509Sdim  MachineInstrBuilder MIB(*FuncRep, NewMI);
1740263509Sdim  MIB.addMBB(LoopHeader);
1741263509Sdim  MIB.addReg(ImmReg, false);
1742263509Sdim  SHOWNEWINSTR(NewMI);
1743263509Sdim  BranchMI->eraseFromParent();
1744263509Sdim  LoopLatch->addSuccessor(DummyExitBlk);
1745249259Sdim
1746263509Sdim  return DummyExitBlk;
1747263509Sdim}
1748249259Sdim
1749263509Sdimvoid AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1750263509Sdim  MachineInstr *BranchMI;
1751249259Sdim
1752249259Sdim  // I saw two unconditional branch in one basic block in example
1753249259Sdim  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1754263509Sdim  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1755263509Sdim          && isUncondBranch(BranchMI)) {
1756263509Sdim    DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump(););
1757263509Sdim    BranchMI->eraseFromParent();
1758249259Sdim  }
1759263509Sdim}
1760249259Sdim
1761263509Sdimvoid AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1762263509Sdim    MachineBasicBlock *MBB) {
1763263509Sdim  if (MBB->succ_size() != 2)
1764263509Sdim    return;
1765263509Sdim  MachineBasicBlock *MBB1 = *MBB->succ_begin();
1766263509Sdim  MachineBasicBlock *MBB2 = *llvm::next(MBB->succ_begin());
1767263509Sdim  if (MBB1 != MBB2)
1768263509Sdim    return;
1769249259Sdim
1770263509Sdim  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1771263509Sdim  assert(BranchMI && isCondBranch(BranchMI));
1772263509Sdim  DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump(););
1773263509Sdim  BranchMI->eraseFromParent();
1774263509Sdim  SHOWNEWBLK(MBB1, "Removing redundant successor");
1775263509Sdim  MBB->removeSuccessor(MBB1);
1776263509Sdim}
1777249259Sdim
1778263509Sdimvoid AMDGPUCFGStructurizer::addDummyExitBlock(
1779263509Sdim    SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
1780263509Sdim  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1781263509Sdim  FuncRep->push_back(DummyExitBlk);  //insert to function
1782263509Sdim  insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
1783249259Sdim
1784263509Sdim  for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
1785263509Sdim       E = RetMBB.end(); It != E; ++It) {
1786263509Sdim    MachineBasicBlock *MBB = *It;
1787263509Sdim    MachineInstr *MI = getReturnInstr(MBB);
1788263509Sdim    if (MI)
1789263509Sdim      MI->eraseFromParent();
1790263509Sdim    MBB->addSuccessor(DummyExitBlk);
1791263509Sdim    DEBUG(
1792263509Sdim      dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1793249259Sdim             << " successors\n";
1794263509Sdim    );
1795249259Sdim  }
1796263509Sdim  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1797249259Sdim}
1798249259Sdim
1799263509Sdimvoid AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1800263509Sdim  while (MBB->succ_size())
1801263509Sdim    MBB->removeSuccessor(*MBB->succ_begin());
1802249259Sdim}
1803249259Sdim
1804263509Sdimvoid AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1805263509Sdim    int SccNum) {
1806263509Sdim  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1807263509Sdim  if (!srcBlkInfo)
1808263509Sdim    srcBlkInfo = new BlockInformation();
1809263509Sdim  srcBlkInfo->SccNum = SccNum;
1810249259Sdim}
1811249259Sdim
1812263509Sdimvoid AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1813263509Sdim  DEBUG(
1814263509Sdim        dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
1815263509Sdim  );
1816249259Sdim
1817263509Sdim  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1818249259Sdim
1819263509Sdim  if (!SrcBlkInfo)
1820263509Sdim    SrcBlkInfo = new BlockInformation();
1821249259Sdim
1822263509Sdim  SrcBlkInfo->IsRetired = true;
1823263509Sdim  assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
1824249259Sdim         && "can't retire block yet");
1825249259Sdim}
1826249259Sdim
1827263509Sdimvoid AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep,
1828263509Sdim    MachineBasicBlock *MBB) {
1829263509Sdim  MachineBasicBlock *&TheEntry = LLInfoMap[loopRep];
1830263509Sdim  if (!MBB) {
1831263509Sdim    MBB = FuncRep->CreateMachineBasicBlock();
1832263509Sdim    FuncRep->push_back(MBB);  //insert to function
1833263509Sdim    SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: ");
1834249259Sdim  }
1835263509Sdim  TheEntry = MBB;
1836263509Sdim  DEBUG(
1837263509Sdim    dbgs() << "setLoopLandBlock loop-header = BB"
1838249259Sdim           << loopRep->getHeader()->getNumber()
1839263509Sdim           << "  landing-block = BB" << MBB->getNumber() << "\n";
1840263509Sdim  );
1841263509Sdim}
1842249259Sdim
1843263509SdimMachineBasicBlock *
1844263509SdimAMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
1845263509Sdim    MachineBasicBlock *MBB2) {
1846249259Sdim
1847263509Sdim  if (PDT->dominates(MBB1, MBB2))
1848263509Sdim    return MBB1;
1849263509Sdim  if (PDT->dominates(MBB2, MBB1))
1850263509Sdim    return MBB2;
1851249259Sdim
1852263509Sdim  MachineDomTreeNode *Node1 = PDT->getNode(MBB1);
1853263509Sdim  MachineDomTreeNode *Node2 = PDT->getNode(MBB2);
1854249259Sdim
1855249259Sdim  // Handle newly cloned node.
1856263509Sdim  if (!Node1 && MBB1->succ_size() == 1)
1857263509Sdim    return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2);
1858263509Sdim  if (!Node2 && MBB2->succ_size() == 1)
1859263509Sdim    return findNearestCommonPostDom(MBB1, *MBB2->succ_begin());
1860249259Sdim
1861263509Sdim  if (!Node1 || !Node2)
1862249259Sdim    return NULL;
1863249259Sdim
1864263509Sdim  Node1 = Node1->getIDom();
1865263509Sdim  while (Node1) {
1866263509Sdim    if (PDT->dominates(Node1, Node2))
1867263509Sdim      return Node1->getBlock();
1868263509Sdim    Node1 = Node1->getIDom();
1869249259Sdim  }
1870249259Sdim
1871249259Sdim  return NULL;
1872249259Sdim}
1873249259Sdim
1874263509SdimMachineBasicBlock *
1875263509SdimAMDGPUCFGStructurizer::findNearestCommonPostDom(
1876263509Sdim    std::set<MachineBasicBlock *> &MBBs) {
1877263509Sdim  MachineBasicBlock *CommonDom;
1878263509Sdim  std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin();
1879263509Sdim  std::set<MachineBasicBlock *>::const_iterator E = MBBs.end();
1880263509Sdim  for (CommonDom = *It; It != E && CommonDom; ++It) {
1881263509Sdim    MachineBasicBlock *MBB = *It;
1882263509Sdim    if (MBB != CommonDom)
1883263509Sdim      CommonDom = findNearestCommonPostDom(MBB, CommonDom);
1884249259Sdim  }
1885249259Sdim
1886263509Sdim  DEBUG(
1887263509Sdim    dbgs() << "Common post dominator for exit blocks is ";
1888263509Sdim    if (CommonDom)
1889263509Sdim          dbgs() << "BB" << CommonDom->getNumber() << "\n";
1890263509Sdim    else
1891263509Sdim      dbgs() << "NULL\n";
1892263509Sdim  );
1893249259Sdim
1894263509Sdim  return CommonDom;
1895249259Sdim}
1896249259Sdim
1897263509Sdimchar AMDGPUCFGStructurizer::ID = 0;
1898249259Sdim
1899263509Sdim} // end anonymous namespace
1900249259Sdim
1901249259Sdim
1902263509SdimFunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm) {
1903263509Sdim  return new AMDGPUCFGStructurizer(tm);
1904249259Sdim}
1905