1292915Sdim//===--- HexagonEarlyIfConv.cpp -------------------------------------------===//
2292915Sdim//
3292915Sdim//                     The LLVM Compiler Infrastructure
4292915Sdim//
5292915Sdim// This file is distributed under the University of Illinois Open Source
6292915Sdim// License. See LICENSE.TXT for details.
7292915Sdim//
8292915Sdim//===----------------------------------------------------------------------===//
9292915Sdim//
10292915Sdim// This implements a Hexagon-specific if-conversion pass that runs on the
11292915Sdim// SSA form.
12292915Sdim// In SSA it is not straightforward to represent instructions that condi-
13292915Sdim// tionally define registers, since a conditionally-defined register may
14292915Sdim// only be used under the same condition on which the definition was based.
15292915Sdim// To avoid complications of this nature, this patch will only generate
16292915Sdim// predicated stores, and speculate other instructions from the "if-conver-
17292915Sdim// ted" block.
18292915Sdim// The code will recognize CFG patterns where a block with a conditional
19292915Sdim// branch "splits" into a "true block" and a "false block". Either of these
20292915Sdim// could be omitted (in case of a triangle, for example).
21292915Sdim// If after conversion of the side block(s) the CFG allows it, the resul-
22292915Sdim// ting blocks may be merged. If the "join" block contained PHI nodes, they
23292915Sdim// will be replaced with MUX (or MUX-like) instructions to maintain the
24292915Sdim// semantics of the PHI.
25292915Sdim//
26292915Sdim// Example:
27292915Sdim//
28292915Sdim//         %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1
29292915Sdim//         %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0
30292915Sdim//         J2_jumpt %vreg41<kill>, <BB#5>, %PC<imp-def,dead>
31292915Sdim//         J2_jump <BB#4>, %PC<imp-def,dead>
32292915Sdim//     Successors according to CFG: BB#4(62) BB#5(62)
33292915Sdim//
34292915Sdim// BB#4: derived from LLVM BB %if.then
35292915Sdim//     Predecessors according to CFG: BB#3
36292915Sdim//         %vreg11<def> = A2_addp %vreg6, %vreg10
37292915Sdim//         S2_storerd_io %vreg32, 16, %vreg11
38292915Sdim//     Successors according to CFG: BB#5
39292915Sdim//
40292915Sdim// BB#5: derived from LLVM BB %if.end
41292915Sdim//     Predecessors according to CFG: BB#3 BB#4
42292915Sdim//         %vreg12<def> = PHI %vreg6, <BB#3>, %vreg11, <BB#4>
43292915Sdim//         %vreg13<def> = A2_addp %vreg7, %vreg12
44292915Sdim//         %vreg42<def> = C2_cmpeqi %vreg9, 10
45292915Sdim//         J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead>
46292915Sdim//         J2_jump <BB#6>, %PC<imp-def,dead>
47292915Sdim//     Successors according to CFG: BB#6(4) BB#3(124)
48292915Sdim//
49292915Sdim// would become:
50292915Sdim//
51292915Sdim//         %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1
52292915Sdim//         %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0
53292915Sdim// spec->  %vreg11<def> = A2_addp %vreg6, %vreg10
54292915Sdim// pred->  S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11
55292915Sdim//         %vreg46<def> = MUX64_rr %vreg41, %vreg6, %vreg11
56292915Sdim//         %vreg13<def> = A2_addp %vreg7, %vreg46
57292915Sdim//         %vreg42<def> = C2_cmpeqi %vreg9, 10
58292915Sdim//         J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead>
59292915Sdim//         J2_jump <BB#6>, %PC<imp-def,dead>
60292915Sdim//     Successors according to CFG: BB#6 BB#3
61292915Sdim
62292915Sdim#define DEBUG_TYPE "hexagon-eif"
63292915Sdim
64292915Sdim#include "llvm/ADT/DenseSet.h"
65292915Sdim#include "llvm/ADT/SetVector.h"
66292915Sdim#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
67292915Sdim#include "llvm/CodeGen/MachineDominators.h"
68292915Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
69292915Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
70292915Sdim#include "llvm/CodeGen/MachineLoopInfo.h"
71292915Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
72292915Sdim#include "llvm/CodeGen/Passes.h"
73292915Sdim#include "llvm/Support/CommandLine.h"
74292915Sdim#include "llvm/Support/Debug.h"
75292915Sdim#include "llvm/Support/raw_ostream.h"
76292915Sdim#include "llvm/Target/TargetInstrInfo.h"
77292915Sdim#include "llvm/Target/TargetMachine.h"
78292915Sdim#include "HexagonTargetMachine.h"
79292915Sdim
80292915Sdim#include <functional>
81292915Sdim#include <set>
82292915Sdim#include <vector>
83292915Sdim
84292915Sdimusing namespace llvm;
85292915Sdim
86292915Sdimnamespace llvm {
87292915Sdim  FunctionPass *createHexagonEarlyIfConversion();
88292915Sdim  void initializeHexagonEarlyIfConversionPass(PassRegistry& Registry);
89292915Sdim}
90292915Sdim
91292915Sdimnamespace {
92292915Sdim  cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden,
93292915Sdim    cl::init(false), cl::desc("Enable branch probability info"));
94292915Sdim  cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden,
95292915Sdim    cl::desc("Size limit in Hexagon early if-conversion"));
96292915Sdim
97292915Sdim  struct PrintMB {
98292915Sdim    PrintMB(const MachineBasicBlock *B) : MB(B) {}
99292915Sdim    const MachineBasicBlock *MB;
100292915Sdim  };
101292915Sdim  raw_ostream &operator<< (raw_ostream &OS, const PrintMB &P) {
102292915Sdim    if (!P.MB)
103292915Sdim      return OS << "<none>";
104292915Sdim    return OS << '#' << P.MB->getNumber();
105292915Sdim  }
106292915Sdim
107292915Sdim  struct FlowPattern {
108292915Sdim    FlowPattern() : SplitB(0), TrueB(0), FalseB(0), JoinB(0), PredR(0) {}
109292915Sdim    FlowPattern(MachineBasicBlock *B, unsigned PR, MachineBasicBlock *TB,
110292915Sdim          MachineBasicBlock *FB, MachineBasicBlock *JB)
111292915Sdim      : SplitB(B), TrueB(TB), FalseB(FB), JoinB(JB), PredR(PR) {}
112292915Sdim
113292915Sdim    MachineBasicBlock *SplitB;
114292915Sdim    MachineBasicBlock *TrueB, *FalseB, *JoinB;
115292915Sdim    unsigned PredR;
116292915Sdim  };
117292915Sdim  struct PrintFP {
118292915Sdim    PrintFP(const FlowPattern &P, const TargetRegisterInfo &T)
119292915Sdim      : FP(P), TRI(T) {}
120292915Sdim    const FlowPattern &FP;
121292915Sdim    const TargetRegisterInfo &TRI;
122292915Sdim    friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P);
123292915Sdim  };
124292915Sdim  raw_ostream &operator<<(raw_ostream &OS,
125292915Sdim                          const PrintFP &P) LLVM_ATTRIBUTE_UNUSED;
126292915Sdim  raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) {
127292915Sdim    OS << "{ SplitB:" << PrintMB(P.FP.SplitB)
128292915Sdim       << ", PredR:" << PrintReg(P.FP.PredR, &P.TRI)
129292915Sdim       << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:"
130292915Sdim       << PrintMB(P.FP.FalseB)
131292915Sdim       << ", JoinB:" << PrintMB(P.FP.JoinB) << " }";
132292915Sdim    return OS;
133292915Sdim  }
134292915Sdim
135292915Sdim  class HexagonEarlyIfConversion : public MachineFunctionPass {
136292915Sdim  public:
137292915Sdim    static char ID;
138292915Sdim    HexagonEarlyIfConversion() : MachineFunctionPass(ID),
139292915Sdim        TII(0), TRI(0), MFN(0), MRI(0), MDT(0), MLI(0) {
140292915Sdim      initializeHexagonEarlyIfConversionPass(*PassRegistry::getPassRegistry());
141292915Sdim    }
142292915Sdim    const char *getPassName() const override {
143292915Sdim      return "Hexagon early if conversion";
144292915Sdim    }
145292915Sdim    void getAnalysisUsage(AnalysisUsage &AU) const override {
146292915Sdim      AU.addRequired<MachineBranchProbabilityInfo>();
147292915Sdim      AU.addRequired<MachineDominatorTree>();
148292915Sdim      AU.addPreserved<MachineDominatorTree>();
149292915Sdim      AU.addRequired<MachineLoopInfo>();
150292915Sdim      MachineFunctionPass::getAnalysisUsage(AU);
151292915Sdim    }
152292915Sdim    bool runOnMachineFunction(MachineFunction &MF) override;
153292915Sdim
154292915Sdim  private:
155292915Sdim    typedef DenseSet<MachineBasicBlock*> BlockSetType;
156292915Sdim
157292915Sdim    bool isPreheader(const MachineBasicBlock *B) const;
158292915Sdim    bool matchFlowPattern(MachineBasicBlock *B, MachineLoop *L,
159292915Sdim          FlowPattern &FP);
160292915Sdim    bool visitBlock(MachineBasicBlock *B, MachineLoop *L);
161292915Sdim    bool visitLoop(MachineLoop *L);
162292915Sdim
163292915Sdim    bool hasEHLabel(const MachineBasicBlock *B) const;
164292915Sdim    bool hasUncondBranch(const MachineBasicBlock *B) const;
165292915Sdim    bool isValidCandidate(const MachineBasicBlock *B) const;
166292915Sdim    bool usesUndefVReg(const MachineInstr *MI) const;
167292915Sdim    bool isValid(const FlowPattern &FP) const;
168292915Sdim    unsigned countPredicateDefs(const MachineBasicBlock *B) const;
169292915Sdim    unsigned computePhiCost(MachineBasicBlock *B) const;
170292915Sdim    bool isProfitable(const FlowPattern &FP) const;
171292915Sdim    bool isPredicableStore(const MachineInstr *MI) const;
172292915Sdim    bool isSafeToSpeculate(const MachineInstr *MI) const;
173292915Sdim
174292915Sdim    unsigned getCondStoreOpcode(unsigned Opc, bool IfTrue) const;
175292915Sdim    void predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At,
176292915Sdim          MachineInstr *MI, unsigned PredR, bool IfTrue);
177292915Sdim    void predicateBlockNB(MachineBasicBlock *ToB,
178292915Sdim          MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
179292915Sdim          unsigned PredR, bool IfTrue);
180292915Sdim
181292915Sdim    void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP);
182292915Sdim    void convert(const FlowPattern &FP);
183292915Sdim
184292915Sdim    void removeBlock(MachineBasicBlock *B);
185292915Sdim    void eliminatePhis(MachineBasicBlock *B);
186292915Sdim    void replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB);
187292915Sdim    void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB);
188292915Sdim    void simplifyFlowGraph(const FlowPattern &FP);
189292915Sdim
190292915Sdim    const TargetInstrInfo *TII;
191292915Sdim    const TargetRegisterInfo *TRI;
192292915Sdim    MachineFunction *MFN;
193292915Sdim    MachineRegisterInfo *MRI;
194292915Sdim    MachineDominatorTree *MDT;
195292915Sdim    MachineLoopInfo *MLI;
196292915Sdim    BlockSetType Deleted;
197292915Sdim    const MachineBranchProbabilityInfo *MBPI;
198292915Sdim  };
199292915Sdim
200292915Sdim  char HexagonEarlyIfConversion::ID = 0;
201292915Sdim}
202292915Sdim
203292915SdimINITIALIZE_PASS(HexagonEarlyIfConversion, "hexagon-eif",
204292915Sdim  "Hexagon early if conversion", false, false)
205292915Sdim
206292915Sdimbool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const {
207292915Sdim  if (B->succ_size() != 1)
208292915Sdim    return false;
209292915Sdim  MachineBasicBlock *SB = *B->succ_begin();
210292915Sdim  MachineLoop *L = MLI->getLoopFor(SB);
211292915Sdim  return L && SB == L->getHeader();
212292915Sdim}
213292915Sdim
214292915Sdim
215292915Sdimbool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
216292915Sdim    MachineLoop *L, FlowPattern &FP) {
217292915Sdim  DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n");
218292915Sdim
219292915Sdim  // Interested only in conditional branches, no .new, no new-value, etc.
220292915Sdim  // Check the terminators directly, it's easier than handling all responses
221292915Sdim  // from AnalyzeBranch.
222292915Sdim  MachineBasicBlock *TB = 0, *FB = 0;
223292915Sdim  MachineBasicBlock::const_iterator T1I = B->getFirstTerminator();
224292915Sdim  if (T1I == B->end())
225292915Sdim    return false;
226292915Sdim  unsigned Opc = T1I->getOpcode();
227292915Sdim  if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf)
228292915Sdim    return false;
229292915Sdim  unsigned PredR = T1I->getOperand(0).getReg();
230292915Sdim
231292915Sdim  // Get the layout successor, or 0 if B does not have one.
232292915Sdim  MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B));
233292915Sdim  MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0;
234292915Sdim
235292915Sdim  MachineBasicBlock *T1B = T1I->getOperand(1).getMBB();
236292915Sdim  MachineBasicBlock::const_iterator T2I = std::next(T1I);
237292915Sdim  // The second terminator should be an unconditional branch.
238292915Sdim  assert(T2I == B->end() || T2I->getOpcode() == Hexagon::J2_jump);
239292915Sdim  MachineBasicBlock *T2B = (T2I == B->end()) ? NextB
240292915Sdim                                             : T2I->getOperand(0).getMBB();
241292915Sdim  if (T1B == T2B) {
242292915Sdim    // XXX merge if T1B == NextB, or convert branch to unconditional.
243292915Sdim    // mark as diamond with both sides equal?
244292915Sdim    return false;
245292915Sdim  }
246292915Sdim  // Loop could be null for both.
247292915Sdim  if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L)
248292915Sdim    return false;
249292915Sdim
250292915Sdim  // Record the true/false blocks in such a way that "true" means "if (PredR)",
251292915Sdim  // and "false" means "if (!PredR)".
252292915Sdim  if (Opc == Hexagon::J2_jumpt)
253292915Sdim    TB = T1B, FB = T2B;
254292915Sdim  else
255292915Sdim    TB = T2B, FB = T1B;
256292915Sdim
257292915Sdim  if (!MDT->properlyDominates(B, TB) || !MDT->properlyDominates(B, FB))
258292915Sdim    return false;
259292915Sdim
260292915Sdim  // Detect triangle first. In case of a triangle, one of the blocks TB/FB
261292915Sdim  // can fall through into the other, in other words, it will be executed
262292915Sdim  // in both cases. We only want to predicate the block that is executed
263292915Sdim  // conditionally.
264292915Sdim  unsigned TNP = TB->pred_size(), FNP = FB->pred_size();
265292915Sdim  unsigned TNS = TB->succ_size(), FNS = FB->succ_size();
266292915Sdim
267292915Sdim  // A block is predicable if it has one predecessor (it must be B), and
268292915Sdim  // it has a single successor. In fact, the block has to end either with
269292915Sdim  // an unconditional branch (which can be predicated), or with a fall-
270292915Sdim  // through.
271292915Sdim  bool TOk = (TNP == 1) && (TNS == 1);
272292915Sdim  bool FOk = (FNP == 1) && (FNS == 1);
273292915Sdim
274292915Sdim  // If neither is predicable, there is nothing interesting.
275292915Sdim  if (!TOk && !FOk)
276292915Sdim    return false;
277292915Sdim
278292915Sdim  MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0;
279292915Sdim  MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0;
280292915Sdim  MachineBasicBlock *JB = 0;
281292915Sdim
282292915Sdim  if (TOk) {
283292915Sdim    if (FOk) {
284292915Sdim      if (TSB == FSB)
285292915Sdim        JB = TSB;
286292915Sdim      // Diamond: "if (P) then TB; else FB;".
287292915Sdim    } else {
288292915Sdim      // TOk && !FOk
289292915Sdim      if (TSB == FB) {
290292915Sdim        JB = FB;
291292915Sdim        FB = 0;
292292915Sdim      }
293292915Sdim    }
294292915Sdim  } else {
295292915Sdim    // !TOk && FOk  (at least one must be true by now).
296292915Sdim    if (FSB == TB) {
297292915Sdim      JB = TB;
298292915Sdim      TB = 0;
299292915Sdim    }
300292915Sdim  }
301292915Sdim  // Don't try to predicate loop preheaders.
302292915Sdim  if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) {
303292915Sdim    DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB)
304292915Sdim                 << " is a loop preheader. Skipping.\n");
305292915Sdim    return false;
306292915Sdim  }
307292915Sdim
308292915Sdim  FP = FlowPattern(B, PredR, TB, FB, JB);
309292915Sdim  DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n");
310292915Sdim  return true;
311292915Sdim}
312292915Sdim
313292915Sdim
314292915Sdim// KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that
315292915Sdim// contains EH_LABEL.
316292915Sdimbool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const {
317292915Sdim  for (auto &I : *B)
318292915Sdim    if (I.isEHLabel())
319292915Sdim      return true;
320292915Sdim  return false;
321292915Sdim}
322292915Sdim
323292915Sdim
324292915Sdim// KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize
325292915Sdim// that a block can never fall-through.
326292915Sdimbool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B)
327292915Sdim      const {
328292915Sdim  MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
329292915Sdim  while (I != E) {
330292915Sdim    if (I->isBarrier())
331292915Sdim      return true;
332292915Sdim    ++I;
333292915Sdim  }
334292915Sdim  return false;
335292915Sdim}
336292915Sdim
337292915Sdim
338292915Sdimbool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
339292915Sdim      const {
340292915Sdim  if (!B)
341292915Sdim    return true;
342292915Sdim  if (B->isEHPad() || B->hasAddressTaken())
343292915Sdim    return false;
344292915Sdim  if (B->succ_size() == 0)
345292915Sdim    return false;
346292915Sdim
347292915Sdim  for (auto &MI : *B) {
348292915Sdim    if (MI.isDebugValue())
349292915Sdim      continue;
350292915Sdim    if (MI.isConditionalBranch())
351292915Sdim      return false;
352292915Sdim    unsigned Opc = MI.getOpcode();
353292915Sdim    bool IsJMP = (Opc == Hexagon::J2_jump);
354292915Sdim    if (!isPredicableStore(&MI) && !IsJMP && !isSafeToSpeculate(&MI))
355292915Sdim      return false;
356292915Sdim    // Look for predicate registers defined by this instruction. It's ok
357292915Sdim    // to speculate such an instruction, but the predicate register cannot
358292915Sdim    // be used outside of this block (or else it won't be possible to
359292915Sdim    // update the use of it after predication). PHI uses will be updated
360292915Sdim    // to use a result of a MUX, and a MUX cannot be created for predicate
361292915Sdim    // registers.
362292915Sdim    for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) {
363292915Sdim      if (!MO->isReg() || !MO->isDef())
364292915Sdim        continue;
365292915Sdim      unsigned R = MO->getReg();
366292915Sdim      if (!TargetRegisterInfo::isVirtualRegister(R))
367292915Sdim        continue;
368292915Sdim      if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass)
369292915Sdim        continue;
370292915Sdim      for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U)
371292915Sdim        if (U->getParent()->isPHI())
372292915Sdim          return false;
373292915Sdim    }
374292915Sdim  }
375292915Sdim  return true;
376292915Sdim}
377292915Sdim
378292915Sdim
379292915Sdimbool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const {
380292915Sdim  for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
381292915Sdim    if (!MO->isReg() || !MO->isUse())
382292915Sdim      continue;
383292915Sdim    unsigned R = MO->getReg();
384292915Sdim    if (!TargetRegisterInfo::isVirtualRegister(R))
385292915Sdim      continue;
386292915Sdim    const MachineInstr *DefI = MRI->getVRegDef(R);
387292915Sdim    // "Undefined" virtual registers are actually defined via IMPLICIT_DEF.
388292915Sdim    assert(DefI && "Expecting a reaching def in MRI");
389292915Sdim    if (DefI->isImplicitDef())
390292915Sdim      return true;
391292915Sdim  }
392292915Sdim  return false;
393292915Sdim}
394292915Sdim
395292915Sdim
396292915Sdimbool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const {
397292915Sdim  if (hasEHLabel(FP.SplitB))  // KLUDGE: see function definition
398292915Sdim    return false;
399292915Sdim  if (FP.TrueB && !isValidCandidate(FP.TrueB))
400292915Sdim    return false;
401292915Sdim  if (FP.FalseB && !isValidCandidate(FP.FalseB))
402292915Sdim    return false;
403292915Sdim  // Check the PHIs in the join block. If any of them use a register
404292915Sdim  // that is defined as IMPLICIT_DEF, do not convert this. This can
405292915Sdim  // legitimately happen if one side of the split never executes, but
406292915Sdim  // the compiler is unable to prove it. That side may then seem to
407292915Sdim  // provide an "undef" value to the join block, however it will never
408292915Sdim  // execute at run-time. If we convert this case, the "undef" will
409292915Sdim  // be used in a MUX instruction, and that may seem like actually
410292915Sdim  // using an undefined value to other optimizations. This could lead
411292915Sdim  // to trouble further down the optimization stream, cause assertions
412292915Sdim  // to fail, etc.
413292915Sdim  if (FP.JoinB) {
414292915Sdim    const MachineBasicBlock &B = *FP.JoinB;
415292915Sdim    for (auto &MI : B) {
416292915Sdim      if (!MI.isPHI())
417292915Sdim        break;
418292915Sdim      if (usesUndefVReg(&MI))
419292915Sdim        return false;
420292915Sdim      unsigned DefR = MI.getOperand(0).getReg();
421292915Sdim      const TargetRegisterClass *RC = MRI->getRegClass(DefR);
422292915Sdim      if (RC == &Hexagon::PredRegsRegClass)
423292915Sdim        return false;
424292915Sdim    }
425292915Sdim  }
426292915Sdim  return true;
427292915Sdim}
428292915Sdim
429292915Sdim
430292915Sdimunsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const {
431292915Sdim  assert(B->pred_size() <= 2);
432292915Sdim  if (B->pred_size() < 2)
433292915Sdim    return 0;
434292915Sdim
435292915Sdim  unsigned Cost = 0;
436292915Sdim  MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI();
437292915Sdim  for (I = B->begin(); I != E; ++I) {
438292915Sdim    const MachineOperand &RO1 = I->getOperand(1);
439292915Sdim    const MachineOperand &RO3 = I->getOperand(3);
440292915Sdim    assert(RO1.isReg() && RO3.isReg());
441292915Sdim    // Must have a MUX if the phi uses a subregister.
442292915Sdim    if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) {
443292915Sdim      Cost++;
444292915Sdim      continue;
445292915Sdim    }
446292915Sdim    MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg());
447292915Sdim    MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg());
448292915Sdim    if (!TII->isPredicable(Def1) || !TII->isPredicable(Def3))
449292915Sdim      Cost++;
450292915Sdim  }
451292915Sdim  return Cost;
452292915Sdim}
453292915Sdim
454292915Sdim
455292915Sdimunsigned HexagonEarlyIfConversion::countPredicateDefs(
456292915Sdim      const MachineBasicBlock *B) const {
457292915Sdim  unsigned PredDefs = 0;
458292915Sdim  for (auto &MI : *B) {
459292915Sdim    for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) {
460292915Sdim      if (!MO->isReg() || !MO->isDef())
461292915Sdim        continue;
462292915Sdim      unsigned R = MO->getReg();
463292915Sdim      if (!TargetRegisterInfo::isVirtualRegister(R))
464292915Sdim        continue;
465292915Sdim      if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass)
466292915Sdim        PredDefs++;
467292915Sdim    }
468292915Sdim  }
469292915Sdim  return PredDefs;
470292915Sdim}
471292915Sdim
472292915Sdim
473292915Sdimbool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
474292915Sdim  if (FP.TrueB && FP.FalseB) {
475292915Sdim
476292915Sdim    // Do not IfCovert if the branch is one sided.
477292915Sdim    if (MBPI) {
478292915Sdim      BranchProbability Prob(9, 10);
479292915Sdim      if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob)
480292915Sdim        return false;
481292915Sdim      if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob)
482292915Sdim        return false;
483292915Sdim    }
484292915Sdim
485292915Sdim    // If both sides are predicable, convert them if they join, and the
486292915Sdim    // join block has no other predecessors.
487292915Sdim    MachineBasicBlock *TSB = *FP.TrueB->succ_begin();
488292915Sdim    MachineBasicBlock *FSB = *FP.FalseB->succ_begin();
489292915Sdim    if (TSB != FSB)
490292915Sdim      return false;
491292915Sdim    if (TSB->pred_size() != 2)
492292915Sdim      return false;
493292915Sdim  }
494292915Sdim
495292915Sdim  // Calculate the total size of the predicated blocks.
496292915Sdim  // Assume instruction counts without branches to be the approximation of
497292915Sdim  // the code size. If the predicated blocks are smaller than a packet size,
498292915Sdim  // approximate the spare room in the packet that could be filled with the
499292915Sdim  // predicated/speculated instructions.
500292915Sdim  unsigned TS = 0, FS = 0, Spare = 0;
501292915Sdim  if (FP.TrueB) {
502292915Sdim    TS = std::distance(FP.TrueB->begin(), FP.TrueB->getFirstTerminator());
503292915Sdim    if (TS < HEXAGON_PACKET_SIZE)
504292915Sdim      Spare += HEXAGON_PACKET_SIZE-TS;
505292915Sdim  }
506292915Sdim  if (FP.FalseB) {
507292915Sdim    FS = std::distance(FP.FalseB->begin(), FP.FalseB->getFirstTerminator());
508292915Sdim    if (FS < HEXAGON_PACKET_SIZE)
509292915Sdim      Spare += HEXAGON_PACKET_SIZE-TS;
510292915Sdim  }
511292915Sdim  unsigned TotalIn = TS+FS;
512292915Sdim  DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: "
513292915Sdim               << TotalIn << ", spare room: " << Spare << "\n");
514292915Sdim  if (TotalIn >= SizeLimit+Spare)
515292915Sdim    return false;
516292915Sdim
517292915Sdim  // Count the number of PHI nodes that will need to be updated (converted
518292915Sdim  // to MUX). Those can be later converted to predicated instructions, so
519292915Sdim  // they aren't always adding extra cost.
520292915Sdim  // KLUDGE: Also, count the number of predicate register definitions in
521292915Sdim  // each block. The scheduler may increase the pressure of these and cause
522292915Sdim  // expensive spills (e.g. bitmnp01).
523292915Sdim  unsigned TotalPh = 0;
524292915Sdim  unsigned PredDefs = countPredicateDefs(FP.SplitB);
525292915Sdim  if (FP.JoinB) {
526292915Sdim    TotalPh = computePhiCost(FP.JoinB);
527292915Sdim    PredDefs += countPredicateDefs(FP.JoinB);
528292915Sdim  } else {
529292915Sdim    if (FP.TrueB && FP.TrueB->succ_size() > 0) {
530292915Sdim      MachineBasicBlock *SB = *FP.TrueB->succ_begin();
531292915Sdim      TotalPh += computePhiCost(SB);
532292915Sdim      PredDefs += countPredicateDefs(SB);
533292915Sdim    }
534292915Sdim    if (FP.FalseB && FP.FalseB->succ_size() > 0) {
535292915Sdim      MachineBasicBlock *SB = *FP.FalseB->succ_begin();
536292915Sdim      TotalPh += computePhiCost(SB);
537292915Sdim      PredDefs += countPredicateDefs(SB);
538292915Sdim    }
539292915Sdim  }
540292915Sdim  DEBUG(dbgs() << "Total number of extra muxes from converted phis: "
541292915Sdim               << TotalPh << "\n");
542292915Sdim  if (TotalIn+TotalPh >= SizeLimit+Spare)
543292915Sdim    return false;
544292915Sdim
545292915Sdim  DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs << "\n");
546292915Sdim  if (PredDefs > 4)
547292915Sdim    return false;
548292915Sdim
549292915Sdim  return true;
550292915Sdim}
551292915Sdim
552292915Sdim
553292915Sdimbool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B,
554292915Sdim      MachineLoop *L) {
555292915Sdim  bool Changed = false;
556292915Sdim
557292915Sdim  // Visit all dominated blocks from the same loop first, then process B.
558292915Sdim  MachineDomTreeNode *N = MDT->getNode(B);
559292915Sdim  typedef GraphTraits<MachineDomTreeNode*> GTN;
560292915Sdim  // We will change CFG/DT during this traversal, so take precautions to
561292915Sdim  // avoid problems related to invalidated iterators. In fact, processing
562292915Sdim  // a child C of B cannot cause another child to be removed, but it can
563292915Sdim  // cause a new child to be added (which was a child of C before C itself
564292915Sdim  // was removed. This new child C, however, would have been processed
565292915Sdim  // prior to processing B, so there is no need to process it again.
566292915Sdim  // Simply keep a list of children of B, and traverse that list.
567292915Sdim  typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType;
568292915Sdim  DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
569292915Sdim  for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
570292915Sdim    MachineBasicBlock *SB = (*I)->getBlock();
571292915Sdim    if (!Deleted.count(SB))
572292915Sdim      Changed |= visitBlock(SB, L);
573292915Sdim  }
574292915Sdim  // When walking down the dominator tree, we want to traverse through
575292915Sdim  // blocks from nested (other) loops, because they can dominate blocks
576292915Sdim  // that are in L. Skip the non-L blocks only after the tree traversal.
577292915Sdim  if (MLI->getLoopFor(B) != L)
578292915Sdim    return Changed;
579292915Sdim
580292915Sdim  FlowPattern FP;
581292915Sdim  if (!matchFlowPattern(B, L, FP))
582292915Sdim    return Changed;
583292915Sdim
584292915Sdim  if (!isValid(FP)) {
585292915Sdim    DEBUG(dbgs() << "Conversion is not valid\n");
586292915Sdim    return Changed;
587292915Sdim  }
588292915Sdim  if (!isProfitable(FP)) {
589292915Sdim    DEBUG(dbgs() << "Conversion is not profitable\n");
590292915Sdim    return Changed;
591292915Sdim  }
592292915Sdim
593292915Sdim  convert(FP);
594292915Sdim  simplifyFlowGraph(FP);
595292915Sdim  return true;
596292915Sdim}
597292915Sdim
598292915Sdim
599292915Sdimbool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) {
600292915Sdim  MachineBasicBlock *HB = L ? L->getHeader() : 0;
601292915Sdim  DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB)
602292915Sdim           : dbgs() << "Visiting function") << "\n");
603292915Sdim  bool Changed = false;
604292915Sdim  if (L) {
605292915Sdim    for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
606292915Sdim      Changed |= visitLoop(*I);
607292915Sdim  }
608292915Sdim
609292915Sdim  MachineBasicBlock *EntryB = GraphTraits<MachineFunction*>::getEntryNode(MFN);
610292915Sdim  Changed |= visitBlock(L ? HB : EntryB, L);
611292915Sdim  return Changed;
612292915Sdim}
613292915Sdim
614292915Sdim
615292915Sdimbool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI)
616292915Sdim      const {
617292915Sdim  // Exclude post-increment stores. Those return a value, so we cannot
618292915Sdim  // predicate them.
619292915Sdim  unsigned Opc = MI->getOpcode();
620292915Sdim  using namespace Hexagon;
621292915Sdim  switch (Opc) {
622292915Sdim    // Store byte:
623292915Sdim    case S2_storerb_io: case S4_storerb_rr:
624292915Sdim    case S2_storerbabs: case S4_storeirb_io:  case S2_storerbgp:
625292915Sdim    // Store halfword:
626292915Sdim    case S2_storerh_io: case S4_storerh_rr:
627292915Sdim    case S2_storerhabs: case S4_storeirh_io:  case S2_storerhgp:
628292915Sdim    // Store upper halfword:
629292915Sdim    case S2_storerf_io: case S4_storerf_rr:
630292915Sdim    case S2_storerfabs: case S2_storerfgp:
631292915Sdim    // Store word:
632292915Sdim    case S2_storeri_io: case S4_storeri_rr:
633292915Sdim    case S2_storeriabs: case S4_storeiri_io:  case S2_storerigp:
634292915Sdim    // Store doubleword:
635292915Sdim    case S2_storerd_io: case S4_storerd_rr:
636292915Sdim    case S2_storerdabs: case S2_storerdgp:
637292915Sdim      return true;
638292915Sdim  }
639292915Sdim  return false;
640292915Sdim}
641292915Sdim
642292915Sdim
643292915Sdimbool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI)
644292915Sdim      const {
645292915Sdim  if (MI->mayLoad() || MI->mayStore())
646292915Sdim    return false;
647292915Sdim  if (MI->isCall() || MI->isBarrier() || MI->isBranch())
648292915Sdim    return false;
649292915Sdim  if (MI->hasUnmodeledSideEffects())
650292915Sdim    return false;
651292915Sdim
652292915Sdim  return true;
653292915Sdim}
654292915Sdim
655292915Sdim
656292915Sdimunsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc,
657292915Sdim      bool IfTrue) const {
658292915Sdim  // Exclude post-increment stores.
659292915Sdim  using namespace Hexagon;
660292915Sdim  switch (Opc) {
661292915Sdim    case S2_storerb_io:
662292915Sdim      return IfTrue ? S2_pstorerbt_io : S2_pstorerbf_io;
663292915Sdim    case S4_storerb_rr:
664292915Sdim      return IfTrue ? S4_pstorerbt_rr : S4_pstorerbf_rr;
665292915Sdim    case S2_storerbabs:
666292915Sdim    case S2_storerbgp:
667292915Sdim      return IfTrue ? S4_pstorerbt_abs : S4_pstorerbf_abs;
668292915Sdim    case S4_storeirb_io:
669292915Sdim      return IfTrue ? S4_storeirbt_io : S4_storeirbf_io;
670292915Sdim    case S2_storerh_io:
671292915Sdim      return IfTrue ? S2_pstorerht_io : S2_pstorerhf_io;
672292915Sdim    case S4_storerh_rr:
673292915Sdim      return IfTrue ? S4_pstorerht_rr : S4_pstorerhf_rr;
674292915Sdim    case S2_storerhabs:
675292915Sdim    case S2_storerhgp:
676292915Sdim      return IfTrue ? S4_pstorerht_abs : S4_pstorerhf_abs;
677292915Sdim    case S2_storerf_io:
678292915Sdim      return IfTrue ? S2_pstorerft_io : S2_pstorerff_io;
679292915Sdim    case S4_storerf_rr:
680292915Sdim      return IfTrue ? S4_pstorerft_rr : S4_pstorerff_rr;
681292915Sdim    case S2_storerfabs:
682292915Sdim    case S2_storerfgp:
683292915Sdim      return IfTrue ? S4_pstorerft_abs : S4_pstorerff_abs;
684292915Sdim    case S4_storeirh_io:
685292915Sdim      return IfTrue ? S4_storeirht_io : S4_storeirhf_io;
686292915Sdim    case S2_storeri_io:
687292915Sdim      return IfTrue ? S2_pstorerit_io : S2_pstorerif_io;
688292915Sdim    case S4_storeri_rr:
689292915Sdim      return IfTrue ? S4_pstorerit_rr : S4_pstorerif_rr;
690292915Sdim    case S2_storeriabs:
691292915Sdim    case S2_storerigp:
692292915Sdim      return IfTrue ? S4_pstorerit_abs : S4_pstorerif_abs;
693292915Sdim    case S4_storeiri_io:
694292915Sdim      return IfTrue ? S4_storeirit_io : S4_storeirif_io;
695292915Sdim    case S2_storerd_io:
696292915Sdim      return IfTrue ? S2_pstorerdt_io : S2_pstorerdf_io;
697292915Sdim    case S4_storerd_rr:
698292915Sdim      return IfTrue ? S4_pstorerdt_rr : S4_pstorerdf_rr;
699292915Sdim    case S2_storerdabs:
700292915Sdim    case S2_storerdgp:
701292915Sdim      return IfTrue ? S4_pstorerdt_abs : S4_pstorerdf_abs;
702292915Sdim  }
703292915Sdim  llvm_unreachable("Unexpected opcode");
704292915Sdim  return 0;
705292915Sdim}
706292915Sdim
707292915Sdim
708292915Sdimvoid HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
709292915Sdim      MachineBasicBlock::iterator At, MachineInstr *MI,
710292915Sdim      unsigned PredR, bool IfTrue) {
711292915Sdim  DebugLoc DL;
712292915Sdim  if (At != ToB->end())
713292915Sdim    DL = At->getDebugLoc();
714292915Sdim  else if (!ToB->empty())
715292915Sdim    DL = ToB->back().getDebugLoc();
716292915Sdim
717292915Sdim  unsigned Opc = MI->getOpcode();
718292915Sdim
719292915Sdim  if (isPredicableStore(MI)) {
720292915Sdim    unsigned COpc = getCondStoreOpcode(Opc, IfTrue);
721292915Sdim    assert(COpc);
722292915Sdim    MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc))
723292915Sdim      .addReg(PredR);
724292915Sdim    for (MIOperands MO(MI); MO.isValid(); ++MO)
725292915Sdim      MIB.addOperand(*MO);
726292915Sdim
727292915Sdim    // Set memory references.
728292915Sdim    MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
729292915Sdim    MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
730292915Sdim    MIB.setMemRefs(MMOBegin, MMOEnd);
731292915Sdim
732292915Sdim    MI->eraseFromParent();
733292915Sdim    return;
734292915Sdim  }
735292915Sdim
736292915Sdim  if (Opc == Hexagon::J2_jump) {
737292915Sdim    MachineBasicBlock *TB = MI->getOperand(0).getMBB();
738292915Sdim    const MCInstrDesc &D = TII->get(IfTrue ? Hexagon::J2_jumpt
739292915Sdim                                           : Hexagon::J2_jumpf);
740292915Sdim    BuildMI(*ToB, At, DL, D)
741292915Sdim      .addReg(PredR)
742292915Sdim      .addMBB(TB);
743292915Sdim    MI->eraseFromParent();
744292915Sdim    return;
745292915Sdim  }
746292915Sdim
747292915Sdim  // Print the offending instruction unconditionally as we are about to
748292915Sdim  // abort.
749292915Sdim  dbgs() << *MI;
750292915Sdim  llvm_unreachable("Unexpected instruction");
751292915Sdim}
752292915Sdim
753292915Sdim
754292915Sdim// Predicate/speculate non-branch instructions from FromB into block ToB.
755292915Sdim// Leave the branches alone, they will be handled later. Btw, at this point
756292915Sdim// FromB should have at most one branch, and it should be unconditional.
757292915Sdimvoid HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB,
758292915Sdim      MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
759292915Sdim      unsigned PredR, bool IfTrue) {
760292915Sdim  DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n");
761292915Sdim  MachineBasicBlock::iterator End = FromB->getFirstTerminator();
762292915Sdim  MachineBasicBlock::iterator I, NextI;
763292915Sdim
764292915Sdim  for (I = FromB->begin(); I != End; I = NextI) {
765292915Sdim    assert(!I->isPHI());
766292915Sdim    NextI = std::next(I);
767292915Sdim    if (isSafeToSpeculate(&*I))
768292915Sdim      ToB->splice(At, FromB, I);
769292915Sdim    else
770292915Sdim      predicateInstr(ToB, At, &*I, PredR, IfTrue);
771292915Sdim  }
772292915Sdim}
773292915Sdim
774292915Sdim
775292915Sdimvoid HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
776292915Sdim      const FlowPattern &FP) {
777292915Sdim  // Visit all PHI nodes in the WhereB block and generate MUX instructions
778292915Sdim  // in the split block. Update the PHI nodes with the values of the MUX.
779292915Sdim  auto NonPHI = WhereB->getFirstNonPHI();
780292915Sdim  for (auto I = WhereB->begin(); I != NonPHI; ++I) {
781292915Sdim    MachineInstr *PN = &*I;
782292915Sdim    // Registers and subregisters corresponding to TrueB, FalseB and SplitB.
783292915Sdim    unsigned TR = 0, TSR = 0, FR = 0, FSR = 0, SR = 0, SSR = 0;
784292915Sdim    for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
785292915Sdim      const MachineOperand &RO = PN->getOperand(i), &BO = PN->getOperand(i+1);
786292915Sdim      if (BO.getMBB() == FP.SplitB)
787292915Sdim        SR = RO.getReg(), SSR = RO.getSubReg();
788292915Sdim      else if (BO.getMBB() == FP.TrueB)
789292915Sdim        TR = RO.getReg(), TSR = RO.getSubReg();
790292915Sdim      else if (BO.getMBB() == FP.FalseB)
791292915Sdim        FR = RO.getReg(), FSR = RO.getSubReg();
792292915Sdim      else
793292915Sdim        continue;
794292915Sdim      PN->RemoveOperand(i+1);
795292915Sdim      PN->RemoveOperand(i);
796292915Sdim    }
797292915Sdim    if (TR == 0)
798292915Sdim      TR = SR, TSR = SSR;
799292915Sdim    else if (FR == 0)
800292915Sdim      FR = SR, FSR = SSR;
801292915Sdim    assert(TR && FR);
802292915Sdim
803292915Sdim    using namespace Hexagon;
804292915Sdim    unsigned DR = PN->getOperand(0).getReg();
805292915Sdim    const TargetRegisterClass *RC = MRI->getRegClass(DR);
806292915Sdim    const MCInstrDesc &D = RC == &IntRegsRegClass ? TII->get(C2_mux)
807292915Sdim                                                  : TII->get(MUX64_rr);
808292915Sdim
809292915Sdim    MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator();
810292915Sdim    DebugLoc DL;
811292915Sdim    if (MuxAt != FP.SplitB->end())
812292915Sdim      DL = MuxAt->getDebugLoc();
813292915Sdim    unsigned MuxR = MRI->createVirtualRegister(RC);
814292915Sdim    BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR)
815292915Sdim      .addReg(FP.PredR)
816292915Sdim      .addReg(TR, 0, TSR)
817292915Sdim      .addReg(FR, 0, FSR);
818292915Sdim
819292915Sdim    PN->addOperand(MachineOperand::CreateReg(MuxR, false));
820292915Sdim    PN->addOperand(MachineOperand::CreateMBB(FP.SplitB));
821292915Sdim  }
822292915Sdim}
823292915Sdim
824292915Sdim
825292915Sdimvoid HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
826292915Sdim  MachineBasicBlock *TSB = 0, *FSB = 0;
827292915Sdim  MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator();
828292915Sdim  assert(OldTI != FP.SplitB->end());
829292915Sdim  DebugLoc DL = OldTI->getDebugLoc();
830292915Sdim
831292915Sdim  if (FP.TrueB) {
832292915Sdim    TSB = *FP.TrueB->succ_begin();
833292915Sdim    predicateBlockNB(FP.SplitB, OldTI, FP.TrueB, FP.PredR, true);
834292915Sdim  }
835292915Sdim  if (FP.FalseB) {
836292915Sdim    FSB = *FP.FalseB->succ_begin();
837292915Sdim    MachineBasicBlock::iterator At = FP.SplitB->getFirstTerminator();
838292915Sdim    predicateBlockNB(FP.SplitB, At, FP.FalseB, FP.PredR, false);
839292915Sdim  }
840292915Sdim
841292915Sdim  // Regenerate new terminators in the split block and update the successors.
842292915Sdim  // First, remember any information that may be needed later and remove the
843292915Sdim  // existing terminators/successors from the split block.
844292915Sdim  MachineBasicBlock *SSB = 0;
845292915Sdim  FP.SplitB->erase(OldTI, FP.SplitB->end());
846292915Sdim  while (FP.SplitB->succ_size() > 0) {
847292915Sdim    MachineBasicBlock *T = *FP.SplitB->succ_begin();
848292915Sdim    // It's possible that the split block had a successor that is not a pre-
849292915Sdim    // dicated block. This could only happen if there was only one block to
850292915Sdim    // be predicated. Example:
851292915Sdim    //   split_b:
852292915Sdim    //     if (p) jump true_b
853292915Sdim    //     jump unrelated2_b
854292915Sdim    //   unrelated1_b:
855292915Sdim    //     ...
856292915Sdim    //   unrelated2_b:  ; can have other predecessors, so it's not "false_b"
857292915Sdim    //     jump other_b
858292915Sdim    //   true_b:        ; only reachable from split_b, can be predicated
859292915Sdim    //     ...
860292915Sdim    //
861292915Sdim    // Find this successor (SSB) if it exists.
862292915Sdim    if (T != FP.TrueB && T != FP.FalseB) {
863292915Sdim      assert(!SSB);
864292915Sdim      SSB = T;
865292915Sdim    }
866292915Sdim    FP.SplitB->removeSuccessor(FP.SplitB->succ_begin());
867292915Sdim  }
868292915Sdim
869292915Sdim  // Insert new branches and update the successors of the split block. This
870292915Sdim  // may create unconditional branches to the layout successor, etc., but
871292915Sdim  // that will be cleaned up later. For now, make sure that correct code is
872292915Sdim  // generated.
873292915Sdim  if (FP.JoinB) {
874292915Sdim    assert(!SSB || SSB == FP.JoinB);
875292915Sdim    BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
876292915Sdim      .addMBB(FP.JoinB);
877292915Sdim    FP.SplitB->addSuccessor(FP.JoinB);
878292915Sdim  } else {
879292915Sdim    bool HasBranch = false;
880292915Sdim    if (TSB) {
881292915Sdim      BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt))
882292915Sdim        .addReg(FP.PredR)
883292915Sdim        .addMBB(TSB);
884292915Sdim      FP.SplitB->addSuccessor(TSB);
885292915Sdim      HasBranch = true;
886292915Sdim    }
887292915Sdim    if (FSB) {
888292915Sdim      const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump)
889292915Sdim                                       : TII->get(Hexagon::J2_jumpf);
890292915Sdim      MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D);
891292915Sdim      if (!HasBranch)
892292915Sdim        MIB.addReg(FP.PredR);
893292915Sdim      MIB.addMBB(FSB);
894292915Sdim      FP.SplitB->addSuccessor(FSB);
895292915Sdim    }
896292915Sdim    if (SSB) {
897292915Sdim      // This cannot happen if both TSB and FSB are set. [TF]SB are the
898292915Sdim      // successor blocks of the TrueB and FalseB (or null of the TrueB
899292915Sdim      // or FalseB block is null). SSB is the potential successor block
900292915Sdim      // of the SplitB that is neither TrueB nor FalseB.
901292915Sdim      BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
902292915Sdim        .addMBB(SSB);
903292915Sdim      FP.SplitB->addSuccessor(SSB);
904292915Sdim    }
905292915Sdim  }
906292915Sdim
907292915Sdim  // What is left to do is to update the PHI nodes that could have entries
908292915Sdim  // referring to predicated blocks.
909292915Sdim  if (FP.JoinB) {
910292915Sdim    updatePhiNodes(FP.JoinB, FP);
911292915Sdim  } else {
912292915Sdim    if (TSB)
913292915Sdim      updatePhiNodes(TSB, FP);
914292915Sdim    if (FSB)
915292915Sdim      updatePhiNodes(FSB, FP);
916292915Sdim    // Nothing to update in SSB, since SSB's predecessors haven't changed.
917292915Sdim  }
918292915Sdim}
919292915Sdim
920292915Sdim
921292915Sdimvoid HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
922292915Sdim  DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n");
923292915Sdim
924292915Sdim  // Transfer the immediate dominator information from B to its descendants.
925292915Sdim  MachineDomTreeNode *N = MDT->getNode(B);
926292915Sdim  MachineDomTreeNode *IDN = N->getIDom();
927292915Sdim  if (IDN) {
928292915Sdim    MachineBasicBlock *IDB = IDN->getBlock();
929292915Sdim    typedef GraphTraits<MachineDomTreeNode*> GTN;
930292915Sdim    typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType;
931292915Sdim    DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
932292915Sdim    for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
933292915Sdim      MachineBasicBlock *SB = (*I)->getBlock();
934292915Sdim      MDT->changeImmediateDominator(SB, IDB);
935292915Sdim    }
936292915Sdim  }
937292915Sdim
938292915Sdim  while (B->succ_size() > 0)
939292915Sdim    B->removeSuccessor(B->succ_begin());
940292915Sdim
941292915Sdim  for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I)
942292915Sdim    (*I)->removeSuccessor(B, true);
943292915Sdim
944292915Sdim  Deleted.insert(B);
945292915Sdim  MDT->eraseNode(B);
946292915Sdim  MFN->erase(B->getIterator());
947292915Sdim}
948292915Sdim
949292915Sdim
950292915Sdimvoid HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) {
951292915Sdim  DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n");
952292915Sdim  MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI();
953292915Sdim  for (I = B->begin(); I != NonPHI; I = NextI) {
954292915Sdim    NextI = std::next(I);
955292915Sdim    MachineInstr *PN = &*I;
956292915Sdim    assert(PN->getNumOperands() == 3 && "Invalid phi node");
957292915Sdim    MachineOperand &UO = PN->getOperand(1);
958292915Sdim    unsigned UseR = UO.getReg(), UseSR = UO.getSubReg();
959292915Sdim    unsigned DefR = PN->getOperand(0).getReg();
960292915Sdim    unsigned NewR = UseR;
961292915Sdim    if (UseSR) {
962292915Sdim      // MRI.replaceVregUsesWith does not allow to update the subregister,
963292915Sdim      // so instead of doing the use-iteration here, create a copy into a
964292915Sdim      // "non-subregistered" register.
965292915Sdim      DebugLoc DL = PN->getDebugLoc();
966292915Sdim      const TargetRegisterClass *RC = MRI->getRegClass(DefR);
967292915Sdim      NewR = MRI->createVirtualRegister(RC);
968292915Sdim      NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR)
969292915Sdim        .addReg(UseR, 0, UseSR);
970292915Sdim    }
971292915Sdim    MRI->replaceRegWith(DefR, NewR);
972292915Sdim    B->erase(I);
973292915Sdim  }
974292915Sdim}
975292915Sdim
976292915Sdim
977292915Sdimvoid HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB,
978292915Sdim      MachineBasicBlock *NewB) {
979292915Sdim  for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) {
980292915Sdim    MachineBasicBlock *SB = *I;
981292915Sdim    MachineBasicBlock::iterator P, N = SB->getFirstNonPHI();
982292915Sdim    for (P = SB->begin(); P != N; ++P) {
983292915Sdim      MachineInstr *PN = &*P;
984292915Sdim      for (MIOperands MO(PN); MO.isValid(); ++MO)
985292915Sdim        if (MO->isMBB() && MO->getMBB() == OldB)
986292915Sdim          MO->setMBB(NewB);
987292915Sdim    }
988292915Sdim  }
989292915Sdim}
990292915Sdim
991292915Sdim
992292915Sdimvoid HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB,
993292915Sdim      MachineBasicBlock *SuccB) {
994292915Sdim  DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and "
995292915Sdim               << PrintMB(SuccB) << "\n");
996292915Sdim  bool TermOk = hasUncondBranch(SuccB);
997292915Sdim  eliminatePhis(SuccB);
998292915Sdim  TII->RemoveBranch(*PredB);
999292915Sdim  PredB->removeSuccessor(SuccB);
1000292915Sdim  PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end());
1001292915Sdim  MachineBasicBlock::succ_iterator I, E = SuccB->succ_end();
1002292915Sdim  for (I = SuccB->succ_begin(); I != E; ++I)
1003292915Sdim    PredB->addSuccessor(*I);
1004292915Sdim  PredB->normalizeSuccProbs();
1005292915Sdim  replacePhiEdges(SuccB, PredB);
1006292915Sdim  removeBlock(SuccB);
1007292915Sdim  if (!TermOk)
1008292915Sdim    PredB->updateTerminator();
1009292915Sdim}
1010292915Sdim
1011292915Sdim
1012292915Sdimvoid HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) {
1013292915Sdim  if (FP.TrueB)
1014292915Sdim    removeBlock(FP.TrueB);
1015292915Sdim  if (FP.FalseB)
1016292915Sdim    removeBlock(FP.FalseB);
1017292915Sdim
1018292915Sdim  FP.SplitB->updateTerminator();
1019292915Sdim  if (FP.SplitB->succ_size() != 1)
1020292915Sdim    return;
1021292915Sdim
1022292915Sdim  MachineBasicBlock *SB = *FP.SplitB->succ_begin();
1023292915Sdim  if (SB->pred_size() != 1)
1024292915Sdim    return;
1025292915Sdim
1026292915Sdim  // By now, the split block has only one successor (SB), and SB has only
1027292915Sdim  // one predecessor. We can try to merge them. We will need to update ter-
1028292915Sdim  // minators in FP.Split+SB, and that requires working AnalyzeBranch, which
1029292915Sdim  // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends
1030292915Sdim  // with an unconditional branch, we won't need to touch the terminators.
1031292915Sdim  if (!hasEHLabel(SB) || hasUncondBranch(SB))
1032292915Sdim    mergeBlocks(FP.SplitB, SB);
1033292915Sdim}
1034292915Sdim
1035292915Sdim
1036292915Sdimbool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) {
1037292915Sdim  auto &ST = MF.getSubtarget();
1038292915Sdim  TII = ST.getInstrInfo();
1039292915Sdim  TRI = ST.getRegisterInfo();
1040292915Sdim  MFN = &MF;
1041292915Sdim  MRI = &MF.getRegInfo();
1042292915Sdim  MDT = &getAnalysis<MachineDominatorTree>();
1043292915Sdim  MLI = &getAnalysis<MachineLoopInfo>();
1044292915Sdim  MBPI = EnableHexagonBP ? &getAnalysis<MachineBranchProbabilityInfo>() :
1045292915Sdim    nullptr;
1046292915Sdim
1047292915Sdim  Deleted.clear();
1048292915Sdim  bool Changed = false;
1049292915Sdim
1050292915Sdim  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
1051292915Sdim    Changed |= visitLoop(*I);
1052292915Sdim  Changed |= visitLoop(0);
1053292915Sdim
1054292915Sdim  return Changed;
1055292915Sdim}
1056292915Sdim
1057292915Sdim//===----------------------------------------------------------------------===//
1058292915Sdim//                         Public Constructor Functions
1059292915Sdim//===----------------------------------------------------------------------===//
1060292915SdimFunctionPass *llvm::createHexagonEarlyIfConversion() {
1061292915Sdim  return new HexagonEarlyIfConversion();
1062292915Sdim}
1063292915Sdim
1064