1292915Sdim//===--- HexagonEarlyIfConv.cpp -------------------------------------------===// 2292915Sdim// 3292915Sdim// The LLVM Compiler Infrastructure 4292915Sdim// 5292915Sdim// This file is distributed under the University of Illinois Open Source 6292915Sdim// License. See LICENSE.TXT for details. 7292915Sdim// 8292915Sdim//===----------------------------------------------------------------------===// 9292915Sdim// 10292915Sdim// This implements a Hexagon-specific if-conversion pass that runs on the 11292915Sdim// SSA form. 12292915Sdim// In SSA it is not straightforward to represent instructions that condi- 13292915Sdim// tionally define registers, since a conditionally-defined register may 14292915Sdim// only be used under the same condition on which the definition was based. 15292915Sdim// To avoid complications of this nature, this patch will only generate 16292915Sdim// predicated stores, and speculate other instructions from the "if-conver- 17292915Sdim// ted" block. 18292915Sdim// The code will recognize CFG patterns where a block with a conditional 19292915Sdim// branch "splits" into a "true block" and a "false block". Either of these 20292915Sdim// could be omitted (in case of a triangle, for example). 21292915Sdim// If after conversion of the side block(s) the CFG allows it, the resul- 22292915Sdim// ting blocks may be merged. If the "join" block contained PHI nodes, they 23292915Sdim// will be replaced with MUX (or MUX-like) instructions to maintain the 24292915Sdim// semantics of the PHI. 25292915Sdim// 26292915Sdim// Example: 27292915Sdim// 28292915Sdim// %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1 29292915Sdim// %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0 30292915Sdim// J2_jumpt %vreg41<kill>, <BB#5>, %PC<imp-def,dead> 31292915Sdim// J2_jump <BB#4>, %PC<imp-def,dead> 32292915Sdim// Successors according to CFG: BB#4(62) BB#5(62) 33292915Sdim// 34292915Sdim// BB#4: derived from LLVM BB %if.then 35292915Sdim// Predecessors according to CFG: BB#3 36292915Sdim// %vreg11<def> = A2_addp %vreg6, %vreg10 37292915Sdim// S2_storerd_io %vreg32, 16, %vreg11 38292915Sdim// Successors according to CFG: BB#5 39292915Sdim// 40292915Sdim// BB#5: derived from LLVM BB %if.end 41292915Sdim// Predecessors according to CFG: BB#3 BB#4 42292915Sdim// %vreg12<def> = PHI %vreg6, <BB#3>, %vreg11, <BB#4> 43292915Sdim// %vreg13<def> = A2_addp %vreg7, %vreg12 44292915Sdim// %vreg42<def> = C2_cmpeqi %vreg9, 10 45292915Sdim// J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead> 46292915Sdim// J2_jump <BB#6>, %PC<imp-def,dead> 47292915Sdim// Successors according to CFG: BB#6(4) BB#3(124) 48292915Sdim// 49292915Sdim// would become: 50292915Sdim// 51292915Sdim// %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1 52292915Sdim// %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0 53292915Sdim// spec-> %vreg11<def> = A2_addp %vreg6, %vreg10 54292915Sdim// pred-> S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11 55292915Sdim// %vreg46<def> = MUX64_rr %vreg41, %vreg6, %vreg11 56292915Sdim// %vreg13<def> = A2_addp %vreg7, %vreg46 57292915Sdim// %vreg42<def> = C2_cmpeqi %vreg9, 10 58292915Sdim// J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead> 59292915Sdim// J2_jump <BB#6>, %PC<imp-def,dead> 60292915Sdim// Successors according to CFG: BB#6 BB#3 61292915Sdim 62292915Sdim#define DEBUG_TYPE "hexagon-eif" 63292915Sdim 64292915Sdim#include "llvm/ADT/DenseSet.h" 65292915Sdim#include "llvm/ADT/SetVector.h" 66292915Sdim#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 67292915Sdim#include "llvm/CodeGen/MachineDominators.h" 68292915Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 69292915Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 70292915Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 71292915Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 72292915Sdim#include "llvm/CodeGen/Passes.h" 73292915Sdim#include "llvm/Support/CommandLine.h" 74292915Sdim#include "llvm/Support/Debug.h" 75292915Sdim#include "llvm/Support/raw_ostream.h" 76292915Sdim#include "llvm/Target/TargetInstrInfo.h" 77292915Sdim#include "llvm/Target/TargetMachine.h" 78292915Sdim#include "HexagonTargetMachine.h" 79292915Sdim 80292915Sdim#include <functional> 81292915Sdim#include <set> 82292915Sdim#include <vector> 83292915Sdim 84292915Sdimusing namespace llvm; 85292915Sdim 86292915Sdimnamespace llvm { 87292915Sdim FunctionPass *createHexagonEarlyIfConversion(); 88292915Sdim void initializeHexagonEarlyIfConversionPass(PassRegistry& Registry); 89292915Sdim} 90292915Sdim 91292915Sdimnamespace { 92292915Sdim cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden, 93292915Sdim cl::init(false), cl::desc("Enable branch probability info")); 94292915Sdim cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden, 95292915Sdim cl::desc("Size limit in Hexagon early if-conversion")); 96292915Sdim 97292915Sdim struct PrintMB { 98292915Sdim PrintMB(const MachineBasicBlock *B) : MB(B) {} 99292915Sdim const MachineBasicBlock *MB; 100292915Sdim }; 101292915Sdim raw_ostream &operator<< (raw_ostream &OS, const PrintMB &P) { 102292915Sdim if (!P.MB) 103292915Sdim return OS << "<none>"; 104292915Sdim return OS << '#' << P.MB->getNumber(); 105292915Sdim } 106292915Sdim 107292915Sdim struct FlowPattern { 108292915Sdim FlowPattern() : SplitB(0), TrueB(0), FalseB(0), JoinB(0), PredR(0) {} 109292915Sdim FlowPattern(MachineBasicBlock *B, unsigned PR, MachineBasicBlock *TB, 110292915Sdim MachineBasicBlock *FB, MachineBasicBlock *JB) 111292915Sdim : SplitB(B), TrueB(TB), FalseB(FB), JoinB(JB), PredR(PR) {} 112292915Sdim 113292915Sdim MachineBasicBlock *SplitB; 114292915Sdim MachineBasicBlock *TrueB, *FalseB, *JoinB; 115292915Sdim unsigned PredR; 116292915Sdim }; 117292915Sdim struct PrintFP { 118292915Sdim PrintFP(const FlowPattern &P, const TargetRegisterInfo &T) 119292915Sdim : FP(P), TRI(T) {} 120292915Sdim const FlowPattern &FP; 121292915Sdim const TargetRegisterInfo &TRI; 122292915Sdim friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P); 123292915Sdim }; 124292915Sdim raw_ostream &operator<<(raw_ostream &OS, 125292915Sdim const PrintFP &P) LLVM_ATTRIBUTE_UNUSED; 126292915Sdim raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) { 127292915Sdim OS << "{ SplitB:" << PrintMB(P.FP.SplitB) 128292915Sdim << ", PredR:" << PrintReg(P.FP.PredR, &P.TRI) 129292915Sdim << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:" 130292915Sdim << PrintMB(P.FP.FalseB) 131292915Sdim << ", JoinB:" << PrintMB(P.FP.JoinB) << " }"; 132292915Sdim return OS; 133292915Sdim } 134292915Sdim 135292915Sdim class HexagonEarlyIfConversion : public MachineFunctionPass { 136292915Sdim public: 137292915Sdim static char ID; 138292915Sdim HexagonEarlyIfConversion() : MachineFunctionPass(ID), 139292915Sdim TII(0), TRI(0), MFN(0), MRI(0), MDT(0), MLI(0) { 140292915Sdim initializeHexagonEarlyIfConversionPass(*PassRegistry::getPassRegistry()); 141292915Sdim } 142292915Sdim const char *getPassName() const override { 143292915Sdim return "Hexagon early if conversion"; 144292915Sdim } 145292915Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 146292915Sdim AU.addRequired<MachineBranchProbabilityInfo>(); 147292915Sdim AU.addRequired<MachineDominatorTree>(); 148292915Sdim AU.addPreserved<MachineDominatorTree>(); 149292915Sdim AU.addRequired<MachineLoopInfo>(); 150292915Sdim MachineFunctionPass::getAnalysisUsage(AU); 151292915Sdim } 152292915Sdim bool runOnMachineFunction(MachineFunction &MF) override; 153292915Sdim 154292915Sdim private: 155292915Sdim typedef DenseSet<MachineBasicBlock*> BlockSetType; 156292915Sdim 157292915Sdim bool isPreheader(const MachineBasicBlock *B) const; 158292915Sdim bool matchFlowPattern(MachineBasicBlock *B, MachineLoop *L, 159292915Sdim FlowPattern &FP); 160292915Sdim bool visitBlock(MachineBasicBlock *B, MachineLoop *L); 161292915Sdim bool visitLoop(MachineLoop *L); 162292915Sdim 163292915Sdim bool hasEHLabel(const MachineBasicBlock *B) const; 164292915Sdim bool hasUncondBranch(const MachineBasicBlock *B) const; 165292915Sdim bool isValidCandidate(const MachineBasicBlock *B) const; 166292915Sdim bool usesUndefVReg(const MachineInstr *MI) const; 167292915Sdim bool isValid(const FlowPattern &FP) const; 168292915Sdim unsigned countPredicateDefs(const MachineBasicBlock *B) const; 169292915Sdim unsigned computePhiCost(MachineBasicBlock *B) const; 170292915Sdim bool isProfitable(const FlowPattern &FP) const; 171292915Sdim bool isPredicableStore(const MachineInstr *MI) const; 172292915Sdim bool isSafeToSpeculate(const MachineInstr *MI) const; 173292915Sdim 174292915Sdim unsigned getCondStoreOpcode(unsigned Opc, bool IfTrue) const; 175292915Sdim void predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At, 176292915Sdim MachineInstr *MI, unsigned PredR, bool IfTrue); 177292915Sdim void predicateBlockNB(MachineBasicBlock *ToB, 178292915Sdim MachineBasicBlock::iterator At, MachineBasicBlock *FromB, 179292915Sdim unsigned PredR, bool IfTrue); 180292915Sdim 181292915Sdim void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP); 182292915Sdim void convert(const FlowPattern &FP); 183292915Sdim 184292915Sdim void removeBlock(MachineBasicBlock *B); 185292915Sdim void eliminatePhis(MachineBasicBlock *B); 186292915Sdim void replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB); 187292915Sdim void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB); 188292915Sdim void simplifyFlowGraph(const FlowPattern &FP); 189292915Sdim 190292915Sdim const TargetInstrInfo *TII; 191292915Sdim const TargetRegisterInfo *TRI; 192292915Sdim MachineFunction *MFN; 193292915Sdim MachineRegisterInfo *MRI; 194292915Sdim MachineDominatorTree *MDT; 195292915Sdim MachineLoopInfo *MLI; 196292915Sdim BlockSetType Deleted; 197292915Sdim const MachineBranchProbabilityInfo *MBPI; 198292915Sdim }; 199292915Sdim 200292915Sdim char HexagonEarlyIfConversion::ID = 0; 201292915Sdim} 202292915Sdim 203292915SdimINITIALIZE_PASS(HexagonEarlyIfConversion, "hexagon-eif", 204292915Sdim "Hexagon early if conversion", false, false) 205292915Sdim 206292915Sdimbool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const { 207292915Sdim if (B->succ_size() != 1) 208292915Sdim return false; 209292915Sdim MachineBasicBlock *SB = *B->succ_begin(); 210292915Sdim MachineLoop *L = MLI->getLoopFor(SB); 211292915Sdim return L && SB == L->getHeader(); 212292915Sdim} 213292915Sdim 214292915Sdim 215292915Sdimbool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, 216292915Sdim MachineLoop *L, FlowPattern &FP) { 217292915Sdim DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n"); 218292915Sdim 219292915Sdim // Interested only in conditional branches, no .new, no new-value, etc. 220292915Sdim // Check the terminators directly, it's easier than handling all responses 221292915Sdim // from AnalyzeBranch. 222292915Sdim MachineBasicBlock *TB = 0, *FB = 0; 223292915Sdim MachineBasicBlock::const_iterator T1I = B->getFirstTerminator(); 224292915Sdim if (T1I == B->end()) 225292915Sdim return false; 226292915Sdim unsigned Opc = T1I->getOpcode(); 227292915Sdim if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf) 228292915Sdim return false; 229292915Sdim unsigned PredR = T1I->getOperand(0).getReg(); 230292915Sdim 231292915Sdim // Get the layout successor, or 0 if B does not have one. 232292915Sdim MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B)); 233292915Sdim MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0; 234292915Sdim 235292915Sdim MachineBasicBlock *T1B = T1I->getOperand(1).getMBB(); 236292915Sdim MachineBasicBlock::const_iterator T2I = std::next(T1I); 237292915Sdim // The second terminator should be an unconditional branch. 238292915Sdim assert(T2I == B->end() || T2I->getOpcode() == Hexagon::J2_jump); 239292915Sdim MachineBasicBlock *T2B = (T2I == B->end()) ? NextB 240292915Sdim : T2I->getOperand(0).getMBB(); 241292915Sdim if (T1B == T2B) { 242292915Sdim // XXX merge if T1B == NextB, or convert branch to unconditional. 243292915Sdim // mark as diamond with both sides equal? 244292915Sdim return false; 245292915Sdim } 246292915Sdim // Loop could be null for both. 247292915Sdim if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L) 248292915Sdim return false; 249292915Sdim 250292915Sdim // Record the true/false blocks in such a way that "true" means "if (PredR)", 251292915Sdim // and "false" means "if (!PredR)". 252292915Sdim if (Opc == Hexagon::J2_jumpt) 253292915Sdim TB = T1B, FB = T2B; 254292915Sdim else 255292915Sdim TB = T2B, FB = T1B; 256292915Sdim 257292915Sdim if (!MDT->properlyDominates(B, TB) || !MDT->properlyDominates(B, FB)) 258292915Sdim return false; 259292915Sdim 260292915Sdim // Detect triangle first. In case of a triangle, one of the blocks TB/FB 261292915Sdim // can fall through into the other, in other words, it will be executed 262292915Sdim // in both cases. We only want to predicate the block that is executed 263292915Sdim // conditionally. 264292915Sdim unsigned TNP = TB->pred_size(), FNP = FB->pred_size(); 265292915Sdim unsigned TNS = TB->succ_size(), FNS = FB->succ_size(); 266292915Sdim 267292915Sdim // A block is predicable if it has one predecessor (it must be B), and 268292915Sdim // it has a single successor. In fact, the block has to end either with 269292915Sdim // an unconditional branch (which can be predicated), or with a fall- 270292915Sdim // through. 271292915Sdim bool TOk = (TNP == 1) && (TNS == 1); 272292915Sdim bool FOk = (FNP == 1) && (FNS == 1); 273292915Sdim 274292915Sdim // If neither is predicable, there is nothing interesting. 275292915Sdim if (!TOk && !FOk) 276292915Sdim return false; 277292915Sdim 278292915Sdim MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0; 279292915Sdim MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0; 280292915Sdim MachineBasicBlock *JB = 0; 281292915Sdim 282292915Sdim if (TOk) { 283292915Sdim if (FOk) { 284292915Sdim if (TSB == FSB) 285292915Sdim JB = TSB; 286292915Sdim // Diamond: "if (P) then TB; else FB;". 287292915Sdim } else { 288292915Sdim // TOk && !FOk 289292915Sdim if (TSB == FB) { 290292915Sdim JB = FB; 291292915Sdim FB = 0; 292292915Sdim } 293292915Sdim } 294292915Sdim } else { 295292915Sdim // !TOk && FOk (at least one must be true by now). 296292915Sdim if (FSB == TB) { 297292915Sdim JB = TB; 298292915Sdim TB = 0; 299292915Sdim } 300292915Sdim } 301292915Sdim // Don't try to predicate loop preheaders. 302292915Sdim if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) { 303292915Sdim DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB) 304292915Sdim << " is a loop preheader. Skipping.\n"); 305292915Sdim return false; 306292915Sdim } 307292915Sdim 308292915Sdim FP = FlowPattern(B, PredR, TB, FB, JB); 309292915Sdim DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n"); 310292915Sdim return true; 311292915Sdim} 312292915Sdim 313292915Sdim 314292915Sdim// KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that 315292915Sdim// contains EH_LABEL. 316292915Sdimbool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const { 317292915Sdim for (auto &I : *B) 318292915Sdim if (I.isEHLabel()) 319292915Sdim return true; 320292915Sdim return false; 321292915Sdim} 322292915Sdim 323292915Sdim 324292915Sdim// KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize 325292915Sdim// that a block can never fall-through. 326292915Sdimbool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B) 327292915Sdim const { 328292915Sdim MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end(); 329292915Sdim while (I != E) { 330292915Sdim if (I->isBarrier()) 331292915Sdim return true; 332292915Sdim ++I; 333292915Sdim } 334292915Sdim return false; 335292915Sdim} 336292915Sdim 337292915Sdim 338292915Sdimbool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) 339292915Sdim const { 340292915Sdim if (!B) 341292915Sdim return true; 342292915Sdim if (B->isEHPad() || B->hasAddressTaken()) 343292915Sdim return false; 344292915Sdim if (B->succ_size() == 0) 345292915Sdim return false; 346292915Sdim 347292915Sdim for (auto &MI : *B) { 348292915Sdim if (MI.isDebugValue()) 349292915Sdim continue; 350292915Sdim if (MI.isConditionalBranch()) 351292915Sdim return false; 352292915Sdim unsigned Opc = MI.getOpcode(); 353292915Sdim bool IsJMP = (Opc == Hexagon::J2_jump); 354292915Sdim if (!isPredicableStore(&MI) && !IsJMP && !isSafeToSpeculate(&MI)) 355292915Sdim return false; 356292915Sdim // Look for predicate registers defined by this instruction. It's ok 357292915Sdim // to speculate such an instruction, but the predicate register cannot 358292915Sdim // be used outside of this block (or else it won't be possible to 359292915Sdim // update the use of it after predication). PHI uses will be updated 360292915Sdim // to use a result of a MUX, and a MUX cannot be created for predicate 361292915Sdim // registers. 362292915Sdim for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) { 363292915Sdim if (!MO->isReg() || !MO->isDef()) 364292915Sdim continue; 365292915Sdim unsigned R = MO->getReg(); 366292915Sdim if (!TargetRegisterInfo::isVirtualRegister(R)) 367292915Sdim continue; 368292915Sdim if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass) 369292915Sdim continue; 370292915Sdim for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U) 371292915Sdim if (U->getParent()->isPHI()) 372292915Sdim return false; 373292915Sdim } 374292915Sdim } 375292915Sdim return true; 376292915Sdim} 377292915Sdim 378292915Sdim 379292915Sdimbool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const { 380292915Sdim for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { 381292915Sdim if (!MO->isReg() || !MO->isUse()) 382292915Sdim continue; 383292915Sdim unsigned R = MO->getReg(); 384292915Sdim if (!TargetRegisterInfo::isVirtualRegister(R)) 385292915Sdim continue; 386292915Sdim const MachineInstr *DefI = MRI->getVRegDef(R); 387292915Sdim // "Undefined" virtual registers are actually defined via IMPLICIT_DEF. 388292915Sdim assert(DefI && "Expecting a reaching def in MRI"); 389292915Sdim if (DefI->isImplicitDef()) 390292915Sdim return true; 391292915Sdim } 392292915Sdim return false; 393292915Sdim} 394292915Sdim 395292915Sdim 396292915Sdimbool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const { 397292915Sdim if (hasEHLabel(FP.SplitB)) // KLUDGE: see function definition 398292915Sdim return false; 399292915Sdim if (FP.TrueB && !isValidCandidate(FP.TrueB)) 400292915Sdim return false; 401292915Sdim if (FP.FalseB && !isValidCandidate(FP.FalseB)) 402292915Sdim return false; 403292915Sdim // Check the PHIs in the join block. If any of them use a register 404292915Sdim // that is defined as IMPLICIT_DEF, do not convert this. This can 405292915Sdim // legitimately happen if one side of the split never executes, but 406292915Sdim // the compiler is unable to prove it. That side may then seem to 407292915Sdim // provide an "undef" value to the join block, however it will never 408292915Sdim // execute at run-time. If we convert this case, the "undef" will 409292915Sdim // be used in a MUX instruction, and that may seem like actually 410292915Sdim // using an undefined value to other optimizations. This could lead 411292915Sdim // to trouble further down the optimization stream, cause assertions 412292915Sdim // to fail, etc. 413292915Sdim if (FP.JoinB) { 414292915Sdim const MachineBasicBlock &B = *FP.JoinB; 415292915Sdim for (auto &MI : B) { 416292915Sdim if (!MI.isPHI()) 417292915Sdim break; 418292915Sdim if (usesUndefVReg(&MI)) 419292915Sdim return false; 420292915Sdim unsigned DefR = MI.getOperand(0).getReg(); 421292915Sdim const TargetRegisterClass *RC = MRI->getRegClass(DefR); 422292915Sdim if (RC == &Hexagon::PredRegsRegClass) 423292915Sdim return false; 424292915Sdim } 425292915Sdim } 426292915Sdim return true; 427292915Sdim} 428292915Sdim 429292915Sdim 430292915Sdimunsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const { 431292915Sdim assert(B->pred_size() <= 2); 432292915Sdim if (B->pred_size() < 2) 433292915Sdim return 0; 434292915Sdim 435292915Sdim unsigned Cost = 0; 436292915Sdim MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI(); 437292915Sdim for (I = B->begin(); I != E; ++I) { 438292915Sdim const MachineOperand &RO1 = I->getOperand(1); 439292915Sdim const MachineOperand &RO3 = I->getOperand(3); 440292915Sdim assert(RO1.isReg() && RO3.isReg()); 441292915Sdim // Must have a MUX if the phi uses a subregister. 442292915Sdim if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) { 443292915Sdim Cost++; 444292915Sdim continue; 445292915Sdim } 446292915Sdim MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg()); 447292915Sdim MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg()); 448292915Sdim if (!TII->isPredicable(Def1) || !TII->isPredicable(Def3)) 449292915Sdim Cost++; 450292915Sdim } 451292915Sdim return Cost; 452292915Sdim} 453292915Sdim 454292915Sdim 455292915Sdimunsigned HexagonEarlyIfConversion::countPredicateDefs( 456292915Sdim const MachineBasicBlock *B) const { 457292915Sdim unsigned PredDefs = 0; 458292915Sdim for (auto &MI : *B) { 459292915Sdim for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) { 460292915Sdim if (!MO->isReg() || !MO->isDef()) 461292915Sdim continue; 462292915Sdim unsigned R = MO->getReg(); 463292915Sdim if (!TargetRegisterInfo::isVirtualRegister(R)) 464292915Sdim continue; 465292915Sdim if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass) 466292915Sdim PredDefs++; 467292915Sdim } 468292915Sdim } 469292915Sdim return PredDefs; 470292915Sdim} 471292915Sdim 472292915Sdim 473292915Sdimbool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const { 474292915Sdim if (FP.TrueB && FP.FalseB) { 475292915Sdim 476292915Sdim // Do not IfCovert if the branch is one sided. 477292915Sdim if (MBPI) { 478292915Sdim BranchProbability Prob(9, 10); 479292915Sdim if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob) 480292915Sdim return false; 481292915Sdim if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob) 482292915Sdim return false; 483292915Sdim } 484292915Sdim 485292915Sdim // If both sides are predicable, convert them if they join, and the 486292915Sdim // join block has no other predecessors. 487292915Sdim MachineBasicBlock *TSB = *FP.TrueB->succ_begin(); 488292915Sdim MachineBasicBlock *FSB = *FP.FalseB->succ_begin(); 489292915Sdim if (TSB != FSB) 490292915Sdim return false; 491292915Sdim if (TSB->pred_size() != 2) 492292915Sdim return false; 493292915Sdim } 494292915Sdim 495292915Sdim // Calculate the total size of the predicated blocks. 496292915Sdim // Assume instruction counts without branches to be the approximation of 497292915Sdim // the code size. If the predicated blocks are smaller than a packet size, 498292915Sdim // approximate the spare room in the packet that could be filled with the 499292915Sdim // predicated/speculated instructions. 500292915Sdim unsigned TS = 0, FS = 0, Spare = 0; 501292915Sdim if (FP.TrueB) { 502292915Sdim TS = std::distance(FP.TrueB->begin(), FP.TrueB->getFirstTerminator()); 503292915Sdim if (TS < HEXAGON_PACKET_SIZE) 504292915Sdim Spare += HEXAGON_PACKET_SIZE-TS; 505292915Sdim } 506292915Sdim if (FP.FalseB) { 507292915Sdim FS = std::distance(FP.FalseB->begin(), FP.FalseB->getFirstTerminator()); 508292915Sdim if (FS < HEXAGON_PACKET_SIZE) 509292915Sdim Spare += HEXAGON_PACKET_SIZE-TS; 510292915Sdim } 511292915Sdim unsigned TotalIn = TS+FS; 512292915Sdim DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: " 513292915Sdim << TotalIn << ", spare room: " << Spare << "\n"); 514292915Sdim if (TotalIn >= SizeLimit+Spare) 515292915Sdim return false; 516292915Sdim 517292915Sdim // Count the number of PHI nodes that will need to be updated (converted 518292915Sdim // to MUX). Those can be later converted to predicated instructions, so 519292915Sdim // they aren't always adding extra cost. 520292915Sdim // KLUDGE: Also, count the number of predicate register definitions in 521292915Sdim // each block. The scheduler may increase the pressure of these and cause 522292915Sdim // expensive spills (e.g. bitmnp01). 523292915Sdim unsigned TotalPh = 0; 524292915Sdim unsigned PredDefs = countPredicateDefs(FP.SplitB); 525292915Sdim if (FP.JoinB) { 526292915Sdim TotalPh = computePhiCost(FP.JoinB); 527292915Sdim PredDefs += countPredicateDefs(FP.JoinB); 528292915Sdim } else { 529292915Sdim if (FP.TrueB && FP.TrueB->succ_size() > 0) { 530292915Sdim MachineBasicBlock *SB = *FP.TrueB->succ_begin(); 531292915Sdim TotalPh += computePhiCost(SB); 532292915Sdim PredDefs += countPredicateDefs(SB); 533292915Sdim } 534292915Sdim if (FP.FalseB && FP.FalseB->succ_size() > 0) { 535292915Sdim MachineBasicBlock *SB = *FP.FalseB->succ_begin(); 536292915Sdim TotalPh += computePhiCost(SB); 537292915Sdim PredDefs += countPredicateDefs(SB); 538292915Sdim } 539292915Sdim } 540292915Sdim DEBUG(dbgs() << "Total number of extra muxes from converted phis: " 541292915Sdim << TotalPh << "\n"); 542292915Sdim if (TotalIn+TotalPh >= SizeLimit+Spare) 543292915Sdim return false; 544292915Sdim 545292915Sdim DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs << "\n"); 546292915Sdim if (PredDefs > 4) 547292915Sdim return false; 548292915Sdim 549292915Sdim return true; 550292915Sdim} 551292915Sdim 552292915Sdim 553292915Sdimbool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B, 554292915Sdim MachineLoop *L) { 555292915Sdim bool Changed = false; 556292915Sdim 557292915Sdim // Visit all dominated blocks from the same loop first, then process B. 558292915Sdim MachineDomTreeNode *N = MDT->getNode(B); 559292915Sdim typedef GraphTraits<MachineDomTreeNode*> GTN; 560292915Sdim // We will change CFG/DT during this traversal, so take precautions to 561292915Sdim // avoid problems related to invalidated iterators. In fact, processing 562292915Sdim // a child C of B cannot cause another child to be removed, but it can 563292915Sdim // cause a new child to be added (which was a child of C before C itself 564292915Sdim // was removed. This new child C, however, would have been processed 565292915Sdim // prior to processing B, so there is no need to process it again. 566292915Sdim // Simply keep a list of children of B, and traverse that list. 567292915Sdim typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType; 568292915Sdim DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N)); 569292915Sdim for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) { 570292915Sdim MachineBasicBlock *SB = (*I)->getBlock(); 571292915Sdim if (!Deleted.count(SB)) 572292915Sdim Changed |= visitBlock(SB, L); 573292915Sdim } 574292915Sdim // When walking down the dominator tree, we want to traverse through 575292915Sdim // blocks from nested (other) loops, because they can dominate blocks 576292915Sdim // that are in L. Skip the non-L blocks only after the tree traversal. 577292915Sdim if (MLI->getLoopFor(B) != L) 578292915Sdim return Changed; 579292915Sdim 580292915Sdim FlowPattern FP; 581292915Sdim if (!matchFlowPattern(B, L, FP)) 582292915Sdim return Changed; 583292915Sdim 584292915Sdim if (!isValid(FP)) { 585292915Sdim DEBUG(dbgs() << "Conversion is not valid\n"); 586292915Sdim return Changed; 587292915Sdim } 588292915Sdim if (!isProfitable(FP)) { 589292915Sdim DEBUG(dbgs() << "Conversion is not profitable\n"); 590292915Sdim return Changed; 591292915Sdim } 592292915Sdim 593292915Sdim convert(FP); 594292915Sdim simplifyFlowGraph(FP); 595292915Sdim return true; 596292915Sdim} 597292915Sdim 598292915Sdim 599292915Sdimbool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) { 600292915Sdim MachineBasicBlock *HB = L ? L->getHeader() : 0; 601292915Sdim DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB) 602292915Sdim : dbgs() << "Visiting function") << "\n"); 603292915Sdim bool Changed = false; 604292915Sdim if (L) { 605292915Sdim for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) 606292915Sdim Changed |= visitLoop(*I); 607292915Sdim } 608292915Sdim 609292915Sdim MachineBasicBlock *EntryB = GraphTraits<MachineFunction*>::getEntryNode(MFN); 610292915Sdim Changed |= visitBlock(L ? HB : EntryB, L); 611292915Sdim return Changed; 612292915Sdim} 613292915Sdim 614292915Sdim 615292915Sdimbool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI) 616292915Sdim const { 617292915Sdim // Exclude post-increment stores. Those return a value, so we cannot 618292915Sdim // predicate them. 619292915Sdim unsigned Opc = MI->getOpcode(); 620292915Sdim using namespace Hexagon; 621292915Sdim switch (Opc) { 622292915Sdim // Store byte: 623292915Sdim case S2_storerb_io: case S4_storerb_rr: 624292915Sdim case S2_storerbabs: case S4_storeirb_io: case S2_storerbgp: 625292915Sdim // Store halfword: 626292915Sdim case S2_storerh_io: case S4_storerh_rr: 627292915Sdim case S2_storerhabs: case S4_storeirh_io: case S2_storerhgp: 628292915Sdim // Store upper halfword: 629292915Sdim case S2_storerf_io: case S4_storerf_rr: 630292915Sdim case S2_storerfabs: case S2_storerfgp: 631292915Sdim // Store word: 632292915Sdim case S2_storeri_io: case S4_storeri_rr: 633292915Sdim case S2_storeriabs: case S4_storeiri_io: case S2_storerigp: 634292915Sdim // Store doubleword: 635292915Sdim case S2_storerd_io: case S4_storerd_rr: 636292915Sdim case S2_storerdabs: case S2_storerdgp: 637292915Sdim return true; 638292915Sdim } 639292915Sdim return false; 640292915Sdim} 641292915Sdim 642292915Sdim 643292915Sdimbool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI) 644292915Sdim const { 645292915Sdim if (MI->mayLoad() || MI->mayStore()) 646292915Sdim return false; 647292915Sdim if (MI->isCall() || MI->isBarrier() || MI->isBranch()) 648292915Sdim return false; 649292915Sdim if (MI->hasUnmodeledSideEffects()) 650292915Sdim return false; 651292915Sdim 652292915Sdim return true; 653292915Sdim} 654292915Sdim 655292915Sdim 656292915Sdimunsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc, 657292915Sdim bool IfTrue) const { 658292915Sdim // Exclude post-increment stores. 659292915Sdim using namespace Hexagon; 660292915Sdim switch (Opc) { 661292915Sdim case S2_storerb_io: 662292915Sdim return IfTrue ? S2_pstorerbt_io : S2_pstorerbf_io; 663292915Sdim case S4_storerb_rr: 664292915Sdim return IfTrue ? S4_pstorerbt_rr : S4_pstorerbf_rr; 665292915Sdim case S2_storerbabs: 666292915Sdim case S2_storerbgp: 667292915Sdim return IfTrue ? S4_pstorerbt_abs : S4_pstorerbf_abs; 668292915Sdim case S4_storeirb_io: 669292915Sdim return IfTrue ? S4_storeirbt_io : S4_storeirbf_io; 670292915Sdim case S2_storerh_io: 671292915Sdim return IfTrue ? S2_pstorerht_io : S2_pstorerhf_io; 672292915Sdim case S4_storerh_rr: 673292915Sdim return IfTrue ? S4_pstorerht_rr : S4_pstorerhf_rr; 674292915Sdim case S2_storerhabs: 675292915Sdim case S2_storerhgp: 676292915Sdim return IfTrue ? S4_pstorerht_abs : S4_pstorerhf_abs; 677292915Sdim case S2_storerf_io: 678292915Sdim return IfTrue ? S2_pstorerft_io : S2_pstorerff_io; 679292915Sdim case S4_storerf_rr: 680292915Sdim return IfTrue ? S4_pstorerft_rr : S4_pstorerff_rr; 681292915Sdim case S2_storerfabs: 682292915Sdim case S2_storerfgp: 683292915Sdim return IfTrue ? S4_pstorerft_abs : S4_pstorerff_abs; 684292915Sdim case S4_storeirh_io: 685292915Sdim return IfTrue ? S4_storeirht_io : S4_storeirhf_io; 686292915Sdim case S2_storeri_io: 687292915Sdim return IfTrue ? S2_pstorerit_io : S2_pstorerif_io; 688292915Sdim case S4_storeri_rr: 689292915Sdim return IfTrue ? S4_pstorerit_rr : S4_pstorerif_rr; 690292915Sdim case S2_storeriabs: 691292915Sdim case S2_storerigp: 692292915Sdim return IfTrue ? S4_pstorerit_abs : S4_pstorerif_abs; 693292915Sdim case S4_storeiri_io: 694292915Sdim return IfTrue ? S4_storeirit_io : S4_storeirif_io; 695292915Sdim case S2_storerd_io: 696292915Sdim return IfTrue ? S2_pstorerdt_io : S2_pstorerdf_io; 697292915Sdim case S4_storerd_rr: 698292915Sdim return IfTrue ? S4_pstorerdt_rr : S4_pstorerdf_rr; 699292915Sdim case S2_storerdabs: 700292915Sdim case S2_storerdgp: 701292915Sdim return IfTrue ? S4_pstorerdt_abs : S4_pstorerdf_abs; 702292915Sdim } 703292915Sdim llvm_unreachable("Unexpected opcode"); 704292915Sdim return 0; 705292915Sdim} 706292915Sdim 707292915Sdim 708292915Sdimvoid HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB, 709292915Sdim MachineBasicBlock::iterator At, MachineInstr *MI, 710292915Sdim unsigned PredR, bool IfTrue) { 711292915Sdim DebugLoc DL; 712292915Sdim if (At != ToB->end()) 713292915Sdim DL = At->getDebugLoc(); 714292915Sdim else if (!ToB->empty()) 715292915Sdim DL = ToB->back().getDebugLoc(); 716292915Sdim 717292915Sdim unsigned Opc = MI->getOpcode(); 718292915Sdim 719292915Sdim if (isPredicableStore(MI)) { 720292915Sdim unsigned COpc = getCondStoreOpcode(Opc, IfTrue); 721292915Sdim assert(COpc); 722292915Sdim MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc)) 723292915Sdim .addReg(PredR); 724292915Sdim for (MIOperands MO(MI); MO.isValid(); ++MO) 725292915Sdim MIB.addOperand(*MO); 726292915Sdim 727292915Sdim // Set memory references. 728292915Sdim MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 729292915Sdim MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 730292915Sdim MIB.setMemRefs(MMOBegin, MMOEnd); 731292915Sdim 732292915Sdim MI->eraseFromParent(); 733292915Sdim return; 734292915Sdim } 735292915Sdim 736292915Sdim if (Opc == Hexagon::J2_jump) { 737292915Sdim MachineBasicBlock *TB = MI->getOperand(0).getMBB(); 738292915Sdim const MCInstrDesc &D = TII->get(IfTrue ? Hexagon::J2_jumpt 739292915Sdim : Hexagon::J2_jumpf); 740292915Sdim BuildMI(*ToB, At, DL, D) 741292915Sdim .addReg(PredR) 742292915Sdim .addMBB(TB); 743292915Sdim MI->eraseFromParent(); 744292915Sdim return; 745292915Sdim } 746292915Sdim 747292915Sdim // Print the offending instruction unconditionally as we are about to 748292915Sdim // abort. 749292915Sdim dbgs() << *MI; 750292915Sdim llvm_unreachable("Unexpected instruction"); 751292915Sdim} 752292915Sdim 753292915Sdim 754292915Sdim// Predicate/speculate non-branch instructions from FromB into block ToB. 755292915Sdim// Leave the branches alone, they will be handled later. Btw, at this point 756292915Sdim// FromB should have at most one branch, and it should be unconditional. 757292915Sdimvoid HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB, 758292915Sdim MachineBasicBlock::iterator At, MachineBasicBlock *FromB, 759292915Sdim unsigned PredR, bool IfTrue) { 760292915Sdim DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n"); 761292915Sdim MachineBasicBlock::iterator End = FromB->getFirstTerminator(); 762292915Sdim MachineBasicBlock::iterator I, NextI; 763292915Sdim 764292915Sdim for (I = FromB->begin(); I != End; I = NextI) { 765292915Sdim assert(!I->isPHI()); 766292915Sdim NextI = std::next(I); 767292915Sdim if (isSafeToSpeculate(&*I)) 768292915Sdim ToB->splice(At, FromB, I); 769292915Sdim else 770292915Sdim predicateInstr(ToB, At, &*I, PredR, IfTrue); 771292915Sdim } 772292915Sdim} 773292915Sdim 774292915Sdim 775292915Sdimvoid HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB, 776292915Sdim const FlowPattern &FP) { 777292915Sdim // Visit all PHI nodes in the WhereB block and generate MUX instructions 778292915Sdim // in the split block. Update the PHI nodes with the values of the MUX. 779292915Sdim auto NonPHI = WhereB->getFirstNonPHI(); 780292915Sdim for (auto I = WhereB->begin(); I != NonPHI; ++I) { 781292915Sdim MachineInstr *PN = &*I; 782292915Sdim // Registers and subregisters corresponding to TrueB, FalseB and SplitB. 783292915Sdim unsigned TR = 0, TSR = 0, FR = 0, FSR = 0, SR = 0, SSR = 0; 784292915Sdim for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { 785292915Sdim const MachineOperand &RO = PN->getOperand(i), &BO = PN->getOperand(i+1); 786292915Sdim if (BO.getMBB() == FP.SplitB) 787292915Sdim SR = RO.getReg(), SSR = RO.getSubReg(); 788292915Sdim else if (BO.getMBB() == FP.TrueB) 789292915Sdim TR = RO.getReg(), TSR = RO.getSubReg(); 790292915Sdim else if (BO.getMBB() == FP.FalseB) 791292915Sdim FR = RO.getReg(), FSR = RO.getSubReg(); 792292915Sdim else 793292915Sdim continue; 794292915Sdim PN->RemoveOperand(i+1); 795292915Sdim PN->RemoveOperand(i); 796292915Sdim } 797292915Sdim if (TR == 0) 798292915Sdim TR = SR, TSR = SSR; 799292915Sdim else if (FR == 0) 800292915Sdim FR = SR, FSR = SSR; 801292915Sdim assert(TR && FR); 802292915Sdim 803292915Sdim using namespace Hexagon; 804292915Sdim unsigned DR = PN->getOperand(0).getReg(); 805292915Sdim const TargetRegisterClass *RC = MRI->getRegClass(DR); 806292915Sdim const MCInstrDesc &D = RC == &IntRegsRegClass ? TII->get(C2_mux) 807292915Sdim : TII->get(MUX64_rr); 808292915Sdim 809292915Sdim MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator(); 810292915Sdim DebugLoc DL; 811292915Sdim if (MuxAt != FP.SplitB->end()) 812292915Sdim DL = MuxAt->getDebugLoc(); 813292915Sdim unsigned MuxR = MRI->createVirtualRegister(RC); 814292915Sdim BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR) 815292915Sdim .addReg(FP.PredR) 816292915Sdim .addReg(TR, 0, TSR) 817292915Sdim .addReg(FR, 0, FSR); 818292915Sdim 819292915Sdim PN->addOperand(MachineOperand::CreateReg(MuxR, false)); 820292915Sdim PN->addOperand(MachineOperand::CreateMBB(FP.SplitB)); 821292915Sdim } 822292915Sdim} 823292915Sdim 824292915Sdim 825292915Sdimvoid HexagonEarlyIfConversion::convert(const FlowPattern &FP) { 826292915Sdim MachineBasicBlock *TSB = 0, *FSB = 0; 827292915Sdim MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator(); 828292915Sdim assert(OldTI != FP.SplitB->end()); 829292915Sdim DebugLoc DL = OldTI->getDebugLoc(); 830292915Sdim 831292915Sdim if (FP.TrueB) { 832292915Sdim TSB = *FP.TrueB->succ_begin(); 833292915Sdim predicateBlockNB(FP.SplitB, OldTI, FP.TrueB, FP.PredR, true); 834292915Sdim } 835292915Sdim if (FP.FalseB) { 836292915Sdim FSB = *FP.FalseB->succ_begin(); 837292915Sdim MachineBasicBlock::iterator At = FP.SplitB->getFirstTerminator(); 838292915Sdim predicateBlockNB(FP.SplitB, At, FP.FalseB, FP.PredR, false); 839292915Sdim } 840292915Sdim 841292915Sdim // Regenerate new terminators in the split block and update the successors. 842292915Sdim // First, remember any information that may be needed later and remove the 843292915Sdim // existing terminators/successors from the split block. 844292915Sdim MachineBasicBlock *SSB = 0; 845292915Sdim FP.SplitB->erase(OldTI, FP.SplitB->end()); 846292915Sdim while (FP.SplitB->succ_size() > 0) { 847292915Sdim MachineBasicBlock *T = *FP.SplitB->succ_begin(); 848292915Sdim // It's possible that the split block had a successor that is not a pre- 849292915Sdim // dicated block. This could only happen if there was only one block to 850292915Sdim // be predicated. Example: 851292915Sdim // split_b: 852292915Sdim // if (p) jump true_b 853292915Sdim // jump unrelated2_b 854292915Sdim // unrelated1_b: 855292915Sdim // ... 856292915Sdim // unrelated2_b: ; can have other predecessors, so it's not "false_b" 857292915Sdim // jump other_b 858292915Sdim // true_b: ; only reachable from split_b, can be predicated 859292915Sdim // ... 860292915Sdim // 861292915Sdim // Find this successor (SSB) if it exists. 862292915Sdim if (T != FP.TrueB && T != FP.FalseB) { 863292915Sdim assert(!SSB); 864292915Sdim SSB = T; 865292915Sdim } 866292915Sdim FP.SplitB->removeSuccessor(FP.SplitB->succ_begin()); 867292915Sdim } 868292915Sdim 869292915Sdim // Insert new branches and update the successors of the split block. This 870292915Sdim // may create unconditional branches to the layout successor, etc., but 871292915Sdim // that will be cleaned up later. For now, make sure that correct code is 872292915Sdim // generated. 873292915Sdim if (FP.JoinB) { 874292915Sdim assert(!SSB || SSB == FP.JoinB); 875292915Sdim BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) 876292915Sdim .addMBB(FP.JoinB); 877292915Sdim FP.SplitB->addSuccessor(FP.JoinB); 878292915Sdim } else { 879292915Sdim bool HasBranch = false; 880292915Sdim if (TSB) { 881292915Sdim BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt)) 882292915Sdim .addReg(FP.PredR) 883292915Sdim .addMBB(TSB); 884292915Sdim FP.SplitB->addSuccessor(TSB); 885292915Sdim HasBranch = true; 886292915Sdim } 887292915Sdim if (FSB) { 888292915Sdim const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump) 889292915Sdim : TII->get(Hexagon::J2_jumpf); 890292915Sdim MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D); 891292915Sdim if (!HasBranch) 892292915Sdim MIB.addReg(FP.PredR); 893292915Sdim MIB.addMBB(FSB); 894292915Sdim FP.SplitB->addSuccessor(FSB); 895292915Sdim } 896292915Sdim if (SSB) { 897292915Sdim // This cannot happen if both TSB and FSB are set. [TF]SB are the 898292915Sdim // successor blocks of the TrueB and FalseB (or null of the TrueB 899292915Sdim // or FalseB block is null). SSB is the potential successor block 900292915Sdim // of the SplitB that is neither TrueB nor FalseB. 901292915Sdim BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) 902292915Sdim .addMBB(SSB); 903292915Sdim FP.SplitB->addSuccessor(SSB); 904292915Sdim } 905292915Sdim } 906292915Sdim 907292915Sdim // What is left to do is to update the PHI nodes that could have entries 908292915Sdim // referring to predicated blocks. 909292915Sdim if (FP.JoinB) { 910292915Sdim updatePhiNodes(FP.JoinB, FP); 911292915Sdim } else { 912292915Sdim if (TSB) 913292915Sdim updatePhiNodes(TSB, FP); 914292915Sdim if (FSB) 915292915Sdim updatePhiNodes(FSB, FP); 916292915Sdim // Nothing to update in SSB, since SSB's predecessors haven't changed. 917292915Sdim } 918292915Sdim} 919292915Sdim 920292915Sdim 921292915Sdimvoid HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) { 922292915Sdim DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n"); 923292915Sdim 924292915Sdim // Transfer the immediate dominator information from B to its descendants. 925292915Sdim MachineDomTreeNode *N = MDT->getNode(B); 926292915Sdim MachineDomTreeNode *IDN = N->getIDom(); 927292915Sdim if (IDN) { 928292915Sdim MachineBasicBlock *IDB = IDN->getBlock(); 929292915Sdim typedef GraphTraits<MachineDomTreeNode*> GTN; 930292915Sdim typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType; 931292915Sdim DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N)); 932292915Sdim for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) { 933292915Sdim MachineBasicBlock *SB = (*I)->getBlock(); 934292915Sdim MDT->changeImmediateDominator(SB, IDB); 935292915Sdim } 936292915Sdim } 937292915Sdim 938292915Sdim while (B->succ_size() > 0) 939292915Sdim B->removeSuccessor(B->succ_begin()); 940292915Sdim 941292915Sdim for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I) 942292915Sdim (*I)->removeSuccessor(B, true); 943292915Sdim 944292915Sdim Deleted.insert(B); 945292915Sdim MDT->eraseNode(B); 946292915Sdim MFN->erase(B->getIterator()); 947292915Sdim} 948292915Sdim 949292915Sdim 950292915Sdimvoid HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) { 951292915Sdim DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n"); 952292915Sdim MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI(); 953292915Sdim for (I = B->begin(); I != NonPHI; I = NextI) { 954292915Sdim NextI = std::next(I); 955292915Sdim MachineInstr *PN = &*I; 956292915Sdim assert(PN->getNumOperands() == 3 && "Invalid phi node"); 957292915Sdim MachineOperand &UO = PN->getOperand(1); 958292915Sdim unsigned UseR = UO.getReg(), UseSR = UO.getSubReg(); 959292915Sdim unsigned DefR = PN->getOperand(0).getReg(); 960292915Sdim unsigned NewR = UseR; 961292915Sdim if (UseSR) { 962292915Sdim // MRI.replaceVregUsesWith does not allow to update the subregister, 963292915Sdim // so instead of doing the use-iteration here, create a copy into a 964292915Sdim // "non-subregistered" register. 965292915Sdim DebugLoc DL = PN->getDebugLoc(); 966292915Sdim const TargetRegisterClass *RC = MRI->getRegClass(DefR); 967292915Sdim NewR = MRI->createVirtualRegister(RC); 968292915Sdim NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR) 969292915Sdim .addReg(UseR, 0, UseSR); 970292915Sdim } 971292915Sdim MRI->replaceRegWith(DefR, NewR); 972292915Sdim B->erase(I); 973292915Sdim } 974292915Sdim} 975292915Sdim 976292915Sdim 977292915Sdimvoid HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB, 978292915Sdim MachineBasicBlock *NewB) { 979292915Sdim for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) { 980292915Sdim MachineBasicBlock *SB = *I; 981292915Sdim MachineBasicBlock::iterator P, N = SB->getFirstNonPHI(); 982292915Sdim for (P = SB->begin(); P != N; ++P) { 983292915Sdim MachineInstr *PN = &*P; 984292915Sdim for (MIOperands MO(PN); MO.isValid(); ++MO) 985292915Sdim if (MO->isMBB() && MO->getMBB() == OldB) 986292915Sdim MO->setMBB(NewB); 987292915Sdim } 988292915Sdim } 989292915Sdim} 990292915Sdim 991292915Sdim 992292915Sdimvoid HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB, 993292915Sdim MachineBasicBlock *SuccB) { 994292915Sdim DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and " 995292915Sdim << PrintMB(SuccB) << "\n"); 996292915Sdim bool TermOk = hasUncondBranch(SuccB); 997292915Sdim eliminatePhis(SuccB); 998292915Sdim TII->RemoveBranch(*PredB); 999292915Sdim PredB->removeSuccessor(SuccB); 1000292915Sdim PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end()); 1001292915Sdim MachineBasicBlock::succ_iterator I, E = SuccB->succ_end(); 1002292915Sdim for (I = SuccB->succ_begin(); I != E; ++I) 1003292915Sdim PredB->addSuccessor(*I); 1004292915Sdim PredB->normalizeSuccProbs(); 1005292915Sdim replacePhiEdges(SuccB, PredB); 1006292915Sdim removeBlock(SuccB); 1007292915Sdim if (!TermOk) 1008292915Sdim PredB->updateTerminator(); 1009292915Sdim} 1010292915Sdim 1011292915Sdim 1012292915Sdimvoid HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) { 1013292915Sdim if (FP.TrueB) 1014292915Sdim removeBlock(FP.TrueB); 1015292915Sdim if (FP.FalseB) 1016292915Sdim removeBlock(FP.FalseB); 1017292915Sdim 1018292915Sdim FP.SplitB->updateTerminator(); 1019292915Sdim if (FP.SplitB->succ_size() != 1) 1020292915Sdim return; 1021292915Sdim 1022292915Sdim MachineBasicBlock *SB = *FP.SplitB->succ_begin(); 1023292915Sdim if (SB->pred_size() != 1) 1024292915Sdim return; 1025292915Sdim 1026292915Sdim // By now, the split block has only one successor (SB), and SB has only 1027292915Sdim // one predecessor. We can try to merge them. We will need to update ter- 1028292915Sdim // minators in FP.Split+SB, and that requires working AnalyzeBranch, which 1029292915Sdim // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends 1030292915Sdim // with an unconditional branch, we won't need to touch the terminators. 1031292915Sdim if (!hasEHLabel(SB) || hasUncondBranch(SB)) 1032292915Sdim mergeBlocks(FP.SplitB, SB); 1033292915Sdim} 1034292915Sdim 1035292915Sdim 1036292915Sdimbool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) { 1037292915Sdim auto &ST = MF.getSubtarget(); 1038292915Sdim TII = ST.getInstrInfo(); 1039292915Sdim TRI = ST.getRegisterInfo(); 1040292915Sdim MFN = &MF; 1041292915Sdim MRI = &MF.getRegInfo(); 1042292915Sdim MDT = &getAnalysis<MachineDominatorTree>(); 1043292915Sdim MLI = &getAnalysis<MachineLoopInfo>(); 1044292915Sdim MBPI = EnableHexagonBP ? &getAnalysis<MachineBranchProbabilityInfo>() : 1045292915Sdim nullptr; 1046292915Sdim 1047292915Sdim Deleted.clear(); 1048292915Sdim bool Changed = false; 1049292915Sdim 1050292915Sdim for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) 1051292915Sdim Changed |= visitLoop(*I); 1052292915Sdim Changed |= visitLoop(0); 1053292915Sdim 1054292915Sdim return Changed; 1055292915Sdim} 1056292915Sdim 1057292915Sdim//===----------------------------------------------------------------------===// 1058292915Sdim// Public Constructor Functions 1059292915Sdim//===----------------------------------------------------------------------===// 1060292915SdimFunctionPass *llvm::createHexagonEarlyIfConversion() { 1061292915Sdim return new HexagonEarlyIfConversion(); 1062292915Sdim} 1063292915Sdim 1064