1239310Sdim//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===// 2239310Sdim// 3239310Sdim// The LLVM Compiler Infrastructure 4239310Sdim// 5239310Sdim// This file is distributed under the University of Illinois Open Source 6239310Sdim// License. See LICENSE.TXT for details. 7239310Sdim// 8239310Sdim//===----------------------------------------------------------------------===// 9239310Sdim// 10239310Sdim// Early if-conversion is for out-of-order CPUs that don't have a lot of 11239310Sdim// predicable instructions. The goal is to eliminate conditional branches that 12239310Sdim// may mispredict. 13239310Sdim// 14239310Sdim// Instructions from both sides of the branch are executed specutatively, and a 15239310Sdim// cmov instruction selects the result. 16239310Sdim// 17239310Sdim//===----------------------------------------------------------------------===// 18239310Sdim 19239310Sdim#define DEBUG_TYPE "early-ifcvt" 20239310Sdim#include "llvm/ADT/BitVector.h" 21239310Sdim#include "llvm/ADT/PostOrderIterator.h" 22239310Sdim#include "llvm/ADT/SetVector.h" 23239310Sdim#include "llvm/ADT/SmallPtrSet.h" 24239310Sdim#include "llvm/ADT/SparseSet.h" 25239310Sdim#include "llvm/ADT/Statistic.h" 26239310Sdim#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 27239310Sdim#include "llvm/CodeGen/MachineDominators.h" 28239310Sdim#include "llvm/CodeGen/MachineFunction.h" 29239310Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 30239310Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 31239310Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 32252723Sdim#include "llvm/CodeGen/MachineTraceMetrics.h" 33239310Sdim#include "llvm/CodeGen/Passes.h" 34252723Sdim#include "llvm/Support/CommandLine.h" 35252723Sdim#include "llvm/Support/Debug.h" 36252723Sdim#include "llvm/Support/raw_ostream.h" 37239310Sdim#include "llvm/Target/TargetInstrInfo.h" 38239310Sdim#include "llvm/Target/TargetRegisterInfo.h" 39245431Sdim#include "llvm/Target/TargetSubtargetInfo.h" 40239310Sdim 41239310Sdimusing namespace llvm; 42239310Sdim 43239310Sdim// Absolute maximum number of instructions allowed per speculated block. 44239310Sdim// This bypasses all other heuristics, so it should be set fairly high. 45239310Sdimstatic cl::opt<unsigned> 46239310SdimBlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, 47239310Sdim cl::desc("Maximum number of instructions per speculated block.")); 48239310Sdim 49239310Sdim// Stress testing mode - disable heuristics. 50239310Sdimstatic cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden, 51239310Sdim cl::desc("Turn all knobs to 11")); 52239310Sdim 53239310SdimSTATISTIC(NumDiamondsSeen, "Number of diamonds"); 54239310SdimSTATISTIC(NumDiamondsConv, "Number of diamonds converted"); 55239310SdimSTATISTIC(NumTrianglesSeen, "Number of triangles"); 56239310SdimSTATISTIC(NumTrianglesConv, "Number of triangles converted"); 57239310Sdim 58239310Sdim//===----------------------------------------------------------------------===// 59239310Sdim// SSAIfConv 60239310Sdim//===----------------------------------------------------------------------===// 61239310Sdim// 62239310Sdim// The SSAIfConv class performs if-conversion on SSA form machine code after 63239310Sdim// determining if it is possible. The class contains no heuristics; external 64239310Sdim// code should be used to determine when if-conversion is a good idea. 65239310Sdim// 66239310Sdim// SSAIfConv can convert both triangles and diamonds: 67239310Sdim// 68239310Sdim// Triangle: Head Diamond: Head 69239310Sdim// | \ / \_ 70239310Sdim// | \ / | 71239310Sdim// | [TF]BB FBB TBB 72239310Sdim// | / \ / 73239310Sdim// | / \ / 74239310Sdim// Tail Tail 75239310Sdim// 76239310Sdim// Instructions in the conditional blocks TBB and/or FBB are spliced into the 77239310Sdim// Head block, and phis in the Tail block are converted to select instructions. 78239310Sdim// 79239310Sdimnamespace { 80239310Sdimclass SSAIfConv { 81239310Sdim const TargetInstrInfo *TII; 82239310Sdim const TargetRegisterInfo *TRI; 83239310Sdim MachineRegisterInfo *MRI; 84239310Sdim 85239310Sdimpublic: 86239310Sdim /// The block containing the conditional branch. 87239310Sdim MachineBasicBlock *Head; 88239310Sdim 89239310Sdim /// The block containing phis after the if-then-else. 90239310Sdim MachineBasicBlock *Tail; 91239310Sdim 92239310Sdim /// The 'true' conditional block as determined by AnalyzeBranch. 93239310Sdim MachineBasicBlock *TBB; 94239310Sdim 95239310Sdim /// The 'false' conditional block as determined by AnalyzeBranch. 96239310Sdim MachineBasicBlock *FBB; 97239310Sdim 98239310Sdim /// isTriangle - When there is no 'else' block, either TBB or FBB will be 99239310Sdim /// equal to Tail. 100239310Sdim bool isTriangle() const { return TBB == Tail || FBB == Tail; } 101239310Sdim 102239310Sdim /// Returns the Tail predecessor for the True side. 103239310Sdim MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; } 104239310Sdim 105239310Sdim /// Returns the Tail predecessor for the False side. 106239310Sdim MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; } 107239310Sdim 108239310Sdim /// Information about each phi in the Tail block. 109239310Sdim struct PHIInfo { 110239310Sdim MachineInstr *PHI; 111239310Sdim unsigned TReg, FReg; 112239310Sdim // Latencies from Cond+Branch, TReg, and FReg to DstReg. 113239310Sdim int CondCycles, TCycles, FCycles; 114239310Sdim 115239310Sdim PHIInfo(MachineInstr *phi) 116239310Sdim : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {} 117239310Sdim }; 118239310Sdim 119239310Sdim SmallVector<PHIInfo, 8> PHIs; 120239310Sdim 121239310Sdimprivate: 122239310Sdim /// The branch condition determined by AnalyzeBranch. 123239310Sdim SmallVector<MachineOperand, 4> Cond; 124239310Sdim 125239310Sdim /// Instructions in Head that define values used by the conditional blocks. 126239310Sdim /// The hoisted instructions must be inserted after these instructions. 127239310Sdim SmallPtrSet<MachineInstr*, 8> InsertAfter; 128239310Sdim 129239310Sdim /// Register units clobbered by the conditional blocks. 130239310Sdim BitVector ClobberedRegUnits; 131239310Sdim 132239310Sdim // Scratch pad for findInsertionPoint. 133239310Sdim SparseSet<unsigned> LiveRegUnits; 134239310Sdim 135239310Sdim /// Insertion point in Head for speculatively executed instructions form TBB 136239310Sdim /// and FBB. 137239310Sdim MachineBasicBlock::iterator InsertionPoint; 138239310Sdim 139239310Sdim /// Return true if all non-terminator instructions in MBB can be safely 140239310Sdim /// speculated. 141239310Sdim bool canSpeculateInstrs(MachineBasicBlock *MBB); 142239310Sdim 143239310Sdim /// Find a valid insertion point in Head. 144239310Sdim bool findInsertionPoint(); 145239310Sdim 146239310Sdim /// Replace PHI instructions in Tail with selects. 147239310Sdim void replacePHIInstrs(); 148239310Sdim 149239310Sdim /// Insert selects and rewrite PHI operands to use them. 150239310Sdim void rewritePHIOperands(); 151239310Sdim 152239310Sdimpublic: 153239310Sdim /// runOnMachineFunction - Initialize per-function data structures. 154239310Sdim void runOnMachineFunction(MachineFunction &MF) { 155239310Sdim TII = MF.getTarget().getInstrInfo(); 156239310Sdim TRI = MF.getTarget().getRegisterInfo(); 157239310Sdim MRI = &MF.getRegInfo(); 158239310Sdim LiveRegUnits.clear(); 159239310Sdim LiveRegUnits.setUniverse(TRI->getNumRegUnits()); 160239310Sdim ClobberedRegUnits.clear(); 161239310Sdim ClobberedRegUnits.resize(TRI->getNumRegUnits()); 162239310Sdim } 163239310Sdim 164239310Sdim /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, 165239310Sdim /// initialize the internal state, and return true. 166239310Sdim bool canConvertIf(MachineBasicBlock *MBB); 167239310Sdim 168239310Sdim /// convertIf - If-convert the last block passed to canConvertIf(), assuming 169239310Sdim /// it is possible. Add any erased blocks to RemovedBlocks. 170239310Sdim void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks); 171239310Sdim}; 172239310Sdim} // end anonymous namespace 173239310Sdim 174239310Sdim 175239310Sdim/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely 176239310Sdim/// be speculated. The terminators are not considered. 177239310Sdim/// 178239310Sdim/// If instructions use any values that are defined in the head basic block, 179239310Sdim/// the defining instructions are added to InsertAfter. 180239310Sdim/// 181239310Sdim/// Any clobbered regunits are added to ClobberedRegUnits. 182239310Sdim/// 183239310Sdimbool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { 184239310Sdim // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to 185239310Sdim // get right. 186239310Sdim if (!MBB->livein_empty()) { 187239310Sdim DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); 188239310Sdim return false; 189239310Sdim } 190239310Sdim 191239310Sdim unsigned InstrCount = 0; 192239310Sdim 193239310Sdim // Check all instructions, except the terminators. It is assumed that 194239310Sdim // terminators never have side effects or define any used register values. 195239310Sdim for (MachineBasicBlock::iterator I = MBB->begin(), 196239310Sdim E = MBB->getFirstTerminator(); I != E; ++I) { 197239310Sdim if (I->isDebugValue()) 198239310Sdim continue; 199239310Sdim 200239310Sdim if (++InstrCount > BlockInstrLimit && !Stress) { 201239310Sdim DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " 202239310Sdim << BlockInstrLimit << " instructions.\n"); 203239310Sdim return false; 204239310Sdim } 205239310Sdim 206239310Sdim // There shouldn't normally be any phis in a single-predecessor block. 207239310Sdim if (I->isPHI()) { 208239310Sdim DEBUG(dbgs() << "Can't hoist: " << *I); 209239310Sdim return false; 210239310Sdim } 211239310Sdim 212239310Sdim // Don't speculate loads. Note that it may be possible and desirable to 213239310Sdim // speculate GOT or constant pool loads that are guaranteed not to trap, 214239310Sdim // but we don't support that for now. 215239310Sdim if (I->mayLoad()) { 216239310Sdim DEBUG(dbgs() << "Won't speculate load: " << *I); 217239310Sdim return false; 218239310Sdim } 219239310Sdim 220239310Sdim // We never speculate stores, so an AA pointer isn't necessary. 221239310Sdim bool DontMoveAcrossStore = true; 222239310Sdim if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) { 223239310Sdim DEBUG(dbgs() << "Can't speculate: " << *I); 224239310Sdim return false; 225239310Sdim } 226239310Sdim 227239310Sdim // Check for any dependencies on Head instructions. 228239310Sdim for (MIOperands MO(I); MO.isValid(); ++MO) { 229239310Sdim if (MO->isRegMask()) { 230239310Sdim DEBUG(dbgs() << "Won't speculate regmask: " << *I); 231239310Sdim return false; 232239310Sdim } 233239310Sdim if (!MO->isReg()) 234239310Sdim continue; 235239310Sdim unsigned Reg = MO->getReg(); 236239310Sdim 237239310Sdim // Remember clobbered regunits. 238239310Sdim if (MO->isDef() && TargetRegisterInfo::isPhysicalRegister(Reg)) 239239310Sdim for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) 240239310Sdim ClobberedRegUnits.set(*Units); 241239310Sdim 242239310Sdim if (!MO->readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg)) 243239310Sdim continue; 244239310Sdim MachineInstr *DefMI = MRI->getVRegDef(Reg); 245239310Sdim if (!DefMI || DefMI->getParent() != Head) 246239310Sdim continue; 247239310Sdim if (InsertAfter.insert(DefMI)) 248239310Sdim DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI); 249239310Sdim if (DefMI->isTerminator()) { 250239310Sdim DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); 251239310Sdim return false; 252239310Sdim } 253239310Sdim } 254239310Sdim } 255239310Sdim return true; 256239310Sdim} 257239310Sdim 258239310Sdim 259239310Sdim/// Find an insertion point in Head for the speculated instructions. The 260239310Sdim/// insertion point must be: 261239310Sdim/// 262239310Sdim/// 1. Before any terminators. 263239310Sdim/// 2. After any instructions in InsertAfter. 264239310Sdim/// 3. Not have any clobbered regunits live. 265239310Sdim/// 266239310Sdim/// This function sets InsertionPoint and returns true when successful, it 267239310Sdim/// returns false if no valid insertion point could be found. 268239310Sdim/// 269239310Sdimbool SSAIfConv::findInsertionPoint() { 270239310Sdim // Keep track of live regunits before the current position. 271239310Sdim // Only track RegUnits that are also in ClobberedRegUnits. 272239310Sdim LiveRegUnits.clear(); 273239310Sdim SmallVector<unsigned, 8> Reads; 274239310Sdim MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); 275239310Sdim MachineBasicBlock::iterator I = Head->end(); 276239310Sdim MachineBasicBlock::iterator B = Head->begin(); 277239310Sdim while (I != B) { 278239310Sdim --I; 279239310Sdim // Some of the conditional code depends in I. 280239310Sdim if (InsertAfter.count(I)) { 281239310Sdim DEBUG(dbgs() << "Can't insert code after " << *I); 282239310Sdim return false; 283239310Sdim } 284239310Sdim 285239310Sdim // Update live regunits. 286239310Sdim for (MIOperands MO(I); MO.isValid(); ++MO) { 287239310Sdim // We're ignoring regmask operands. That is conservatively correct. 288239310Sdim if (!MO->isReg()) 289239310Sdim continue; 290239310Sdim unsigned Reg = MO->getReg(); 291239310Sdim if (!TargetRegisterInfo::isPhysicalRegister(Reg)) 292239310Sdim continue; 293239310Sdim // I clobbers Reg, so it isn't live before I. 294239310Sdim if (MO->isDef()) 295239310Sdim for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) 296239310Sdim LiveRegUnits.erase(*Units); 297239310Sdim // Unless I reads Reg. 298239310Sdim if (MO->readsReg()) 299239310Sdim Reads.push_back(Reg); 300239310Sdim } 301239310Sdim // Anything read by I is live before I. 302239310Sdim while (!Reads.empty()) 303239310Sdim for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid(); 304239310Sdim ++Units) 305239310Sdim if (ClobberedRegUnits.test(*Units)) 306239310Sdim LiveRegUnits.insert(*Units); 307239310Sdim 308239310Sdim // We can't insert before a terminator. 309239310Sdim if (I != FirstTerm && I->isTerminator()) 310239310Sdim continue; 311239310Sdim 312239310Sdim // Some of the clobbered registers are live before I, not a valid insertion 313239310Sdim // point. 314239310Sdim if (!LiveRegUnits.empty()) { 315239310Sdim DEBUG({ 316239310Sdim dbgs() << "Would clobber"; 317239310Sdim for (SparseSet<unsigned>::const_iterator 318239310Sdim i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i) 319239310Sdim dbgs() << ' ' << PrintRegUnit(*i, TRI); 320239310Sdim dbgs() << " live before " << *I; 321239310Sdim }); 322239310Sdim continue; 323239310Sdim } 324239310Sdim 325239310Sdim // This is a valid insertion point. 326239310Sdim InsertionPoint = I; 327239310Sdim DEBUG(dbgs() << "Can insert before " << *I); 328239310Sdim return true; 329239310Sdim } 330239310Sdim DEBUG(dbgs() << "No legal insertion point found.\n"); 331239310Sdim return false; 332239310Sdim} 333239310Sdim 334239310Sdim 335239310Sdim 336239310Sdim/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is 337239310Sdim/// a potential candidate for if-conversion. Fill out the internal state. 338239310Sdim/// 339239310Sdimbool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { 340239310Sdim Head = MBB; 341239310Sdim TBB = FBB = Tail = 0; 342239310Sdim 343239310Sdim if (Head->succ_size() != 2) 344239310Sdim return false; 345239310Sdim MachineBasicBlock *Succ0 = Head->succ_begin()[0]; 346239310Sdim MachineBasicBlock *Succ1 = Head->succ_begin()[1]; 347239310Sdim 348239310Sdim // Canonicalize so Succ0 has MBB as its single predecessor. 349239310Sdim if (Succ0->pred_size() != 1) 350239310Sdim std::swap(Succ0, Succ1); 351239310Sdim 352239310Sdim if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1) 353239310Sdim return false; 354239310Sdim 355239310Sdim Tail = Succ0->succ_begin()[0]; 356239310Sdim 357239310Sdim // This is not a triangle. 358239310Sdim if (Tail != Succ1) { 359239310Sdim // Check for a diamond. We won't deal with any critical edges. 360239310Sdim if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 || 361239310Sdim Succ1->succ_begin()[0] != Tail) 362239310Sdim return false; 363239310Sdim DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber() 364239310Sdim << " -> BB#" << Succ0->getNumber() 365239310Sdim << "/BB#" << Succ1->getNumber() 366239310Sdim << " -> BB#" << Tail->getNumber() << '\n'); 367239310Sdim 368239310Sdim // Live-in physregs are tricky to get right when speculating code. 369239310Sdim if (!Tail->livein_empty()) { 370239310Sdim DEBUG(dbgs() << "Tail has live-ins.\n"); 371239310Sdim return false; 372239310Sdim } 373239310Sdim } else { 374239310Sdim DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() 375239310Sdim << " -> BB#" << Succ0->getNumber() 376239310Sdim << " -> BB#" << Tail->getNumber() << '\n'); 377239310Sdim } 378239310Sdim 379239310Sdim // This is a triangle or a diamond. 380239310Sdim // If Tail doesn't have any phis, there must be side effects. 381239310Sdim if (Tail->empty() || !Tail->front().isPHI()) { 382239310Sdim DEBUG(dbgs() << "No phis in tail.\n"); 383239310Sdim return false; 384239310Sdim } 385239310Sdim 386239310Sdim // The branch we're looking to eliminate must be analyzable. 387239310Sdim Cond.clear(); 388239310Sdim if (TII->AnalyzeBranch(*Head, TBB, FBB, Cond)) { 389239310Sdim DEBUG(dbgs() << "Branch not analyzable.\n"); 390239310Sdim return false; 391239310Sdim } 392239310Sdim 393239310Sdim // This is weird, probably some sort of degenerate CFG. 394239310Sdim if (!TBB) { 395239310Sdim DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n"); 396239310Sdim return false; 397239310Sdim } 398239310Sdim 399239310Sdim // AnalyzeBranch doesn't set FBB on a fall-through branch. 400239310Sdim // Make sure it is always set. 401239310Sdim FBB = TBB == Succ0 ? Succ1 : Succ0; 402239310Sdim 403239310Sdim // Any phis in the tail block must be convertible to selects. 404239310Sdim PHIs.clear(); 405239310Sdim MachineBasicBlock *TPred = getTPred(); 406239310Sdim MachineBasicBlock *FPred = getFPred(); 407239310Sdim for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); 408239310Sdim I != E && I->isPHI(); ++I) { 409239310Sdim PHIs.push_back(&*I); 410239310Sdim PHIInfo &PI = PHIs.back(); 411239310Sdim // Find PHI operands corresponding to TPred and FPred. 412239310Sdim for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) { 413239310Sdim if (PI.PHI->getOperand(i+1).getMBB() == TPred) 414239310Sdim PI.TReg = PI.PHI->getOperand(i).getReg(); 415239310Sdim if (PI.PHI->getOperand(i+1).getMBB() == FPred) 416239310Sdim PI.FReg = PI.PHI->getOperand(i).getReg(); 417239310Sdim } 418239310Sdim assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI"); 419239310Sdim assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI"); 420239310Sdim 421239310Sdim // Get target information. 422239310Sdim if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg, 423239310Sdim PI.CondCycles, PI.TCycles, PI.FCycles)) { 424239310Sdim DEBUG(dbgs() << "Can't convert: " << *PI.PHI); 425239310Sdim return false; 426239310Sdim } 427239310Sdim } 428239310Sdim 429239310Sdim // Check that the conditional instructions can be speculated. 430239310Sdim InsertAfter.clear(); 431239310Sdim ClobberedRegUnits.reset(); 432239310Sdim if (TBB != Tail && !canSpeculateInstrs(TBB)) 433239310Sdim return false; 434239310Sdim if (FBB != Tail && !canSpeculateInstrs(FBB)) 435239310Sdim return false; 436239310Sdim 437239310Sdim // Try to find a valid insertion point for the speculated instructions in the 438239310Sdim // head basic block. 439239310Sdim if (!findInsertionPoint()) 440239310Sdim return false; 441239310Sdim 442239310Sdim if (isTriangle()) 443239310Sdim ++NumTrianglesSeen; 444239310Sdim else 445239310Sdim ++NumDiamondsSeen; 446239310Sdim return true; 447239310Sdim} 448239310Sdim 449239310Sdim/// replacePHIInstrs - Completely replace PHI instructions with selects. 450239310Sdim/// This is possible when the only Tail predecessors are the if-converted 451239310Sdim/// blocks. 452239310Sdimvoid SSAIfConv::replacePHIInstrs() { 453239310Sdim assert(Tail->pred_size() == 2 && "Cannot replace PHIs"); 454239310Sdim MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); 455239310Sdim assert(FirstTerm != Head->end() && "No terminators"); 456239310Sdim DebugLoc HeadDL = FirstTerm->getDebugLoc(); 457239310Sdim 458239310Sdim // Convert all PHIs to select instructions inserted before FirstTerm. 459239310Sdim for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { 460239310Sdim PHIInfo &PI = PHIs[i]; 461239310Sdim DEBUG(dbgs() << "If-converting " << *PI.PHI); 462239310Sdim unsigned DstReg = PI.PHI->getOperand(0).getReg(); 463239310Sdim TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); 464239310Sdim DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); 465239310Sdim PI.PHI->eraseFromParent(); 466239310Sdim PI.PHI = 0; 467239310Sdim } 468239310Sdim} 469239310Sdim 470239310Sdim/// rewritePHIOperands - When there are additional Tail predecessors, insert 471239310Sdim/// select instructions in Head and rewrite PHI operands to use the selects. 472239310Sdim/// Keep the PHI instructions in Tail to handle the other predecessors. 473239310Sdimvoid SSAIfConv::rewritePHIOperands() { 474239310Sdim MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); 475239310Sdim assert(FirstTerm != Head->end() && "No terminators"); 476239310Sdim DebugLoc HeadDL = FirstTerm->getDebugLoc(); 477239310Sdim 478239310Sdim // Convert all PHIs to select instructions inserted before FirstTerm. 479239310Sdim for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { 480239310Sdim PHIInfo &PI = PHIs[i]; 481239310Sdim DEBUG(dbgs() << "If-converting " << *PI.PHI); 482239310Sdim unsigned PHIDst = PI.PHI->getOperand(0).getReg(); 483239310Sdim unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); 484239310Sdim TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); 485239310Sdim DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); 486239310Sdim 487239310Sdim // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. 488239310Sdim for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { 489239310Sdim MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB(); 490239310Sdim if (MBB == getTPred()) { 491239310Sdim PI.PHI->getOperand(i-1).setMBB(Head); 492239310Sdim PI.PHI->getOperand(i-2).setReg(DstReg); 493239310Sdim } else if (MBB == getFPred()) { 494239310Sdim PI.PHI->RemoveOperand(i-1); 495239310Sdim PI.PHI->RemoveOperand(i-2); 496239310Sdim } 497239310Sdim } 498239310Sdim DEBUG(dbgs() << " --> " << *PI.PHI); 499239310Sdim } 500239310Sdim} 501239310Sdim 502239310Sdim/// convertIf - Execute the if conversion after canConvertIf has determined the 503239310Sdim/// feasibility. 504239310Sdim/// 505239310Sdim/// Any basic blocks erased will be added to RemovedBlocks. 506239310Sdim/// 507239310Sdimvoid SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { 508239310Sdim assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); 509239310Sdim 510239310Sdim // Update statistics. 511239310Sdim if (isTriangle()) 512239310Sdim ++NumTrianglesConv; 513239310Sdim else 514239310Sdim ++NumDiamondsConv; 515239310Sdim 516239310Sdim // Move all instructions into Head, except for the terminators. 517239310Sdim if (TBB != Tail) 518239310Sdim Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); 519239310Sdim if (FBB != Tail) 520239310Sdim Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); 521239310Sdim 522239310Sdim // Are there extra Tail predecessors? 523239310Sdim bool ExtraPreds = Tail->pred_size() != 2; 524239310Sdim if (ExtraPreds) 525239310Sdim rewritePHIOperands(); 526239310Sdim else 527239310Sdim replacePHIInstrs(); 528239310Sdim 529239310Sdim // Fix up the CFG, temporarily leave Head without any successors. 530239310Sdim Head->removeSuccessor(TBB); 531239310Sdim Head->removeSuccessor(FBB); 532239310Sdim if (TBB != Tail) 533239310Sdim TBB->removeSuccessor(Tail); 534239310Sdim if (FBB != Tail) 535239310Sdim FBB->removeSuccessor(Tail); 536239310Sdim 537239310Sdim // Fix up Head's terminators. 538239310Sdim // It should become a single branch or a fallthrough. 539239310Sdim DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); 540239310Sdim TII->RemoveBranch(*Head); 541239310Sdim 542239310Sdim // Erase the now empty conditional blocks. It is likely that Head can fall 543239310Sdim // through to Tail, and we can join the two blocks. 544239310Sdim if (TBB != Tail) { 545239310Sdim RemovedBlocks.push_back(TBB); 546239310Sdim TBB->eraseFromParent(); 547239310Sdim } 548239310Sdim if (FBB != Tail) { 549239310Sdim RemovedBlocks.push_back(FBB); 550239310Sdim FBB->eraseFromParent(); 551239310Sdim } 552239310Sdim 553239310Sdim assert(Head->succ_empty() && "Additional head successors?"); 554239310Sdim if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) { 555239310Sdim // Splice Tail onto the end of Head. 556239310Sdim DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber() 557239310Sdim << " into head BB#" << Head->getNumber() << '\n'); 558239310Sdim Head->splice(Head->end(), Tail, 559239310Sdim Tail->begin(), Tail->end()); 560239310Sdim Head->transferSuccessorsAndUpdatePHIs(Tail); 561239310Sdim RemovedBlocks.push_back(Tail); 562239310Sdim Tail->eraseFromParent(); 563239310Sdim } else { 564239310Sdim // We need a branch to Tail, let code placement work it out later. 565239310Sdim DEBUG(dbgs() << "Converting to unconditional branch.\n"); 566239310Sdim SmallVector<MachineOperand, 0> EmptyCond; 567239310Sdim TII->InsertBranch(*Head, Tail, 0, EmptyCond, HeadDL); 568239310Sdim Head->addSuccessor(Tail); 569239310Sdim } 570239310Sdim DEBUG(dbgs() << *Head); 571239310Sdim} 572239310Sdim 573239310Sdim 574239310Sdim//===----------------------------------------------------------------------===// 575239310Sdim// EarlyIfConverter Pass 576239310Sdim//===----------------------------------------------------------------------===// 577239310Sdim 578239310Sdimnamespace { 579239310Sdimclass EarlyIfConverter : public MachineFunctionPass { 580239310Sdim const TargetInstrInfo *TII; 581239310Sdim const TargetRegisterInfo *TRI; 582239310Sdim const MCSchedModel *SchedModel; 583239310Sdim MachineRegisterInfo *MRI; 584239310Sdim MachineDominatorTree *DomTree; 585239310Sdim MachineLoopInfo *Loops; 586239310Sdim MachineTraceMetrics *Traces; 587239310Sdim MachineTraceMetrics::Ensemble *MinInstr; 588239310Sdim SSAIfConv IfConv; 589239310Sdim 590239310Sdimpublic: 591239310Sdim static char ID; 592239310Sdim EarlyIfConverter() : MachineFunctionPass(ID) {} 593239310Sdim void getAnalysisUsage(AnalysisUsage &AU) const; 594239310Sdim bool runOnMachineFunction(MachineFunction &MF); 595252723Sdim const char *getPassName() const { return "Early If-Conversion"; } 596239310Sdim 597239310Sdimprivate: 598239310Sdim bool tryConvertIf(MachineBasicBlock*); 599239310Sdim void updateDomTree(ArrayRef<MachineBasicBlock*> Removed); 600239310Sdim void updateLoops(ArrayRef<MachineBasicBlock*> Removed); 601239310Sdim void invalidateTraces(); 602239310Sdim bool shouldConvertIf(); 603239310Sdim}; 604239310Sdim} // end anonymous namespace 605239310Sdim 606239310Sdimchar EarlyIfConverter::ID = 0; 607239310Sdimchar &llvm::EarlyIfConverterID = EarlyIfConverter::ID; 608239310Sdim 609239310SdimINITIALIZE_PASS_BEGIN(EarlyIfConverter, 610239310Sdim "early-ifcvt", "Early If Converter", false, false) 611239310SdimINITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) 612239310SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 613239310SdimINITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) 614239310SdimINITIALIZE_PASS_END(EarlyIfConverter, 615239310Sdim "early-ifcvt", "Early If Converter", false, false) 616239310Sdim 617239310Sdimvoid EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { 618239310Sdim AU.addRequired<MachineBranchProbabilityInfo>(); 619239310Sdim AU.addRequired<MachineDominatorTree>(); 620239310Sdim AU.addPreserved<MachineDominatorTree>(); 621239310Sdim AU.addRequired<MachineLoopInfo>(); 622239310Sdim AU.addPreserved<MachineLoopInfo>(); 623239310Sdim AU.addRequired<MachineTraceMetrics>(); 624239310Sdim AU.addPreserved<MachineTraceMetrics>(); 625239310Sdim MachineFunctionPass::getAnalysisUsage(AU); 626239310Sdim} 627239310Sdim 628239310Sdim/// Update the dominator tree after if-conversion erased some blocks. 629239310Sdimvoid EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) { 630239310Sdim // convertIf can remove TBB, FBB, and Tail can be merged into Head. 631239310Sdim // TBB and FBB should not dominate any blocks. 632239310Sdim // Tail children should be transferred to Head. 633239310Sdim MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head); 634239310Sdim for (unsigned i = 0, e = Removed.size(); i != e; ++i) { 635239310Sdim MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); 636239310Sdim assert(Node != HeadNode && "Cannot erase the head node"); 637239310Sdim while (Node->getNumChildren()) { 638239310Sdim assert(Node->getBlock() == IfConv.Tail && "Unexpected children"); 639239310Sdim DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); 640239310Sdim } 641239310Sdim DomTree->eraseNode(Removed[i]); 642239310Sdim } 643239310Sdim} 644239310Sdim 645239310Sdim/// Update LoopInfo after if-conversion. 646239310Sdimvoid EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) { 647239310Sdim if (!Loops) 648239310Sdim return; 649239310Sdim // If-conversion doesn't change loop structure, and it doesn't mess with back 650239310Sdim // edges, so updating LoopInfo is simply removing the dead blocks. 651239310Sdim for (unsigned i = 0, e = Removed.size(); i != e; ++i) 652239310Sdim Loops->removeBlock(Removed[i]); 653239310Sdim} 654239310Sdim 655239310Sdim/// Invalidate MachineTraceMetrics before if-conversion. 656239310Sdimvoid EarlyIfConverter::invalidateTraces() { 657239310Sdim Traces->verifyAnalysis(); 658239310Sdim Traces->invalidate(IfConv.Head); 659239310Sdim Traces->invalidate(IfConv.Tail); 660239310Sdim Traces->invalidate(IfConv.TBB); 661239310Sdim Traces->invalidate(IfConv.FBB); 662239310Sdim Traces->verifyAnalysis(); 663239310Sdim} 664239310Sdim 665239310Sdim// Adjust cycles with downward saturation. 666239310Sdimstatic unsigned adjCycles(unsigned Cyc, int Delta) { 667239310Sdim if (Delta < 0 && Cyc + Delta > Cyc) 668239310Sdim return 0; 669239310Sdim return Cyc + Delta; 670239310Sdim} 671239310Sdim 672239310Sdim/// Apply cost model and heuristics to the if-conversion in IfConv. 673239310Sdim/// Return true if the conversion is a good idea. 674239310Sdim/// 675239310Sdimbool EarlyIfConverter::shouldConvertIf() { 676239310Sdim // Stress testing mode disables all cost considerations. 677239310Sdim if (Stress) 678239310Sdim return true; 679239310Sdim 680239310Sdim if (!MinInstr) 681239310Sdim MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); 682239310Sdim 683239310Sdim MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred()); 684239310Sdim MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred()); 685239310Sdim DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); 686239310Sdim unsigned MinCrit = std::min(TBBTrace.getCriticalPath(), 687239310Sdim FBBTrace.getCriticalPath()); 688239310Sdim 689239310Sdim // Set a somewhat arbitrary limit on the critical path extension we accept. 690239310Sdim unsigned CritLimit = SchedModel->MispredictPenalty/2; 691239310Sdim 692239310Sdim // If-conversion only makes sense when there is unexploited ILP. Compute the 693239310Sdim // maximum-ILP resource length of the trace after if-conversion. Compare it 694239310Sdim // to the shortest critical path. 695239310Sdim SmallVector<const MachineBasicBlock*, 1> ExtraBlocks; 696239310Sdim if (IfConv.TBB != IfConv.Tail) 697239310Sdim ExtraBlocks.push_back(IfConv.TBB); 698239310Sdim unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks); 699239310Sdim DEBUG(dbgs() << "Resource length " << ResLength 700239310Sdim << ", minimal critical path " << MinCrit << '\n'); 701239310Sdim if (ResLength > MinCrit + CritLimit) { 702239310Sdim DEBUG(dbgs() << "Not enough available ILP.\n"); 703239310Sdim return false; 704239310Sdim } 705239310Sdim 706239310Sdim // Assume that the depth of the first head terminator will also be the depth 707239310Sdim // of the select instruction inserted, as determined by the flag dependency. 708239310Sdim // TBB / FBB data dependencies may delay the select even more. 709239310Sdim MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head); 710239310Sdim unsigned BranchDepth = 711239310Sdim HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth; 712239310Sdim DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); 713239310Sdim 714239310Sdim // Look at all the tail phis, and compute the critical path extension caused 715239310Sdim // by inserting select instructions. 716239310Sdim MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail); 717239310Sdim for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) { 718239310Sdim SSAIfConv::PHIInfo &PI = IfConv.PHIs[i]; 719239310Sdim unsigned Slack = TailTrace.getInstrSlack(PI.PHI); 720239310Sdim unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth; 721239310Sdim DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); 722239310Sdim 723239310Sdim // The condition is pulled into the critical path. 724239310Sdim unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles); 725239310Sdim if (CondDepth > MaxDepth) { 726239310Sdim unsigned Extra = CondDepth - MaxDepth; 727239310Sdim DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n"); 728239310Sdim if (Extra > CritLimit) { 729239310Sdim DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); 730239310Sdim return false; 731239310Sdim } 732239310Sdim } 733239310Sdim 734239310Sdim // The TBB value is pulled into the critical path. 735239310Sdim unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles); 736239310Sdim if (TDepth > MaxDepth) { 737239310Sdim unsigned Extra = TDepth - MaxDepth; 738239310Sdim DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); 739239310Sdim if (Extra > CritLimit) { 740239310Sdim DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); 741239310Sdim return false; 742239310Sdim } 743239310Sdim } 744239310Sdim 745239310Sdim // The FBB value is pulled into the critical path. 746239310Sdim unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles); 747239310Sdim if (FDepth > MaxDepth) { 748239310Sdim unsigned Extra = FDepth - MaxDepth; 749239310Sdim DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); 750239310Sdim if (Extra > CritLimit) { 751239310Sdim DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); 752239310Sdim return false; 753239310Sdim } 754239310Sdim } 755239310Sdim } 756239310Sdim return true; 757239310Sdim} 758239310Sdim 759239310Sdim/// Attempt repeated if-conversion on MBB, return true if successful. 760239310Sdim/// 761239310Sdimbool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { 762239310Sdim bool Changed = false; 763239310Sdim while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { 764239310Sdim // If-convert MBB and update analyses. 765239310Sdim invalidateTraces(); 766239310Sdim SmallVector<MachineBasicBlock*, 4> RemovedBlocks; 767239310Sdim IfConv.convertIf(RemovedBlocks); 768239310Sdim Changed = true; 769239310Sdim updateDomTree(RemovedBlocks); 770239310Sdim updateLoops(RemovedBlocks); 771239310Sdim } 772239310Sdim return Changed; 773239310Sdim} 774239310Sdim 775239310Sdimbool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { 776239310Sdim DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" 777245431Sdim << "********** Function: " << MF.getName() << '\n'); 778239310Sdim TII = MF.getTarget().getInstrInfo(); 779239310Sdim TRI = MF.getTarget().getRegisterInfo(); 780245431Sdim SchedModel = 781245431Sdim MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel(); 782239310Sdim MRI = &MF.getRegInfo(); 783239310Sdim DomTree = &getAnalysis<MachineDominatorTree>(); 784239310Sdim Loops = getAnalysisIfAvailable<MachineLoopInfo>(); 785239310Sdim Traces = &getAnalysis<MachineTraceMetrics>(); 786239310Sdim MinInstr = 0; 787239310Sdim 788239310Sdim bool Changed = false; 789239310Sdim IfConv.runOnMachineFunction(MF); 790239310Sdim 791239310Sdim // Visit blocks in dominator tree post-order. The post-order enables nested 792239310Sdim // if-conversion in a single pass. The tryConvertIf() function may erase 793239310Sdim // blocks, but only blocks dominated by the head block. This makes it safe to 794239310Sdim // update the dominator tree while the post-order iterator is still active. 795239310Sdim for (po_iterator<MachineDominatorTree*> 796239310Sdim I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I) 797239310Sdim if (tryConvertIf(I->getBlock())) 798239310Sdim Changed = true; 799239310Sdim 800239310Sdim return Changed; 801239310Sdim} 802