TailDuplicator.cpp revision 353358
1321369Sdim//===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===// 2303231Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6303231Sdim// 7303231Sdim//===----------------------------------------------------------------------===// 8303231Sdim// 9303231Sdim// This utility class duplicates basic blocks ending in unconditional branches 10303231Sdim// into the tails of their predecessors. 11303231Sdim// 12303231Sdim//===----------------------------------------------------------------------===// 13303231Sdim 14327952Sdim#include "llvm/CodeGen/TailDuplicator.h" 15321369Sdim#include "llvm/ADT/DenseMap.h" 16303231Sdim#include "llvm/ADT/DenseSet.h" 17327952Sdim#include "llvm/ADT/STLExtras.h" 18303231Sdim#include "llvm/ADT/SetVector.h" 19321369Sdim#include "llvm/ADT/SmallPtrSet.h" 20321369Sdim#include "llvm/ADT/SmallVector.h" 21303231Sdim#include "llvm/ADT/Statistic.h" 22321369Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 23303231Sdim#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 24321369Sdim#include "llvm/CodeGen/MachineFunction.h" 25321369Sdim#include "llvm/CodeGen/MachineInstr.h" 26303231Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 27321369Sdim#include "llvm/CodeGen/MachineOperand.h" 28321369Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 29321369Sdim#include "llvm/CodeGen/MachineSSAUpdater.h" 30327952Sdim#include "llvm/CodeGen/TargetInstrInfo.h" 31327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h" 32327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h" 33321369Sdim#include "llvm/IR/DebugLoc.h" 34303231Sdim#include "llvm/IR/Function.h" 35303231Sdim#include "llvm/Support/CommandLine.h" 36303231Sdim#include "llvm/Support/Debug.h" 37303231Sdim#include "llvm/Support/ErrorHandling.h" 38303231Sdim#include "llvm/Support/raw_ostream.h" 39341825Sdim#include "llvm/Target/TargetMachine.h" 40321369Sdim#include <algorithm> 41321369Sdim#include <cassert> 42321369Sdim#include <iterator> 43321369Sdim#include <utility> 44321369Sdim 45303231Sdimusing namespace llvm; 46303231Sdim 47303231Sdim#define DEBUG_TYPE "tailduplication" 48303231Sdim 49303231SdimSTATISTIC(NumTails, "Number of tails duplicated"); 50303231SdimSTATISTIC(NumTailDups, "Number of tail duplicated blocks"); 51303231SdimSTATISTIC(NumTailDupAdded, 52303231Sdim "Number of instructions added due to tail duplication"); 53303231SdimSTATISTIC(NumTailDupRemoved, 54303231Sdim "Number of instructions removed due to tail duplication"); 55303231SdimSTATISTIC(NumDeadBlocks, "Number of dead blocks removed"); 56303231SdimSTATISTIC(NumAddedPHIs, "Number of phis added"); 57303231Sdim 58303231Sdim// Heuristic for tail duplication. 59303231Sdimstatic cl::opt<unsigned> TailDuplicateSize( 60303231Sdim "tail-dup-size", 61303231Sdim cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), 62303231Sdim cl::Hidden); 63303231Sdim 64321369Sdimstatic cl::opt<unsigned> TailDupIndirectBranchSize( 65314564Sdim "tail-dup-indirect-size", 66314564Sdim cl::desc("Maximum instructions to consider tail duplicating blocks that " 67314564Sdim "end with indirect branches."), cl::init(20), 68314564Sdim cl::Hidden); 69314564Sdim 70303231Sdimstatic cl::opt<bool> 71303231Sdim TailDupVerify("tail-dup-verify", 72303231Sdim cl::desc("Verify sanity of PHI instructions during taildup"), 73303231Sdim cl::init(false), cl::Hidden); 74303231Sdim 75303231Sdimstatic cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U), 76303231Sdim cl::Hidden); 77303231Sdim 78327952Sdimvoid TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, 79314564Sdim const MachineBranchProbabilityInfo *MBPIin, 80314564Sdim bool LayoutModeIn, unsigned TailDupSizeIn) { 81314564Sdim MF = &MFin; 82314564Sdim TII = MF->getSubtarget().getInstrInfo(); 83314564Sdim TRI = MF->getSubtarget().getRegisterInfo(); 84314564Sdim MRI = &MF->getRegInfo(); 85314564Sdim MMI = &MF->getMMI(); 86303231Sdim MBPI = MBPIin; 87314564Sdim TailDupSize = TailDupSizeIn; 88303231Sdim 89303231Sdim assert(MBPI != nullptr && "Machine Branch Probability Info required"); 90303231Sdim 91314564Sdim LayoutMode = LayoutModeIn; 92327952Sdim this->PreRegAlloc = PreRegAlloc; 93303231Sdim} 94303231Sdim 95303231Sdimstatic void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { 96303231Sdim for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { 97303231Sdim MachineBasicBlock *MBB = &*I; 98303231Sdim SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(), 99303231Sdim MBB->pred_end()); 100303231Sdim MachineBasicBlock::iterator MI = MBB->begin(); 101303231Sdim while (MI != MBB->end()) { 102303231Sdim if (!MI->isPHI()) 103303231Sdim break; 104314564Sdim for (MachineBasicBlock *PredBB : Preds) { 105303231Sdim bool Found = false; 106303231Sdim for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 107303231Sdim MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); 108303231Sdim if (PHIBB == PredBB) { 109303231Sdim Found = true; 110303231Sdim break; 111303231Sdim } 112303231Sdim } 113303231Sdim if (!Found) { 114327952Sdim dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": " 115327952Sdim << *MI; 116327952Sdim dbgs() << " missing input from predecessor " 117327952Sdim << printMBBReference(*PredBB) << '\n'; 118303231Sdim llvm_unreachable(nullptr); 119303231Sdim } 120303231Sdim } 121303231Sdim 122303231Sdim for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 123303231Sdim MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); 124303231Sdim if (CheckExtra && !Preds.count(PHIBB)) { 125327952Sdim dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB) 126327952Sdim << ": " << *MI; 127327952Sdim dbgs() << " extra input from predecessor " 128327952Sdim << printMBBReference(*PHIBB) << '\n'; 129303231Sdim llvm_unreachable(nullptr); 130303231Sdim } 131303231Sdim if (PHIBB->getNumber() < 0) { 132327952Sdim dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": " 133327952Sdim << *MI; 134327952Sdim dbgs() << " non-existing " << printMBBReference(*PHIBB) << '\n'; 135303231Sdim llvm_unreachable(nullptr); 136303231Sdim } 137303231Sdim } 138303231Sdim ++MI; 139303231Sdim } 140303231Sdim } 141303231Sdim} 142303231Sdim 143303231Sdim/// Tail duplicate the block and cleanup. 144314564Sdim/// \p IsSimple - return value of isSimpleBB 145314564Sdim/// \p MBB - block to be duplicated 146314564Sdim/// \p ForcedLayoutPred - If non-null, treat this block as the layout 147314564Sdim/// predecessor, instead of using the ordering in MF 148314564Sdim/// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of 149314564Sdim/// all Preds that received a copy of \p MBB. 150314564Sdim/// \p RemovalCallback - if non-null, called just before MBB is deleted. 151314564Sdimbool TailDuplicator::tailDuplicateAndUpdate( 152314564Sdim bool IsSimple, MachineBasicBlock *MBB, 153314564Sdim MachineBasicBlock *ForcedLayoutPred, 154314564Sdim SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds, 155321369Sdim function_ref<void(MachineBasicBlock *)> *RemovalCallback) { 156303231Sdim // Save the successors list. 157303231Sdim SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(), 158303231Sdim MBB->succ_end()); 159303231Sdim 160303231Sdim SmallVector<MachineBasicBlock *, 8> TDBBs; 161303231Sdim SmallVector<MachineInstr *, 16> Copies; 162314564Sdim if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, TDBBs, Copies)) 163303231Sdim return false; 164303231Sdim 165303231Sdim ++NumTails; 166303231Sdim 167303231Sdim SmallVector<MachineInstr *, 8> NewPHIs; 168314564Sdim MachineSSAUpdater SSAUpdate(*MF, &NewPHIs); 169303231Sdim 170303231Sdim // TailBB's immediate successors are now successors of those predecessors 171303231Sdim // which duplicated TailBB. Add the predecessors as sources to the PHI 172303231Sdim // instructions. 173303231Sdim bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); 174303231Sdim if (PreRegAlloc) 175303231Sdim updateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); 176303231Sdim 177303231Sdim // If it is dead, remove it. 178303231Sdim if (isDead) { 179303231Sdim NumTailDupRemoved += MBB->size(); 180314564Sdim removeDeadBlock(MBB, RemovalCallback); 181303231Sdim ++NumDeadBlocks; 182303231Sdim } 183303231Sdim 184303231Sdim // Update SSA form. 185303231Sdim if (!SSAUpdateVRs.empty()) { 186303231Sdim for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { 187303231Sdim unsigned VReg = SSAUpdateVRs[i]; 188303231Sdim SSAUpdate.Initialize(VReg); 189303231Sdim 190303231Sdim // If the original definition is still around, add it as an available 191303231Sdim // value. 192303231Sdim MachineInstr *DefMI = MRI->getVRegDef(VReg); 193303231Sdim MachineBasicBlock *DefBB = nullptr; 194303231Sdim if (DefMI) { 195303231Sdim DefBB = DefMI->getParent(); 196303231Sdim SSAUpdate.AddAvailableValue(DefBB, VReg); 197303231Sdim } 198303231Sdim 199303231Sdim // Add the new vregs as available values. 200303231Sdim DenseMap<unsigned, AvailableValsTy>::iterator LI = 201303231Sdim SSAUpdateVals.find(VReg); 202303231Sdim for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 203303231Sdim MachineBasicBlock *SrcBB = LI->second[j].first; 204303231Sdim unsigned SrcReg = LI->second[j].second; 205303231Sdim SSAUpdate.AddAvailableValue(SrcBB, SrcReg); 206303231Sdim } 207303231Sdim 208303231Sdim // Rewrite uses that are outside of the original def's block. 209303231Sdim MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); 210303231Sdim while (UI != MRI->use_end()) { 211303231Sdim MachineOperand &UseMO = *UI; 212303231Sdim MachineInstr *UseMI = UseMO.getParent(); 213303231Sdim ++UI; 214303231Sdim if (UseMI->isDebugValue()) { 215303231Sdim // SSAUpdate can replace the use with an undef. That creates 216303231Sdim // a debug instruction that is a kill. 217303231Sdim // FIXME: Should it SSAUpdate job to delete debug instructions 218303231Sdim // instead of replacing the use with undef? 219303231Sdim UseMI->eraseFromParent(); 220303231Sdim continue; 221303231Sdim } 222303231Sdim if (UseMI->getParent() == DefBB && !UseMI->isPHI()) 223303231Sdim continue; 224303231Sdim SSAUpdate.RewriteUse(UseMO); 225303231Sdim } 226303231Sdim } 227303231Sdim 228303231Sdim SSAUpdateVRs.clear(); 229303231Sdim SSAUpdateVals.clear(); 230303231Sdim } 231303231Sdim 232303231Sdim // Eliminate some of the copies inserted by tail duplication to maintain 233303231Sdim // SSA form. 234303231Sdim for (unsigned i = 0, e = Copies.size(); i != e; ++i) { 235303231Sdim MachineInstr *Copy = Copies[i]; 236303231Sdim if (!Copy->isCopy()) 237303231Sdim continue; 238303231Sdim unsigned Dst = Copy->getOperand(0).getReg(); 239303231Sdim unsigned Src = Copy->getOperand(1).getReg(); 240303231Sdim if (MRI->hasOneNonDBGUse(Src) && 241303231Sdim MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { 242303231Sdim // Copy is the only use. Do trivial copy propagation here. 243303231Sdim MRI->replaceRegWith(Dst, Src); 244303231Sdim Copy->eraseFromParent(); 245303231Sdim } 246303231Sdim } 247303231Sdim 248303231Sdim if (NewPHIs.size()) 249303231Sdim NumAddedPHIs += NewPHIs.size(); 250303231Sdim 251314564Sdim if (DuplicatedPreds) 252314564Sdim *DuplicatedPreds = std::move(TDBBs); 253314564Sdim 254303231Sdim return true; 255303231Sdim} 256303231Sdim 257303231Sdim/// Look for small blocks that are unconditionally branched to and do not fall 258303231Sdim/// through. Tail-duplicate their instructions into their predecessors to 259303231Sdim/// eliminate (dynamic) branches. 260314564Sdimbool TailDuplicator::tailDuplicateBlocks() { 261303231Sdim bool MadeChange = false; 262303231Sdim 263303231Sdim if (PreRegAlloc && TailDupVerify) { 264341825Sdim LLVM_DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); 265314564Sdim VerifyPHIs(*MF, true); 266303231Sdim } 267303231Sdim 268314564Sdim for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) { 269303231Sdim MachineBasicBlock *MBB = &*I++; 270303231Sdim 271303231Sdim if (NumTails == TailDupLimit) 272303231Sdim break; 273303231Sdim 274303231Sdim bool IsSimple = isSimpleBB(MBB); 275303231Sdim 276314564Sdim if (!shouldTailDuplicate(IsSimple, *MBB)) 277303231Sdim continue; 278303231Sdim 279314564Sdim MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr); 280303231Sdim } 281303231Sdim 282303231Sdim if (PreRegAlloc && TailDupVerify) 283314564Sdim VerifyPHIs(*MF, false); 284303231Sdim 285303231Sdim return MadeChange; 286303231Sdim} 287303231Sdim 288303231Sdimstatic bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, 289303231Sdim const MachineRegisterInfo *MRI) { 290303231Sdim for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 291303231Sdim if (UseMI.isDebugValue()) 292303231Sdim continue; 293303231Sdim if (UseMI.getParent() != BB) 294303231Sdim return true; 295303231Sdim } 296303231Sdim return false; 297303231Sdim} 298303231Sdim 299303231Sdimstatic unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { 300303231Sdim for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) 301303231Sdim if (MI->getOperand(i + 1).getMBB() == SrcBB) 302303231Sdim return i; 303303231Sdim return 0; 304303231Sdim} 305303231Sdim 306303231Sdim// Remember which registers are used by phis in this block. This is 307303231Sdim// used to determine which registers are liveout while modifying the 308303231Sdim// block (which is why we need to copy the information). 309303231Sdimstatic void getRegsUsedByPHIs(const MachineBasicBlock &BB, 310303231Sdim DenseSet<unsigned> *UsedByPhi) { 311303231Sdim for (const auto &MI : BB) { 312303231Sdim if (!MI.isPHI()) 313303231Sdim break; 314303231Sdim for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 315303231Sdim unsigned SrcReg = MI.getOperand(i).getReg(); 316303231Sdim UsedByPhi->insert(SrcReg); 317303231Sdim } 318303231Sdim } 319303231Sdim} 320303231Sdim 321303231Sdim/// Add a definition and source virtual registers pair for SSA update. 322303231Sdimvoid TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 323303231Sdim MachineBasicBlock *BB) { 324303231Sdim DenseMap<unsigned, AvailableValsTy>::iterator LI = 325303231Sdim SSAUpdateVals.find(OrigReg); 326303231Sdim if (LI != SSAUpdateVals.end()) 327303231Sdim LI->second.push_back(std::make_pair(BB, NewReg)); 328303231Sdim else { 329303231Sdim AvailableValsTy Vals; 330303231Sdim Vals.push_back(std::make_pair(BB, NewReg)); 331303231Sdim SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); 332303231Sdim SSAUpdateVRs.push_back(OrigReg); 333303231Sdim } 334303231Sdim} 335303231Sdim 336303231Sdim/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the 337303231Sdim/// source register that's contributed by PredBB and update SSA update map. 338303231Sdimvoid TailDuplicator::processPHI( 339303231Sdim MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, 340303231Sdim DenseMap<unsigned, RegSubRegPair> &LocalVRMap, 341303231Sdim SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies, 342303231Sdim const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { 343303231Sdim unsigned DefReg = MI->getOperand(0).getReg(); 344303231Sdim unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); 345303231Sdim assert(SrcOpIdx && "Unable to find matching PHI source?"); 346303231Sdim unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); 347303231Sdim unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg(); 348303231Sdim const TargetRegisterClass *RC = MRI->getRegClass(DefReg); 349303231Sdim LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg))); 350303231Sdim 351303231Sdim // Insert a copy from source to the end of the block. The def register is the 352303231Sdim // available value liveout of the block. 353303231Sdim unsigned NewDef = MRI->createVirtualRegister(RC); 354303231Sdim Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg))); 355303231Sdim if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) 356303231Sdim addSSAUpdateEntry(DefReg, NewDef, PredBB); 357303231Sdim 358303231Sdim if (!Remove) 359303231Sdim return; 360303231Sdim 361303231Sdim // Remove PredBB from the PHI node. 362303231Sdim MI->RemoveOperand(SrcOpIdx + 1); 363303231Sdim MI->RemoveOperand(SrcOpIdx); 364303231Sdim if (MI->getNumOperands() == 1) 365303231Sdim MI->eraseFromParent(); 366303231Sdim} 367303231Sdim 368303231Sdim/// Duplicate a TailBB instruction to PredBB and update 369303231Sdim/// the source operands due to earlier PHI translation. 370303231Sdimvoid TailDuplicator::duplicateInstruction( 371303231Sdim MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, 372303231Sdim DenseMap<unsigned, RegSubRegPair> &LocalVRMap, 373303231Sdim const DenseSet<unsigned> &UsedByPhi) { 374341825Sdim // Allow duplication of CFI instructions. 375341825Sdim if (MI->isCFIInstruction()) { 376341825Sdim BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()), 377341825Sdim TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex( 378341825Sdim MI->getOperand(0).getCFIIndex()); 379341825Sdim return; 380341825Sdim } 381327952Sdim MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI); 382303231Sdim if (PreRegAlloc) { 383327952Sdim for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) { 384327952Sdim MachineOperand &MO = NewMI.getOperand(i); 385303231Sdim if (!MO.isReg()) 386303231Sdim continue; 387303231Sdim unsigned Reg = MO.getReg(); 388303231Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 389303231Sdim continue; 390303231Sdim if (MO.isDef()) { 391303231Sdim const TargetRegisterClass *RC = MRI->getRegClass(Reg); 392303231Sdim unsigned NewReg = MRI->createVirtualRegister(RC); 393303231Sdim MO.setReg(NewReg); 394303231Sdim LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); 395303231Sdim if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) 396303231Sdim addSSAUpdateEntry(Reg, NewReg, PredBB); 397303231Sdim } else { 398303231Sdim auto VI = LocalVRMap.find(Reg); 399303231Sdim if (VI != LocalVRMap.end()) { 400303231Sdim // Need to make sure that the register class of the mapped register 401303231Sdim // will satisfy the constraints of the class of the register being 402303231Sdim // replaced. 403303231Sdim auto *OrigRC = MRI->getRegClass(Reg); 404303231Sdim auto *MappedRC = MRI->getRegClass(VI->second.Reg); 405303231Sdim const TargetRegisterClass *ConstrRC; 406303231Sdim if (VI->second.SubReg != 0) { 407303231Sdim ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC, 408303231Sdim VI->second.SubReg); 409303231Sdim if (ConstrRC) { 410303231Sdim // The actual constraining (as in "find appropriate new class") 411303231Sdim // is done by getMatchingSuperRegClass, so now we only need to 412303231Sdim // change the class of the mapped register. 413303231Sdim MRI->setRegClass(VI->second.Reg, ConstrRC); 414303231Sdim } 415303231Sdim } else { 416303231Sdim // For mapped registers that do not have sub-registers, simply 417303231Sdim // restrict their class to match the original one. 418303231Sdim ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC); 419303231Sdim } 420303231Sdim 421303231Sdim if (ConstrRC) { 422303231Sdim // If the class constraining succeeded, we can simply replace 423303231Sdim // the old register with the mapped one. 424303231Sdim MO.setReg(VI->second.Reg); 425303231Sdim // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a 426303231Sdim // sub-register, we need to compose the sub-register indices. 427303231Sdim MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(), 428303231Sdim VI->second.SubReg)); 429303231Sdim } else { 430303231Sdim // The direct replacement is not possible, due to failing register 431303231Sdim // class constraints. An explicit COPY is necessary. Create one 432303231Sdim // that can be reused 433303231Sdim auto *NewRC = MI->getRegClassConstraint(i, TII, TRI); 434303231Sdim if (NewRC == nullptr) 435303231Sdim NewRC = OrigRC; 436303231Sdim unsigned NewReg = MRI->createVirtualRegister(NewRC); 437353358Sdim BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(), 438303231Sdim TII->get(TargetOpcode::COPY), NewReg) 439303231Sdim .addReg(VI->second.Reg, 0, VI->second.SubReg); 440303231Sdim LocalVRMap.erase(VI); 441303231Sdim LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); 442303231Sdim MO.setReg(NewReg); 443303231Sdim // The composed VI.Reg:VI.SubReg is replaced with NewReg, which 444303231Sdim // is equivalent to the whole register Reg. Hence, Reg:subreg 445303231Sdim // is same as NewReg:subreg, so keep the sub-register index 446303231Sdim // unchanged. 447303231Sdim } 448303231Sdim // Clear any kill flags from this operand. The new register could 449303231Sdim // have uses after this one, so kills are not valid here. 450303231Sdim MO.setIsKill(false); 451303231Sdim } 452303231Sdim } 453303231Sdim } 454303231Sdim } 455303231Sdim} 456303231Sdim 457303231Sdim/// After FromBB is tail duplicated into its predecessor blocks, the successors 458303231Sdim/// have gained new predecessors. Update the PHI instructions in them 459303231Sdim/// accordingly. 460303231Sdimvoid TailDuplicator::updateSuccessorsPHIs( 461303231Sdim MachineBasicBlock *FromBB, bool isDead, 462303231Sdim SmallVectorImpl<MachineBasicBlock *> &TDBBs, 463303231Sdim SmallSetVector<MachineBasicBlock *, 8> &Succs) { 464314564Sdim for (MachineBasicBlock *SuccBB : Succs) { 465314564Sdim for (MachineInstr &MI : *SuccBB) { 466314564Sdim if (!MI.isPHI()) 467303231Sdim break; 468314564Sdim MachineInstrBuilder MIB(*FromBB->getParent(), MI); 469303231Sdim unsigned Idx = 0; 470314564Sdim for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 471314564Sdim MachineOperand &MO = MI.getOperand(i + 1); 472303231Sdim if (MO.getMBB() == FromBB) { 473303231Sdim Idx = i; 474303231Sdim break; 475303231Sdim } 476303231Sdim } 477303231Sdim 478303231Sdim assert(Idx != 0); 479314564Sdim MachineOperand &MO0 = MI.getOperand(Idx); 480303231Sdim unsigned Reg = MO0.getReg(); 481303231Sdim if (isDead) { 482303231Sdim // Folded into the previous BB. 483303231Sdim // There could be duplicate phi source entries. FIXME: Should sdisel 484303231Sdim // or earlier pass fixed this? 485314564Sdim for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) { 486314564Sdim MachineOperand &MO = MI.getOperand(i + 1); 487303231Sdim if (MO.getMBB() == FromBB) { 488314564Sdim MI.RemoveOperand(i + 1); 489314564Sdim MI.RemoveOperand(i); 490303231Sdim } 491303231Sdim } 492303231Sdim } else 493303231Sdim Idx = 0; 494303231Sdim 495303231Sdim // If Idx is set, the operands at Idx and Idx+1 must be removed. 496303231Sdim // We reuse the location to avoid expensive RemoveOperand calls. 497303231Sdim 498303231Sdim DenseMap<unsigned, AvailableValsTy>::iterator LI = 499303231Sdim SSAUpdateVals.find(Reg); 500303231Sdim if (LI != SSAUpdateVals.end()) { 501303231Sdim // This register is defined in the tail block. 502303231Sdim for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 503303231Sdim MachineBasicBlock *SrcBB = LI->second[j].first; 504303231Sdim // If we didn't duplicate a bb into a particular predecessor, we 505303231Sdim // might still have added an entry to SSAUpdateVals to correcly 506303231Sdim // recompute SSA. If that case, avoid adding a dummy extra argument 507303231Sdim // this PHI. 508303231Sdim if (!SrcBB->isSuccessor(SuccBB)) 509303231Sdim continue; 510303231Sdim 511303231Sdim unsigned SrcReg = LI->second[j].second; 512303231Sdim if (Idx != 0) { 513314564Sdim MI.getOperand(Idx).setReg(SrcReg); 514314564Sdim MI.getOperand(Idx + 1).setMBB(SrcBB); 515303231Sdim Idx = 0; 516303231Sdim } else { 517303231Sdim MIB.addReg(SrcReg).addMBB(SrcBB); 518303231Sdim } 519303231Sdim } 520303231Sdim } else { 521303231Sdim // Live in tail block, must also be live in predecessors. 522303231Sdim for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { 523303231Sdim MachineBasicBlock *SrcBB = TDBBs[j]; 524303231Sdim if (Idx != 0) { 525314564Sdim MI.getOperand(Idx).setReg(Reg); 526314564Sdim MI.getOperand(Idx + 1).setMBB(SrcBB); 527303231Sdim Idx = 0; 528303231Sdim } else { 529303231Sdim MIB.addReg(Reg).addMBB(SrcBB); 530303231Sdim } 531303231Sdim } 532303231Sdim } 533303231Sdim if (Idx != 0) { 534314564Sdim MI.RemoveOperand(Idx + 1); 535314564Sdim MI.RemoveOperand(Idx); 536303231Sdim } 537303231Sdim } 538303231Sdim } 539303231Sdim} 540303231Sdim 541303231Sdim/// Determine if it is profitable to duplicate this block. 542314564Sdimbool TailDuplicator::shouldTailDuplicate(bool IsSimple, 543303231Sdim MachineBasicBlock &TailBB) { 544314564Sdim // When doing tail-duplication during layout, the block ordering is in flux, 545314564Sdim // so canFallThrough returns a result based on incorrect information and 546314564Sdim // should just be ignored. 547314564Sdim if (!LayoutMode && TailBB.canFallThrough()) 548303231Sdim return false; 549303231Sdim 550303231Sdim // Don't try to tail-duplicate single-block loops. 551303231Sdim if (TailBB.isSuccessor(&TailBB)) 552303231Sdim return false; 553303231Sdim 554303231Sdim // Set the limit on the cost to duplicate. When optimizing for size, 555303231Sdim // duplicate only one, because one branch instruction can be eliminated to 556303231Sdim // compensate for the duplication. 557303231Sdim unsigned MaxDuplicateCount; 558314564Sdim if (TailDupSize == 0 && 559314564Sdim TailDuplicateSize.getNumOccurrences() == 0 && 560353358Sdim MF->getFunction().hasOptSize()) 561303231Sdim MaxDuplicateCount = 1; 562314564Sdim else if (TailDupSize == 0) 563314564Sdim MaxDuplicateCount = TailDuplicateSize; 564303231Sdim else 565314564Sdim MaxDuplicateCount = TailDupSize; 566303231Sdim 567314564Sdim // If the block to be duplicated ends in an unanalyzable fallthrough, don't 568314564Sdim // duplicate it. 569314564Sdim // A similar check is necessary in MachineBlockPlacement to make sure pairs of 570314564Sdim // blocks with unanalyzable fallthrough get layed out contiguously. 571314564Sdim MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 572314564Sdim SmallVector<MachineOperand, 4> PredCond; 573314564Sdim if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) && 574314564Sdim TailBB.canFallThrough()) 575314564Sdim return false; 576314564Sdim 577303231Sdim // If the target has hardware branch prediction that can handle indirect 578303231Sdim // branches, duplicating them can often make them predictable when there 579303231Sdim // are common paths through the code. The limit needs to be high enough 580303231Sdim // to allow undoing the effects of tail merging and other optimizations 581303231Sdim // that rearrange the predecessors of the indirect branch. 582303231Sdim 583303231Sdim bool HasIndirectbr = false; 584303231Sdim if (!TailBB.empty()) 585303231Sdim HasIndirectbr = TailBB.back().isIndirectBranch(); 586303231Sdim 587303231Sdim if (HasIndirectbr && PreRegAlloc) 588314564Sdim MaxDuplicateCount = TailDupIndirectBranchSize; 589303231Sdim 590303231Sdim // Check the instructions in the block to determine whether tail-duplication 591303231Sdim // is invalid or unlikely to be profitable. 592303231Sdim unsigned InstrCount = 0; 593303231Sdim for (MachineInstr &MI : TailBB) { 594303231Sdim // Non-duplicable things shouldn't be tail-duplicated. 595341825Sdim // CFI instructions are marked as non-duplicable, because Darwin compact 596341825Sdim // unwind info emission can't handle multiple prologue setups. In case of 597341825Sdim // DWARF, allow them be duplicated, so that their existence doesn't prevent 598341825Sdim // tail duplication of some basic blocks, that would be duplicated otherwise. 599341825Sdim if (MI.isNotDuplicable() && 600341825Sdim (TailBB.getParent()->getTarget().getTargetTriple().isOSDarwin() || 601341825Sdim !MI.isCFIInstruction())) 602303231Sdim return false; 603303231Sdim 604303231Sdim // Convergent instructions can be duplicated only if doing so doesn't add 605303231Sdim // new control dependencies, which is what we're going to do here. 606303231Sdim if (MI.isConvergent()) 607303231Sdim return false; 608303231Sdim 609303231Sdim // Do not duplicate 'return' instructions if this is a pre-regalloc run. 610303231Sdim // A return may expand into a lot more instructions (e.g. reload of callee 611303231Sdim // saved registers) after PEI. 612303231Sdim if (PreRegAlloc && MI.isReturn()) 613303231Sdim return false; 614303231Sdim 615303231Sdim // Avoid duplicating calls before register allocation. Calls presents a 616303231Sdim // barrier to register allocation so duplicating them may end up increasing 617303231Sdim // spills. 618303231Sdim if (PreRegAlloc && MI.isCall()) 619303231Sdim return false; 620303231Sdim 621341825Sdim if (!MI.isPHI() && !MI.isMetaInstruction()) 622303231Sdim InstrCount += 1; 623303231Sdim 624303231Sdim if (InstrCount > MaxDuplicateCount) 625303231Sdim return false; 626303231Sdim } 627303231Sdim 628303231Sdim // Check if any of the successors of TailBB has a PHI node in which the 629303231Sdim // value corresponding to TailBB uses a subregister. 630303231Sdim // If a phi node uses a register paired with a subregister, the actual 631303231Sdim // "value type" of the phi may differ from the type of the register without 632303231Sdim // any subregisters. Due to a bug, tail duplication may add a new operand 633303231Sdim // without a necessary subregister, producing an invalid code. This is 634303231Sdim // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. 635303231Sdim // Disable tail duplication for this case for now, until the problem is 636303231Sdim // fixed. 637303231Sdim for (auto SB : TailBB.successors()) { 638303231Sdim for (auto &I : *SB) { 639303231Sdim if (!I.isPHI()) 640303231Sdim break; 641303231Sdim unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB); 642303231Sdim assert(Idx != 0); 643303231Sdim MachineOperand &PU = I.getOperand(Idx); 644303231Sdim if (PU.getSubReg() != 0) 645303231Sdim return false; 646303231Sdim } 647303231Sdim } 648303231Sdim 649303231Sdim if (HasIndirectbr && PreRegAlloc) 650303231Sdim return true; 651303231Sdim 652303231Sdim if (IsSimple) 653303231Sdim return true; 654303231Sdim 655303231Sdim if (!PreRegAlloc) 656303231Sdim return true; 657303231Sdim 658303231Sdim return canCompletelyDuplicateBB(TailBB); 659303231Sdim} 660303231Sdim 661303231Sdim/// True if this BB has only one unconditional jump. 662303231Sdimbool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) { 663303231Sdim if (TailBB->succ_size() != 1) 664303231Sdim return false; 665303231Sdim if (TailBB->pred_empty()) 666303231Sdim return false; 667303231Sdim MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(); 668303231Sdim if (I == TailBB->end()) 669303231Sdim return true; 670303231Sdim return I->isUnconditionalBranch(); 671303231Sdim} 672303231Sdim 673303231Sdimstatic bool bothUsedInPHI(const MachineBasicBlock &A, 674303231Sdim const SmallPtrSet<MachineBasicBlock *, 8> &SuccsB) { 675303231Sdim for (MachineBasicBlock *BB : A.successors()) 676303231Sdim if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) 677303231Sdim return true; 678303231Sdim 679303231Sdim return false; 680303231Sdim} 681303231Sdim 682303231Sdimbool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { 683303231Sdim for (MachineBasicBlock *PredBB : BB.predecessors()) { 684303231Sdim if (PredBB->succ_size() > 1) 685303231Sdim return false; 686303231Sdim 687303231Sdim MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 688303231Sdim SmallVector<MachineOperand, 4> PredCond; 689314564Sdim if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) 690303231Sdim return false; 691303231Sdim 692303231Sdim if (!PredCond.empty()) 693303231Sdim return false; 694303231Sdim } 695303231Sdim return true; 696303231Sdim} 697303231Sdim 698303231Sdimbool TailDuplicator::duplicateSimpleBB( 699303231Sdim MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs, 700303231Sdim const DenseSet<unsigned> &UsedByPhi, 701303231Sdim SmallVectorImpl<MachineInstr *> &Copies) { 702303231Sdim SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(), 703303231Sdim TailBB->succ_end()); 704303231Sdim SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), 705303231Sdim TailBB->pred_end()); 706303231Sdim bool Changed = false; 707314564Sdim for (MachineBasicBlock *PredBB : Preds) { 708303231Sdim if (PredBB->hasEHPadSuccessor()) 709303231Sdim continue; 710303231Sdim 711303231Sdim if (bothUsedInPHI(*PredBB, Succs)) 712303231Sdim continue; 713303231Sdim 714303231Sdim MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 715303231Sdim SmallVector<MachineOperand, 4> PredCond; 716314564Sdim if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) 717303231Sdim continue; 718303231Sdim 719303231Sdim Changed = true; 720341825Sdim LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 721341825Sdim << "From simple Succ: " << *TailBB); 722303231Sdim 723303231Sdim MachineBasicBlock *NewTarget = *TailBB->succ_begin(); 724314564Sdim MachineBasicBlock *NextBB = PredBB->getNextNode(); 725303231Sdim 726303231Sdim // Make PredFBB explicit. 727303231Sdim if (PredCond.empty()) 728303231Sdim PredFBB = PredTBB; 729303231Sdim 730303231Sdim // Make fall through explicit. 731303231Sdim if (!PredTBB) 732303231Sdim PredTBB = NextBB; 733303231Sdim if (!PredFBB) 734303231Sdim PredFBB = NextBB; 735303231Sdim 736303231Sdim // Redirect 737303231Sdim if (PredFBB == TailBB) 738303231Sdim PredFBB = NewTarget; 739303231Sdim if (PredTBB == TailBB) 740303231Sdim PredTBB = NewTarget; 741303231Sdim 742303231Sdim // Make the branch unconditional if possible 743303231Sdim if (PredTBB == PredFBB) { 744303231Sdim PredCond.clear(); 745303231Sdim PredFBB = nullptr; 746303231Sdim } 747303231Sdim 748303231Sdim // Avoid adding fall through branches. 749303231Sdim if (PredFBB == NextBB) 750303231Sdim PredFBB = nullptr; 751303231Sdim if (PredTBB == NextBB && PredFBB == nullptr) 752303231Sdim PredTBB = nullptr; 753303231Sdim 754321369Sdim auto DL = PredBB->findBranchDebugLoc(); 755314564Sdim TII->removeBranch(*PredBB); 756303231Sdim 757303231Sdim if (!PredBB->isSuccessor(NewTarget)) 758303231Sdim PredBB->replaceSuccessor(TailBB, NewTarget); 759303231Sdim else { 760303231Sdim PredBB->removeSuccessor(TailBB, true); 761303231Sdim assert(PredBB->succ_size() <= 1); 762303231Sdim } 763303231Sdim 764303231Sdim if (PredTBB) 765321369Sdim TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DL); 766303231Sdim 767303231Sdim TDBBs.push_back(PredBB); 768303231Sdim } 769303231Sdim return Changed; 770303231Sdim} 771303231Sdim 772314564Sdimbool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, 773314564Sdim MachineBasicBlock *PredBB) { 774314564Sdim // EH edges are ignored by analyzeBranch. 775314564Sdim if (PredBB->succ_size() > 1) 776314564Sdim return false; 777314564Sdim 778321369Sdim MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 779314564Sdim SmallVector<MachineOperand, 4> PredCond; 780314564Sdim if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) 781314564Sdim return false; 782314564Sdim if (!PredCond.empty()) 783314564Sdim return false; 784314564Sdim return true; 785314564Sdim} 786314564Sdim 787303231Sdim/// If it is profitable, duplicate TailBB's contents in each 788303231Sdim/// of its predecessors. 789314564Sdim/// \p IsSimple result of isSimpleBB 790314564Sdim/// \p TailBB Block to be duplicated. 791314564Sdim/// \p ForcedLayoutPred When non-null, use this block as the layout predecessor 792314564Sdim/// instead of the previous block in MF's order. 793314564Sdim/// \p TDBBs A vector to keep track of all blocks tail-duplicated 794314564Sdim/// into. 795314564Sdim/// \p Copies A vector of copy instructions inserted. Used later to 796314564Sdim/// walk all the inserted copies and remove redundant ones. 797314564Sdimbool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, 798314564Sdim MachineBasicBlock *ForcedLayoutPred, 799303231Sdim SmallVectorImpl<MachineBasicBlock *> &TDBBs, 800303231Sdim SmallVectorImpl<MachineInstr *> &Copies) { 801341825Sdim LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB) 802341825Sdim << '\n'); 803303231Sdim 804303231Sdim DenseSet<unsigned> UsedByPhi; 805303231Sdim getRegsUsedByPHIs(*TailBB, &UsedByPhi); 806303231Sdim 807303231Sdim if (IsSimple) 808303231Sdim return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); 809303231Sdim 810303231Sdim // Iterate through all the unique predecessors and tail-duplicate this 811303231Sdim // block into them, if possible. Copying the list ahead of time also 812303231Sdim // avoids trouble with the predecessor list reallocating. 813303231Sdim bool Changed = false; 814303231Sdim SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), 815303231Sdim TailBB->pred_end()); 816314564Sdim for (MachineBasicBlock *PredBB : Preds) { 817303231Sdim assert(TailBB != PredBB && 818303231Sdim "Single-block loop should have been rejected earlier!"); 819314564Sdim 820314564Sdim if (!canTailDuplicate(TailBB, PredBB)) 821303231Sdim continue; 822303231Sdim 823303231Sdim // Don't duplicate into a fall-through predecessor (at least for now). 824314564Sdim bool IsLayoutSuccessor = false; 825314564Sdim if (ForcedLayoutPred) 826314564Sdim IsLayoutSuccessor = (ForcedLayoutPred == PredBB); 827314564Sdim else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) 828314564Sdim IsLayoutSuccessor = true; 829314564Sdim if (IsLayoutSuccessor) 830303231Sdim continue; 831303231Sdim 832341825Sdim LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 833341825Sdim << "From Succ: " << *TailBB); 834303231Sdim 835303231Sdim TDBBs.push_back(PredBB); 836303231Sdim 837303231Sdim // Remove PredBB's unconditional branch. 838314564Sdim TII->removeBranch(*PredBB); 839303231Sdim 840303231Sdim // Clone the contents of TailBB into PredBB. 841303231Sdim DenseMap<unsigned, RegSubRegPair> LocalVRMap; 842303231Sdim SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 843327952Sdim for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); 844327952Sdim I != E; /* empty */) { 845303231Sdim MachineInstr *MI = &*I; 846303231Sdim ++I; 847303231Sdim if (MI->isPHI()) { 848303231Sdim // Replace the uses of the def of the PHI with the register coming 849303231Sdim // from PredBB. 850303231Sdim processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); 851303231Sdim } else { 852303231Sdim // Replace def of virtual registers with new registers, and update 853303231Sdim // uses with PHI source register or the new registers. 854314564Sdim duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi); 855303231Sdim } 856303231Sdim } 857303231Sdim appendCopies(PredBB, CopyInfos, Copies); 858303231Sdim 859303231Sdim NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch 860303231Sdim 861303231Sdim // Update the CFG. 862303231Sdim PredBB->removeSuccessor(PredBB->succ_begin()); 863303231Sdim assert(PredBB->succ_empty() && 864303231Sdim "TailDuplicate called on block with multiple successors!"); 865314564Sdim for (MachineBasicBlock *Succ : TailBB->successors()) 866314564Sdim PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ)); 867303231Sdim 868303231Sdim Changed = true; 869303231Sdim ++NumTailDups; 870303231Sdim } 871303231Sdim 872303231Sdim // If TailBB was duplicated into all its predecessors except for the prior 873303231Sdim // block, which falls through unconditionally, move the contents of this 874303231Sdim // block into the prior block. 875314564Sdim MachineBasicBlock *PrevBB = ForcedLayoutPred; 876314564Sdim if (!PrevBB) 877314564Sdim PrevBB = &*std::prev(TailBB->getIterator()); 878303231Sdim MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; 879303231Sdim SmallVector<MachineOperand, 4> PriorCond; 880303231Sdim // This has to check PrevBB->succ_size() because EH edges are ignored by 881314564Sdim // analyzeBranch. 882303231Sdim if (PrevBB->succ_size() == 1 && 883314564Sdim // Layout preds are not always CFG preds. Check. 884314564Sdim *PrevBB->succ_begin() == TailBB && 885314564Sdim !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) && 886314564Sdim PriorCond.empty() && 887314564Sdim (!PriorTBB || PriorTBB == TailBB) && 888314564Sdim TailBB->pred_size() == 1 && 889303231Sdim !TailBB->hasAddressTaken()) { 890341825Sdim LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB 891341825Sdim << "From MBB: " << *TailBB); 892314564Sdim // There may be a branch to the layout successor. This is unlikely but it 893314564Sdim // happens. The correct thing to do is to remove the branch before 894314564Sdim // duplicating the instructions in all cases. 895314564Sdim TII->removeBranch(*PrevBB); 896303231Sdim if (PreRegAlloc) { 897303231Sdim DenseMap<unsigned, RegSubRegPair> LocalVRMap; 898303231Sdim SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 899303231Sdim MachineBasicBlock::iterator I = TailBB->begin(); 900303231Sdim // Process PHI instructions first. 901303231Sdim while (I != TailBB->end() && I->isPHI()) { 902303231Sdim // Replace the uses of the def of the PHI with the register coming 903303231Sdim // from PredBB. 904303231Sdim MachineInstr *MI = &*I++; 905303231Sdim processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); 906303231Sdim } 907303231Sdim 908303231Sdim // Now copy the non-PHI instructions. 909303231Sdim while (I != TailBB->end()) { 910303231Sdim // Replace def of virtual registers with new registers, and update 911303231Sdim // uses with PHI source register or the new registers. 912303231Sdim MachineInstr *MI = &*I++; 913303231Sdim assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); 914314564Sdim duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); 915303231Sdim MI->eraseFromParent(); 916303231Sdim } 917303231Sdim appendCopies(PrevBB, CopyInfos, Copies); 918303231Sdim } else { 919314564Sdim TII->removeBranch(*PrevBB); 920303231Sdim // No PHIs to worry about, just splice the instructions over. 921303231Sdim PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); 922303231Sdim } 923303231Sdim PrevBB->removeSuccessor(PrevBB->succ_begin()); 924303231Sdim assert(PrevBB->succ_empty()); 925303231Sdim PrevBB->transferSuccessors(TailBB); 926303231Sdim TDBBs.push_back(PrevBB); 927303231Sdim Changed = true; 928303231Sdim } 929303231Sdim 930303231Sdim // If this is after register allocation, there are no phis to fix. 931303231Sdim if (!PreRegAlloc) 932303231Sdim return Changed; 933303231Sdim 934303231Sdim // If we made no changes so far, we are safe. 935303231Sdim if (!Changed) 936303231Sdim return Changed; 937303231Sdim 938303231Sdim // Handle the nasty case in that we duplicated a block that is part of a loop 939303231Sdim // into some but not all of its predecessors. For example: 940303231Sdim // 1 -> 2 <-> 3 | 941303231Sdim // \ | 942303231Sdim // \---> rest | 943303231Sdim // if we duplicate 2 into 1 but not into 3, we end up with 944303231Sdim // 12 -> 3 <-> 2 -> rest | 945303231Sdim // \ / | 946303231Sdim // \----->-----/ | 947303231Sdim // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced 948303231Sdim // with a phi in 3 (which now dominates 2). 949303231Sdim // What we do here is introduce a copy in 3 of the register defined by the 950303231Sdim // phi, just like when we are duplicating 2 into 3, but we don't copy any 951303231Sdim // real instructions or remove the 3 -> 2 edge from the phi in 2. 952314564Sdim for (MachineBasicBlock *PredBB : Preds) { 953314564Sdim if (is_contained(TDBBs, PredBB)) 954303231Sdim continue; 955303231Sdim 956303231Sdim // EH edges 957303231Sdim if (PredBB->succ_size() != 1) 958303231Sdim continue; 959303231Sdim 960303231Sdim DenseMap<unsigned, RegSubRegPair> LocalVRMap; 961303231Sdim SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 962303231Sdim MachineBasicBlock::iterator I = TailBB->begin(); 963303231Sdim // Process PHI instructions first. 964303231Sdim while (I != TailBB->end() && I->isPHI()) { 965303231Sdim // Replace the uses of the def of the PHI with the register coming 966303231Sdim // from PredBB. 967303231Sdim MachineInstr *MI = &*I++; 968303231Sdim processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); 969303231Sdim } 970303231Sdim appendCopies(PredBB, CopyInfos, Copies); 971303231Sdim } 972303231Sdim 973303231Sdim return Changed; 974303231Sdim} 975303231Sdim 976303231Sdim/// At the end of the block \p MBB generate COPY instructions between registers 977303231Sdim/// described by \p CopyInfos. Append resulting instructions to \p Copies. 978303231Sdimvoid TailDuplicator::appendCopies(MachineBasicBlock *MBB, 979303231Sdim SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos, 980303231Sdim SmallVectorImpl<MachineInstr*> &Copies) { 981303231Sdim MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); 982303231Sdim const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY); 983303231Sdim for (auto &CI : CopyInfos) { 984303231Sdim auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first) 985303231Sdim .addReg(CI.second.Reg, 0, CI.second.SubReg); 986303231Sdim Copies.push_back(C); 987303231Sdim } 988303231Sdim} 989303231Sdim 990303231Sdim/// Remove the specified dead machine basic block from the function, updating 991303231Sdim/// the CFG. 992314564Sdimvoid TailDuplicator::removeDeadBlock( 993314564Sdim MachineBasicBlock *MBB, 994321369Sdim function_ref<void(MachineBasicBlock *)> *RemovalCallback) { 995303231Sdim assert(MBB->pred_empty() && "MBB must be dead!"); 996341825Sdim LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); 997303231Sdim 998314564Sdim if (RemovalCallback) 999314564Sdim (*RemovalCallback)(MBB); 1000314564Sdim 1001303231Sdim // Remove all successors. 1002303231Sdim while (!MBB->succ_empty()) 1003303231Sdim MBB->removeSuccessor(MBB->succ_end() - 1); 1004303231Sdim 1005303231Sdim // Remove the block. 1006303231Sdim MBB->eraseFromParent(); 1007303231Sdim} 1008